]> git.proxmox.com Git - rustc.git/blob - library/stdarch/crates/core_arch/src/x86/sse2.rs
New upstream version 1.48.0~beta.8+dfsg1
[rustc.git] / library / stdarch / crates / core_arch / src / x86 / sse2.rs
1 //! Streaming SIMD Extensions 2 (SSE2)
2
3 #[cfg(test)]
4 use stdarch_test::assert_instr;
5
6 use crate::{
7 core_arch::{simd::*, simd_llvm::*, x86::*},
8 intrinsics,
9 mem::{self, transmute},
10 ptr,
11 };
12
13 /// Provides a hint to the processor that the code sequence is a spin-wait loop.
14 ///
15 /// This can help improve the performance and power consumption of spin-wait
16 /// loops.
17 ///
18 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_pause)
19 #[inline]
20 #[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))]
21 #[stable(feature = "simd_x86", since = "1.27.0")]
22 pub unsafe fn _mm_pause() {
23 // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without
24 // the SSE2 target-feature - therefore it does not require any target features
25 pause()
26 }
27
28 /// Invalidates and flushes the cache line that contains `p` from all levels of
29 /// the cache hierarchy.
30 ///
31 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clflush)
32 #[inline]
33 #[target_feature(enable = "sse2")]
34 #[cfg_attr(test, assert_instr(clflush))]
35 #[stable(feature = "simd_x86", since = "1.27.0")]
36 pub unsafe fn _mm_clflush(p: *const u8) {
37 clflush(p)
38 }
39
40 /// Performs a serializing operation on all load-from-memory instructions
41 /// that were issued prior to this instruction.
42 ///
43 /// Guarantees that every load instruction that precedes, in program order, is
44 /// globally visible before any load instruction which follows the fence in
45 /// program order.
46 ///
47 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lfence)
48 #[inline]
49 #[target_feature(enable = "sse2")]
50 #[cfg_attr(test, assert_instr(lfence))]
51 #[stable(feature = "simd_x86", since = "1.27.0")]
52 pub unsafe fn _mm_lfence() {
53 lfence()
54 }
55
56 /// Performs a serializing operation on all load-from-memory and store-to-memory
57 /// instructions that were issued prior to this instruction.
58 ///
59 /// Guarantees that every memory access that precedes, in program order, the
60 /// memory fence instruction is globally visible before any memory instruction
61 /// which follows the fence in program order.
62 ///
63 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mfence)
64 #[inline]
65 #[target_feature(enable = "sse2")]
66 #[cfg_attr(test, assert_instr(mfence))]
67 #[stable(feature = "simd_x86", since = "1.27.0")]
68 pub unsafe fn _mm_mfence() {
69 mfence()
70 }
71
72 /// Adds packed 8-bit integers in `a` and `b`.
73 ///
74 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi8)
75 #[inline]
76 #[target_feature(enable = "sse2")]
77 #[cfg_attr(test, assert_instr(paddb))]
78 #[stable(feature = "simd_x86", since = "1.27.0")]
79 pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i {
80 transmute(simd_add(a.as_i8x16(), b.as_i8x16()))
81 }
82
83 /// Adds packed 16-bit integers in `a` and `b`.
84 ///
85 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi16)
86 #[inline]
87 #[target_feature(enable = "sse2")]
88 #[cfg_attr(test, assert_instr(paddw))]
89 #[stable(feature = "simd_x86", since = "1.27.0")]
90 pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i {
91 transmute(simd_add(a.as_i16x8(), b.as_i16x8()))
92 }
93
94 /// Adds packed 32-bit integers in `a` and `b`.
95 ///
96 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi32)
97 #[inline]
98 #[target_feature(enable = "sse2")]
99 #[cfg_attr(test, assert_instr(paddd))]
100 #[stable(feature = "simd_x86", since = "1.27.0")]
101 pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i {
102 transmute(simd_add(a.as_i32x4(), b.as_i32x4()))
103 }
104
105 /// Adds packed 64-bit integers in `a` and `b`.
106 ///
107 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi64)
108 #[inline]
109 #[target_feature(enable = "sse2")]
110 #[cfg_attr(test, assert_instr(paddq))]
111 #[stable(feature = "simd_x86", since = "1.27.0")]
112 pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i {
113 transmute(simd_add(a.as_i64x2(), b.as_i64x2()))
114 }
115
116 /// Adds packed 8-bit integers in `a` and `b` using saturation.
117 ///
118 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi8)
119 #[inline]
120 #[target_feature(enable = "sse2")]
121 #[cfg_attr(test, assert_instr(paddsb))]
122 #[stable(feature = "simd_x86", since = "1.27.0")]
123 pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i {
124 transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16()))
125 }
126
127 /// Adds packed 16-bit integers in `a` and `b` using saturation.
128 ///
129 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi16)
130 #[inline]
131 #[target_feature(enable = "sse2")]
132 #[cfg_attr(test, assert_instr(paddsw))]
133 #[stable(feature = "simd_x86", since = "1.27.0")]
134 pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i {
135 transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8()))
136 }
137
138 /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation.
139 ///
140 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu8)
141 #[inline]
142 #[target_feature(enable = "sse2")]
143 #[cfg_attr(test, assert_instr(paddusb))]
144 #[stable(feature = "simd_x86", since = "1.27.0")]
145 pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i {
146 transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16()))
147 }
148
149 /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation.
150 ///
151 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu16)
152 #[inline]
153 #[target_feature(enable = "sse2")]
154 #[cfg_attr(test, assert_instr(paddusw))]
155 #[stable(feature = "simd_x86", since = "1.27.0")]
156 pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i {
157 transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8()))
158 }
159
160 /// Averages packed unsigned 8-bit integers in `a` and `b`.
161 ///
162 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu8)
163 #[inline]
164 #[target_feature(enable = "sse2")]
165 #[cfg_attr(test, assert_instr(pavgb))]
166 #[stable(feature = "simd_x86", since = "1.27.0")]
167 pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i {
168 transmute(pavgb(a.as_u8x16(), b.as_u8x16()))
169 }
170
171 /// Averages packed unsigned 16-bit integers in `a` and `b`.
172 ///
173 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu16)
174 #[inline]
175 #[target_feature(enable = "sse2")]
176 #[cfg_attr(test, assert_instr(pavgw))]
177 #[stable(feature = "simd_x86", since = "1.27.0")]
178 pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i {
179 transmute(pavgw(a.as_u16x8(), b.as_u16x8()))
180 }
181
182 /// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`.
183 ///
184 /// Multiplies packed signed 16-bit integers in `a` and `b`, producing
185 /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of
186 /// intermediate 32-bit integers.
187 ///
188 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_madd_epi16)
189 #[inline]
190 #[target_feature(enable = "sse2")]
191 #[cfg_attr(test, assert_instr(pmaddwd))]
192 #[stable(feature = "simd_x86", since = "1.27.0")]
193 pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
194 transmute(pmaddwd(a.as_i16x8(), b.as_i16x8()))
195 }
196
197 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
198 /// maximum values.
199 ///
200 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi16)
201 #[inline]
202 #[target_feature(enable = "sse2")]
203 #[cfg_attr(test, assert_instr(pmaxsw))]
204 #[stable(feature = "simd_x86", since = "1.27.0")]
205 pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
206 transmute(pmaxsw(a.as_i16x8(), b.as_i16x8()))
207 }
208
209 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
210 /// packed maximum values.
211 ///
212 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu8)
213 #[inline]
214 #[target_feature(enable = "sse2")]
215 #[cfg_attr(test, assert_instr(pmaxub))]
216 #[stable(feature = "simd_x86", since = "1.27.0")]
217 pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
218 transmute(pmaxub(a.as_u8x16(), b.as_u8x16()))
219 }
220
221 /// Compares packed 16-bit integers in `a` and `b`, and returns the packed
222 /// minimum values.
223 ///
224 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi16)
225 #[inline]
226 #[target_feature(enable = "sse2")]
227 #[cfg_attr(test, assert_instr(pminsw))]
228 #[stable(feature = "simd_x86", since = "1.27.0")]
229 pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
230 transmute(pminsw(a.as_i16x8(), b.as_i16x8()))
231 }
232
233 /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
234 /// packed minimum values.
235 ///
236 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu8)
237 #[inline]
238 #[target_feature(enable = "sse2")]
239 #[cfg_attr(test, assert_instr(pminub))]
240 #[stable(feature = "simd_x86", since = "1.27.0")]
241 pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
242 transmute(pminub(a.as_u8x16(), b.as_u8x16()))
243 }
244
245 /// Multiplies the packed 16-bit integers in `a` and `b`.
246 ///
247 /// The multiplication produces intermediate 32-bit integers, and returns the
248 /// high 16 bits of the intermediate integers.
249 ///
250 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epi16)
251 #[inline]
252 #[target_feature(enable = "sse2")]
253 #[cfg_attr(test, assert_instr(pmulhw))]
254 #[stable(feature = "simd_x86", since = "1.27.0")]
255 pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i {
256 transmute(pmulhw(a.as_i16x8(), b.as_i16x8()))
257 }
258
259 /// Multiplies the packed unsigned 16-bit integers in `a` and `b`.
260 ///
261 /// The multiplication produces intermediate 32-bit integers, and returns the
262 /// high 16 bits of the intermediate integers.
263 ///
264 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epu16)
265 #[inline]
266 #[target_feature(enable = "sse2")]
267 #[cfg_attr(test, assert_instr(pmulhuw))]
268 #[stable(feature = "simd_x86", since = "1.27.0")]
269 pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i {
270 transmute(pmulhuw(a.as_u16x8(), b.as_u16x8()))
271 }
272
273 /// Multiplies the packed 16-bit integers in `a` and `b`.
274 ///
275 /// The multiplication produces intermediate 32-bit integers, and returns the
276 /// low 16 bits of the intermediate integers.
277 ///
278 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mullo_epi16)
279 #[inline]
280 #[target_feature(enable = "sse2")]
281 #[cfg_attr(test, assert_instr(pmullw))]
282 #[stable(feature = "simd_x86", since = "1.27.0")]
283 pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i {
284 transmute(simd_mul(a.as_i16x8(), b.as_i16x8()))
285 }
286
287 /// Multiplies the low unsigned 32-bit integers from each packed 64-bit element
288 /// in `a` and `b`.
289 ///
290 /// Returns the unsigned 64-bit results.
291 ///
292 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epu32)
293 #[inline]
294 #[target_feature(enable = "sse2")]
295 #[cfg_attr(test, assert_instr(pmuludq))]
296 #[stable(feature = "simd_x86", since = "1.27.0")]
297 pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i {
298 transmute(pmuludq(a.as_u32x4(), b.as_u32x4()))
299 }
300
301 /// Sum the absolute differences of packed unsigned 8-bit integers.
302 ///
303 /// Computes the absolute differences of packed unsigned 8-bit integers in `a`
304 /// and `b`, then horizontally sum each consecutive 8 differences to produce
305 /// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in
306 /// the low 16 bits of 64-bit elements returned.
307 ///
308 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_epu8)
309 #[inline]
310 #[target_feature(enable = "sse2")]
311 #[cfg_attr(test, assert_instr(psadbw))]
312 #[stable(feature = "simd_x86", since = "1.27.0")]
313 pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i {
314 transmute(psadbw(a.as_u8x16(), b.as_u8x16()))
315 }
316
317 /// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`.
318 ///
319 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi8)
320 #[inline]
321 #[target_feature(enable = "sse2")]
322 #[cfg_attr(test, assert_instr(psubb))]
323 #[stable(feature = "simd_x86", since = "1.27.0")]
324 pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i {
325 transmute(simd_sub(a.as_i8x16(), b.as_i8x16()))
326 }
327
328 /// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`.
329 ///
330 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi16)
331 #[inline]
332 #[target_feature(enable = "sse2")]
333 #[cfg_attr(test, assert_instr(psubw))]
334 #[stable(feature = "simd_x86", since = "1.27.0")]
335 pub unsafe fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i {
336 transmute(simd_sub(a.as_i16x8(), b.as_i16x8()))
337 }
338
339 /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`.
340 ///
341 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi32)
342 #[inline]
343 #[target_feature(enable = "sse2")]
344 #[cfg_attr(test, assert_instr(psubd))]
345 #[stable(feature = "simd_x86", since = "1.27.0")]
346 pub unsafe fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i {
347 transmute(simd_sub(a.as_i32x4(), b.as_i32x4()))
348 }
349
350 /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`.
351 ///
352 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi64)
353 #[inline]
354 #[target_feature(enable = "sse2")]
355 #[cfg_attr(test, assert_instr(psubq))]
356 #[stable(feature = "simd_x86", since = "1.27.0")]
357 pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i {
358 transmute(simd_sub(a.as_i64x2(), b.as_i64x2()))
359 }
360
361 /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`
362 /// using saturation.
363 ///
364 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi8)
365 #[inline]
366 #[target_feature(enable = "sse2")]
367 #[cfg_attr(test, assert_instr(psubsb))]
368 #[stable(feature = "simd_x86", since = "1.27.0")]
369 pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i {
370 transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16()))
371 }
372
373 /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
374 /// using saturation.
375 ///
376 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi16)
377 #[inline]
378 #[target_feature(enable = "sse2")]
379 #[cfg_attr(test, assert_instr(psubsw))]
380 #[stable(feature = "simd_x86", since = "1.27.0")]
381 pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i {
382 transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8()))
383 }
384
385 /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit
386 /// integers in `a` using saturation.
387 ///
388 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu8)
389 #[inline]
390 #[target_feature(enable = "sse2")]
391 #[cfg_attr(test, assert_instr(psubusb))]
392 #[stable(feature = "simd_x86", since = "1.27.0")]
393 pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i {
394 transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16()))
395 }
396
397 /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit
398 /// integers in `a` using saturation.
399 ///
400 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu16)
401 #[inline]
402 #[target_feature(enable = "sse2")]
403 #[cfg_attr(test, assert_instr(psubusw))]
404 #[stable(feature = "simd_x86", since = "1.27.0")]
405 pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i {
406 transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8()))
407 }
408
409 /// Shifts `a` left by `imm8` bytes while shifting in zeros.
410 ///
411 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_si128)
412 #[inline]
413 #[target_feature(enable = "sse2")]
414 #[cfg_attr(test, assert_instr(pslldq, imm8 = 1))]
415 #[rustc_args_required_const(1)]
416 #[stable(feature = "simd_x86", since = "1.27.0")]
417 pub unsafe fn _mm_slli_si128(a: __m128i, imm8: i32) -> __m128i {
418 _mm_slli_si128_impl(a, imm8)
419 }
420
421 /// Implementation detail: converts the immediate argument of the
422 /// `_mm_slli_si128` intrinsic into a compile-time constant.
423 #[inline]
424 #[target_feature(enable = "sse2")]
425 unsafe fn _mm_slli_si128_impl(a: __m128i, imm8: i32) -> __m128i {
426 let (zero, imm8) = (_mm_set1_epi8(0).as_i8x16(), imm8 as u32);
427 let a = a.as_i8x16();
428 macro_rules! shuffle {
429 ($shift:expr) => {
430 simd_shuffle16::<i8x16, i8x16>(
431 zero,
432 a,
433 [
434 16 - $shift,
435 17 - $shift,
436 18 - $shift,
437 19 - $shift,
438 20 - $shift,
439 21 - $shift,
440 22 - $shift,
441 23 - $shift,
442 24 - $shift,
443 25 - $shift,
444 26 - $shift,
445 27 - $shift,
446 28 - $shift,
447 29 - $shift,
448 30 - $shift,
449 31 - $shift,
450 ],
451 )
452 };
453 }
454 let x = match imm8 {
455 0 => shuffle!(0),
456 1 => shuffle!(1),
457 2 => shuffle!(2),
458 3 => shuffle!(3),
459 4 => shuffle!(4),
460 5 => shuffle!(5),
461 6 => shuffle!(6),
462 7 => shuffle!(7),
463 8 => shuffle!(8),
464 9 => shuffle!(9),
465 10 => shuffle!(10),
466 11 => shuffle!(11),
467 12 => shuffle!(12),
468 13 => shuffle!(13),
469 14 => shuffle!(14),
470 15 => shuffle!(15),
471 _ => shuffle!(16),
472 };
473 transmute(x)
474 }
475
476 /// Shifts `a` left by `imm8` bytes while shifting in zeros.
477 ///
478 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bslli_si128)
479 #[inline]
480 #[target_feature(enable = "sse2")]
481 #[cfg_attr(test, assert_instr(pslldq, imm8 = 1))]
482 #[rustc_args_required_const(1)]
483 #[stable(feature = "simd_x86", since = "1.27.0")]
484 pub unsafe fn _mm_bslli_si128(a: __m128i, imm8: i32) -> __m128i {
485 _mm_slli_si128_impl(a, imm8)
486 }
487
488 /// Shifts `a` right by `imm8` bytes while shifting in zeros.
489 ///
490 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bsrli_si128)
491 #[inline]
492 #[target_feature(enable = "sse2")]
493 #[cfg_attr(test, assert_instr(psrldq, imm8 = 1))]
494 #[rustc_args_required_const(1)]
495 #[stable(feature = "simd_x86", since = "1.27.0")]
496 pub unsafe fn _mm_bsrli_si128(a: __m128i, imm8: i32) -> __m128i {
497 _mm_srli_si128_impl(a, imm8)
498 }
499
500 /// Shifts packed 16-bit integers in `a` left by `imm8` while shifting in zeros.
501 ///
502 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi16)
503 #[inline]
504 #[target_feature(enable = "sse2")]
505 #[cfg_attr(test, assert_instr(psllw, imm8 = 7))]
506 #[rustc_args_required_const(1)]
507 #[stable(feature = "simd_x86", since = "1.27.0")]
508 pub unsafe fn _mm_slli_epi16(a: __m128i, imm8: i32) -> __m128i {
509 let a = a.as_i16x8();
510 macro_rules! call {
511 ($imm8:expr) => {
512 transmute(pslliw(a, $imm8))
513 };
514 }
515 constify_imm8!(imm8, call)
516 }
517
518 /// Shifts packed 16-bit integers in `a` left by `count` while shifting in
519 /// zeros.
520 ///
521 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi16)
522 #[inline]
523 #[target_feature(enable = "sse2")]
524 #[cfg_attr(test, assert_instr(psllw))]
525 #[stable(feature = "simd_x86", since = "1.27.0")]
526 pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i {
527 transmute(psllw(a.as_i16x8(), count.as_i16x8()))
528 }
529
530 /// Shifts packed 32-bit integers in `a` left by `imm8` while shifting in zeros.
531 ///
532 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi32)
533 #[inline]
534 #[target_feature(enable = "sse2")]
535 #[cfg_attr(test, assert_instr(pslld, imm8 = 7))]
536 #[rustc_args_required_const(1)]
537 #[stable(feature = "simd_x86", since = "1.27.0")]
538 pub unsafe fn _mm_slli_epi32(a: __m128i, imm8: i32) -> __m128i {
539 let a = a.as_i32x4();
540 macro_rules! call {
541 ($imm8:expr) => {
542 transmute(psllid(a, $imm8))
543 };
544 }
545 constify_imm8!(imm8, call)
546 }
547
548 /// Shifts packed 32-bit integers in `a` left by `count` while shifting in
549 /// zeros.
550 ///
551 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi32)
552 #[inline]
553 #[target_feature(enable = "sse2")]
554 #[cfg_attr(test, assert_instr(pslld))]
555 #[stable(feature = "simd_x86", since = "1.27.0")]
556 pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i {
557 transmute(pslld(a.as_i32x4(), count.as_i32x4()))
558 }
559
560 /// Shifts packed 64-bit integers in `a` left by `imm8` while shifting in zeros.
561 ///
562 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi64)
563 #[inline]
564 #[target_feature(enable = "sse2")]
565 #[cfg_attr(test, assert_instr(psllq, imm8 = 7))]
566 #[rustc_args_required_const(1)]
567 #[stable(feature = "simd_x86", since = "1.27.0")]
568 pub unsafe fn _mm_slli_epi64(a: __m128i, imm8: i32) -> __m128i {
569 let a = a.as_i64x2();
570 macro_rules! call {
571 ($imm8:expr) => {
572 transmute(pslliq(a, $imm8))
573 };
574 }
575 constify_imm8!(imm8, call)
576 }
577
578 /// Shifts packed 64-bit integers in `a` left by `count` while shifting in
579 /// zeros.
580 ///
581 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi64)
582 #[inline]
583 #[target_feature(enable = "sse2")]
584 #[cfg_attr(test, assert_instr(psllq))]
585 #[stable(feature = "simd_x86", since = "1.27.0")]
586 pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i {
587 transmute(psllq(a.as_i64x2(), count.as_i64x2()))
588 }
589
590 /// Shifts packed 16-bit integers in `a` right by `imm8` while shifting in sign
591 /// bits.
592 ///
593 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi16)
594 #[inline]
595 #[target_feature(enable = "sse2")]
596 #[cfg_attr(test, assert_instr(psraw, imm8 = 1))]
597 #[rustc_args_required_const(1)]
598 #[stable(feature = "simd_x86", since = "1.27.0")]
599 pub unsafe fn _mm_srai_epi16(a: __m128i, imm8: i32) -> __m128i {
600 let a = a.as_i16x8();
601 macro_rules! call {
602 ($imm8:expr) => {
603 transmute(psraiw(a, $imm8))
604 };
605 }
606 constify_imm8!(imm8, call)
607 }
608
609 /// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign
610 /// bits.
611 ///
612 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi16)
613 #[inline]
614 #[target_feature(enable = "sse2")]
615 #[cfg_attr(test, assert_instr(psraw))]
616 #[stable(feature = "simd_x86", since = "1.27.0")]
617 pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i {
618 transmute(psraw(a.as_i16x8(), count.as_i16x8()))
619 }
620
621 /// Shifts packed 32-bit integers in `a` right by `imm8` while shifting in sign
622 /// bits.
623 ///
624 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi32)
625 #[inline]
626 #[target_feature(enable = "sse2")]
627 #[cfg_attr(test, assert_instr(psrad, imm8 = 1))]
628 #[rustc_args_required_const(1)]
629 #[stable(feature = "simd_x86", since = "1.27.0")]
630 pub unsafe fn _mm_srai_epi32(a: __m128i, imm8: i32) -> __m128i {
631 let a = a.as_i32x4();
632 macro_rules! call {
633 ($imm8:expr) => {
634 transmute(psraid(a, $imm8))
635 };
636 }
637 constify_imm8!(imm8, call)
638 }
639
640 /// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign
641 /// bits.
642 ///
643 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi32)
644 #[inline]
645 #[target_feature(enable = "sse2")]
646 #[cfg_attr(test, assert_instr(psrad))]
647 #[stable(feature = "simd_x86", since = "1.27.0")]
648 pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i {
649 transmute(psrad(a.as_i32x4(), count.as_i32x4()))
650 }
651
652 /// Shifts `a` right by `imm8` bytes while shifting in zeros.
653 ///
654 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_si128)
655 #[inline]
656 #[target_feature(enable = "sse2")]
657 #[cfg_attr(test, assert_instr(psrldq, imm8 = 1))]
658 #[rustc_args_required_const(1)]
659 #[stable(feature = "simd_x86", since = "1.27.0")]
660 pub unsafe fn _mm_srli_si128(a: __m128i, imm8: i32) -> __m128i {
661 _mm_srli_si128_impl(a, imm8)
662 }
663
664 /// Implementation detail: converts the immediate argument of the
665 /// `_mm_srli_si128` intrinsic into a compile-time constant.
666 #[inline]
667 #[target_feature(enable = "sse2")]
668 unsafe fn _mm_srli_si128_impl(a: __m128i, imm8: i32) -> __m128i {
669 let (zero, imm8) = (_mm_set1_epi8(0).as_i8x16(), imm8 as u32);
670 let a = a.as_i8x16();
671 macro_rules! shuffle {
672 ($shift:expr) => {
673 simd_shuffle16(
674 a,
675 zero,
676 [
677 0 + $shift,
678 1 + $shift,
679 2 + $shift,
680 3 + $shift,
681 4 + $shift,
682 5 + $shift,
683 6 + $shift,
684 7 + $shift,
685 8 + $shift,
686 9 + $shift,
687 10 + $shift,
688 11 + $shift,
689 12 + $shift,
690 13 + $shift,
691 14 + $shift,
692 15 + $shift,
693 ],
694 )
695 };
696 }
697 let x: i8x16 = match imm8 {
698 0 => shuffle!(0),
699 1 => shuffle!(1),
700 2 => shuffle!(2),
701 3 => shuffle!(3),
702 4 => shuffle!(4),
703 5 => shuffle!(5),
704 6 => shuffle!(6),
705 7 => shuffle!(7),
706 8 => shuffle!(8),
707 9 => shuffle!(9),
708 10 => shuffle!(10),
709 11 => shuffle!(11),
710 12 => shuffle!(12),
711 13 => shuffle!(13),
712 14 => shuffle!(14),
713 15 => shuffle!(15),
714 _ => shuffle!(16),
715 };
716 transmute(x)
717 }
718
719 /// Shifts packed 16-bit integers in `a` right by `imm8` while shifting in
720 /// zeros.
721 ///
722 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi16)
723 #[inline]
724 #[target_feature(enable = "sse2")]
725 #[cfg_attr(test, assert_instr(psrlw, imm8 = 1))]
726 #[rustc_args_required_const(1)]
727 #[stable(feature = "simd_x86", since = "1.27.0")]
728 pub unsafe fn _mm_srli_epi16(a: __m128i, imm8: i32) -> __m128i {
729 let a = a.as_i16x8();
730 macro_rules! call {
731 ($imm8:expr) => {
732 transmute(psrliw(a, $imm8))
733 };
734 }
735 constify_imm8!(imm8, call)
736 }
737
738 /// Shifts packed 16-bit integers in `a` right by `count` while shifting in
739 /// zeros.
740 ///
741 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi16)
742 #[inline]
743 #[target_feature(enable = "sse2")]
744 #[cfg_attr(test, assert_instr(psrlw))]
745 #[stable(feature = "simd_x86", since = "1.27.0")]
746 pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i {
747 transmute(psrlw(a.as_i16x8(), count.as_i16x8()))
748 }
749
750 /// Shifts packed 32-bit integers in `a` right by `imm8` while shifting in
751 /// zeros.
752 ///
753 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi32)
754 #[inline]
755 #[target_feature(enable = "sse2")]
756 #[cfg_attr(test, assert_instr(psrld, imm8 = 8))]
757 #[rustc_args_required_const(1)]
758 #[stable(feature = "simd_x86", since = "1.27.0")]
759 pub unsafe fn _mm_srli_epi32(a: __m128i, imm8: i32) -> __m128i {
760 let a = a.as_i32x4();
761 macro_rules! call {
762 ($imm8:expr) => {
763 transmute(psrlid(a, $imm8))
764 };
765 }
766 constify_imm8!(imm8, call)
767 }
768
769 /// Shifts packed 32-bit integers in `a` right by `count` while shifting in
770 /// zeros.
771 ///
772 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi32)
773 #[inline]
774 #[target_feature(enable = "sse2")]
775 #[cfg_attr(test, assert_instr(psrld))]
776 #[stable(feature = "simd_x86", since = "1.27.0")]
777 pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i {
778 transmute(psrld(a.as_i32x4(), count.as_i32x4()))
779 }
780
781 /// Shifts packed 64-bit integers in `a` right by `imm8` while shifting in
782 /// zeros.
783 ///
784 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi64)
785 #[inline]
786 #[target_feature(enable = "sse2")]
787 #[cfg_attr(test, assert_instr(psrlq, imm8 = 1))]
788 #[rustc_args_required_const(1)]
789 #[stable(feature = "simd_x86", since = "1.27.0")]
790 pub unsafe fn _mm_srli_epi64(a: __m128i, imm8: i32) -> __m128i {
791 transmute(psrliq(a.as_i64x2(), imm8))
792 }
793
794 /// Shifts packed 64-bit integers in `a` right by `count` while shifting in
795 /// zeros.
796 ///
797 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi64)
798 #[inline]
799 #[target_feature(enable = "sse2")]
800 #[cfg_attr(test, assert_instr(psrlq))]
801 #[stable(feature = "simd_x86", since = "1.27.0")]
802 pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i {
803 transmute(psrlq(a.as_i64x2(), count.as_i64x2()))
804 }
805
806 /// Computes the bitwise AND of 128 bits (representing integer data) in `a` and
807 /// `b`.
808 ///
809 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_si128)
810 #[inline]
811 #[target_feature(enable = "sse2")]
812 #[cfg_attr(test, assert_instr(andps))]
813 #[stable(feature = "simd_x86", since = "1.27.0")]
814 pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i {
815 simd_and(a, b)
816 }
817
818 /// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and
819 /// then AND with `b`.
820 ///
821 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_si128)
822 #[inline]
823 #[target_feature(enable = "sse2")]
824 #[cfg_attr(test, assert_instr(andnps))]
825 #[stable(feature = "simd_x86", since = "1.27.0")]
826 pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i {
827 simd_and(simd_xor(_mm_set1_epi8(-1), a), b)
828 }
829
830 /// Computes the bitwise OR of 128 bits (representing integer data) in `a` and
831 /// `b`.
832 ///
833 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_si128)
834 #[inline]
835 #[target_feature(enable = "sse2")]
836 #[cfg_attr(test, assert_instr(orps))]
837 #[stable(feature = "simd_x86", since = "1.27.0")]
838 pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i {
839 simd_or(a, b)
840 }
841
842 /// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and
843 /// `b`.
844 ///
845 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_si128)
846 #[inline]
847 #[target_feature(enable = "sse2")]
848 #[cfg_attr(test, assert_instr(xorps))]
849 #[stable(feature = "simd_x86", since = "1.27.0")]
850 pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i {
851 simd_xor(a, b)
852 }
853
854 /// Compares packed 8-bit integers in `a` and `b` for equality.
855 ///
856 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi8)
857 #[inline]
858 #[target_feature(enable = "sse2")]
859 #[cfg_attr(test, assert_instr(pcmpeqb))]
860 #[stable(feature = "simd_x86", since = "1.27.0")]
861 pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i {
862 transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16()))
863 }
864
865 /// Compares packed 16-bit integers in `a` and `b` for equality.
866 ///
867 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi16)
868 #[inline]
869 #[target_feature(enable = "sse2")]
870 #[cfg_attr(test, assert_instr(pcmpeqw))]
871 #[stable(feature = "simd_x86", since = "1.27.0")]
872 pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i {
873 transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8()))
874 }
875
876 /// Compares packed 32-bit integers in `a` and `b` for equality.
877 ///
878 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi32)
879 #[inline]
880 #[target_feature(enable = "sse2")]
881 #[cfg_attr(test, assert_instr(pcmpeqd))]
882 #[stable(feature = "simd_x86", since = "1.27.0")]
883 pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i {
884 transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4()))
885 }
886
887 /// Compares packed 8-bit integers in `a` and `b` for greater-than.
888 ///
889 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi8)
890 #[inline]
891 #[target_feature(enable = "sse2")]
892 #[cfg_attr(test, assert_instr(pcmpgtb))]
893 #[stable(feature = "simd_x86", since = "1.27.0")]
894 pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i {
895 transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16()))
896 }
897
898 /// Compares packed 16-bit integers in `a` and `b` for greater-than.
899 ///
900 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi16)
901 #[inline]
902 #[target_feature(enable = "sse2")]
903 #[cfg_attr(test, assert_instr(pcmpgtw))]
904 #[stable(feature = "simd_x86", since = "1.27.0")]
905 pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i {
906 transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8()))
907 }
908
909 /// Compares packed 32-bit integers in `a` and `b` for greater-than.
910 ///
911 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi32)
912 #[inline]
913 #[target_feature(enable = "sse2")]
914 #[cfg_attr(test, assert_instr(pcmpgtd))]
915 #[stable(feature = "simd_x86", since = "1.27.0")]
916 pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i {
917 transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4()))
918 }
919
920 /// Compares packed 8-bit integers in `a` and `b` for less-than.
921 ///
922 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi8)
923 #[inline]
924 #[target_feature(enable = "sse2")]
925 #[cfg_attr(test, assert_instr(pcmpgtb))]
926 #[stable(feature = "simd_x86", since = "1.27.0")]
927 pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i {
928 transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16()))
929 }
930
931 /// Compares packed 16-bit integers in `a` and `b` for less-than.
932 ///
933 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi16)
934 #[inline]
935 #[target_feature(enable = "sse2")]
936 #[cfg_attr(test, assert_instr(pcmpgtw))]
937 #[stable(feature = "simd_x86", since = "1.27.0")]
938 pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i {
939 transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8()))
940 }
941
942 /// Compares packed 32-bit integers in `a` and `b` for less-than.
943 ///
944 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi32)
945 #[inline]
946 #[target_feature(enable = "sse2")]
947 #[cfg_attr(test, assert_instr(pcmpgtd))]
948 #[stable(feature = "simd_x86", since = "1.27.0")]
949 pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i {
950 transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4()))
951 }
952
953 /// Converts the lower two packed 32-bit integers in `a` to packed
954 /// double-precision (64-bit) floating-point elements.
955 ///
956 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_pd)
957 #[inline]
958 #[target_feature(enable = "sse2")]
959 #[cfg_attr(test, assert_instr(cvtdq2pd))]
960 #[stable(feature = "simd_x86", since = "1.27.0")]
961 pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d {
962 let a = a.as_i32x4();
963 simd_cast::<i32x2, __m128d>(simd_shuffle2(a, a, [0, 1]))
964 }
965
966 /// Returns `a` with its lower element replaced by `b` after converting it to
967 /// an `f64`.
968 ///
969 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_sd)
970 #[inline]
971 #[target_feature(enable = "sse2")]
972 #[cfg_attr(test, assert_instr(cvtsi2sd))]
973 #[stable(feature = "simd_x86", since = "1.27.0")]
974 pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d {
975 simd_insert(a, 0, b as f64)
976 }
977
978 /// Converts packed 32-bit integers in `a` to packed single-precision (32-bit)
979 /// floating-point elements.
980 ///
981 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ps)
982 #[inline]
983 #[target_feature(enable = "sse2")]
984 #[cfg_attr(test, assert_instr(cvtdq2ps))]
985 #[stable(feature = "simd_x86", since = "1.27.0")]
986 pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 {
987 cvtdq2ps(a.as_i32x4())
988 }
989
990 /// Converts packed single-precision (32-bit) floating-point elements in `a`
991 /// to packed 32-bit integers.
992 ///
993 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_epi32)
994 #[inline]
995 #[target_feature(enable = "sse2")]
996 #[cfg_attr(test, assert_instr(cvtps2dq))]
997 #[stable(feature = "simd_x86", since = "1.27.0")]
998 pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i {
999 transmute(cvtps2dq(a))
1000 }
1001
1002 /// Returns a vector whose lowest element is `a` and all higher elements are
1003 /// `0`.
1004 ///
1005 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_si128)
1006 #[inline]
1007 #[target_feature(enable = "sse2")]
1008 #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movd))]
1009 #[stable(feature = "simd_x86", since = "1.27.0")]
1010 pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i {
1011 transmute(i32x4::new(a, 0, 0, 0))
1012 }
1013
1014 /// Returns the lowest element of `a`.
1015 ///
1016 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si32)
1017 #[inline]
1018 #[target_feature(enable = "sse2")]
1019 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movd))]
1020 #[stable(feature = "simd_x86", since = "1.27.0")]
1021 pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 {
1022 simd_extract(a.as_i32x4(), 0)
1023 }
1024
1025 /// Sets packed 64-bit integers with the supplied values, from highest to
1026 /// lowest.
1027 ///
1028 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi64x)
1029 #[inline]
1030 #[target_feature(enable = "sse2")]
1031 // no particular instruction to test
1032 #[stable(feature = "simd_x86", since = "1.27.0")]
1033 pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i {
1034 transmute(i64x2::new(e0, e1))
1035 }
1036
1037 /// Sets packed 32-bit integers with the supplied values.
1038 ///
1039 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi32)
1040 #[inline]
1041 #[target_feature(enable = "sse2")]
1042 // no particular instruction to test
1043 #[stable(feature = "simd_x86", since = "1.27.0")]
1044 pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1045 transmute(i32x4::new(e0, e1, e2, e3))
1046 }
1047
1048 /// Sets packed 16-bit integers with the supplied values.
1049 ///
1050 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi16)
1051 #[inline]
1052 #[target_feature(enable = "sse2")]
1053 // no particular instruction to test
1054 #[stable(feature = "simd_x86", since = "1.27.0")]
1055 pub unsafe fn _mm_set_epi16(
1056 e7: i16,
1057 e6: i16,
1058 e5: i16,
1059 e4: i16,
1060 e3: i16,
1061 e2: i16,
1062 e1: i16,
1063 e0: i16,
1064 ) -> __m128i {
1065 transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7))
1066 }
1067
1068 /// Sets packed 8-bit integers with the supplied values.
1069 ///
1070 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi8)
1071 #[inline]
1072 #[target_feature(enable = "sse2")]
1073 // no particular instruction to test
1074 #[stable(feature = "simd_x86", since = "1.27.0")]
1075 pub unsafe fn _mm_set_epi8(
1076 e15: i8,
1077 e14: i8,
1078 e13: i8,
1079 e12: i8,
1080 e11: i8,
1081 e10: i8,
1082 e9: i8,
1083 e8: i8,
1084 e7: i8,
1085 e6: i8,
1086 e5: i8,
1087 e4: i8,
1088 e3: i8,
1089 e2: i8,
1090 e1: i8,
1091 e0: i8,
1092 ) -> __m128i {
1093 #[rustfmt::skip]
1094 transmute(i8x16::new(
1095 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1096 ))
1097 }
1098
1099 /// Broadcasts 64-bit integer `a` to all elements.
1100 ///
1101 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi64x)
1102 #[inline]
1103 #[target_feature(enable = "sse2")]
1104 // no particular instruction to test
1105 #[stable(feature = "simd_x86", since = "1.27.0")]
1106 pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i {
1107 _mm_set_epi64x(a, a)
1108 }
1109
1110 /// Broadcasts 32-bit integer `a` to all elements.
1111 ///
1112 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi32)
1113 #[inline]
1114 #[target_feature(enable = "sse2")]
1115 // no particular instruction to test
1116 #[stable(feature = "simd_x86", since = "1.27.0")]
1117 pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i {
1118 _mm_set_epi32(a, a, a, a)
1119 }
1120
1121 /// Broadcasts 16-bit integer `a` to all elements.
1122 ///
1123 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi16)
1124 #[inline]
1125 #[target_feature(enable = "sse2")]
1126 // no particular instruction to test
1127 #[stable(feature = "simd_x86", since = "1.27.0")]
1128 pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i {
1129 _mm_set_epi16(a, a, a, a, a, a, a, a)
1130 }
1131
1132 /// Broadcasts 8-bit integer `a` to all elements.
1133 ///
1134 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi8)
1135 #[inline]
1136 #[target_feature(enable = "sse2")]
1137 // no particular instruction to test
1138 #[stable(feature = "simd_x86", since = "1.27.0")]
1139 pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i {
1140 _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a)
1141 }
1142
1143 /// Sets packed 32-bit integers with the supplied values in reverse order.
1144 ///
1145 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi32)
1146 #[inline]
1147 #[target_feature(enable = "sse2")]
1148 // no particular instruction to test
1149 #[stable(feature = "simd_x86", since = "1.27.0")]
1150 pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i {
1151 _mm_set_epi32(e0, e1, e2, e3)
1152 }
1153
1154 /// Sets packed 16-bit integers with the supplied values in reverse order.
1155 ///
1156 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi16)
1157 #[inline]
1158 #[target_feature(enable = "sse2")]
1159 // no particular instruction to test
1160 #[stable(feature = "simd_x86", since = "1.27.0")]
1161 pub unsafe fn _mm_setr_epi16(
1162 e7: i16,
1163 e6: i16,
1164 e5: i16,
1165 e4: i16,
1166 e3: i16,
1167 e2: i16,
1168 e1: i16,
1169 e0: i16,
1170 ) -> __m128i {
1171 _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7)
1172 }
1173
1174 /// Sets packed 8-bit integers with the supplied values in reverse order.
1175 ///
1176 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi8)
1177 #[inline]
1178 #[target_feature(enable = "sse2")]
1179 // no particular instruction to test
1180 #[stable(feature = "simd_x86", since = "1.27.0")]
1181 pub unsafe fn _mm_setr_epi8(
1182 e15: i8,
1183 e14: i8,
1184 e13: i8,
1185 e12: i8,
1186 e11: i8,
1187 e10: i8,
1188 e9: i8,
1189 e8: i8,
1190 e7: i8,
1191 e6: i8,
1192 e5: i8,
1193 e4: i8,
1194 e3: i8,
1195 e2: i8,
1196 e1: i8,
1197 e0: i8,
1198 ) -> __m128i {
1199 #[rustfmt::skip]
1200 _mm_set_epi8(
1201 e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
1202 )
1203 }
1204
1205 /// Returns a vector with all elements set to zero.
1206 ///
1207 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_si128)
1208 #[inline]
1209 #[target_feature(enable = "sse2")]
1210 #[cfg_attr(test, assert_instr(xorps))]
1211 #[stable(feature = "simd_x86", since = "1.27.0")]
1212 pub unsafe fn _mm_setzero_si128() -> __m128i {
1213 _mm_set1_epi64x(0)
1214 }
1215
1216 /// Loads 64-bit integer from memory into first element of returned vector.
1217 ///
1218 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_epi64)
1219 #[inline]
1220 #[target_feature(enable = "sse2")]
1221 // FIXME movsd on windows
1222 #[cfg_attr(
1223 all(
1224 test,
1225 not(windows),
1226 not(all(target_os = "linux", target_arch = "x86_64")),
1227 target_arch = "x86_64"
1228 ),
1229 assert_instr(movq)
1230 )]
1231 #[stable(feature = "simd_x86", since = "1.27.0")]
1232 pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i {
1233 _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64))
1234 }
1235
1236 /// Loads 128-bits of integer data from memory into a new vector.
1237 ///
1238 /// `mem_addr` must be aligned on a 16-byte boundary.
1239 ///
1240 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_si128)
1241 #[inline]
1242 #[target_feature(enable = "sse2")]
1243 #[cfg_attr(test, assert_instr(movaps))]
1244 #[stable(feature = "simd_x86", since = "1.27.0")]
1245 pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i {
1246 *mem_addr
1247 }
1248
1249 /// Loads 128-bits of integer data from memory into a new vector.
1250 ///
1251 /// `mem_addr` does not need to be aligned on any particular boundary.
1252 ///
1253 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si128)
1254 #[inline]
1255 #[target_feature(enable = "sse2")]
1256 #[cfg_attr(test, assert_instr(movups))]
1257 #[stable(feature = "simd_x86", since = "1.27.0")]
1258 pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i {
1259 let mut dst: __m128i = _mm_undefined_si128();
1260 ptr::copy_nonoverlapping(
1261 mem_addr as *const u8,
1262 &mut dst as *mut __m128i as *mut u8,
1263 mem::size_of::<__m128i>(),
1264 );
1265 dst
1266 }
1267
1268 /// Conditionally store 8-bit integer elements from `a` into memory using
1269 /// `mask`.
1270 ///
1271 /// Elements are not stored when the highest bit is not set in the
1272 /// corresponding element.
1273 ///
1274 /// `mem_addr` should correspond to a 128-bit memory location and does not need
1275 /// to be aligned on any particular boundary.
1276 ///
1277 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskmoveu_si128)
1278 #[inline]
1279 #[target_feature(enable = "sse2")]
1280 #[cfg_attr(test, assert_instr(maskmovdqu))]
1281 #[stable(feature = "simd_x86", since = "1.27.0")]
1282 pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) {
1283 maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr)
1284 }
1285
1286 /// Stores 128-bits of integer data from `a` into memory.
1287 ///
1288 /// `mem_addr` must be aligned on a 16-byte boundary.
1289 ///
1290 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_si128)
1291 #[inline]
1292 #[target_feature(enable = "sse2")]
1293 #[cfg_attr(test, assert_instr(movaps))]
1294 #[stable(feature = "simd_x86", since = "1.27.0")]
1295 pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) {
1296 *mem_addr = a;
1297 }
1298
1299 /// Stores 128-bits of integer data from `a` into memory.
1300 ///
1301 /// `mem_addr` does not need to be aligned on any particular boundary.
1302 ///
1303 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si128)
1304 #[inline]
1305 #[target_feature(enable = "sse2")]
1306 #[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected
1307 #[stable(feature = "simd_x86", since = "1.27.0")]
1308 pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) {
1309 storeudq(mem_addr as *mut i8, a);
1310 }
1311
1312 /// Stores the lower 64-bit integer `a` to a memory location.
1313 ///
1314 /// `mem_addr` does not need to be aligned on any particular boundary.
1315 ///
1316 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_epi64)
1317 #[inline]
1318 #[target_feature(enable = "sse2")]
1319 // FIXME mov on windows, movlps on i686
1320 #[cfg_attr(
1321 all(
1322 test,
1323 not(windows),
1324 not(all(target_os = "linux", target_arch = "x86_64")),
1325 target_arch = "x86_64"
1326 ),
1327 assert_instr(movq)
1328 )]
1329 #[stable(feature = "simd_x86", since = "1.27.0")]
1330 pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
1331 ptr::copy_nonoverlapping(&a as *const _ as *const u8, mem_addr as *mut u8, 8);
1332 }
1333
1334 /// Stores a 128-bit integer vector to a 128-bit aligned memory location.
1335 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
1336 /// used again soon).
1337 ///
1338 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si128)
1339 #[inline]
1340 #[target_feature(enable = "sse2")]
1341 #[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq
1342 #[stable(feature = "simd_x86", since = "1.27.0")]
1343 pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
1344 intrinsics::nontemporal_store(mem_addr, a);
1345 }
1346
1347 /// Stores a 32-bit integer value in the specified memory location.
1348 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
1349 /// used again soon).
1350 ///
1351 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si32)
1352 #[inline]
1353 #[target_feature(enable = "sse2")]
1354 #[cfg_attr(test, assert_instr(movnti))]
1355 #[stable(feature = "simd_x86", since = "1.27.0")]
1356 pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
1357 intrinsics::nontemporal_store(mem_addr, a);
1358 }
1359
1360 /// Returns a vector where the low element is extracted from `a` and its upper
1361 /// element is zero.
1362 ///
1363 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_epi64)
1364 #[inline]
1365 #[target_feature(enable = "sse2")]
1366 // FIXME movd on windows, movd on i686
1367 #[cfg_attr(all(test, not(windows), target_arch = "x86_64"), assert_instr(movq))]
1368 #[stable(feature = "simd_x86", since = "1.27.0")]
1369 pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i {
1370 let zero = _mm_setzero_si128();
1371 let r: i64x2 = simd_shuffle2(a.as_i64x2(), zero.as_i64x2(), [0, 2]);
1372 transmute(r)
1373 }
1374
1375 /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1376 /// using signed saturation.
1377 ///
1378 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi16)
1379 #[inline]
1380 #[target_feature(enable = "sse2")]
1381 #[cfg_attr(test, assert_instr(packsswb))]
1382 #[stable(feature = "simd_x86", since = "1.27.0")]
1383 pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i {
1384 transmute(packsswb(a.as_i16x8(), b.as_i16x8()))
1385 }
1386
1387 /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
1388 /// using signed saturation.
1389 ///
1390 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi32)
1391 #[inline]
1392 #[target_feature(enable = "sse2")]
1393 #[cfg_attr(test, assert_instr(packssdw))]
1394 #[stable(feature = "simd_x86", since = "1.27.0")]
1395 pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i {
1396 transmute(packssdw(a.as_i32x4(), b.as_i32x4()))
1397 }
1398
1399 /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers
1400 /// using unsigned saturation.
1401 ///
1402 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi16)
1403 #[inline]
1404 #[target_feature(enable = "sse2")]
1405 #[cfg_attr(test, assert_instr(packuswb))]
1406 #[stable(feature = "simd_x86", since = "1.27.0")]
1407 pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i {
1408 transmute(packuswb(a.as_i16x8(), b.as_i16x8()))
1409 }
1410
1411 /// Returns the `imm8` element of `a`.
1412 ///
1413 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi16)
1414 #[inline]
1415 #[target_feature(enable = "sse2")]
1416 #[cfg_attr(test, assert_instr(pextrw, imm8 = 9))]
1417 #[rustc_args_required_const(1)]
1418 #[stable(feature = "simd_x86", since = "1.27.0")]
1419 pub unsafe fn _mm_extract_epi16(a: __m128i, imm8: i32) -> i32 {
1420 let a = a.as_u16x8();
1421 macro_rules! call {
1422 ($imm3:expr) => {
1423 simd_extract::<_, u16>(a, $imm3) as i32
1424 };
1425 }
1426 constify_imm3!(imm8, call)
1427 }
1428
1429 /// Returns a new vector where the `imm8` element of `a` is replaced with `i`.
1430 ///
1431 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi16)
1432 #[inline]
1433 #[target_feature(enable = "sse2")]
1434 #[cfg_attr(test, assert_instr(pinsrw, imm8 = 9))]
1435 #[rustc_args_required_const(2)]
1436 #[stable(feature = "simd_x86", since = "1.27.0")]
1437 pub unsafe fn _mm_insert_epi16(a: __m128i, i: i32, imm8: i32) -> __m128i {
1438 let a = a.as_i16x8();
1439 macro_rules! call {
1440 ($imm3:expr) => {
1441 transmute(simd_insert(a, $imm3, i as i16))
1442 };
1443 }
1444 constify_imm3!(imm8, call)
1445 }
1446
1447 /// Returns a mask of the most significant bit of each element in `a`.
1448 ///
1449 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_epi8)
1450 #[inline]
1451 #[target_feature(enable = "sse2")]
1452 #[cfg_attr(test, assert_instr(pmovmskb))]
1453 #[stable(feature = "simd_x86", since = "1.27.0")]
1454 pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 {
1455 pmovmskb(a.as_i8x16())
1456 }
1457
1458 /// Shuffles 32-bit integers in `a` using the control in `imm8`.
1459 ///
1460 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi32)
1461 #[inline]
1462 #[target_feature(enable = "sse2")]
1463 #[cfg_attr(test, assert_instr(pshufd, imm8 = 9))]
1464 #[rustc_args_required_const(1)]
1465 #[stable(feature = "simd_x86", since = "1.27.0")]
1466 pub unsafe fn _mm_shuffle_epi32(a: __m128i, imm8: i32) -> __m128i {
1467 // simd_shuffleX requires that its selector parameter be made up of
1468 // constant values, but we can't enforce that here. In spirit, we need
1469 // to write a `match` on all possible values of a byte, and for each value,
1470 // hard-code the correct `simd_shuffleX` call using only constants. We
1471 // then hope for LLVM to do the rest.
1472 //
1473 // Of course, that's... awful. So we try to use macros to do it for us.
1474 let imm8 = (imm8 & 0xFF) as u8;
1475 let a = a.as_i32x4();
1476
1477 macro_rules! shuffle_done {
1478 ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => {
1479 simd_shuffle4(a, a, [$x01, $x23, $x45, $x67])
1480 };
1481 }
1482 macro_rules! shuffle_x67 {
1483 ($x01:expr, $x23:expr, $x45:expr) => {
1484 match (imm8 >> 6) & 0b11 {
1485 0b00 => shuffle_done!($x01, $x23, $x45, 0),
1486 0b01 => shuffle_done!($x01, $x23, $x45, 1),
1487 0b10 => shuffle_done!($x01, $x23, $x45, 2),
1488 _ => shuffle_done!($x01, $x23, $x45, 3),
1489 }
1490 };
1491 }
1492 macro_rules! shuffle_x45 {
1493 ($x01:expr, $x23:expr) => {
1494 match (imm8 >> 4) & 0b11 {
1495 0b00 => shuffle_x67!($x01, $x23, 0),
1496 0b01 => shuffle_x67!($x01, $x23, 1),
1497 0b10 => shuffle_x67!($x01, $x23, 2),
1498 _ => shuffle_x67!($x01, $x23, 3),
1499 }
1500 };
1501 }
1502 macro_rules! shuffle_x23 {
1503 ($x01:expr) => {
1504 match (imm8 >> 2) & 0b11 {
1505 0b00 => shuffle_x45!($x01, 0),
1506 0b01 => shuffle_x45!($x01, 1),
1507 0b10 => shuffle_x45!($x01, 2),
1508 _ => shuffle_x45!($x01, 3),
1509 }
1510 };
1511 }
1512 let x: i32x4 = match imm8 & 0b11 {
1513 0b00 => shuffle_x23!(0),
1514 0b01 => shuffle_x23!(1),
1515 0b10 => shuffle_x23!(2),
1516 _ => shuffle_x23!(3),
1517 };
1518 transmute(x)
1519 }
1520
1521 /// Shuffles 16-bit integers in the high 64 bits of `a` using the control in
1522 /// `imm8`.
1523 ///
1524 /// Put the results in the high 64 bits of the returned vector, with the low 64
1525 /// bits being copied from from `a`.
1526 ///
1527 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflehi_epi16)
1528 #[inline]
1529 #[target_feature(enable = "sse2")]
1530 #[cfg_attr(test, assert_instr(pshufhw, imm8 = 9))]
1531 #[rustc_args_required_const(1)]
1532 #[stable(feature = "simd_x86", since = "1.27.0")]
1533 pub unsafe fn _mm_shufflehi_epi16(a: __m128i, imm8: i32) -> __m128i {
1534 // See _mm_shuffle_epi32.
1535 let imm8 = (imm8 & 0xFF) as u8;
1536 let a = a.as_i16x8();
1537 macro_rules! shuffle_done {
1538 ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => {
1539 simd_shuffle8(a, a, [0, 1, 2, 3, $x01 + 4, $x23 + 4, $x45 + 4, $x67 + 4])
1540 };
1541 }
1542 macro_rules! shuffle_x67 {
1543 ($x01:expr, $x23:expr, $x45:expr) => {
1544 match (imm8 >> 6) & 0b11 {
1545 0b00 => shuffle_done!($x01, $x23, $x45, 0),
1546 0b01 => shuffle_done!($x01, $x23, $x45, 1),
1547 0b10 => shuffle_done!($x01, $x23, $x45, 2),
1548 _ => shuffle_done!($x01, $x23, $x45, 3),
1549 }
1550 };
1551 }
1552 macro_rules! shuffle_x45 {
1553 ($x01:expr, $x23:expr) => {
1554 match (imm8 >> 4) & 0b11 {
1555 0b00 => shuffle_x67!($x01, $x23, 0),
1556 0b01 => shuffle_x67!($x01, $x23, 1),
1557 0b10 => shuffle_x67!($x01, $x23, 2),
1558 _ => shuffle_x67!($x01, $x23, 3),
1559 }
1560 };
1561 }
1562 macro_rules! shuffle_x23 {
1563 ($x01:expr) => {
1564 match (imm8 >> 2) & 0b11 {
1565 0b00 => shuffle_x45!($x01, 0),
1566 0b01 => shuffle_x45!($x01, 1),
1567 0b10 => shuffle_x45!($x01, 2),
1568 _ => shuffle_x45!($x01, 3),
1569 }
1570 };
1571 }
1572 let x: i16x8 = match imm8 & 0b11 {
1573 0b00 => shuffle_x23!(0),
1574 0b01 => shuffle_x23!(1),
1575 0b10 => shuffle_x23!(2),
1576 _ => shuffle_x23!(3),
1577 };
1578 transmute(x)
1579 }
1580
1581 /// Shuffles 16-bit integers in the low 64 bits of `a` using the control in
1582 /// `imm8`.
1583 ///
1584 /// Put the results in the low 64 bits of the returned vector, with the high 64
1585 /// bits being copied from from `a`.
1586 ///
1587 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflelo_epi16)
1588 #[inline]
1589 #[target_feature(enable = "sse2")]
1590 #[cfg_attr(test, assert_instr(pshuflw, imm8 = 9))]
1591 #[rustc_args_required_const(1)]
1592 #[stable(feature = "simd_x86", since = "1.27.0")]
1593 pub unsafe fn _mm_shufflelo_epi16(a: __m128i, imm8: i32) -> __m128i {
1594 // See _mm_shuffle_epi32.
1595 let imm8 = (imm8 & 0xFF) as u8;
1596 let a = a.as_i16x8();
1597
1598 macro_rules! shuffle_done {
1599 ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => {
1600 simd_shuffle8(a, a, [$x01, $x23, $x45, $x67, 4, 5, 6, 7])
1601 };
1602 }
1603 macro_rules! shuffle_x67 {
1604 ($x01:expr, $x23:expr, $x45:expr) => {
1605 match (imm8 >> 6) & 0b11 {
1606 0b00 => shuffle_done!($x01, $x23, $x45, 0),
1607 0b01 => shuffle_done!($x01, $x23, $x45, 1),
1608 0b10 => shuffle_done!($x01, $x23, $x45, 2),
1609 _ => shuffle_done!($x01, $x23, $x45, 3),
1610 }
1611 };
1612 }
1613 macro_rules! shuffle_x45 {
1614 ($x01:expr, $x23:expr) => {
1615 match (imm8 >> 4) & 0b11 {
1616 0b00 => shuffle_x67!($x01, $x23, 0),
1617 0b01 => shuffle_x67!($x01, $x23, 1),
1618 0b10 => shuffle_x67!($x01, $x23, 2),
1619 _ => shuffle_x67!($x01, $x23, 3),
1620 }
1621 };
1622 }
1623 macro_rules! shuffle_x23 {
1624 ($x01:expr) => {
1625 match (imm8 >> 2) & 0b11 {
1626 0b00 => shuffle_x45!($x01, 0),
1627 0b01 => shuffle_x45!($x01, 1),
1628 0b10 => shuffle_x45!($x01, 2),
1629 _ => shuffle_x45!($x01, 3),
1630 }
1631 };
1632 }
1633 let x: i16x8 = match imm8 & 0b11 {
1634 0b00 => shuffle_x23!(0),
1635 0b01 => shuffle_x23!(1),
1636 0b10 => shuffle_x23!(2),
1637 _ => shuffle_x23!(3),
1638 };
1639 transmute(x)
1640 }
1641
1642 /// Unpacks and interleave 8-bit integers from the high half of `a` and `b`.
1643 ///
1644 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi8)
1645 #[inline]
1646 #[target_feature(enable = "sse2")]
1647 #[cfg_attr(test, assert_instr(punpckhbw))]
1648 #[stable(feature = "simd_x86", since = "1.27.0")]
1649 pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i {
1650 transmute::<i8x16, _>(simd_shuffle16(
1651 a.as_i8x16(),
1652 b.as_i8x16(),
1653 [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31],
1654 ))
1655 }
1656
1657 /// Unpacks and interleave 16-bit integers from the high half of `a` and `b`.
1658 ///
1659 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi16)
1660 #[inline]
1661 #[target_feature(enable = "sse2")]
1662 #[cfg_attr(test, assert_instr(punpckhwd))]
1663 #[stable(feature = "simd_x86", since = "1.27.0")]
1664 pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i {
1665 let x = simd_shuffle8(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]);
1666 transmute::<i16x8, _>(x)
1667 }
1668
1669 /// Unpacks and interleave 32-bit integers from the high half of `a` and `b`.
1670 ///
1671 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi32)
1672 #[inline]
1673 #[target_feature(enable = "sse2")]
1674 #[cfg_attr(test, assert_instr(unpckhps))]
1675 #[stable(feature = "simd_x86", since = "1.27.0")]
1676 pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i {
1677 transmute::<i32x4, _>(simd_shuffle4(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7]))
1678 }
1679
1680 /// Unpacks and interleave 64-bit integers from the high half of `a` and `b`.
1681 ///
1682 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi64)
1683 #[inline]
1684 #[target_feature(enable = "sse2")]
1685 #[cfg_attr(test, assert_instr(unpckhpd))]
1686 #[stable(feature = "simd_x86", since = "1.27.0")]
1687 pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i {
1688 transmute::<i64x2, _>(simd_shuffle2(a.as_i64x2(), b.as_i64x2(), [1, 3]))
1689 }
1690
1691 /// Unpacks and interleave 8-bit integers from the low half of `a` and `b`.
1692 ///
1693 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi8)
1694 #[inline]
1695 #[target_feature(enable = "sse2")]
1696 #[cfg_attr(test, assert_instr(punpcklbw))]
1697 #[stable(feature = "simd_x86", since = "1.27.0")]
1698 pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i {
1699 transmute::<i8x16, _>(simd_shuffle16(
1700 a.as_i8x16(),
1701 b.as_i8x16(),
1702 [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23],
1703 ))
1704 }
1705
1706 /// Unpacks and interleave 16-bit integers from the low half of `a` and `b`.
1707 ///
1708 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi16)
1709 #[inline]
1710 #[target_feature(enable = "sse2")]
1711 #[cfg_attr(test, assert_instr(punpcklwd))]
1712 #[stable(feature = "simd_x86", since = "1.27.0")]
1713 pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i {
1714 let x = simd_shuffle8(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]);
1715 transmute::<i16x8, _>(x)
1716 }
1717
1718 /// Unpacks and interleave 32-bit integers from the low half of `a` and `b`.
1719 ///
1720 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi32)
1721 #[inline]
1722 #[target_feature(enable = "sse2")]
1723 #[cfg_attr(test, assert_instr(unpcklps))]
1724 #[stable(feature = "simd_x86", since = "1.27.0")]
1725 pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i {
1726 transmute::<i32x4, _>(simd_shuffle4(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5]))
1727 }
1728
1729 /// Unpacks and interleave 64-bit integers from the low half of `a` and `b`.
1730 ///
1731 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi64)
1732 #[inline]
1733 #[target_feature(enable = "sse2")]
1734 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
1735 #[stable(feature = "simd_x86", since = "1.27.0")]
1736 pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i {
1737 transmute::<i64x2, _>(simd_shuffle2(a.as_i64x2(), b.as_i64x2(), [0, 2]))
1738 }
1739
1740 /// Returns a new vector with the low element of `a` replaced by the sum of the
1741 /// low elements of `a` and `b`.
1742 ///
1743 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sd)
1744 #[inline]
1745 #[target_feature(enable = "sse2")]
1746 #[cfg_attr(test, assert_instr(addsd))]
1747 #[stable(feature = "simd_x86", since = "1.27.0")]
1748 pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d {
1749 simd_insert(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b))
1750 }
1751
1752 /// Adds packed double-precision (64-bit) floating-point elements in `a` and
1753 /// `b`.
1754 ///
1755 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_pd)
1756 #[inline]
1757 #[target_feature(enable = "sse2")]
1758 #[cfg_attr(test, assert_instr(addpd))]
1759 #[stable(feature = "simd_x86", since = "1.27.0")]
1760 pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d {
1761 simd_add(a, b)
1762 }
1763
1764 /// Returns a new vector with the low element of `a` replaced by the result of
1765 /// diving the lower element of `a` by the lower element of `b`.
1766 ///
1767 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sd)
1768 #[inline]
1769 #[target_feature(enable = "sse2")]
1770 #[cfg_attr(test, assert_instr(divsd))]
1771 #[stable(feature = "simd_x86", since = "1.27.0")]
1772 pub unsafe fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d {
1773 simd_insert(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b))
1774 }
1775
1776 /// Divide packed double-precision (64-bit) floating-point elements in `a` by
1777 /// packed elements in `b`.
1778 ///
1779 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_pd)
1780 #[inline]
1781 #[target_feature(enable = "sse2")]
1782 #[cfg_attr(test, assert_instr(divpd))]
1783 #[stable(feature = "simd_x86", since = "1.27.0")]
1784 pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d {
1785 simd_div(a, b)
1786 }
1787
1788 /// Returns a new vector with the low element of `a` replaced by the maximum
1789 /// of the lower elements of `a` and `b`.
1790 ///
1791 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sd)
1792 #[inline]
1793 #[target_feature(enable = "sse2")]
1794 #[cfg_attr(test, assert_instr(maxsd))]
1795 #[stable(feature = "simd_x86", since = "1.27.0")]
1796 pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d {
1797 maxsd(a, b)
1798 }
1799
1800 /// Returns a new vector with the maximum values from corresponding elements in
1801 /// `a` and `b`.
1802 ///
1803 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pd)
1804 #[inline]
1805 #[target_feature(enable = "sse2")]
1806 #[cfg_attr(test, assert_instr(maxpd))]
1807 #[stable(feature = "simd_x86", since = "1.27.0")]
1808 pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d {
1809 maxpd(a, b)
1810 }
1811
1812 /// Returns a new vector with the low element of `a` replaced by the minimum
1813 /// of the lower elements of `a` and `b`.
1814 ///
1815 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sd)
1816 #[inline]
1817 #[target_feature(enable = "sse2")]
1818 #[cfg_attr(test, assert_instr(minsd))]
1819 #[stable(feature = "simd_x86", since = "1.27.0")]
1820 pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d {
1821 minsd(a, b)
1822 }
1823
1824 /// Returns a new vector with the minimum values from corresponding elements in
1825 /// `a` and `b`.
1826 ///
1827 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pd)
1828 #[inline]
1829 #[target_feature(enable = "sse2")]
1830 #[cfg_attr(test, assert_instr(minpd))]
1831 #[stable(feature = "simd_x86", since = "1.27.0")]
1832 pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d {
1833 minpd(a, b)
1834 }
1835
1836 /// Returns a new vector with the low element of `a` replaced by multiplying the
1837 /// low elements of `a` and `b`.
1838 ///
1839 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sd)
1840 #[inline]
1841 #[target_feature(enable = "sse2")]
1842 #[cfg_attr(test, assert_instr(mulsd))]
1843 #[stable(feature = "simd_x86", since = "1.27.0")]
1844 pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d {
1845 simd_insert(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b))
1846 }
1847
1848 /// Multiplies packed double-precision (64-bit) floating-point elements in `a`
1849 /// and `b`.
1850 ///
1851 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pd)
1852 #[inline]
1853 #[target_feature(enable = "sse2")]
1854 #[cfg_attr(test, assert_instr(mulpd))]
1855 #[stable(feature = "simd_x86", since = "1.27.0")]
1856 pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d {
1857 simd_mul(a, b)
1858 }
1859
1860 /// Returns a new vector with the low element of `a` replaced by the square
1861 /// root of the lower element `b`.
1862 ///
1863 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sd)
1864 #[inline]
1865 #[target_feature(enable = "sse2")]
1866 #[cfg_attr(test, assert_instr(sqrtsd))]
1867 #[stable(feature = "simd_x86", since = "1.27.0")]
1868 pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d {
1869 simd_insert(a, 0, _mm_cvtsd_f64(sqrtsd(b)))
1870 }
1871
1872 /// Returns a new vector with the square root of each of the values in `a`.
1873 ///
1874 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_pd)
1875 #[inline]
1876 #[target_feature(enable = "sse2")]
1877 #[cfg_attr(test, assert_instr(sqrtpd))]
1878 #[stable(feature = "simd_x86", since = "1.27.0")]
1879 pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d {
1880 simd_fsqrt(a)
1881 }
1882
1883 /// Returns a new vector with the low element of `a` replaced by subtracting the
1884 /// low element by `b` from the low element of `a`.
1885 ///
1886 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sd)
1887 #[inline]
1888 #[target_feature(enable = "sse2")]
1889 #[cfg_attr(test, assert_instr(subsd))]
1890 #[stable(feature = "simd_x86", since = "1.27.0")]
1891 pub unsafe fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d {
1892 simd_insert(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b))
1893 }
1894
1895 /// Subtract packed double-precision (64-bit) floating-point elements in `b`
1896 /// from `a`.
1897 ///
1898 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_pd)
1899 #[inline]
1900 #[target_feature(enable = "sse2")]
1901 #[cfg_attr(test, assert_instr(subpd))]
1902 #[stable(feature = "simd_x86", since = "1.27.0")]
1903 pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d {
1904 simd_sub(a, b)
1905 }
1906
1907 /// Computes the bitwise AND of packed double-precision (64-bit) floating-point
1908 /// elements in `a` and `b`.
1909 ///
1910 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_pd)
1911 #[inline]
1912 #[target_feature(enable = "sse2")]
1913 #[cfg_attr(test, assert_instr(andps))]
1914 #[stable(feature = "simd_x86", since = "1.27.0")]
1915 pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d {
1916 let a: __m128i = transmute(a);
1917 let b: __m128i = transmute(b);
1918 transmute(_mm_and_si128(a, b))
1919 }
1920
1921 /// Computes the bitwise NOT of `a` and then AND with `b`.
1922 ///
1923 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_pd)
1924 #[inline]
1925 #[target_feature(enable = "sse2")]
1926 #[cfg_attr(test, assert_instr(andnps))]
1927 #[stable(feature = "simd_x86", since = "1.27.0")]
1928 pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d {
1929 let a: __m128i = transmute(a);
1930 let b: __m128i = transmute(b);
1931 transmute(_mm_andnot_si128(a, b))
1932 }
1933
1934 /// Computes the bitwise OR of `a` and `b`.
1935 ///
1936 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_pd)
1937 #[inline]
1938 #[target_feature(enable = "sse2")]
1939 #[cfg_attr(test, assert_instr(orps))]
1940 #[stable(feature = "simd_x86", since = "1.27.0")]
1941 pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d {
1942 let a: __m128i = transmute(a);
1943 let b: __m128i = transmute(b);
1944 transmute(_mm_or_si128(a, b))
1945 }
1946
1947 /// Computes the bitwise OR of `a` and `b`.
1948 ///
1949 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_pd)
1950 #[inline]
1951 #[target_feature(enable = "sse2")]
1952 #[cfg_attr(test, assert_instr(xorps))]
1953 #[stable(feature = "simd_x86", since = "1.27.0")]
1954 pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d {
1955 let a: __m128i = transmute(a);
1956 let b: __m128i = transmute(b);
1957 transmute(_mm_xor_si128(a, b))
1958 }
1959
1960 /// Returns a new vector with the low element of `a` replaced by the equality
1961 /// comparison of the lower elements of `a` and `b`.
1962 ///
1963 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_sd)
1964 #[inline]
1965 #[target_feature(enable = "sse2")]
1966 #[cfg_attr(test, assert_instr(cmpeqsd))]
1967 #[stable(feature = "simd_x86", since = "1.27.0")]
1968 pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d {
1969 cmpsd(a, b, 0)
1970 }
1971
1972 /// Returns a new vector with the low element of `a` replaced by the less-than
1973 /// comparison of the lower elements of `a` and `b`.
1974 ///
1975 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_sd)
1976 #[inline]
1977 #[target_feature(enable = "sse2")]
1978 #[cfg_attr(test, assert_instr(cmpltsd))]
1979 #[stable(feature = "simd_x86", since = "1.27.0")]
1980 pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d {
1981 cmpsd(a, b, 1)
1982 }
1983
1984 /// Returns a new vector with the low element of `a` replaced by the
1985 /// less-than-or-equal comparison of the lower elements of `a` and `b`.
1986 ///
1987 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_sd)
1988 #[inline]
1989 #[target_feature(enable = "sse2")]
1990 #[cfg_attr(test, assert_instr(cmplesd))]
1991 #[stable(feature = "simd_x86", since = "1.27.0")]
1992 pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d {
1993 cmpsd(a, b, 2)
1994 }
1995
1996 /// Returns a new vector with the low element of `a` replaced by the
1997 /// greater-than comparison of the lower elements of `a` and `b`.
1998 ///
1999 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_sd)
2000 #[inline]
2001 #[target_feature(enable = "sse2")]
2002 #[cfg_attr(test, assert_instr(cmpltsd))]
2003 #[stable(feature = "simd_x86", since = "1.27.0")]
2004 pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d {
2005 simd_insert(_mm_cmplt_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
2006 }
2007
2008 /// Returns a new vector with the low element of `a` replaced by the
2009 /// greater-than-or-equal comparison of the lower elements of `a` and `b`.
2010 ///
2011 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_sd)
2012 #[inline]
2013 #[target_feature(enable = "sse2")]
2014 #[cfg_attr(test, assert_instr(cmplesd))]
2015 #[stable(feature = "simd_x86", since = "1.27.0")]
2016 pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d {
2017 simd_insert(_mm_cmple_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
2018 }
2019
2020 /// Returns a new vector with the low element of `a` replaced by the result
2021 /// of comparing both of the lower elements of `a` and `b` to `NaN`. If
2022 /// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0`
2023 /// otherwise.
2024 ///
2025 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_sd)
2026 #[inline]
2027 #[target_feature(enable = "sse2")]
2028 #[cfg_attr(test, assert_instr(cmpordsd))]
2029 #[stable(feature = "simd_x86", since = "1.27.0")]
2030 pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d {
2031 cmpsd(a, b, 7)
2032 }
2033
2034 /// Returns a new vector with the low element of `a` replaced by the result of
2035 /// comparing both of the lower elements of `a` and `b` to `NaN`. If either is
2036 /// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise.
2037 ///
2038 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_sd)
2039 #[inline]
2040 #[target_feature(enable = "sse2")]
2041 #[cfg_attr(test, assert_instr(cmpunordsd))]
2042 #[stable(feature = "simd_x86", since = "1.27.0")]
2043 pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d {
2044 cmpsd(a, b, 3)
2045 }
2046
2047 /// Returns a new vector with the low element of `a` replaced by the not-equal
2048 /// comparison of the lower elements of `a` and `b`.
2049 ///
2050 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_sd)
2051 #[inline]
2052 #[target_feature(enable = "sse2")]
2053 #[cfg_attr(test, assert_instr(cmpneqsd))]
2054 #[stable(feature = "simd_x86", since = "1.27.0")]
2055 pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d {
2056 cmpsd(a, b, 4)
2057 }
2058
2059 /// Returns a new vector with the low element of `a` replaced by the
2060 /// not-less-than comparison of the lower elements of `a` and `b`.
2061 ///
2062 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_sd)
2063 #[inline]
2064 #[target_feature(enable = "sse2")]
2065 #[cfg_attr(test, assert_instr(cmpnltsd))]
2066 #[stable(feature = "simd_x86", since = "1.27.0")]
2067 pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d {
2068 cmpsd(a, b, 5)
2069 }
2070
2071 /// Returns a new vector with the low element of `a` replaced by the
2072 /// not-less-than-or-equal comparison of the lower elements of `a` and `b`.
2073 ///
2074 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_sd)
2075 #[inline]
2076 #[target_feature(enable = "sse2")]
2077 #[cfg_attr(test, assert_instr(cmpnlesd))]
2078 #[stable(feature = "simd_x86", since = "1.27.0")]
2079 pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d {
2080 cmpsd(a, b, 6)
2081 }
2082
2083 /// Returns a new vector with the low element of `a` replaced by the
2084 /// not-greater-than comparison of the lower elements of `a` and `b`.
2085 ///
2086 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_sd)
2087 #[inline]
2088 #[target_feature(enable = "sse2")]
2089 #[cfg_attr(test, assert_instr(cmpnltsd))]
2090 #[stable(feature = "simd_x86", since = "1.27.0")]
2091 pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d {
2092 simd_insert(_mm_cmpnlt_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
2093 }
2094
2095 /// Returns a new vector with the low element of `a` replaced by the
2096 /// not-greater-than-or-equal comparison of the lower elements of `a` and `b`.
2097 ///
2098 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_sd)
2099 #[inline]
2100 #[target_feature(enable = "sse2")]
2101 #[cfg_attr(test, assert_instr(cmpnlesd))]
2102 #[stable(feature = "simd_x86", since = "1.27.0")]
2103 pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d {
2104 simd_insert(_mm_cmpnle_sd(b, a), 1, simd_extract::<_, f64>(a, 1))
2105 }
2106
2107 /// Compares corresponding elements in `a` and `b` for equality.
2108 ///
2109 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_pd)
2110 #[inline]
2111 #[target_feature(enable = "sse2")]
2112 #[cfg_attr(test, assert_instr(cmpeqpd))]
2113 #[stable(feature = "simd_x86", since = "1.27.0")]
2114 pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d {
2115 cmppd(a, b, 0)
2116 }
2117
2118 /// Compares corresponding elements in `a` and `b` for less-than.
2119 ///
2120 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_pd)
2121 #[inline]
2122 #[target_feature(enable = "sse2")]
2123 #[cfg_attr(test, assert_instr(cmpltpd))]
2124 #[stable(feature = "simd_x86", since = "1.27.0")]
2125 pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d {
2126 cmppd(a, b, 1)
2127 }
2128
2129 /// Compares corresponding elements in `a` and `b` for less-than-or-equal
2130 ///
2131 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_pd)
2132 #[inline]
2133 #[target_feature(enable = "sse2")]
2134 #[cfg_attr(test, assert_instr(cmplepd))]
2135 #[stable(feature = "simd_x86", since = "1.27.0")]
2136 pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d {
2137 cmppd(a, b, 2)
2138 }
2139
2140 /// Compares corresponding elements in `a` and `b` for greater-than.
2141 ///
2142 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_pd)
2143 #[inline]
2144 #[target_feature(enable = "sse2")]
2145 #[cfg_attr(test, assert_instr(cmpltpd))]
2146 #[stable(feature = "simd_x86", since = "1.27.0")]
2147 pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d {
2148 _mm_cmplt_pd(b, a)
2149 }
2150
2151 /// Compares corresponding elements in `a` and `b` for greater-than-or-equal.
2152 ///
2153 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_pd)
2154 #[inline]
2155 #[target_feature(enable = "sse2")]
2156 #[cfg_attr(test, assert_instr(cmplepd))]
2157 #[stable(feature = "simd_x86", since = "1.27.0")]
2158 pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d {
2159 _mm_cmple_pd(b, a)
2160 }
2161
2162 /// Compares corresponding elements in `a` and `b` to see if neither is `NaN`.
2163 ///
2164 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_pd)
2165 #[inline]
2166 #[target_feature(enable = "sse2")]
2167 #[cfg_attr(test, assert_instr(cmpordpd))]
2168 #[stable(feature = "simd_x86", since = "1.27.0")]
2169 pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d {
2170 cmppd(a, b, 7)
2171 }
2172
2173 /// Compares corresponding elements in `a` and `b` to see if either is `NaN`.
2174 ///
2175 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_pd)
2176 #[inline]
2177 #[target_feature(enable = "sse2")]
2178 #[cfg_attr(test, assert_instr(cmpunordpd))]
2179 #[stable(feature = "simd_x86", since = "1.27.0")]
2180 pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d {
2181 cmppd(a, b, 3)
2182 }
2183
2184 /// Compares corresponding elements in `a` and `b` for not-equal.
2185 ///
2186 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_pd)
2187 #[inline]
2188 #[target_feature(enable = "sse2")]
2189 #[cfg_attr(test, assert_instr(cmpneqpd))]
2190 #[stable(feature = "simd_x86", since = "1.27.0")]
2191 pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d {
2192 cmppd(a, b, 4)
2193 }
2194
2195 /// Compares corresponding elements in `a` and `b` for not-less-than.
2196 ///
2197 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_pd)
2198 #[inline]
2199 #[target_feature(enable = "sse2")]
2200 #[cfg_attr(test, assert_instr(cmpnltpd))]
2201 #[stable(feature = "simd_x86", since = "1.27.0")]
2202 pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d {
2203 cmppd(a, b, 5)
2204 }
2205
2206 /// Compares corresponding elements in `a` and `b` for not-less-than-or-equal.
2207 ///
2208 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_pd)
2209 #[inline]
2210 #[target_feature(enable = "sse2")]
2211 #[cfg_attr(test, assert_instr(cmpnlepd))]
2212 #[stable(feature = "simd_x86", since = "1.27.0")]
2213 pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d {
2214 cmppd(a, b, 6)
2215 }
2216
2217 /// Compares corresponding elements in `a` and `b` for not-greater-than.
2218 ///
2219 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_pd)
2220 #[inline]
2221 #[target_feature(enable = "sse2")]
2222 #[cfg_attr(test, assert_instr(cmpnltpd))]
2223 #[stable(feature = "simd_x86", since = "1.27.0")]
2224 pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d {
2225 _mm_cmpnlt_pd(b, a)
2226 }
2227
2228 /// Compares corresponding elements in `a` and `b` for
2229 /// not-greater-than-or-equal.
2230 ///
2231 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_pd)
2232 #[inline]
2233 #[target_feature(enable = "sse2")]
2234 #[cfg_attr(test, assert_instr(cmpnlepd))]
2235 #[stable(feature = "simd_x86", since = "1.27.0")]
2236 pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d {
2237 _mm_cmpnle_pd(b, a)
2238 }
2239
2240 /// Compares the lower element of `a` and `b` for equality.
2241 ///
2242 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sd)
2243 #[inline]
2244 #[target_feature(enable = "sse2")]
2245 #[cfg_attr(test, assert_instr(comisd))]
2246 #[stable(feature = "simd_x86", since = "1.27.0")]
2247 pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 {
2248 comieqsd(a, b)
2249 }
2250
2251 /// Compares the lower element of `a` and `b` for less-than.
2252 ///
2253 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sd)
2254 #[inline]
2255 #[target_feature(enable = "sse2")]
2256 #[cfg_attr(test, assert_instr(comisd))]
2257 #[stable(feature = "simd_x86", since = "1.27.0")]
2258 pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 {
2259 comiltsd(a, b)
2260 }
2261
2262 /// Compares the lower element of `a` and `b` for less-than-or-equal.
2263 ///
2264 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sd)
2265 #[inline]
2266 #[target_feature(enable = "sse2")]
2267 #[cfg_attr(test, assert_instr(comisd))]
2268 #[stable(feature = "simd_x86", since = "1.27.0")]
2269 pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 {
2270 comilesd(a, b)
2271 }
2272
2273 /// Compares the lower element of `a` and `b` for greater-than.
2274 ///
2275 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sd)
2276 #[inline]
2277 #[target_feature(enable = "sse2")]
2278 #[cfg_attr(test, assert_instr(comisd))]
2279 #[stable(feature = "simd_x86", since = "1.27.0")]
2280 pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 {
2281 comigtsd(a, b)
2282 }
2283
2284 /// Compares the lower element of `a` and `b` for greater-than-or-equal.
2285 ///
2286 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sd)
2287 #[inline]
2288 #[target_feature(enable = "sse2")]
2289 #[cfg_attr(test, assert_instr(comisd))]
2290 #[stable(feature = "simd_x86", since = "1.27.0")]
2291 pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 {
2292 comigesd(a, b)
2293 }
2294
2295 /// Compares the lower element of `a` and `b` for not-equal.
2296 ///
2297 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sd)
2298 #[inline]
2299 #[target_feature(enable = "sse2")]
2300 #[cfg_attr(test, assert_instr(comisd))]
2301 #[stable(feature = "simd_x86", since = "1.27.0")]
2302 pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 {
2303 comineqsd(a, b)
2304 }
2305
2306 /// Compares the lower element of `a` and `b` for equality.
2307 ///
2308 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sd)
2309 #[inline]
2310 #[target_feature(enable = "sse2")]
2311 #[cfg_attr(test, assert_instr(ucomisd))]
2312 #[stable(feature = "simd_x86", since = "1.27.0")]
2313 pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 {
2314 ucomieqsd(a, b)
2315 }
2316
2317 /// Compares the lower element of `a` and `b` for less-than.
2318 ///
2319 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sd)
2320 #[inline]
2321 #[target_feature(enable = "sse2")]
2322 #[cfg_attr(test, assert_instr(ucomisd))]
2323 #[stable(feature = "simd_x86", since = "1.27.0")]
2324 pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 {
2325 ucomiltsd(a, b)
2326 }
2327
2328 /// Compares the lower element of `a` and `b` for less-than-or-equal.
2329 ///
2330 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sd)
2331 #[inline]
2332 #[target_feature(enable = "sse2")]
2333 #[cfg_attr(test, assert_instr(ucomisd))]
2334 #[stable(feature = "simd_x86", since = "1.27.0")]
2335 pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 {
2336 ucomilesd(a, b)
2337 }
2338
2339 /// Compares the lower element of `a` and `b` for greater-than.
2340 ///
2341 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sd)
2342 #[inline]
2343 #[target_feature(enable = "sse2")]
2344 #[cfg_attr(test, assert_instr(ucomisd))]
2345 #[stable(feature = "simd_x86", since = "1.27.0")]
2346 pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 {
2347 ucomigtsd(a, b)
2348 }
2349
2350 /// Compares the lower element of `a` and `b` for greater-than-or-equal.
2351 ///
2352 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sd)
2353 #[inline]
2354 #[target_feature(enable = "sse2")]
2355 #[cfg_attr(test, assert_instr(ucomisd))]
2356 #[stable(feature = "simd_x86", since = "1.27.0")]
2357 pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 {
2358 ucomigesd(a, b)
2359 }
2360
2361 /// Compares the lower element of `a` and `b` for not-equal.
2362 ///
2363 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sd)
2364 #[inline]
2365 #[target_feature(enable = "sse2")]
2366 #[cfg_attr(test, assert_instr(ucomisd))]
2367 #[stable(feature = "simd_x86", since = "1.27.0")]
2368 pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 {
2369 ucomineqsd(a, b)
2370 }
2371
2372 /// Converts packed double-precision (64-bit) floating-point elements in `a` to
2373 /// packed single-precision (32-bit) floating-point elements
2374 ///
2375 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ps)
2376 #[inline]
2377 #[target_feature(enable = "sse2")]
2378 #[cfg_attr(test, assert_instr(cvtpd2ps))]
2379 #[stable(feature = "simd_x86", since = "1.27.0")]
2380 pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 {
2381 cvtpd2ps(a)
2382 }
2383
2384 /// Converts packed single-precision (32-bit) floating-point elements in `a` to
2385 /// packed
2386 /// double-precision (64-bit) floating-point elements.
2387 ///
2388 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pd)
2389 #[inline]
2390 #[target_feature(enable = "sse2")]
2391 #[cfg_attr(test, assert_instr(cvtps2pd))]
2392 #[stable(feature = "simd_x86", since = "1.27.0")]
2393 pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d {
2394 cvtps2pd(a)
2395 }
2396
2397 /// Converts packed double-precision (64-bit) floating-point elements in `a` to
2398 /// packed 32-bit integers.
2399 ///
2400 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_epi32)
2401 #[inline]
2402 #[target_feature(enable = "sse2")]
2403 #[cfg_attr(test, assert_instr(cvtpd2dq))]
2404 #[stable(feature = "simd_x86", since = "1.27.0")]
2405 pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i {
2406 transmute(cvtpd2dq(a))
2407 }
2408
2409 /// Converts the lower double-precision (64-bit) floating-point element in a to
2410 /// a 32-bit integer.
2411 ///
2412 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si32)
2413 #[inline]
2414 #[target_feature(enable = "sse2")]
2415 #[cfg_attr(test, assert_instr(cvtsd2si))]
2416 #[stable(feature = "simd_x86", since = "1.27.0")]
2417 pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 {
2418 cvtsd2si(a)
2419 }
2420
2421 /// Converts the lower double-precision (64-bit) floating-point element in `b`
2422 /// to a single-precision (32-bit) floating-point element, store the result in
2423 /// the lower element of the return value, and copies the upper element from `a`
2424 /// to the upper element the return value.
2425 ///
2426 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_ss)
2427 #[inline]
2428 #[target_feature(enable = "sse2")]
2429 #[cfg_attr(test, assert_instr(cvtsd2ss))]
2430 #[stable(feature = "simd_x86", since = "1.27.0")]
2431 pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 {
2432 cvtsd2ss(a, b)
2433 }
2434
2435 /// Returns the lower double-precision (64-bit) floating-point element of `a`.
2436 ///
2437 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_f64)
2438 #[inline]
2439 #[target_feature(enable = "sse2")]
2440 #[stable(feature = "simd_x86", since = "1.27.0")]
2441 pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 {
2442 simd_extract(a, 0)
2443 }
2444
2445 /// Converts the lower single-precision (32-bit) floating-point element in `b`
2446 /// to a double-precision (64-bit) floating-point element, store the result in
2447 /// the lower element of the return value, and copies the upper element from `a`
2448 /// to the upper element the return value.
2449 ///
2450 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sd)
2451 #[inline]
2452 #[target_feature(enable = "sse2")]
2453 #[cfg_attr(test, assert_instr(cvtss2sd))]
2454 #[stable(feature = "simd_x86", since = "1.27.0")]
2455 pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d {
2456 cvtss2sd(a, b)
2457 }
2458
2459 /// Converts packed double-precision (64-bit) floating-point elements in `a` to
2460 /// packed 32-bit integers with truncation.
2461 ///
2462 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_epi32)
2463 #[inline]
2464 #[target_feature(enable = "sse2")]
2465 #[cfg_attr(test, assert_instr(cvttpd2dq))]
2466 #[stable(feature = "simd_x86", since = "1.27.0")]
2467 pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i {
2468 transmute(cvttpd2dq(a))
2469 }
2470
2471 /// Converts the lower double-precision (64-bit) floating-point element in `a`
2472 /// to a 32-bit integer with truncation.
2473 ///
2474 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si32)
2475 #[inline]
2476 #[target_feature(enable = "sse2")]
2477 #[cfg_attr(test, assert_instr(cvttsd2si))]
2478 #[stable(feature = "simd_x86", since = "1.27.0")]
2479 pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 {
2480 cvttsd2si(a)
2481 }
2482
2483 /// Converts packed single-precision (32-bit) floating-point elements in `a` to
2484 /// packed 32-bit integers with truncation.
2485 ///
2486 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_epi32)
2487 #[inline]
2488 #[target_feature(enable = "sse2")]
2489 #[cfg_attr(test, assert_instr(cvttps2dq))]
2490 #[stable(feature = "simd_x86", since = "1.27.0")]
2491 pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i {
2492 transmute(cvttps2dq(a))
2493 }
2494
2495 /// Copies double-precision (64-bit) floating-point element `a` to the lower
2496 /// element of the packed 64-bit return value.
2497 ///
2498 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sd)
2499 #[inline]
2500 #[target_feature(enable = "sse2")]
2501 #[stable(feature = "simd_x86", since = "1.27.0")]
2502 pub unsafe fn _mm_set_sd(a: f64) -> __m128d {
2503 _mm_set_pd(0.0, a)
2504 }
2505
2506 /// Broadcasts double-precision (64-bit) floating-point value a to all elements
2507 /// of the return value.
2508 ///
2509 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pd)
2510 #[inline]
2511 #[target_feature(enable = "sse2")]
2512 #[stable(feature = "simd_x86", since = "1.27.0")]
2513 pub unsafe fn _mm_set1_pd(a: f64) -> __m128d {
2514 _mm_set_pd(a, a)
2515 }
2516
2517 /// Broadcasts double-precision (64-bit) floating-point value a to all elements
2518 /// of the return value.
2519 ///
2520 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd1)
2521 #[inline]
2522 #[target_feature(enable = "sse2")]
2523 #[stable(feature = "simd_x86", since = "1.27.0")]
2524 pub unsafe fn _mm_set_pd1(a: f64) -> __m128d {
2525 _mm_set_pd(a, a)
2526 }
2527
2528 /// Sets packed double-precision (64-bit) floating-point elements in the return
2529 /// value with the supplied values.
2530 ///
2531 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd)
2532 #[inline]
2533 #[target_feature(enable = "sse2")]
2534 #[stable(feature = "simd_x86", since = "1.27.0")]
2535 pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d {
2536 __m128d(b, a)
2537 }
2538
2539 /// Sets packed double-precision (64-bit) floating-point elements in the return
2540 /// value with the supplied values in reverse order.
2541 ///
2542 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_pd)
2543 #[inline]
2544 #[target_feature(enable = "sse2")]
2545 #[stable(feature = "simd_x86", since = "1.27.0")]
2546 pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
2547 _mm_set_pd(b, a)
2548 }
2549
2550 /// Returns packed double-precision (64-bit) floating-point elements with all
2551 /// zeros.
2552 ///
2553 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_pd)
2554 #[inline]
2555 #[target_feature(enable = "sse2")]
2556 #[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected
2557 #[stable(feature = "simd_x86", since = "1.27.0")]
2558 pub unsafe fn _mm_setzero_pd() -> __m128d {
2559 _mm_set_pd(0.0, 0.0)
2560 }
2561
2562 /// Returns a mask of the most significant bit of each element in `a`.
2563 ///
2564 /// The mask is stored in the 2 least significant bits of the return value.
2565 /// All other bits are set to `0`.
2566 ///
2567 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_pd)
2568 #[inline]
2569 #[target_feature(enable = "sse2")]
2570 #[cfg_attr(test, assert_instr(movmskpd))]
2571 #[stable(feature = "simd_x86", since = "1.27.0")]
2572 pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 {
2573 movmskpd(a)
2574 }
2575
2576 /// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2577 /// floating-point elements) from memory into the returned vector.
2578 /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2579 /// exception may be generated.
2580 ///
2581 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd)
2582 #[inline]
2583 #[target_feature(enable = "sse2")]
2584 #[cfg_attr(test, assert_instr(movaps))]
2585 #[stable(feature = "simd_x86", since = "1.27.0")]
2586 #[allow(clippy::cast_ptr_alignment)]
2587 pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d {
2588 *(mem_addr as *const __m128d)
2589 }
2590
2591 /// Loads a 64-bit double-precision value to the low element of a
2592 /// 128-bit integer vector and clears the upper element.
2593 ///
2594 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sd)
2595 #[inline]
2596 #[target_feature(enable = "sse2")]
2597 #[cfg_attr(test, assert_instr(movsd))]
2598 #[stable(feature = "simd_x86", since = "1.27.0")]
2599 pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d {
2600 _mm_setr_pd(*mem_addr, 0.)
2601 }
2602
2603 /// Loads a double-precision value into the high-order bits of a 128-bit
2604 /// vector of `[2 x double]`. The low-order bits are copied from the low-order
2605 /// bits of the first operand.
2606 ///
2607 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadh_pd)
2608 #[inline]
2609 #[target_feature(enable = "sse2")]
2610 #[cfg_attr(test, assert_instr(movhps))]
2611 #[stable(feature = "simd_x86", since = "1.27.0")]
2612 pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2613 _mm_setr_pd(simd_extract(a, 0), *mem_addr)
2614 }
2615
2616 /// Loads a double-precision value into the low-order bits of a 128-bit
2617 /// vector of `[2 x double]`. The high-order bits are copied from the
2618 /// high-order bits of the first operand.
2619 ///
2620 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_pd)
2621 #[inline]
2622 #[target_feature(enable = "sse2")]
2623 #[cfg_attr(test, assert_instr(movlps))]
2624 #[stable(feature = "simd_x86", since = "1.27.0")]
2625 pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d {
2626 _mm_setr_pd(*mem_addr, simd_extract(a, 1))
2627 }
2628
2629 /// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit
2630 /// aligned memory location.
2631 /// To minimize caching, the data is flagged as non-temporal (unlikely to be
2632 /// used again soon).
2633 ///
2634 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_pd)
2635 #[inline]
2636 #[target_feature(enable = "sse2")]
2637 #[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd
2638 #[stable(feature = "simd_x86", since = "1.27.0")]
2639 #[allow(clippy::cast_ptr_alignment)]
2640 pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) {
2641 intrinsics::nontemporal_store(mem_addr as *mut __m128d, a);
2642 }
2643
2644 /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2645 /// memory location.
2646 ///
2647 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sd)
2648 #[inline]
2649 #[target_feature(enable = "sse2")]
2650 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))]
2651 #[stable(feature = "simd_x86", since = "1.27.0")]
2652 pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) {
2653 *mem_addr = simd_extract(a, 0)
2654 }
2655
2656 /// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2657 /// floating-point elements) from `a` into memory. `mem_addr` must be aligned
2658 /// on a 16-byte boundary or a general-protection exception may be generated.
2659 ///
2660 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd)
2661 #[inline]
2662 #[target_feature(enable = "sse2")]
2663 #[cfg_attr(test, assert_instr(movaps))]
2664 #[stable(feature = "simd_x86", since = "1.27.0")]
2665 #[allow(clippy::cast_ptr_alignment)]
2666 pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) {
2667 *(mem_addr as *mut __m128d) = a;
2668 }
2669
2670 /// Stores 128-bits (composed of 2 packed double-precision (64-bit)
2671 /// floating-point elements) from `a` into memory.
2672 /// `mem_addr` does not need to be aligned on any particular boundary.
2673 ///
2674 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_pd)
2675 #[inline]
2676 #[target_feature(enable = "sse2")]
2677 #[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected
2678 #[stable(feature = "simd_x86", since = "1.27.0")]
2679 pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) {
2680 storeupd(mem_addr as *mut i8, a);
2681 }
2682
2683 /// Stores the lower double-precision (64-bit) floating-point element from `a`
2684 /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2685 /// 16-byte boundary or a general-protection exception may be generated.
2686 ///
2687 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_pd)
2688 #[inline]
2689 #[target_feature(enable = "sse2")]
2690 #[stable(feature = "simd_x86", since = "1.27.0")]
2691 #[allow(clippy::cast_ptr_alignment)]
2692 pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) {
2693 let b: __m128d = simd_shuffle2(a, a, [0, 0]);
2694 *(mem_addr as *mut __m128d) = b;
2695 }
2696
2697 /// Stores the lower double-precision (64-bit) floating-point element from `a`
2698 /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a
2699 /// 16-byte boundary or a general-protection exception may be generated.
2700 ///
2701 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd1)
2702 #[inline]
2703 #[target_feature(enable = "sse2")]
2704 #[stable(feature = "simd_x86", since = "1.27.0")]
2705 #[allow(clippy::cast_ptr_alignment)]
2706 pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) {
2707 let b: __m128d = simd_shuffle2(a, a, [0, 0]);
2708 *(mem_addr as *mut __m128d) = b;
2709 }
2710
2711 /// Stores 2 double-precision (64-bit) floating-point elements from `a` into
2712 /// memory in reverse order.
2713 /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection
2714 /// exception may be generated.
2715 ///
2716 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_pd)
2717 #[inline]
2718 #[target_feature(enable = "sse2")]
2719 #[stable(feature = "simd_x86", since = "1.27.0")]
2720 #[allow(clippy::cast_ptr_alignment)]
2721 pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) {
2722 let b: __m128d = simd_shuffle2(a, a, [1, 0]);
2723 *(mem_addr as *mut __m128d) = b;
2724 }
2725
2726 /// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a
2727 /// memory location.
2728 ///
2729 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeh_pd)
2730 #[inline]
2731 #[target_feature(enable = "sse2")]
2732 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movhps))]
2733 #[stable(feature = "simd_x86", since = "1.27.0")]
2734 pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) {
2735 *mem_addr = simd_extract(a, 1);
2736 }
2737
2738 /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a
2739 /// memory location.
2740 ///
2741 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_pd)
2742 #[inline]
2743 #[target_feature(enable = "sse2")]
2744 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))]
2745 #[stable(feature = "simd_x86", since = "1.27.0")]
2746 pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) {
2747 *mem_addr = simd_extract(a, 0);
2748 }
2749
2750 /// Loads a double-precision (64-bit) floating-point element from memory
2751 /// into both elements of returned vector.
2752 ///
2753 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_pd)
2754 #[inline]
2755 #[target_feature(enable = "sse2")]
2756 // #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen
2757 #[stable(feature = "simd_x86", since = "1.27.0")]
2758 pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d {
2759 let d = *mem_addr;
2760 _mm_setr_pd(d, d)
2761 }
2762
2763 /// Loads a double-precision (64-bit) floating-point element from memory
2764 /// into both elements of returned vector.
2765 ///
2766 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd1)
2767 #[inline]
2768 #[target_feature(enable = "sse2")]
2769 // #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd
2770 #[stable(feature = "simd_x86", since = "1.27.0")]
2771 pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d {
2772 _mm_load1_pd(mem_addr)
2773 }
2774
2775 /// Loads 2 double-precision (64-bit) floating-point elements from memory into
2776 /// the returned vector in reverse order. `mem_addr` must be aligned on a
2777 /// 16-byte boundary or a general-protection exception may be generated.
2778 ///
2779 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_pd)
2780 #[inline]
2781 #[target_feature(enable = "sse2")]
2782 #[cfg_attr(test, assert_instr(movaps))]
2783 #[stable(feature = "simd_x86", since = "1.27.0")]
2784 pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d {
2785 let a = _mm_load_pd(mem_addr);
2786 simd_shuffle2(a, a, [1, 0])
2787 }
2788
2789 /// Loads 128-bits (composed of 2 packed double-precision (64-bit)
2790 /// floating-point elements) from memory into the returned vector.
2791 /// `mem_addr` does not need to be aligned on any particular boundary.
2792 ///
2793 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_pd)
2794 #[inline]
2795 #[target_feature(enable = "sse2")]
2796 #[cfg_attr(test, assert_instr(movups))]
2797 #[stable(feature = "simd_x86", since = "1.27.0")]
2798 pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d {
2799 let mut dst = _mm_undefined_pd();
2800 ptr::copy_nonoverlapping(
2801 mem_addr as *const u8,
2802 &mut dst as *mut __m128d as *mut u8,
2803 mem::size_of::<__m128d>(),
2804 );
2805 dst
2806 }
2807
2808 /// Constructs a 128-bit floating-point vector of `[2 x double]` from two
2809 /// 128-bit vector parameters of `[2 x double]`, using the immediate-value
2810 /// parameter as a specifier.
2811 ///
2812 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pd)
2813 #[inline]
2814 #[target_feature(enable = "sse2")]
2815 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(shufps, imm8 = 1))]
2816 #[cfg_attr(all(test, target_os = "windows"), assert_instr(shufpd, imm8 = 1))]
2817 #[rustc_args_required_const(2)]
2818 #[stable(feature = "simd_x86", since = "1.27.0")]
2819 pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
2820 match imm8 & 0b11 {
2821 0b00 => simd_shuffle2(a, b, [0, 2]),
2822 0b01 => simd_shuffle2(a, b, [1, 2]),
2823 0b10 => simd_shuffle2(a, b, [0, 3]),
2824 _ => simd_shuffle2(a, b, [1, 3]),
2825 }
2826 }
2827
2828 /// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower
2829 /// 64 bits are set to the lower 64 bits of the second parameter. The upper
2830 /// 64 bits are set to the upper 64 bits of the first parameter.
2831 ///
2832 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sd)
2833 #[inline]
2834 #[target_feature(enable = "sse2")]
2835 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movsd))]
2836 #[cfg_attr(all(test, target_os = "windows"), assert_instr(movlps))]
2837 #[stable(feature = "simd_x86", since = "1.27.0")]
2838 pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d {
2839 _mm_setr_pd(simd_extract(b, 0), simd_extract(a, 1))
2840 }
2841
2842 /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2843 /// floating-point vector of `[4 x float]`.
2844 ///
2845 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ps)
2846 #[inline]
2847 #[target_feature(enable = "sse2")]
2848 #[stable(feature = "simd_x86", since = "1.27.0")]
2849 pub unsafe fn _mm_castpd_ps(a: __m128d) -> __m128 {
2850 transmute(a)
2851 }
2852
2853 /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit
2854 /// integer vector.
2855 ///
2856 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_si128)
2857 #[inline]
2858 #[target_feature(enable = "sse2")]
2859 #[stable(feature = "simd_x86", since = "1.27.0")]
2860 pub unsafe fn _mm_castpd_si128(a: __m128d) -> __m128i {
2861 transmute(a)
2862 }
2863
2864 /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2865 /// floating-point vector of `[2 x double]`.
2866 ///
2867 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_pd)
2868 #[inline]
2869 #[target_feature(enable = "sse2")]
2870 #[stable(feature = "simd_x86", since = "1.27.0")]
2871 pub unsafe fn _mm_castps_pd(a: __m128) -> __m128d {
2872 transmute(a)
2873 }
2874
2875 /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit
2876 /// integer vector.
2877 ///
2878 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_si128)
2879 #[inline]
2880 #[target_feature(enable = "sse2")]
2881 #[stable(feature = "simd_x86", since = "1.27.0")]
2882 pub unsafe fn _mm_castps_si128(a: __m128) -> __m128i {
2883 transmute(a)
2884 }
2885
2886 /// Casts a 128-bit integer vector into a 128-bit floating-point vector
2887 /// of `[2 x double]`.
2888 ///
2889 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_pd)
2890 #[inline]
2891 #[target_feature(enable = "sse2")]
2892 #[stable(feature = "simd_x86", since = "1.27.0")]
2893 pub unsafe fn _mm_castsi128_pd(a: __m128i) -> __m128d {
2894 transmute(a)
2895 }
2896
2897 /// Casts a 128-bit integer vector into a 128-bit floating-point vector
2898 /// of `[4 x float]`.
2899 ///
2900 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ps)
2901 #[inline]
2902 #[target_feature(enable = "sse2")]
2903 #[stable(feature = "simd_x86", since = "1.27.0")]
2904 pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 {
2905 transmute(a)
2906 }
2907
2908 /// Returns vector of type __m128d with undefined elements.
2909 ///
2910 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_pd)
2911 #[inline]
2912 #[target_feature(enable = "sse2")]
2913 #[stable(feature = "simd_x86", since = "1.27.0")]
2914 pub unsafe fn _mm_undefined_pd() -> __m128d {
2915 // FIXME: this function should return MaybeUninit<__m128d>
2916 mem::MaybeUninit::<__m128d>::uninit().assume_init()
2917 }
2918
2919 /// Returns vector of type __m128i with undefined elements.
2920 ///
2921 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_si128)
2922 #[inline]
2923 #[target_feature(enable = "sse2")]
2924 #[stable(feature = "simd_x86", since = "1.27.0")]
2925 pub unsafe fn _mm_undefined_si128() -> __m128i {
2926 // FIXME: this function should return MaybeUninit<__m128i>
2927 mem::MaybeUninit::<__m128i>::uninit().assume_init()
2928 }
2929
2930 /// The resulting `__m128d` element is composed by the low-order values of
2931 /// the two `__m128d` interleaved input elements, i.e.:
2932 ///
2933 /// * The `[127:64]` bits are copied from the `[127:64]` bits of the second
2934 /// input * The `[63:0]` bits are copied from the `[127:64]` bits of the first
2935 /// input
2936 ///
2937 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_pd)
2938 #[inline]
2939 #[target_feature(enable = "sse2")]
2940 #[cfg_attr(test, assert_instr(unpckhpd))]
2941 #[stable(feature = "simd_x86", since = "1.27.0")]
2942 pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d {
2943 simd_shuffle2(a, b, [1, 3])
2944 }
2945
2946 /// The resulting `__m128d` element is composed by the high-order values of
2947 /// the two `__m128d` interleaved input elements, i.e.:
2948 ///
2949 /// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input
2950 /// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input
2951 ///
2952 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_pd)
2953 #[inline]
2954 #[target_feature(enable = "sse2")]
2955 #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))]
2956 #[stable(feature = "simd_x86", since = "1.27.0")]
2957 pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d {
2958 simd_shuffle2(a, b, [0, 2])
2959 }
2960
2961 #[allow(improper_ctypes)]
2962 extern "C" {
2963 #[link_name = "llvm.x86.sse2.pause"]
2964 fn pause();
2965 #[link_name = "llvm.x86.sse2.clflush"]
2966 fn clflush(p: *const u8);
2967 #[link_name = "llvm.x86.sse2.lfence"]
2968 fn lfence();
2969 #[link_name = "llvm.x86.sse2.mfence"]
2970 fn mfence();
2971 #[link_name = "llvm.x86.sse2.pavg.b"]
2972 fn pavgb(a: u8x16, b: u8x16) -> u8x16;
2973 #[link_name = "llvm.x86.sse2.pavg.w"]
2974 fn pavgw(a: u16x8, b: u16x8) -> u16x8;
2975 #[link_name = "llvm.x86.sse2.pmadd.wd"]
2976 fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
2977 #[link_name = "llvm.x86.sse2.pmaxs.w"]
2978 fn pmaxsw(a: i16x8, b: i16x8) -> i16x8;
2979 #[link_name = "llvm.x86.sse2.pmaxu.b"]
2980 fn pmaxub(a: u8x16, b: u8x16) -> u8x16;
2981 #[link_name = "llvm.x86.sse2.pmins.w"]
2982 fn pminsw(a: i16x8, b: i16x8) -> i16x8;
2983 #[link_name = "llvm.x86.sse2.pminu.b"]
2984 fn pminub(a: u8x16, b: u8x16) -> u8x16;
2985 #[link_name = "llvm.x86.sse2.pmulh.w"]
2986 fn pmulhw(a: i16x8, b: i16x8) -> i16x8;
2987 #[link_name = "llvm.x86.sse2.pmulhu.w"]
2988 fn pmulhuw(a: u16x8, b: u16x8) -> u16x8;
2989 #[link_name = "llvm.x86.sse2.pmulu.dq"]
2990 fn pmuludq(a: u32x4, b: u32x4) -> u64x2;
2991 #[link_name = "llvm.x86.sse2.psad.bw"]
2992 fn psadbw(a: u8x16, b: u8x16) -> u64x2;
2993 #[link_name = "llvm.x86.sse2.pslli.w"]
2994 fn pslliw(a: i16x8, imm8: i32) -> i16x8;
2995 #[link_name = "llvm.x86.sse2.psll.w"]
2996 fn psllw(a: i16x8, count: i16x8) -> i16x8;
2997 #[link_name = "llvm.x86.sse2.pslli.d"]
2998 fn psllid(a: i32x4, imm8: i32) -> i32x4;
2999 #[link_name = "llvm.x86.sse2.psll.d"]
3000 fn pslld(a: i32x4, count: i32x4) -> i32x4;
3001 #[link_name = "llvm.x86.sse2.pslli.q"]
3002 fn pslliq(a: i64x2, imm8: i32) -> i64x2;
3003 #[link_name = "llvm.x86.sse2.psll.q"]
3004 fn psllq(a: i64x2, count: i64x2) -> i64x2;
3005 #[link_name = "llvm.x86.sse2.psrai.w"]
3006 fn psraiw(a: i16x8, imm8: i32) -> i16x8;
3007 #[link_name = "llvm.x86.sse2.psra.w"]
3008 fn psraw(a: i16x8, count: i16x8) -> i16x8;
3009 #[link_name = "llvm.x86.sse2.psrai.d"]
3010 fn psraid(a: i32x4, imm8: i32) -> i32x4;
3011 #[link_name = "llvm.x86.sse2.psra.d"]
3012 fn psrad(a: i32x4, count: i32x4) -> i32x4;
3013 #[link_name = "llvm.x86.sse2.psrli.w"]
3014 fn psrliw(a: i16x8, imm8: i32) -> i16x8;
3015 #[link_name = "llvm.x86.sse2.psrl.w"]
3016 fn psrlw(a: i16x8, count: i16x8) -> i16x8;
3017 #[link_name = "llvm.x86.sse2.psrli.d"]
3018 fn psrlid(a: i32x4, imm8: i32) -> i32x4;
3019 #[link_name = "llvm.x86.sse2.psrl.d"]
3020 fn psrld(a: i32x4, count: i32x4) -> i32x4;
3021 #[link_name = "llvm.x86.sse2.psrli.q"]
3022 fn psrliq(a: i64x2, imm8: i32) -> i64x2;
3023 #[link_name = "llvm.x86.sse2.psrl.q"]
3024 fn psrlq(a: i64x2, count: i64x2) -> i64x2;
3025 #[link_name = "llvm.x86.sse2.cvtdq2ps"]
3026 fn cvtdq2ps(a: i32x4) -> __m128;
3027 #[link_name = "llvm.x86.sse2.cvtps2dq"]
3028 fn cvtps2dq(a: __m128) -> i32x4;
3029 #[link_name = "llvm.x86.sse2.maskmov.dqu"]
3030 fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8);
3031 #[link_name = "llvm.x86.sse2.packsswb.128"]
3032 fn packsswb(a: i16x8, b: i16x8) -> i8x16;
3033 #[link_name = "llvm.x86.sse2.packssdw.128"]
3034 fn packssdw(a: i32x4, b: i32x4) -> i16x8;
3035 #[link_name = "llvm.x86.sse2.packuswb.128"]
3036 fn packuswb(a: i16x8, b: i16x8) -> u8x16;
3037 #[link_name = "llvm.x86.sse2.pmovmskb.128"]
3038 fn pmovmskb(a: i8x16) -> i32;
3039 #[link_name = "llvm.x86.sse2.max.sd"]
3040 fn maxsd(a: __m128d, b: __m128d) -> __m128d;
3041 #[link_name = "llvm.x86.sse2.max.pd"]
3042 fn maxpd(a: __m128d, b: __m128d) -> __m128d;
3043 #[link_name = "llvm.x86.sse2.min.sd"]
3044 fn minsd(a: __m128d, b: __m128d) -> __m128d;
3045 #[link_name = "llvm.x86.sse2.min.pd"]
3046 fn minpd(a: __m128d, b: __m128d) -> __m128d;
3047 #[link_name = "llvm.x86.sse2.sqrt.sd"]
3048 fn sqrtsd(a: __m128d) -> __m128d;
3049 #[link_name = "llvm.x86.sse2.sqrt.pd"]
3050 fn sqrtpd(a: __m128d) -> __m128d;
3051 #[link_name = "llvm.x86.sse2.cmp.sd"]
3052 fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3053 #[link_name = "llvm.x86.sse2.cmp.pd"]
3054 fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d;
3055 #[link_name = "llvm.x86.sse2.comieq.sd"]
3056 fn comieqsd(a: __m128d, b: __m128d) -> i32;
3057 #[link_name = "llvm.x86.sse2.comilt.sd"]
3058 fn comiltsd(a: __m128d, b: __m128d) -> i32;
3059 #[link_name = "llvm.x86.sse2.comile.sd"]
3060 fn comilesd(a: __m128d, b: __m128d) -> i32;
3061 #[link_name = "llvm.x86.sse2.comigt.sd"]
3062 fn comigtsd(a: __m128d, b: __m128d) -> i32;
3063 #[link_name = "llvm.x86.sse2.comige.sd"]
3064 fn comigesd(a: __m128d, b: __m128d) -> i32;
3065 #[link_name = "llvm.x86.sse2.comineq.sd"]
3066 fn comineqsd(a: __m128d, b: __m128d) -> i32;
3067 #[link_name = "llvm.x86.sse2.ucomieq.sd"]
3068 fn ucomieqsd(a: __m128d, b: __m128d) -> i32;
3069 #[link_name = "llvm.x86.sse2.ucomilt.sd"]
3070 fn ucomiltsd(a: __m128d, b: __m128d) -> i32;
3071 #[link_name = "llvm.x86.sse2.ucomile.sd"]
3072 fn ucomilesd(a: __m128d, b: __m128d) -> i32;
3073 #[link_name = "llvm.x86.sse2.ucomigt.sd"]
3074 fn ucomigtsd(a: __m128d, b: __m128d) -> i32;
3075 #[link_name = "llvm.x86.sse2.ucomige.sd"]
3076 fn ucomigesd(a: __m128d, b: __m128d) -> i32;
3077 #[link_name = "llvm.x86.sse2.ucomineq.sd"]
3078 fn ucomineqsd(a: __m128d, b: __m128d) -> i32;
3079 #[link_name = "llvm.x86.sse2.movmsk.pd"]
3080 fn movmskpd(a: __m128d) -> i32;
3081 #[link_name = "llvm.x86.sse2.cvtpd2ps"]
3082 fn cvtpd2ps(a: __m128d) -> __m128;
3083 #[link_name = "llvm.x86.sse2.cvtps2pd"]
3084 fn cvtps2pd(a: __m128) -> __m128d;
3085 #[link_name = "llvm.x86.sse2.cvtpd2dq"]
3086 fn cvtpd2dq(a: __m128d) -> i32x4;
3087 #[link_name = "llvm.x86.sse2.cvtsd2si"]
3088 fn cvtsd2si(a: __m128d) -> i32;
3089 #[link_name = "llvm.x86.sse2.cvtsd2ss"]
3090 fn cvtsd2ss(a: __m128, b: __m128d) -> __m128;
3091 #[link_name = "llvm.x86.sse2.cvtss2sd"]
3092 fn cvtss2sd(a: __m128d, b: __m128) -> __m128d;
3093 #[link_name = "llvm.x86.sse2.cvttpd2dq"]
3094 fn cvttpd2dq(a: __m128d) -> i32x4;
3095 #[link_name = "llvm.x86.sse2.cvttsd2si"]
3096 fn cvttsd2si(a: __m128d) -> i32;
3097 #[link_name = "llvm.x86.sse2.cvttps2dq"]
3098 fn cvttps2dq(a: __m128) -> i32x4;
3099 #[link_name = "llvm.x86.sse2.storeu.dq"]
3100 fn storeudq(mem_addr: *mut i8, a: __m128i);
3101 #[link_name = "llvm.x86.sse2.storeu.pd"]
3102 fn storeupd(mem_addr: *mut i8, a: __m128d);
3103 }
3104
3105 #[cfg(test)]
3106 mod tests {
3107 use crate::{
3108 core_arch::{simd::*, x86::*},
3109 hint::black_box,
3110 };
3111 use std::{
3112 boxed, f32,
3113 f64::{self, NAN},
3114 i32,
3115 mem::{self, transmute},
3116 };
3117 use stdarch_test::simd_test;
3118
3119 #[test]
3120 fn test_mm_pause() {
3121 unsafe { _mm_pause() }
3122 }
3123
3124 #[simd_test(enable = "sse2")]
3125 unsafe fn test_mm_clflush() {
3126 let x = 0_u8;
3127 _mm_clflush(&x as *const _);
3128 }
3129
3130 #[simd_test(enable = "sse2")]
3131 unsafe fn test_mm_lfence() {
3132 _mm_lfence();
3133 }
3134
3135 #[simd_test(enable = "sse2")]
3136 unsafe fn test_mm_mfence() {
3137 _mm_mfence();
3138 }
3139
3140 #[simd_test(enable = "sse2")]
3141 unsafe fn test_mm_add_epi8() {
3142 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3143 #[rustfmt::skip]
3144 let b = _mm_setr_epi8(
3145 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3146 );
3147 let r = _mm_add_epi8(a, b);
3148 #[rustfmt::skip]
3149 let e = _mm_setr_epi8(
3150 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3151 );
3152 assert_eq_m128i(r, e);
3153 }
3154
3155 #[simd_test(enable = "sse2")]
3156 unsafe fn test_mm_add_epi8_overflow() {
3157 let a = _mm_set1_epi8(0x7F);
3158 let b = _mm_set1_epi8(1);
3159 let r = _mm_add_epi8(a, b);
3160 assert_eq_m128i(r, _mm_set1_epi8(-128));
3161 }
3162
3163 #[simd_test(enable = "sse2")]
3164 unsafe fn test_mm_add_epi16() {
3165 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3166 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3167 let r = _mm_add_epi16(a, b);
3168 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3169 assert_eq_m128i(r, e);
3170 }
3171
3172 #[simd_test(enable = "sse2")]
3173 unsafe fn test_mm_add_epi32() {
3174 let a = _mm_setr_epi32(0, 1, 2, 3);
3175 let b = _mm_setr_epi32(4, 5, 6, 7);
3176 let r = _mm_add_epi32(a, b);
3177 let e = _mm_setr_epi32(4, 6, 8, 10);
3178 assert_eq_m128i(r, e);
3179 }
3180
3181 #[simd_test(enable = "sse2")]
3182 unsafe fn test_mm_add_epi64() {
3183 let a = _mm_setr_epi64x(0, 1);
3184 let b = _mm_setr_epi64x(2, 3);
3185 let r = _mm_add_epi64(a, b);
3186 let e = _mm_setr_epi64x(2, 4);
3187 assert_eq_m128i(r, e);
3188 }
3189
3190 #[simd_test(enable = "sse2")]
3191 unsafe fn test_mm_adds_epi8() {
3192 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3193 #[rustfmt::skip]
3194 let b = _mm_setr_epi8(
3195 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3196 );
3197 let r = _mm_adds_epi8(a, b);
3198 #[rustfmt::skip]
3199 let e = _mm_setr_epi8(
3200 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3201 );
3202 assert_eq_m128i(r, e);
3203 }
3204
3205 #[simd_test(enable = "sse2")]
3206 unsafe fn test_mm_adds_epi8_saturate_positive() {
3207 let a = _mm_set1_epi8(0x7F);
3208 let b = _mm_set1_epi8(1);
3209 let r = _mm_adds_epi8(a, b);
3210 assert_eq_m128i(r, a);
3211 }
3212
3213 #[simd_test(enable = "sse2")]
3214 unsafe fn test_mm_adds_epi8_saturate_negative() {
3215 let a = _mm_set1_epi8(-0x80);
3216 let b = _mm_set1_epi8(-1);
3217 let r = _mm_adds_epi8(a, b);
3218 assert_eq_m128i(r, a);
3219 }
3220
3221 #[simd_test(enable = "sse2")]
3222 unsafe fn test_mm_adds_epi16() {
3223 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3224 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3225 let r = _mm_adds_epi16(a, b);
3226 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3227 assert_eq_m128i(r, e);
3228 }
3229
3230 #[simd_test(enable = "sse2")]
3231 unsafe fn test_mm_adds_epi16_saturate_positive() {
3232 let a = _mm_set1_epi16(0x7FFF);
3233 let b = _mm_set1_epi16(1);
3234 let r = _mm_adds_epi16(a, b);
3235 assert_eq_m128i(r, a);
3236 }
3237
3238 #[simd_test(enable = "sse2")]
3239 unsafe fn test_mm_adds_epi16_saturate_negative() {
3240 let a = _mm_set1_epi16(-0x8000);
3241 let b = _mm_set1_epi16(-1);
3242 let r = _mm_adds_epi16(a, b);
3243 assert_eq_m128i(r, a);
3244 }
3245
3246 #[simd_test(enable = "sse2")]
3247 unsafe fn test_mm_adds_epu8() {
3248 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3249 #[rustfmt::skip]
3250 let b = _mm_setr_epi8(
3251 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
3252 );
3253 let r = _mm_adds_epu8(a, b);
3254 #[rustfmt::skip]
3255 let e = _mm_setr_epi8(
3256 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
3257 );
3258 assert_eq_m128i(r, e);
3259 }
3260
3261 #[simd_test(enable = "sse2")]
3262 unsafe fn test_mm_adds_epu8_saturate() {
3263 let a = _mm_set1_epi8(!0);
3264 let b = _mm_set1_epi8(1);
3265 let r = _mm_adds_epu8(a, b);
3266 assert_eq_m128i(r, a);
3267 }
3268
3269 #[simd_test(enable = "sse2")]
3270 unsafe fn test_mm_adds_epu16() {
3271 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3272 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
3273 let r = _mm_adds_epu16(a, b);
3274 let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22);
3275 assert_eq_m128i(r, e);
3276 }
3277
3278 #[simd_test(enable = "sse2")]
3279 unsafe fn test_mm_adds_epu16_saturate() {
3280 let a = _mm_set1_epi16(!0);
3281 let b = _mm_set1_epi16(1);
3282 let r = _mm_adds_epu16(a, b);
3283 assert_eq_m128i(r, a);
3284 }
3285
3286 #[simd_test(enable = "sse2")]
3287 unsafe fn test_mm_avg_epu8() {
3288 let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9));
3289 let r = _mm_avg_epu8(a, b);
3290 assert_eq_m128i(r, _mm_set1_epi8(6));
3291 }
3292
3293 #[simd_test(enable = "sse2")]
3294 unsafe fn test_mm_avg_epu16() {
3295 let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9));
3296 let r = _mm_avg_epu16(a, b);
3297 assert_eq_m128i(r, _mm_set1_epi16(6));
3298 }
3299
3300 #[simd_test(enable = "sse2")]
3301 unsafe fn test_mm_madd_epi16() {
3302 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
3303 let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16);
3304 let r = _mm_madd_epi16(a, b);
3305 let e = _mm_setr_epi32(29, 81, 149, 233);
3306 assert_eq_m128i(r, e);
3307 }
3308
3309 #[simd_test(enable = "sse2")]
3310 unsafe fn test_mm_max_epi16() {
3311 let a = _mm_set1_epi16(1);
3312 let b = _mm_set1_epi16(-1);
3313 let r = _mm_max_epi16(a, b);
3314 assert_eq_m128i(r, a);
3315 }
3316
3317 #[simd_test(enable = "sse2")]
3318 unsafe fn test_mm_max_epu8() {
3319 let a = _mm_set1_epi8(1);
3320 let b = _mm_set1_epi8(!0);
3321 let r = _mm_max_epu8(a, b);
3322 assert_eq_m128i(r, b);
3323 }
3324
3325 #[simd_test(enable = "sse2")]
3326 unsafe fn test_mm_min_epi16() {
3327 let a = _mm_set1_epi16(1);
3328 let b = _mm_set1_epi16(-1);
3329 let r = _mm_min_epi16(a, b);
3330 assert_eq_m128i(r, b);
3331 }
3332
3333 #[simd_test(enable = "sse2")]
3334 unsafe fn test_mm_min_epu8() {
3335 let a = _mm_set1_epi8(1);
3336 let b = _mm_set1_epi8(!0);
3337 let r = _mm_min_epu8(a, b);
3338 assert_eq_m128i(r, a);
3339 }
3340
3341 #[simd_test(enable = "sse2")]
3342 unsafe fn test_mm_mulhi_epi16() {
3343 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3344 let r = _mm_mulhi_epi16(a, b);
3345 assert_eq_m128i(r, _mm_set1_epi16(-16));
3346 }
3347
3348 #[simd_test(enable = "sse2")]
3349 unsafe fn test_mm_mulhi_epu16() {
3350 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001));
3351 let r = _mm_mulhi_epu16(a, b);
3352 assert_eq_m128i(r, _mm_set1_epi16(15));
3353 }
3354
3355 #[simd_test(enable = "sse2")]
3356 unsafe fn test_mm_mullo_epi16() {
3357 let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001));
3358 let r = _mm_mullo_epi16(a, b);
3359 assert_eq_m128i(r, _mm_set1_epi16(-17960));
3360 }
3361
3362 #[simd_test(enable = "sse2")]
3363 unsafe fn test_mm_mul_epu32() {
3364 let a = _mm_setr_epi64x(1_000_000_000, 1 << 34);
3365 let b = _mm_setr_epi64x(1_000_000_000, 1 << 35);
3366 let r = _mm_mul_epu32(a, b);
3367 let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0);
3368 assert_eq_m128i(r, e);
3369 }
3370
3371 #[simd_test(enable = "sse2")]
3372 unsafe fn test_mm_sad_epu8() {
3373 #[rustfmt::skip]
3374 let a = _mm_setr_epi8(
3375 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8,
3376 1, 2, 3, 4,
3377 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8,
3378 1, 2, 3, 4,
3379 );
3380 let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2);
3381 let r = _mm_sad_epu8(a, b);
3382 let e = _mm_setr_epi64x(1020, 614);
3383 assert_eq_m128i(r, e);
3384 }
3385
3386 #[simd_test(enable = "sse2")]
3387 unsafe fn test_mm_sub_epi8() {
3388 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6));
3389 let r = _mm_sub_epi8(a, b);
3390 assert_eq_m128i(r, _mm_set1_epi8(-1));
3391 }
3392
3393 #[simd_test(enable = "sse2")]
3394 unsafe fn test_mm_sub_epi16() {
3395 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6));
3396 let r = _mm_sub_epi16(a, b);
3397 assert_eq_m128i(r, _mm_set1_epi16(-1));
3398 }
3399
3400 #[simd_test(enable = "sse2")]
3401 unsafe fn test_mm_sub_epi32() {
3402 let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6));
3403 let r = _mm_sub_epi32(a, b);
3404 assert_eq_m128i(r, _mm_set1_epi32(-1));
3405 }
3406
3407 #[simd_test(enable = "sse2")]
3408 unsafe fn test_mm_sub_epi64() {
3409 let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6));
3410 let r = _mm_sub_epi64(a, b);
3411 assert_eq_m128i(r, _mm_set1_epi64x(-1));
3412 }
3413
3414 #[simd_test(enable = "sse2")]
3415 unsafe fn test_mm_subs_epi8() {
3416 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3417 let r = _mm_subs_epi8(a, b);
3418 assert_eq_m128i(r, _mm_set1_epi8(3));
3419 }
3420
3421 #[simd_test(enable = "sse2")]
3422 unsafe fn test_mm_subs_epi8_saturate_positive() {
3423 let a = _mm_set1_epi8(0x7F);
3424 let b = _mm_set1_epi8(-1);
3425 let r = _mm_subs_epi8(a, b);
3426 assert_eq_m128i(r, a);
3427 }
3428
3429 #[simd_test(enable = "sse2")]
3430 unsafe fn test_mm_subs_epi8_saturate_negative() {
3431 let a = _mm_set1_epi8(-0x80);
3432 let b = _mm_set1_epi8(1);
3433 let r = _mm_subs_epi8(a, b);
3434 assert_eq_m128i(r, a);
3435 }
3436
3437 #[simd_test(enable = "sse2")]
3438 unsafe fn test_mm_subs_epi16() {
3439 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3440 let r = _mm_subs_epi16(a, b);
3441 assert_eq_m128i(r, _mm_set1_epi16(3));
3442 }
3443
3444 #[simd_test(enable = "sse2")]
3445 unsafe fn test_mm_subs_epi16_saturate_positive() {
3446 let a = _mm_set1_epi16(0x7FFF);
3447 let b = _mm_set1_epi16(-1);
3448 let r = _mm_subs_epi16(a, b);
3449 assert_eq_m128i(r, a);
3450 }
3451
3452 #[simd_test(enable = "sse2")]
3453 unsafe fn test_mm_subs_epi16_saturate_negative() {
3454 let a = _mm_set1_epi16(-0x8000);
3455 let b = _mm_set1_epi16(1);
3456 let r = _mm_subs_epi16(a, b);
3457 assert_eq_m128i(r, a);
3458 }
3459
3460 #[simd_test(enable = "sse2")]
3461 unsafe fn test_mm_subs_epu8() {
3462 let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2));
3463 let r = _mm_subs_epu8(a, b);
3464 assert_eq_m128i(r, _mm_set1_epi8(3));
3465 }
3466
3467 #[simd_test(enable = "sse2")]
3468 unsafe fn test_mm_subs_epu8_saturate() {
3469 let a = _mm_set1_epi8(0);
3470 let b = _mm_set1_epi8(1);
3471 let r = _mm_subs_epu8(a, b);
3472 assert_eq_m128i(r, a);
3473 }
3474
3475 #[simd_test(enable = "sse2")]
3476 unsafe fn test_mm_subs_epu16() {
3477 let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2));
3478 let r = _mm_subs_epu16(a, b);
3479 assert_eq_m128i(r, _mm_set1_epi16(3));
3480 }
3481
3482 #[simd_test(enable = "sse2")]
3483 unsafe fn test_mm_subs_epu16_saturate() {
3484 let a = _mm_set1_epi16(0);
3485 let b = _mm_set1_epi16(1);
3486 let r = _mm_subs_epu16(a, b);
3487 assert_eq_m128i(r, a);
3488 }
3489
3490 #[simd_test(enable = "sse2")]
3491 unsafe fn test_mm_slli_si128() {
3492 #[rustfmt::skip]
3493 let a = _mm_setr_epi8(
3494 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3495 );
3496 let r = _mm_slli_si128(a, 1);
3497 let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3498 assert_eq_m128i(r, e);
3499
3500 #[rustfmt::skip]
3501 let a = _mm_setr_epi8(
3502 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3503 );
3504 let r = _mm_slli_si128(a, 15);
3505 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
3506 assert_eq_m128i(r, e);
3507
3508 #[rustfmt::skip]
3509 let a = _mm_setr_epi8(
3510 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3511 );
3512 let r = _mm_slli_si128(a, 16);
3513 assert_eq_m128i(r, _mm_set1_epi8(0));
3514
3515 #[rustfmt::skip]
3516 let a = _mm_setr_epi8(
3517 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3518 );
3519 let r = _mm_slli_si128(a, -1);
3520 assert_eq_m128i(_mm_set1_epi8(0), r);
3521
3522 #[rustfmt::skip]
3523 let a = _mm_setr_epi8(
3524 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3525 );
3526 let r = _mm_slli_si128(a, -0x80000000);
3527 assert_eq_m128i(r, _mm_set1_epi8(0));
3528 }
3529
3530 #[simd_test(enable = "sse2")]
3531 unsafe fn test_mm_slli_epi16() {
3532 #[rustfmt::skip]
3533 let a = _mm_setr_epi16(
3534 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0,
3535 );
3536 let r = _mm_slli_epi16(a, 4);
3537
3538 #[rustfmt::skip]
3539 let e = _mm_setr_epi16(
3540 0xFFF0 as u16 as i16, 0xFFF0 as u16 as i16, 0x0FF0, 0x00F0,
3541 0, 0, 0, 0,
3542 );
3543 assert_eq_m128i(r, e);
3544 }
3545
3546 #[simd_test(enable = "sse2")]
3547 unsafe fn test_mm_sll_epi16() {
3548 let a = _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0);
3549 let r = _mm_sll_epi16(a, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0));
3550 assert_eq_m128i(r, _mm_setr_epi16(0xFF0, 0, 0, 0, 0, 0, 0, 0));
3551 let r = _mm_sll_epi16(a, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0));
3552 assert_eq_m128i(r, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0));
3553 }
3554
3555 #[simd_test(enable = "sse2")]
3556 unsafe fn test_mm_slli_epi32() {
3557 let r = _mm_slli_epi32(_mm_set1_epi32(0xFFFF), 4);
3558 assert_eq_m128i(r, _mm_set1_epi32(0xFFFF0));
3559 }
3560
3561 #[simd_test(enable = "sse2")]
3562 unsafe fn test_mm_sll_epi32() {
3563 let a = _mm_set1_epi32(0xFFFF);
3564 let b = _mm_setr_epi32(4, 0, 0, 0);
3565 let r = _mm_sll_epi32(a, b);
3566 assert_eq_m128i(r, _mm_set1_epi32(0xFFFF0));
3567 }
3568
3569 #[simd_test(enable = "sse2")]
3570 unsafe fn test_mm_slli_epi64() {
3571 let r = _mm_slli_epi64(_mm_set1_epi64x(0xFFFFFFFF), 4);
3572 assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFFF0));
3573 }
3574
3575 #[simd_test(enable = "sse2")]
3576 unsafe fn test_mm_sll_epi64() {
3577 let a = _mm_set1_epi64x(0xFFFFFFFF);
3578 let b = _mm_setr_epi64x(4, 0);
3579 let r = _mm_sll_epi64(a, b);
3580 assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFFF0));
3581 }
3582
3583 #[simd_test(enable = "sse2")]
3584 unsafe fn test_mm_srai_epi16() {
3585 let r = _mm_srai_epi16(_mm_set1_epi16(-1), 1);
3586 assert_eq_m128i(r, _mm_set1_epi16(-1));
3587 }
3588
3589 #[simd_test(enable = "sse2")]
3590 unsafe fn test_mm_sra_epi16() {
3591 let a = _mm_set1_epi16(-1);
3592 let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0);
3593 let r = _mm_sra_epi16(a, b);
3594 assert_eq_m128i(r, _mm_set1_epi16(-1));
3595 }
3596
3597 #[simd_test(enable = "sse2")]
3598 unsafe fn test_mm_srai_epi32() {
3599 let r = _mm_srai_epi32(_mm_set1_epi32(-1), 1);
3600 assert_eq_m128i(r, _mm_set1_epi32(-1));
3601 }
3602
3603 #[simd_test(enable = "sse2")]
3604 unsafe fn test_mm_sra_epi32() {
3605 let a = _mm_set1_epi32(-1);
3606 let b = _mm_setr_epi32(1, 0, 0, 0);
3607 let r = _mm_sra_epi32(a, b);
3608 assert_eq_m128i(r, _mm_set1_epi32(-1));
3609 }
3610
3611 #[simd_test(enable = "sse2")]
3612 unsafe fn test_mm_srli_si128() {
3613 #[rustfmt::skip]
3614 let a = _mm_setr_epi8(
3615 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3616 );
3617 let r = _mm_srli_si128(a, 1);
3618 #[rustfmt::skip]
3619 let e = _mm_setr_epi8(
3620 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0,
3621 );
3622 assert_eq_m128i(r, e);
3623
3624 #[rustfmt::skip]
3625 let a = _mm_setr_epi8(
3626 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3627 );
3628 let r = _mm_srli_si128(a, 15);
3629 let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3630 assert_eq_m128i(r, e);
3631
3632 #[rustfmt::skip]
3633 let a = _mm_setr_epi8(
3634 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3635 );
3636 let r = _mm_srli_si128(a, 16);
3637 assert_eq_m128i(r, _mm_set1_epi8(0));
3638
3639 #[rustfmt::skip]
3640 let a = _mm_setr_epi8(
3641 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3642 );
3643 let r = _mm_srli_si128(a, -1);
3644 assert_eq_m128i(r, _mm_set1_epi8(0));
3645
3646 #[rustfmt::skip]
3647 let a = _mm_setr_epi8(
3648 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
3649 );
3650 let r = _mm_srli_si128(a, -0x80000000);
3651 assert_eq_m128i(r, _mm_set1_epi8(0));
3652 }
3653
3654 #[simd_test(enable = "sse2")]
3655 unsafe fn test_mm_srli_epi16() {
3656 #[rustfmt::skip]
3657 let a = _mm_setr_epi16(
3658 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0,
3659 );
3660 let r = _mm_srli_epi16(a, 4);
3661 #[rustfmt::skip]
3662 let e = _mm_setr_epi16(
3663 0xFFF as u16 as i16, 0xFF as u16 as i16, 0xF, 0, 0, 0, 0, 0,
3664 );
3665 assert_eq_m128i(r, e);
3666 }
3667
3668 #[simd_test(enable = "sse2")]
3669 unsafe fn test_mm_srl_epi16() {
3670 let a = _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0);
3671 let r = _mm_srl_epi16(a, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0));
3672 assert_eq_m128i(r, _mm_setr_epi16(0xF, 0, 0, 0, 0, 0, 0, 0));
3673 let r = _mm_srl_epi16(a, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0));
3674 assert_eq_m128i(r, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0));
3675 }
3676
3677 #[simd_test(enable = "sse2")]
3678 unsafe fn test_mm_srli_epi32() {
3679 let r = _mm_srli_epi32(_mm_set1_epi32(0xFFFF), 4);
3680 assert_eq_m128i(r, _mm_set1_epi32(0xFFF));
3681 }
3682
3683 #[simd_test(enable = "sse2")]
3684 unsafe fn test_mm_srl_epi32() {
3685 let a = _mm_set1_epi32(0xFFFF);
3686 let b = _mm_setr_epi32(4, 0, 0, 0);
3687 let r = _mm_srl_epi32(a, b);
3688 assert_eq_m128i(r, _mm_set1_epi32(0xFFF));
3689 }
3690
3691 #[simd_test(enable = "sse2")]
3692 unsafe fn test_mm_srli_epi64() {
3693 let r = _mm_srli_epi64(_mm_set1_epi64x(0xFFFFFFFF), 4);
3694 assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFF));
3695 }
3696
3697 #[simd_test(enable = "sse2")]
3698 unsafe fn test_mm_srl_epi64() {
3699 let a = _mm_set1_epi64x(0xFFFFFFFF);
3700 let b = _mm_setr_epi64x(4, 0);
3701 let r = _mm_srl_epi64(a, b);
3702 assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFF));
3703 }
3704
3705 #[simd_test(enable = "sse2")]
3706 unsafe fn test_mm_and_si128() {
3707 let a = _mm_set1_epi8(5);
3708 let b = _mm_set1_epi8(3);
3709 let r = _mm_and_si128(a, b);
3710 assert_eq_m128i(r, _mm_set1_epi8(1));
3711 }
3712
3713 #[simd_test(enable = "sse2")]
3714 unsafe fn test_mm_andnot_si128() {
3715 let a = _mm_set1_epi8(5);
3716 let b = _mm_set1_epi8(3);
3717 let r = _mm_andnot_si128(a, b);
3718 assert_eq_m128i(r, _mm_set1_epi8(2));
3719 }
3720
3721 #[simd_test(enable = "sse2")]
3722 unsafe fn test_mm_or_si128() {
3723 let a = _mm_set1_epi8(5);
3724 let b = _mm_set1_epi8(3);
3725 let r = _mm_or_si128(a, b);
3726 assert_eq_m128i(r, _mm_set1_epi8(7));
3727 }
3728
3729 #[simd_test(enable = "sse2")]
3730 unsafe fn test_mm_xor_si128() {
3731 let a = _mm_set1_epi8(5);
3732 let b = _mm_set1_epi8(3);
3733 let r = _mm_xor_si128(a, b);
3734 assert_eq_m128i(r, _mm_set1_epi8(6));
3735 }
3736
3737 #[simd_test(enable = "sse2")]
3738 unsafe fn test_mm_cmpeq_epi8() {
3739 let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3740 let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
3741 let r = _mm_cmpeq_epi8(a, b);
3742 #[rustfmt::skip]
3743 assert_eq_m128i(
3744 r,
3745 _mm_setr_epi8(
3746 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
3747 )
3748 );
3749 }
3750
3751 #[simd_test(enable = "sse2")]
3752 unsafe fn test_mm_cmpeq_epi16() {
3753 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3754 let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0);
3755 let r = _mm_cmpeq_epi16(a, b);
3756 assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0));
3757 }
3758
3759 #[simd_test(enable = "sse2")]
3760 unsafe fn test_mm_cmpeq_epi32() {
3761 let a = _mm_setr_epi32(0, 1, 2, 3);
3762 let b = _mm_setr_epi32(3, 2, 2, 0);
3763 let r = _mm_cmpeq_epi32(a, b);
3764 assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0));
3765 }
3766
3767 #[simd_test(enable = "sse2")]
3768 unsafe fn test_mm_cmpgt_epi8() {
3769 let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3770 let b = _mm_set1_epi8(0);
3771 let r = _mm_cmpgt_epi8(a, b);
3772 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3773 assert_eq_m128i(r, e);
3774 }
3775
3776 #[simd_test(enable = "sse2")]
3777 unsafe fn test_mm_cmpgt_epi16() {
3778 let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3779 let b = _mm_set1_epi16(0);
3780 let r = _mm_cmpgt_epi16(a, b);
3781 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3782 assert_eq_m128i(r, e);
3783 }
3784
3785 #[simd_test(enable = "sse2")]
3786 unsafe fn test_mm_cmpgt_epi32() {
3787 let a = _mm_set_epi32(5, 0, 0, 0);
3788 let b = _mm_set1_epi32(0);
3789 let r = _mm_cmpgt_epi32(a, b);
3790 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3791 }
3792
3793 #[simd_test(enable = "sse2")]
3794 unsafe fn test_mm_cmplt_epi8() {
3795 let a = _mm_set1_epi8(0);
3796 let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3797 let r = _mm_cmplt_epi8(a, b);
3798 let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3799 assert_eq_m128i(r, e);
3800 }
3801
3802 #[simd_test(enable = "sse2")]
3803 unsafe fn test_mm_cmplt_epi16() {
3804 let a = _mm_set1_epi16(0);
3805 let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0);
3806 let r = _mm_cmplt_epi16(a, b);
3807 let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0);
3808 assert_eq_m128i(r, e);
3809 }
3810
3811 #[simd_test(enable = "sse2")]
3812 unsafe fn test_mm_cmplt_epi32() {
3813 let a = _mm_set1_epi32(0);
3814 let b = _mm_set_epi32(5, 0, 0, 0);
3815 let r = _mm_cmplt_epi32(a, b);
3816 assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0));
3817 }
3818
3819 #[simd_test(enable = "sse2")]
3820 unsafe fn test_mm_cvtepi32_pd() {
3821 let a = _mm_set_epi32(35, 25, 15, 5);
3822 let r = _mm_cvtepi32_pd(a);
3823 assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0));
3824 }
3825
3826 #[simd_test(enable = "sse2")]
3827 unsafe fn test_mm_cvtsi32_sd() {
3828 let a = _mm_set1_pd(3.5);
3829 let r = _mm_cvtsi32_sd(a, 5);
3830 assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
3831 }
3832
3833 #[simd_test(enable = "sse2")]
3834 unsafe fn test_mm_cvtepi32_ps() {
3835 let a = _mm_setr_epi32(1, 2, 3, 4);
3836 let r = _mm_cvtepi32_ps(a);
3837 assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0));
3838 }
3839
3840 #[simd_test(enable = "sse2")]
3841 unsafe fn test_mm_cvtps_epi32() {
3842 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
3843 let r = _mm_cvtps_epi32(a);
3844 assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4));
3845 }
3846
3847 #[simd_test(enable = "sse2")]
3848 unsafe fn test_mm_cvtsi32_si128() {
3849 let r = _mm_cvtsi32_si128(5);
3850 assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0));
3851 }
3852
3853 #[simd_test(enable = "sse2")]
3854 unsafe fn test_mm_cvtsi128_si32() {
3855 let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0));
3856 assert_eq!(r, 5);
3857 }
3858
3859 #[simd_test(enable = "sse2")]
3860 unsafe fn test_mm_set_epi64x() {
3861 let r = _mm_set_epi64x(0, 1);
3862 assert_eq_m128i(r, _mm_setr_epi64x(1, 0));
3863 }
3864
3865 #[simd_test(enable = "sse2")]
3866 unsafe fn test_mm_set_epi32() {
3867 let r = _mm_set_epi32(0, 1, 2, 3);
3868 assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0));
3869 }
3870
3871 #[simd_test(enable = "sse2")]
3872 unsafe fn test_mm_set_epi16() {
3873 let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3874 assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0));
3875 }
3876
3877 #[simd_test(enable = "sse2")]
3878 unsafe fn test_mm_set_epi8() {
3879 #[rustfmt::skip]
3880 let r = _mm_set_epi8(
3881 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3882 );
3883 #[rustfmt::skip]
3884 let e = _mm_setr_epi8(
3885 15, 14, 13, 12, 11, 10, 9, 8,
3886 7, 6, 5, 4, 3, 2, 1, 0,
3887 );
3888 assert_eq_m128i(r, e);
3889 }
3890
3891 #[simd_test(enable = "sse2")]
3892 unsafe fn test_mm_set1_epi64x() {
3893 let r = _mm_set1_epi64x(1);
3894 assert_eq_m128i(r, _mm_set1_epi64x(1));
3895 }
3896
3897 #[simd_test(enable = "sse2")]
3898 unsafe fn test_mm_set1_epi32() {
3899 let r = _mm_set1_epi32(1);
3900 assert_eq_m128i(r, _mm_set1_epi32(1));
3901 }
3902
3903 #[simd_test(enable = "sse2")]
3904 unsafe fn test_mm_set1_epi16() {
3905 let r = _mm_set1_epi16(1);
3906 assert_eq_m128i(r, _mm_set1_epi16(1));
3907 }
3908
3909 #[simd_test(enable = "sse2")]
3910 unsafe fn test_mm_set1_epi8() {
3911 let r = _mm_set1_epi8(1);
3912 assert_eq_m128i(r, _mm_set1_epi8(1));
3913 }
3914
3915 #[simd_test(enable = "sse2")]
3916 unsafe fn test_mm_setr_epi32() {
3917 let r = _mm_setr_epi32(0, 1, 2, 3);
3918 assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3));
3919 }
3920
3921 #[simd_test(enable = "sse2")]
3922 unsafe fn test_mm_setr_epi16() {
3923 let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
3924 assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7));
3925 }
3926
3927 #[simd_test(enable = "sse2")]
3928 unsafe fn test_mm_setr_epi8() {
3929 #[rustfmt::skip]
3930 let r = _mm_setr_epi8(
3931 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
3932 );
3933 #[rustfmt::skip]
3934 let e = _mm_setr_epi8(
3935 0, 1, 2, 3, 4, 5, 6, 7,
3936 8, 9, 10, 11, 12, 13, 14, 15,
3937 );
3938 assert_eq_m128i(r, e);
3939 }
3940
3941 #[simd_test(enable = "sse2")]
3942 unsafe fn test_mm_setzero_si128() {
3943 let r = _mm_setzero_si128();
3944 assert_eq_m128i(r, _mm_set1_epi64x(0));
3945 }
3946
3947 #[simd_test(enable = "sse2")]
3948 unsafe fn test_mm_loadl_epi64() {
3949 let a = _mm_setr_epi64x(6, 5);
3950 let r = _mm_loadl_epi64(&a as *const _);
3951 assert_eq_m128i(r, _mm_setr_epi64x(6, 0));
3952 }
3953
3954 #[simd_test(enable = "sse2")]
3955 unsafe fn test_mm_load_si128() {
3956 let a = _mm_set_epi64x(5, 6);
3957 let r = _mm_load_si128(&a as *const _ as *const _);
3958 assert_eq_m128i(a, r);
3959 }
3960
3961 #[simd_test(enable = "sse2")]
3962 unsafe fn test_mm_loadu_si128() {
3963 let a = _mm_set_epi64x(5, 6);
3964 let r = _mm_loadu_si128(&a as *const _ as *const _);
3965 assert_eq_m128i(a, r);
3966 }
3967
3968 #[simd_test(enable = "sse2")]
3969 unsafe fn test_mm_maskmoveu_si128() {
3970 let a = _mm_set1_epi8(9);
3971 #[rustfmt::skip]
3972 let mask = _mm_set_epi8(
3973 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0,
3974 0, 0, 0, 0, 0, 0, 0, 0,
3975 );
3976 let mut r = _mm_set1_epi8(0);
3977 _mm_maskmoveu_si128(a, mask, &mut r as *mut _ as *mut i8);
3978 let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
3979 assert_eq_m128i(r, e);
3980 }
3981
3982 #[simd_test(enable = "sse2")]
3983 unsafe fn test_mm_store_si128() {
3984 let a = _mm_set1_epi8(9);
3985 let mut r = _mm_set1_epi8(0);
3986 _mm_store_si128(&mut r as *mut _ as *mut __m128i, a);
3987 assert_eq_m128i(r, a);
3988 }
3989
3990 #[simd_test(enable = "sse2")]
3991 unsafe fn test_mm_storeu_si128() {
3992 let a = _mm_set1_epi8(9);
3993 let mut r = _mm_set1_epi8(0);
3994 _mm_storeu_si128(&mut r as *mut _ as *mut __m128i, a);
3995 assert_eq_m128i(r, a);
3996 }
3997
3998 #[simd_test(enable = "sse2")]
3999 unsafe fn test_mm_storel_epi64() {
4000 let a = _mm_setr_epi64x(2, 9);
4001 let mut r = _mm_set1_epi8(0);
4002 _mm_storel_epi64(&mut r as *mut _ as *mut __m128i, a);
4003 assert_eq_m128i(r, _mm_setr_epi64x(2, 0));
4004 }
4005
4006 #[simd_test(enable = "sse2")]
4007 unsafe fn test_mm_stream_si128() {
4008 let a = _mm_setr_epi32(1, 2, 3, 4);
4009 let mut r = _mm_undefined_si128();
4010 _mm_stream_si128(&mut r as *mut _, a);
4011 assert_eq_m128i(r, a);
4012 }
4013
4014 #[simd_test(enable = "sse2")]
4015 unsafe fn test_mm_stream_si32() {
4016 let a: i32 = 7;
4017 let mut mem = boxed::Box::<i32>::new(-1);
4018 _mm_stream_si32(&mut *mem as *mut i32, a);
4019 assert_eq!(a, *mem);
4020 }
4021
4022 #[simd_test(enable = "sse2")]
4023 unsafe fn test_mm_move_epi64() {
4024 let a = _mm_setr_epi64x(5, 6);
4025 let r = _mm_move_epi64(a);
4026 assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
4027 }
4028
4029 #[simd_test(enable = "sse2")]
4030 unsafe fn test_mm_packs_epi16() {
4031 let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0);
4032 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80);
4033 let r = _mm_packs_epi16(a, b);
4034 #[rustfmt::skip]
4035 assert_eq_m128i(
4036 r,
4037 _mm_setr_epi8(
4038 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F
4039 )
4040 );
4041 }
4042
4043 #[simd_test(enable = "sse2")]
4044 unsafe fn test_mm_packs_epi32() {
4045 let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0);
4046 let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000);
4047 let r = _mm_packs_epi32(a, b);
4048 assert_eq_m128i(
4049 r,
4050 _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF),
4051 );
4052 }
4053
4054 #[simd_test(enable = "sse2")]
4055 unsafe fn test_mm_packus_epi16() {
4056 let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0);
4057 let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100);
4058 let r = _mm_packus_epi16(a, b);
4059 assert_eq_m128i(
4060 r,
4061 _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0),
4062 );
4063 }
4064
4065 #[simd_test(enable = "sse2")]
4066 unsafe fn test_mm_extract_epi16() {
4067 let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7);
4068 let r1 = _mm_extract_epi16(a, 0);
4069 let r2 = _mm_extract_epi16(a, 11);
4070 assert_eq!(r1, 0xFFFF);
4071 assert_eq!(r2, 3);
4072 }
4073
4074 #[simd_test(enable = "sse2")]
4075 unsafe fn test_mm_insert_epi16() {
4076 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4077 let r = _mm_insert_epi16(a, 9, 0);
4078 let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7);
4079 assert_eq_m128i(r, e);
4080 }
4081
4082 #[simd_test(enable = "sse2")]
4083 unsafe fn test_mm_movemask_epi8() {
4084 #[rustfmt::skip]
4085 let a = _mm_setr_epi8(
4086 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
4087 0b0101, 0b1111_0000u8 as i8, 0, 0,
4088 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101,
4089 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8,
4090 );
4091 let r = _mm_movemask_epi8(a);
4092 assert_eq!(r, 0b10100110_00100101);
4093 }
4094
4095 #[simd_test(enable = "sse2")]
4096 unsafe fn test_mm_shuffle_epi32() {
4097 let a = _mm_setr_epi32(5, 10, 15, 20);
4098 let r = _mm_shuffle_epi32(a, 0b00_01_01_11);
4099 let e = _mm_setr_epi32(20, 10, 10, 5);
4100 assert_eq_m128i(r, e);
4101 }
4102
4103 #[simd_test(enable = "sse2")]
4104 unsafe fn test_mm_shufflehi_epi16() {
4105 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20);
4106 let r = _mm_shufflehi_epi16(a, 0b00_01_01_11);
4107 let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5);
4108 assert_eq_m128i(r, e);
4109 }
4110
4111 #[simd_test(enable = "sse2")]
4112 unsafe fn test_mm_shufflelo_epi16() {
4113 let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4);
4114 let r = _mm_shufflelo_epi16(a, 0b00_01_01_11);
4115 let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4);
4116 assert_eq_m128i(r, e);
4117 }
4118
4119 #[simd_test(enable = "sse2")]
4120 unsafe fn test_mm_unpackhi_epi8() {
4121 #[rustfmt::skip]
4122 let a = _mm_setr_epi8(
4123 0, 1, 2, 3, 4, 5, 6, 7,
4124 8, 9, 10, 11, 12, 13, 14, 15,
4125 );
4126 #[rustfmt::skip]
4127 let b = _mm_setr_epi8(
4128 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4129 );
4130 let r = _mm_unpackhi_epi8(a, b);
4131 #[rustfmt::skip]
4132 let e = _mm_setr_epi8(
4133 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31,
4134 );
4135 assert_eq_m128i(r, e);
4136 }
4137
4138 #[simd_test(enable = "sse2")]
4139 unsafe fn test_mm_unpackhi_epi16() {
4140 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4141 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4142 let r = _mm_unpackhi_epi16(a, b);
4143 let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15);
4144 assert_eq_m128i(r, e);
4145 }
4146
4147 #[simd_test(enable = "sse2")]
4148 unsafe fn test_mm_unpackhi_epi32() {
4149 let a = _mm_setr_epi32(0, 1, 2, 3);
4150 let b = _mm_setr_epi32(4, 5, 6, 7);
4151 let r = _mm_unpackhi_epi32(a, b);
4152 let e = _mm_setr_epi32(2, 6, 3, 7);
4153 assert_eq_m128i(r, e);
4154 }
4155
4156 #[simd_test(enable = "sse2")]
4157 unsafe fn test_mm_unpackhi_epi64() {
4158 let a = _mm_setr_epi64x(0, 1);
4159 let b = _mm_setr_epi64x(2, 3);
4160 let r = _mm_unpackhi_epi64(a, b);
4161 let e = _mm_setr_epi64x(1, 3);
4162 assert_eq_m128i(r, e);
4163 }
4164
4165 #[simd_test(enable = "sse2")]
4166 unsafe fn test_mm_unpacklo_epi8() {
4167 #[rustfmt::skip]
4168 let a = _mm_setr_epi8(
4169 0, 1, 2, 3, 4, 5, 6, 7,
4170 8, 9, 10, 11, 12, 13, 14, 15,
4171 );
4172 #[rustfmt::skip]
4173 let b = _mm_setr_epi8(
4174 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
4175 );
4176 let r = _mm_unpacklo_epi8(a, b);
4177 #[rustfmt::skip]
4178 let e = _mm_setr_epi8(
4179 0, 16, 1, 17, 2, 18, 3, 19,
4180 4, 20, 5, 21, 6, 22, 7, 23,
4181 );
4182 assert_eq_m128i(r, e);
4183 }
4184
4185 #[simd_test(enable = "sse2")]
4186 unsafe fn test_mm_unpacklo_epi16() {
4187 let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
4188 let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15);
4189 let r = _mm_unpacklo_epi16(a, b);
4190 let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11);
4191 assert_eq_m128i(r, e);
4192 }
4193
4194 #[simd_test(enable = "sse2")]
4195 unsafe fn test_mm_unpacklo_epi32() {
4196 let a = _mm_setr_epi32(0, 1, 2, 3);
4197 let b = _mm_setr_epi32(4, 5, 6, 7);
4198 let r = _mm_unpacklo_epi32(a, b);
4199 let e = _mm_setr_epi32(0, 4, 1, 5);
4200 assert_eq_m128i(r, e);
4201 }
4202
4203 #[simd_test(enable = "sse2")]
4204 unsafe fn test_mm_unpacklo_epi64() {
4205 let a = _mm_setr_epi64x(0, 1);
4206 let b = _mm_setr_epi64x(2, 3);
4207 let r = _mm_unpacklo_epi64(a, b);
4208 let e = _mm_setr_epi64x(0, 2);
4209 assert_eq_m128i(r, e);
4210 }
4211
4212 #[simd_test(enable = "sse2")]
4213 unsafe fn test_mm_add_sd() {
4214 let a = _mm_setr_pd(1.0, 2.0);
4215 let b = _mm_setr_pd(5.0, 10.0);
4216 let r = _mm_add_sd(a, b);
4217 assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0));
4218 }
4219
4220 #[simd_test(enable = "sse2")]
4221 unsafe fn test_mm_add_pd() {
4222 let a = _mm_setr_pd(1.0, 2.0);
4223 let b = _mm_setr_pd(5.0, 10.0);
4224 let r = _mm_add_pd(a, b);
4225 assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
4226 }
4227
4228 #[simd_test(enable = "sse2")]
4229 unsafe fn test_mm_div_sd() {
4230 let a = _mm_setr_pd(1.0, 2.0);
4231 let b = _mm_setr_pd(5.0, 10.0);
4232 let r = _mm_div_sd(a, b);
4233 assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0));
4234 }
4235
4236 #[simd_test(enable = "sse2")]
4237 unsafe fn test_mm_div_pd() {
4238 let a = _mm_setr_pd(1.0, 2.0);
4239 let b = _mm_setr_pd(5.0, 10.0);
4240 let r = _mm_div_pd(a, b);
4241 assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2));
4242 }
4243
4244 #[simd_test(enable = "sse2")]
4245 unsafe fn test_mm_max_sd() {
4246 let a = _mm_setr_pd(1.0, 2.0);
4247 let b = _mm_setr_pd(5.0, 10.0);
4248 let r = _mm_max_sd(a, b);
4249 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4250 }
4251
4252 #[simd_test(enable = "sse2")]
4253 unsafe fn test_mm_max_pd() {
4254 let a = _mm_setr_pd(1.0, 2.0);
4255 let b = _mm_setr_pd(5.0, 10.0);
4256 let r = _mm_max_pd(a, b);
4257 assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0));
4258 }
4259
4260 #[simd_test(enable = "sse2")]
4261 unsafe fn test_mm_min_sd() {
4262 let a = _mm_setr_pd(1.0, 2.0);
4263 let b = _mm_setr_pd(5.0, 10.0);
4264 let r = _mm_min_sd(a, b);
4265 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4266 }
4267
4268 #[simd_test(enable = "sse2")]
4269 unsafe fn test_mm_min_pd() {
4270 let a = _mm_setr_pd(1.0, 2.0);
4271 let b = _mm_setr_pd(5.0, 10.0);
4272 let r = _mm_min_pd(a, b);
4273 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4274 }
4275
4276 #[simd_test(enable = "sse2")]
4277 unsafe fn test_mm_mul_sd() {
4278 let a = _mm_setr_pd(1.0, 2.0);
4279 let b = _mm_setr_pd(5.0, 10.0);
4280 let r = _mm_mul_sd(a, b);
4281 assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0));
4282 }
4283
4284 #[simd_test(enable = "sse2")]
4285 unsafe fn test_mm_mul_pd() {
4286 let a = _mm_setr_pd(1.0, 2.0);
4287 let b = _mm_setr_pd(5.0, 10.0);
4288 let r = _mm_mul_pd(a, b);
4289 assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0));
4290 }
4291
4292 #[simd_test(enable = "sse2")]
4293 unsafe fn test_mm_sqrt_sd() {
4294 let a = _mm_setr_pd(1.0, 2.0);
4295 let b = _mm_setr_pd(5.0, 10.0);
4296 let r = _mm_sqrt_sd(a, b);
4297 assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0));
4298 }
4299
4300 #[simd_test(enable = "sse2")]
4301 unsafe fn test_mm_sqrt_pd() {
4302 let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0));
4303 assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt()));
4304 }
4305
4306 #[simd_test(enable = "sse2")]
4307 unsafe fn test_mm_sub_sd() {
4308 let a = _mm_setr_pd(1.0, 2.0);
4309 let b = _mm_setr_pd(5.0, 10.0);
4310 let r = _mm_sub_sd(a, b);
4311 assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0));
4312 }
4313
4314 #[simd_test(enable = "sse2")]
4315 unsafe fn test_mm_sub_pd() {
4316 let a = _mm_setr_pd(1.0, 2.0);
4317 let b = _mm_setr_pd(5.0, 10.0);
4318 let r = _mm_sub_pd(a, b);
4319 assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0));
4320 }
4321
4322 #[simd_test(enable = "sse2")]
4323 unsafe fn test_mm_and_pd() {
4324 let a = transmute(u64x2::splat(5));
4325 let b = transmute(u64x2::splat(3));
4326 let r = _mm_and_pd(a, b);
4327 let e = transmute(u64x2::splat(1));
4328 assert_eq_m128d(r, e);
4329 }
4330
4331 #[simd_test(enable = "sse2")]
4332 unsafe fn test_mm_andnot_pd() {
4333 let a = transmute(u64x2::splat(5));
4334 let b = transmute(u64x2::splat(3));
4335 let r = _mm_andnot_pd(a, b);
4336 let e = transmute(u64x2::splat(2));
4337 assert_eq_m128d(r, e);
4338 }
4339
4340 #[simd_test(enable = "sse2")]
4341 unsafe fn test_mm_or_pd() {
4342 let a = transmute(u64x2::splat(5));
4343 let b = transmute(u64x2::splat(3));
4344 let r = _mm_or_pd(a, b);
4345 let e = transmute(u64x2::splat(7));
4346 assert_eq_m128d(r, e);
4347 }
4348
4349 #[simd_test(enable = "sse2")]
4350 unsafe fn test_mm_xor_pd() {
4351 let a = transmute(u64x2::splat(5));
4352 let b = transmute(u64x2::splat(3));
4353 let r = _mm_xor_pd(a, b);
4354 let e = transmute(u64x2::splat(6));
4355 assert_eq_m128d(r, e);
4356 }
4357
4358 #[simd_test(enable = "sse2")]
4359 unsafe fn test_mm_cmpeq_sd() {
4360 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4361 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4362 let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b));
4363 assert_eq_m128i(r, e);
4364 }
4365
4366 #[simd_test(enable = "sse2")]
4367 unsafe fn test_mm_cmplt_sd() {
4368 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4369 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4370 let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b));
4371 assert_eq_m128i(r, e);
4372 }
4373
4374 #[simd_test(enable = "sse2")]
4375 unsafe fn test_mm_cmple_sd() {
4376 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4377 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4378 let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b));
4379 assert_eq_m128i(r, e);
4380 }
4381
4382 #[simd_test(enable = "sse2")]
4383 unsafe fn test_mm_cmpgt_sd() {
4384 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4385 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4386 let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b));
4387 assert_eq_m128i(r, e);
4388 }
4389
4390 #[simd_test(enable = "sse2")]
4391 unsafe fn test_mm_cmpge_sd() {
4392 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4393 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4394 let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b));
4395 assert_eq_m128i(r, e);
4396 }
4397
4398 #[simd_test(enable = "sse2")]
4399 unsafe fn test_mm_cmpord_sd() {
4400 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4401 let e = _mm_setr_epi64x(0, transmute(2.0f64));
4402 let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b));
4403 assert_eq_m128i(r, e);
4404 }
4405
4406 #[simd_test(enable = "sse2")]
4407 unsafe fn test_mm_cmpunord_sd() {
4408 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4409 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4410 let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b));
4411 assert_eq_m128i(r, e);
4412 }
4413
4414 #[simd_test(enable = "sse2")]
4415 unsafe fn test_mm_cmpneq_sd() {
4416 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4417 let e = _mm_setr_epi64x(!0, transmute(2.0f64));
4418 let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b));
4419 assert_eq_m128i(r, e);
4420 }
4421
4422 #[simd_test(enable = "sse2")]
4423 unsafe fn test_mm_cmpnlt_sd() {
4424 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4425 let e = _mm_setr_epi64x(0, transmute(2.0f64));
4426 let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b));
4427 assert_eq_m128i(r, e);
4428 }
4429
4430 #[simd_test(enable = "sse2")]
4431 unsafe fn test_mm_cmpnle_sd() {
4432 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4433 let e = _mm_setr_epi64x(0, transmute(2.0f64));
4434 let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b));
4435 assert_eq_m128i(r, e);
4436 }
4437
4438 #[simd_test(enable = "sse2")]
4439 unsafe fn test_mm_cmpngt_sd() {
4440 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4441 let e = _mm_setr_epi64x(0, transmute(2.0f64));
4442 let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b));
4443 assert_eq_m128i(r, e);
4444 }
4445
4446 #[simd_test(enable = "sse2")]
4447 unsafe fn test_mm_cmpnge_sd() {
4448 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4449 let e = _mm_setr_epi64x(0, transmute(2.0f64));
4450 let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b));
4451 assert_eq_m128i(r, e);
4452 }
4453
4454 #[simd_test(enable = "sse2")]
4455 unsafe fn test_mm_cmpeq_pd() {
4456 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4457 let e = _mm_setr_epi64x(!0, 0);
4458 let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b));
4459 assert_eq_m128i(r, e);
4460 }
4461
4462 #[simd_test(enable = "sse2")]
4463 unsafe fn test_mm_cmplt_pd() {
4464 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4465 let e = _mm_setr_epi64x(0, !0);
4466 let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b));
4467 assert_eq_m128i(r, e);
4468 }
4469
4470 #[simd_test(enable = "sse2")]
4471 unsafe fn test_mm_cmple_pd() {
4472 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4473 let e = _mm_setr_epi64x(!0, !0);
4474 let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b));
4475 assert_eq_m128i(r, e);
4476 }
4477
4478 #[simd_test(enable = "sse2")]
4479 unsafe fn test_mm_cmpgt_pd() {
4480 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4481 let e = _mm_setr_epi64x(0, 0);
4482 let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b));
4483 assert_eq_m128i(r, e);
4484 }
4485
4486 #[simd_test(enable = "sse2")]
4487 unsafe fn test_mm_cmpge_pd() {
4488 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4489 let e = _mm_setr_epi64x(!0, 0);
4490 let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b));
4491 assert_eq_m128i(r, e);
4492 }
4493
4494 #[simd_test(enable = "sse2")]
4495 unsafe fn test_mm_cmpord_pd() {
4496 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4497 let e = _mm_setr_epi64x(0, !0);
4498 let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b));
4499 assert_eq_m128i(r, e);
4500 }
4501
4502 #[simd_test(enable = "sse2")]
4503 unsafe fn test_mm_cmpunord_pd() {
4504 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0));
4505 let e = _mm_setr_epi64x(!0, 0);
4506 let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b));
4507 assert_eq_m128i(r, e);
4508 }
4509
4510 #[simd_test(enable = "sse2")]
4511 unsafe fn test_mm_cmpneq_pd() {
4512 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4513 let e = _mm_setr_epi64x(!0, !0);
4514 let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b));
4515 assert_eq_m128i(r, e);
4516 }
4517
4518 #[simd_test(enable = "sse2")]
4519 unsafe fn test_mm_cmpnlt_pd() {
4520 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0));
4521 let e = _mm_setr_epi64x(0, 0);
4522 let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b));
4523 assert_eq_m128i(r, e);
4524 }
4525
4526 #[simd_test(enable = "sse2")]
4527 unsafe fn test_mm_cmpnle_pd() {
4528 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4529 let e = _mm_setr_epi64x(0, 0);
4530 let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b));
4531 assert_eq_m128i(r, e);
4532 }
4533
4534 #[simd_test(enable = "sse2")]
4535 unsafe fn test_mm_cmpngt_pd() {
4536 let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0));
4537 let e = _mm_setr_epi64x(0, !0);
4538 let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b));
4539 assert_eq_m128i(r, e);
4540 }
4541
4542 #[simd_test(enable = "sse2")]
4543 unsafe fn test_mm_cmpnge_pd() {
4544 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4545 let e = _mm_setr_epi64x(0, !0);
4546 let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b));
4547 assert_eq_m128i(r, e);
4548 }
4549
4550 #[simd_test(enable = "sse2")]
4551 unsafe fn test_mm_comieq_sd() {
4552 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4553 assert!(_mm_comieq_sd(a, b) != 0);
4554
4555 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0));
4556 assert!(_mm_comieq_sd(a, b) == 0);
4557 }
4558
4559 #[simd_test(enable = "sse2")]
4560 unsafe fn test_mm_comilt_sd() {
4561 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4562 assert!(_mm_comilt_sd(a, b) == 0);
4563 }
4564
4565 #[simd_test(enable = "sse2")]
4566 unsafe fn test_mm_comile_sd() {
4567 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4568 assert!(_mm_comile_sd(a, b) != 0);
4569 }
4570
4571 #[simd_test(enable = "sse2")]
4572 unsafe fn test_mm_comigt_sd() {
4573 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4574 assert!(_mm_comigt_sd(a, b) == 0);
4575 }
4576
4577 #[simd_test(enable = "sse2")]
4578 unsafe fn test_mm_comige_sd() {
4579 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4580 assert!(_mm_comige_sd(a, b) != 0);
4581 }
4582
4583 #[simd_test(enable = "sse2")]
4584 unsafe fn test_mm_comineq_sd() {
4585 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4586 assert!(_mm_comineq_sd(a, b) == 0);
4587 }
4588
4589 #[simd_test(enable = "sse2")]
4590 unsafe fn test_mm_ucomieq_sd() {
4591 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4592 assert!(_mm_ucomieq_sd(a, b) != 0);
4593
4594 let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0));
4595 assert!(_mm_ucomieq_sd(a, b) == 0);
4596 }
4597
4598 #[simd_test(enable = "sse2")]
4599 unsafe fn test_mm_ucomilt_sd() {
4600 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4601 assert!(_mm_ucomilt_sd(a, b) == 0);
4602 }
4603
4604 #[simd_test(enable = "sse2")]
4605 unsafe fn test_mm_ucomile_sd() {
4606 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4607 assert!(_mm_ucomile_sd(a, b) != 0);
4608 }
4609
4610 #[simd_test(enable = "sse2")]
4611 unsafe fn test_mm_ucomigt_sd() {
4612 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4613 assert!(_mm_ucomigt_sd(a, b) == 0);
4614 }
4615
4616 #[simd_test(enable = "sse2")]
4617 unsafe fn test_mm_ucomige_sd() {
4618 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4619 assert!(_mm_ucomige_sd(a, b) != 0);
4620 }
4621
4622 #[simd_test(enable = "sse2")]
4623 unsafe fn test_mm_ucomineq_sd() {
4624 let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0));
4625 assert!(_mm_ucomineq_sd(a, b) == 0);
4626 }
4627
4628 #[simd_test(enable = "sse2")]
4629 unsafe fn test_mm_movemask_pd() {
4630 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0));
4631 assert_eq!(r, 0b01);
4632
4633 let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0));
4634 assert_eq!(r, 0b11);
4635 }
4636
4637 #[repr(align(16))]
4638 struct Memory {
4639 data: [f64; 4],
4640 }
4641
4642 #[simd_test(enable = "sse2")]
4643 unsafe fn test_mm_load_pd() {
4644 let mem = Memory {
4645 data: [1.0f64, 2.0, 3.0, 4.0],
4646 };
4647 let vals = &mem.data;
4648 let d = vals.as_ptr();
4649
4650 let r = _mm_load_pd(d);
4651 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0));
4652 }
4653
4654 #[simd_test(enable = "sse2")]
4655 unsafe fn test_mm_load_sd() {
4656 let a = 1.;
4657 let expected = _mm_setr_pd(a, 0.);
4658 let r = _mm_load_sd(&a);
4659 assert_eq_m128d(r, expected);
4660 }
4661
4662 #[simd_test(enable = "sse2")]
4663 unsafe fn test_mm_loadh_pd() {
4664 let a = _mm_setr_pd(1., 2.);
4665 let b = 3.;
4666 let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.);
4667 let r = _mm_loadh_pd(a, &b);
4668 assert_eq_m128d(r, expected);
4669 }
4670
4671 #[simd_test(enable = "sse2")]
4672 unsafe fn test_mm_loadl_pd() {
4673 let a = _mm_setr_pd(1., 2.);
4674 let b = 3.;
4675 let expected = _mm_setr_pd(3., get_m128d(a, 1));
4676 let r = _mm_loadl_pd(a, &b);
4677 assert_eq_m128d(r, expected);
4678 }
4679
4680 #[simd_test(enable = "sse2")]
4681 unsafe fn test_mm_stream_pd() {
4682 #[repr(align(128))]
4683 struct Memory {
4684 pub data: [f64; 2],
4685 }
4686 let a = _mm_set1_pd(7.0);
4687 let mut mem = Memory { data: [-1.0; 2] };
4688
4689 _mm_stream_pd(&mut mem.data[0] as *mut f64, a);
4690 for i in 0..2 {
4691 assert_eq!(mem.data[i], get_m128d(a, i));
4692 }
4693 }
4694
4695 #[simd_test(enable = "sse2")]
4696 unsafe fn test_mm_store_sd() {
4697 let mut dest = 0.;
4698 let a = _mm_setr_pd(1., 2.);
4699 _mm_store_sd(&mut dest, a);
4700 assert_eq!(dest, _mm_cvtsd_f64(a));
4701 }
4702
4703 #[simd_test(enable = "sse2")]
4704 unsafe fn test_mm_store_pd() {
4705 let mut mem = Memory { data: [0.0f64; 4] };
4706 let vals = &mut mem.data;
4707 let a = _mm_setr_pd(1.0, 2.0);
4708 let d = vals.as_mut_ptr();
4709
4710 _mm_store_pd(d, *black_box(&a));
4711 assert_eq!(vals[0], 1.0);
4712 assert_eq!(vals[1], 2.0);
4713 }
4714
4715 #[simd_test(enable = "sse")]
4716 unsafe fn test_mm_storeu_pd() {
4717 let mut mem = Memory { data: [0.0f64; 4] };
4718 let vals = &mut mem.data;
4719 let a = _mm_setr_pd(1.0, 2.0);
4720
4721 let mut ofs = 0;
4722 let mut p = vals.as_mut_ptr();
4723
4724 // Make sure p is **not** aligned to 16-byte boundary
4725 if (p as usize) & 0xf == 0 {
4726 ofs = 1;
4727 p = p.offset(1);
4728 }
4729
4730 _mm_storeu_pd(p, *black_box(&a));
4731
4732 if ofs > 0 {
4733 assert_eq!(vals[ofs - 1], 0.0);
4734 }
4735 assert_eq!(vals[ofs + 0], 1.0);
4736 assert_eq!(vals[ofs + 1], 2.0);
4737 }
4738
4739 #[simd_test(enable = "sse2")]
4740 unsafe fn test_mm_store1_pd() {
4741 let mut mem = Memory { data: [0.0f64; 4] };
4742 let vals = &mut mem.data;
4743 let a = _mm_setr_pd(1.0, 2.0);
4744 let d = vals.as_mut_ptr();
4745
4746 _mm_store1_pd(d, *black_box(&a));
4747 assert_eq!(vals[0], 1.0);
4748 assert_eq!(vals[1], 1.0);
4749 }
4750
4751 #[simd_test(enable = "sse2")]
4752 unsafe fn test_mm_store_pd1() {
4753 let mut mem = Memory { data: [0.0f64; 4] };
4754 let vals = &mut mem.data;
4755 let a = _mm_setr_pd(1.0, 2.0);
4756 let d = vals.as_mut_ptr();
4757
4758 _mm_store_pd1(d, *black_box(&a));
4759 assert_eq!(vals[0], 1.0);
4760 assert_eq!(vals[1], 1.0);
4761 }
4762
4763 #[simd_test(enable = "sse2")]
4764 unsafe fn test_mm_storer_pd() {
4765 let mut mem = Memory { data: [0.0f64; 4] };
4766 let vals = &mut mem.data;
4767 let a = _mm_setr_pd(1.0, 2.0);
4768 let d = vals.as_mut_ptr();
4769
4770 _mm_storer_pd(d, *black_box(&a));
4771 assert_eq!(vals[0], 2.0);
4772 assert_eq!(vals[1], 1.0);
4773 }
4774
4775 #[simd_test(enable = "sse2")]
4776 unsafe fn test_mm_storeh_pd() {
4777 let mut dest = 0.;
4778 let a = _mm_setr_pd(1., 2.);
4779 _mm_storeh_pd(&mut dest, a);
4780 assert_eq!(dest, get_m128d(a, 1));
4781 }
4782
4783 #[simd_test(enable = "sse2")]
4784 unsafe fn test_mm_storel_pd() {
4785 let mut dest = 0.;
4786 let a = _mm_setr_pd(1., 2.);
4787 _mm_storel_pd(&mut dest, a);
4788 assert_eq!(dest, _mm_cvtsd_f64(a));
4789 }
4790
4791 #[simd_test(enable = "sse2")]
4792 unsafe fn test_mm_loadr_pd() {
4793 let mut mem = Memory {
4794 data: [1.0f64, 2.0, 3.0, 4.0],
4795 };
4796 let vals = &mut mem.data;
4797 let d = vals.as_ptr();
4798
4799 let r = _mm_loadr_pd(d);
4800 assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0));
4801 }
4802
4803 #[simd_test(enable = "sse2")]
4804 unsafe fn test_mm_loadu_pd() {
4805 let mut mem = Memory {
4806 data: [1.0f64, 2.0, 3.0, 4.0],
4807 };
4808 let vals = &mut mem.data;
4809 let mut d = vals.as_ptr();
4810
4811 // make sure d is not aligned to 16-byte boundary
4812 let mut offset = 0;
4813 if (d as usize) & 0xf == 0 {
4814 offset = 1;
4815 d = d.offset(offset as isize);
4816 }
4817
4818 let r = _mm_loadu_pd(d);
4819 let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64));
4820 assert_eq_m128d(r, e);
4821 }
4822
4823 #[simd_test(enable = "sse2")]
4824 unsafe fn test_mm_cvtpd_ps() {
4825 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0));
4826 assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0));
4827
4828 let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0));
4829 assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0));
4830
4831 let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN));
4832 assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0));
4833
4834 let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64));
4835 assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0));
4836 }
4837
4838 #[simd_test(enable = "sse2")]
4839 unsafe fn test_mm_cvtps_pd() {
4840 let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0));
4841 assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0));
4842
4843 let r = _mm_cvtps_pd(_mm_setr_ps(
4844 f32::MAX,
4845 f32::INFINITY,
4846 f32::NEG_INFINITY,
4847 f32::MIN,
4848 ));
4849 assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY));
4850 }
4851
4852 #[simd_test(enable = "sse2")]
4853 unsafe fn test_mm_cvtpd_epi32() {
4854 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0));
4855 assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0));
4856
4857 let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0));
4858 assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0));
4859
4860 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN));
4861 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4862
4863 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY));
4864 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4865
4866 let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN));
4867 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4868 }
4869
4870 #[simd_test(enable = "sse2")]
4871 unsafe fn test_mm_cvtsd_si32() {
4872 let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0));
4873 assert_eq!(r, -2);
4874
4875 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN));
4876 assert_eq!(r, i32::MIN);
4877
4878 let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN));
4879 assert_eq!(r, i32::MIN);
4880 }
4881
4882 #[simd_test(enable = "sse2")]
4883 unsafe fn test_mm_cvtsd_ss() {
4884 let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4);
4885 let b = _mm_setr_pd(2.0, -5.0);
4886
4887 let r = _mm_cvtsd_ss(a, b);
4888
4889 assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4));
4890
4891 let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY);
4892 let b = _mm_setr_pd(f64::INFINITY, -5.0);
4893
4894 let r = _mm_cvtsd_ss(a, b);
4895
4896 assert_eq_m128(
4897 r,
4898 _mm_setr_ps(
4899 f32::INFINITY,
4900 f32::NEG_INFINITY,
4901 f32::MAX,
4902 f32::NEG_INFINITY,
4903 ),
4904 );
4905 }
4906
4907 #[simd_test(enable = "sse2")]
4908 unsafe fn test_mm_cvtsd_f64() {
4909 let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2));
4910 assert_eq!(r, -1.1);
4911 }
4912
4913 #[simd_test(enable = "sse2")]
4914 unsafe fn test_mm_cvtss_sd() {
4915 let a = _mm_setr_pd(-1.1, 2.2);
4916 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
4917
4918 let r = _mm_cvtss_sd(a, b);
4919 assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2));
4920
4921 let a = _mm_setr_pd(-1.1, f64::INFINITY);
4922 let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0);
4923
4924 let r = _mm_cvtss_sd(a, b);
4925 assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY));
4926 }
4927
4928 #[simd_test(enable = "sse2")]
4929 unsafe fn test_mm_cvttpd_epi32() {
4930 let a = _mm_setr_pd(-1.1, 2.2);
4931 let r = _mm_cvttpd_epi32(a);
4932 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0));
4933
4934 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
4935 let r = _mm_cvttpd_epi32(a);
4936 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0));
4937 }
4938
4939 #[simd_test(enable = "sse2")]
4940 unsafe fn test_mm_cvttsd_si32() {
4941 let a = _mm_setr_pd(-1.1, 2.2);
4942 let r = _mm_cvttsd_si32(a);
4943 assert_eq!(r, -1);
4944
4945 let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
4946 let r = _mm_cvttsd_si32(a);
4947 assert_eq!(r, i32::MIN);
4948 }
4949
4950 #[simd_test(enable = "sse2")]
4951 unsafe fn test_mm_cvttps_epi32() {
4952 let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6);
4953 let r = _mm_cvttps_epi32(a);
4954 assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6));
4955
4956 let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX);
4957 let r = _mm_cvttps_epi32(a);
4958 assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
4959 }
4960
4961 #[simd_test(enable = "sse2")]
4962 unsafe fn test_mm_set_sd() {
4963 let r = _mm_set_sd(-1.0_f64);
4964 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64));
4965 }
4966
4967 #[simd_test(enable = "sse2")]
4968 unsafe fn test_mm_set1_pd() {
4969 let r = _mm_set1_pd(-1.0_f64);
4970 assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64));
4971 }
4972
4973 #[simd_test(enable = "sse2")]
4974 unsafe fn test_mm_set_pd1() {
4975 let r = _mm_set_pd1(-2.0_f64);
4976 assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64));
4977 }
4978
4979 #[simd_test(enable = "sse2")]
4980 unsafe fn test_mm_set_pd() {
4981 let r = _mm_set_pd(1.0_f64, 5.0_f64);
4982 assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64));
4983 }
4984
4985 #[simd_test(enable = "sse2")]
4986 unsafe fn test_mm_setr_pd() {
4987 let r = _mm_setr_pd(1.0_f64, -5.0_f64);
4988 assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64));
4989 }
4990
4991 #[simd_test(enable = "sse2")]
4992 unsafe fn test_mm_setzero_pd() {
4993 let r = _mm_setzero_pd();
4994 assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64));
4995 }
4996
4997 #[simd_test(enable = "sse2")]
4998 unsafe fn test_mm_load1_pd() {
4999 let d = -5.0;
5000 let r = _mm_load1_pd(&d);
5001 assert_eq_m128d(r, _mm_setr_pd(d, d));
5002 }
5003
5004 #[simd_test(enable = "sse2")]
5005 unsafe fn test_mm_load_pd1() {
5006 let d = -5.0;
5007 let r = _mm_load_pd1(&d);
5008 assert_eq_m128d(r, _mm_setr_pd(d, d));
5009 }
5010
5011 #[simd_test(enable = "sse2")]
5012 unsafe fn test_mm_unpackhi_pd() {
5013 let a = _mm_setr_pd(1.0, 2.0);
5014 let b = _mm_setr_pd(3.0, 4.0);
5015 let r = _mm_unpackhi_pd(a, b);
5016 assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0));
5017 }
5018
5019 #[simd_test(enable = "sse2")]
5020 unsafe fn test_mm_unpacklo_pd() {
5021 let a = _mm_setr_pd(1.0, 2.0);
5022 let b = _mm_setr_pd(3.0, 4.0);
5023 let r = _mm_unpacklo_pd(a, b);
5024 assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0));
5025 }
5026
5027 #[simd_test(enable = "sse2")]
5028 unsafe fn test_mm_shuffle_pd() {
5029 let a = _mm_setr_pd(1., 2.);
5030 let b = _mm_setr_pd(3., 4.);
5031 let expected = _mm_setr_pd(1., 3.);
5032 let r = _mm_shuffle_pd(a, b, 0);
5033 assert_eq_m128d(r, expected);
5034 }
5035
5036 #[simd_test(enable = "sse2")]
5037 unsafe fn test_mm_move_sd() {
5038 let a = _mm_setr_pd(1., 2.);
5039 let b = _mm_setr_pd(3., 4.);
5040 let expected = _mm_setr_pd(3., 2.);
5041 let r = _mm_move_sd(a, b);
5042 assert_eq_m128d(r, expected);
5043 }
5044
5045 #[simd_test(enable = "sse2")]
5046 unsafe fn test_mm_castpd_ps() {
5047 let a = _mm_set1_pd(0.);
5048 let expected = _mm_set1_ps(0.);
5049 let r = _mm_castpd_ps(a);
5050 assert_eq_m128(r, expected);
5051 }
5052
5053 #[simd_test(enable = "sse2")]
5054 unsafe fn test_mm_castpd_si128() {
5055 let a = _mm_set1_pd(0.);
5056 let expected = _mm_set1_epi64x(0);
5057 let r = _mm_castpd_si128(a);
5058 assert_eq_m128i(r, expected);
5059 }
5060
5061 #[simd_test(enable = "sse2")]
5062 unsafe fn test_mm_castps_pd() {
5063 let a = _mm_set1_ps(0.);
5064 let expected = _mm_set1_pd(0.);
5065 let r = _mm_castps_pd(a);
5066 assert_eq_m128d(r, expected);
5067 }
5068
5069 #[simd_test(enable = "sse2")]
5070 unsafe fn test_mm_castps_si128() {
5071 let a = _mm_set1_ps(0.);
5072 let expected = _mm_set1_epi32(0);
5073 let r = _mm_castps_si128(a);
5074 assert_eq_m128i(r, expected);
5075 }
5076
5077 #[simd_test(enable = "sse2")]
5078 unsafe fn test_mm_castsi128_pd() {
5079 let a = _mm_set1_epi64x(0);
5080 let expected = _mm_set1_pd(0.);
5081 let r = _mm_castsi128_pd(a);
5082 assert_eq_m128d(r, expected);
5083 }
5084
5085 #[simd_test(enable = "sse2")]
5086 unsafe fn test_mm_castsi128_ps() {
5087 let a = _mm_set1_epi32(0);
5088 let expected = _mm_set1_ps(0.);
5089 let r = _mm_castsi128_ps(a);
5090 assert_eq_m128(r, expected);
5091 }
5092 }