]>
Commit | Line | Data |
---|---|---|
0531ce1d XL |
1 | //! Streaming SIMD Extensions 2 (SSE2) |
2 | ||
3 | #[cfg(test)] | |
416331ca | 4 | use stdarch_test::assert_instr; |
0531ce1d | 5 | |
532ac7d7 XL |
6 | use crate::{ |
7 | core_arch::{simd::*, simd_llvm::*, x86::*}, | |
8 | intrinsics, | |
9 | mem::{self, transmute}, | |
10 | ptr, | |
11 | }; | |
0531ce1d | 12 | |
532ac7d7 | 13 | /// Provides a hint to the processor that the code sequence is a spin-wait loop. |
0531ce1d XL |
14 | /// |
15 | /// This can help improve the performance and power consumption of spin-wait | |
16 | /// loops. | |
83c7162d XL |
17 | /// |
18 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_pause) | |
0531ce1d | 19 | #[inline] |
532ac7d7 | 20 | #[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))] |
83c7162d | 21 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 22 | pub unsafe fn _mm_pause() { |
532ac7d7 XL |
23 | // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without |
24 | // the SSE2 target-feature - therefore it does not require any target features | |
0531ce1d XL |
25 | pause() |
26 | } | |
27 | ||
532ac7d7 | 28 | /// Invalidates and flushes the cache line that contains `p` from all levels of |
0531ce1d | 29 | /// the cache hierarchy. |
83c7162d XL |
30 | /// |
31 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clflush) | |
0531ce1d XL |
32 | #[inline] |
33 | #[target_feature(enable = "sse2")] | |
34 | #[cfg_attr(test, assert_instr(clflush))] | |
83c7162d | 35 | #[stable(feature = "simd_x86", since = "1.27.0")] |
416331ca | 36 | pub unsafe fn _mm_clflush(p: *const u8) { |
0531ce1d XL |
37 | clflush(p) |
38 | } | |
39 | ||
532ac7d7 | 40 | /// Performs a serializing operation on all load-from-memory instructions |
0531ce1d XL |
41 | /// that were issued prior to this instruction. |
42 | /// | |
43 | /// Guarantees that every load instruction that precedes, in program order, is | |
44 | /// globally visible before any load instruction which follows the fence in | |
45 | /// program order. | |
83c7162d XL |
46 | /// |
47 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lfence) | |
0531ce1d XL |
48 | #[inline] |
49 | #[target_feature(enable = "sse2")] | |
50 | #[cfg_attr(test, assert_instr(lfence))] | |
83c7162d | 51 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
52 | pub unsafe fn _mm_lfence() { |
53 | lfence() | |
54 | } | |
55 | ||
532ac7d7 | 56 | /// Performs a serializing operation on all load-from-memory and store-to-memory |
0531ce1d XL |
57 | /// instructions that were issued prior to this instruction. |
58 | /// | |
59 | /// Guarantees that every memory access that precedes, in program order, the | |
60 | /// memory fence instruction is globally visible before any memory instruction | |
61 | /// which follows the fence in program order. | |
83c7162d XL |
62 | /// |
63 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mfence) | |
0531ce1d XL |
64 | #[inline] |
65 | #[target_feature(enable = "sse2")] | |
66 | #[cfg_attr(test, assert_instr(mfence))] | |
83c7162d | 67 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
68 | pub unsafe fn _mm_mfence() { |
69 | mfence() | |
70 | } | |
71 | ||
532ac7d7 | 72 | /// Adds packed 8-bit integers in `a` and `b`. |
83c7162d XL |
73 | /// |
74 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi8) | |
0531ce1d XL |
75 | #[inline] |
76 | #[target_feature(enable = "sse2")] | |
77 | #[cfg_attr(test, assert_instr(paddb))] | |
83c7162d | 78 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 79 | pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 80 | transmute(simd_add(a.as_i8x16(), b.as_i8x16())) |
0531ce1d XL |
81 | } |
82 | ||
532ac7d7 | 83 | /// Adds packed 16-bit integers in `a` and `b`. |
83c7162d XL |
84 | /// |
85 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi16) | |
0531ce1d XL |
86 | #[inline] |
87 | #[target_feature(enable = "sse2")] | |
88 | #[cfg_attr(test, assert_instr(paddw))] | |
83c7162d | 89 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 90 | pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 91 | transmute(simd_add(a.as_i16x8(), b.as_i16x8())) |
0531ce1d XL |
92 | } |
93 | ||
532ac7d7 | 94 | /// Adds packed 32-bit integers in `a` and `b`. |
83c7162d XL |
95 | /// |
96 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi32) | |
0531ce1d XL |
97 | #[inline] |
98 | #[target_feature(enable = "sse2")] | |
99 | #[cfg_attr(test, assert_instr(paddd))] | |
83c7162d | 100 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 101 | pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 102 | transmute(simd_add(a.as_i32x4(), b.as_i32x4())) |
0531ce1d XL |
103 | } |
104 | ||
416331ca | 105 | /// Adds packed 64-bit integers in `a` and `b`. |
83c7162d XL |
106 | /// |
107 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi64) | |
0531ce1d XL |
108 | #[inline] |
109 | #[target_feature(enable = "sse2")] | |
110 | #[cfg_attr(test, assert_instr(paddq))] | |
83c7162d | 111 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 112 | pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 113 | transmute(simd_add(a.as_i64x2(), b.as_i64x2())) |
0531ce1d XL |
114 | } |
115 | ||
532ac7d7 | 116 | /// Adds packed 8-bit integers in `a` and `b` using saturation. |
83c7162d XL |
117 | /// |
118 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi8) | |
0531ce1d XL |
119 | #[inline] |
120 | #[target_feature(enable = "sse2")] | |
121 | #[cfg_attr(test, assert_instr(paddsb))] | |
83c7162d | 122 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 123 | pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i { |
74b04a01 | 124 | transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16())) |
0531ce1d XL |
125 | } |
126 | ||
532ac7d7 | 127 | /// Adds packed 16-bit integers in `a` and `b` using saturation. |
83c7162d XL |
128 | /// |
129 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi16) | |
0531ce1d XL |
130 | #[inline] |
131 | #[target_feature(enable = "sse2")] | |
132 | #[cfg_attr(test, assert_instr(paddsw))] | |
83c7162d | 133 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 134 | pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i { |
74b04a01 | 135 | transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8())) |
0531ce1d XL |
136 | } |
137 | ||
532ac7d7 | 138 | /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. |
83c7162d XL |
139 | /// |
140 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu8) | |
0531ce1d XL |
141 | #[inline] |
142 | #[target_feature(enable = "sse2")] | |
143 | #[cfg_attr(test, assert_instr(paddusb))] | |
83c7162d | 144 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 145 | pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i { |
74b04a01 | 146 | transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16())) |
0531ce1d XL |
147 | } |
148 | ||
532ac7d7 | 149 | /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. |
83c7162d XL |
150 | /// |
151 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu16) | |
0531ce1d XL |
152 | #[inline] |
153 | #[target_feature(enable = "sse2")] | |
154 | #[cfg_attr(test, assert_instr(paddusw))] | |
83c7162d | 155 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 156 | pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i { |
74b04a01 | 157 | transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8())) |
0531ce1d XL |
158 | } |
159 | ||
532ac7d7 | 160 | /// Averages packed unsigned 8-bit integers in `a` and `b`. |
83c7162d XL |
161 | /// |
162 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu8) | |
0531ce1d XL |
163 | #[inline] |
164 | #[target_feature(enable = "sse2")] | |
165 | #[cfg_attr(test, assert_instr(pavgb))] | |
83c7162d | 166 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 167 | pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 168 | transmute(pavgb(a.as_u8x16(), b.as_u8x16())) |
0531ce1d XL |
169 | } |
170 | ||
532ac7d7 | 171 | /// Averages packed unsigned 16-bit integers in `a` and `b`. |
83c7162d XL |
172 | /// |
173 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu16) | |
0531ce1d XL |
174 | #[inline] |
175 | #[target_feature(enable = "sse2")] | |
176 | #[cfg_attr(test, assert_instr(pavgw))] | |
83c7162d | 177 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 178 | pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 179 | transmute(pavgw(a.as_u16x8(), b.as_u16x8())) |
0531ce1d XL |
180 | } |
181 | ||
532ac7d7 | 182 | /// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`. |
0531ce1d | 183 | /// |
532ac7d7 | 184 | /// Multiplies packed signed 16-bit integers in `a` and `b`, producing |
0531ce1d XL |
185 | /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of |
186 | /// intermediate 32-bit integers. | |
83c7162d XL |
187 | /// |
188 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_madd_epi16) | |
0531ce1d XL |
189 | #[inline] |
190 | #[target_feature(enable = "sse2")] | |
191 | #[cfg_attr(test, assert_instr(pmaddwd))] | |
83c7162d | 192 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 193 | pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 194 | transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) |
0531ce1d XL |
195 | } |
196 | ||
532ac7d7 | 197 | /// Compares packed 16-bit integers in `a` and `b`, and returns the packed |
0531ce1d | 198 | /// maximum values. |
83c7162d XL |
199 | /// |
200 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi16) | |
0531ce1d XL |
201 | #[inline] |
202 | #[target_feature(enable = "sse2")] | |
203 | #[cfg_attr(test, assert_instr(pmaxsw))] | |
83c7162d | 204 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 205 | pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 206 | transmute(pmaxsw(a.as_i16x8(), b.as_i16x8())) |
0531ce1d XL |
207 | } |
208 | ||
532ac7d7 | 209 | /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the |
0531ce1d | 210 | /// packed maximum values. |
83c7162d XL |
211 | /// |
212 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu8) | |
0531ce1d XL |
213 | #[inline] |
214 | #[target_feature(enable = "sse2")] | |
215 | #[cfg_attr(test, assert_instr(pmaxub))] | |
83c7162d | 216 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 217 | pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 218 | transmute(pmaxub(a.as_u8x16(), b.as_u8x16())) |
0531ce1d XL |
219 | } |
220 | ||
532ac7d7 | 221 | /// Compares packed 16-bit integers in `a` and `b`, and returns the packed |
0531ce1d | 222 | /// minimum values. |
83c7162d XL |
223 | /// |
224 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi16) | |
0531ce1d XL |
225 | #[inline] |
226 | #[target_feature(enable = "sse2")] | |
227 | #[cfg_attr(test, assert_instr(pminsw))] | |
83c7162d | 228 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 229 | pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 230 | transmute(pminsw(a.as_i16x8(), b.as_i16x8())) |
0531ce1d XL |
231 | } |
232 | ||
532ac7d7 | 233 | /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the |
0531ce1d | 234 | /// packed minimum values. |
83c7162d XL |
235 | /// |
236 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu8) | |
0531ce1d XL |
237 | #[inline] |
238 | #[target_feature(enable = "sse2")] | |
239 | #[cfg_attr(test, assert_instr(pminub))] | |
83c7162d | 240 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 241 | pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 242 | transmute(pminub(a.as_u8x16(), b.as_u8x16())) |
0531ce1d XL |
243 | } |
244 | ||
532ac7d7 | 245 | /// Multiplies the packed 16-bit integers in `a` and `b`. |
0531ce1d XL |
246 | /// |
247 | /// The multiplication produces intermediate 32-bit integers, and returns the | |
248 | /// high 16 bits of the intermediate integers. | |
83c7162d XL |
249 | /// |
250 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epi16) | |
0531ce1d XL |
251 | #[inline] |
252 | #[target_feature(enable = "sse2")] | |
253 | #[cfg_attr(test, assert_instr(pmulhw))] | |
83c7162d | 254 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 255 | pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 256 | transmute(pmulhw(a.as_i16x8(), b.as_i16x8())) |
0531ce1d XL |
257 | } |
258 | ||
532ac7d7 | 259 | /// Multiplies the packed unsigned 16-bit integers in `a` and `b`. |
0531ce1d XL |
260 | /// |
261 | /// The multiplication produces intermediate 32-bit integers, and returns the | |
262 | /// high 16 bits of the intermediate integers. | |
83c7162d XL |
263 | /// |
264 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epu16) | |
0531ce1d XL |
265 | #[inline] |
266 | #[target_feature(enable = "sse2")] | |
267 | #[cfg_attr(test, assert_instr(pmulhuw))] | |
83c7162d | 268 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 269 | pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 270 | transmute(pmulhuw(a.as_u16x8(), b.as_u16x8())) |
0531ce1d XL |
271 | } |
272 | ||
532ac7d7 | 273 | /// Multiplies the packed 16-bit integers in `a` and `b`. |
0531ce1d XL |
274 | /// |
275 | /// The multiplication produces intermediate 32-bit integers, and returns the | |
276 | /// low 16 bits of the intermediate integers. | |
83c7162d XL |
277 | /// |
278 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mullo_epi16) | |
0531ce1d XL |
279 | #[inline] |
280 | #[target_feature(enable = "sse2")] | |
281 | #[cfg_attr(test, assert_instr(pmullw))] | |
83c7162d | 282 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 283 | pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 284 | transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) |
0531ce1d XL |
285 | } |
286 | ||
532ac7d7 | 287 | /// Multiplies the low unsigned 32-bit integers from each packed 64-bit element |
0531ce1d XL |
288 | /// in `a` and `b`. |
289 | /// | |
532ac7d7 | 290 | /// Returns the unsigned 64-bit results. |
83c7162d XL |
291 | /// |
292 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epu32) | |
0531ce1d XL |
293 | #[inline] |
294 | #[target_feature(enable = "sse2")] | |
295 | #[cfg_attr(test, assert_instr(pmuludq))] | |
83c7162d | 296 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 297 | pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 298 | transmute(pmuludq(a.as_u32x4(), b.as_u32x4())) |
0531ce1d XL |
299 | } |
300 | ||
301 | /// Sum the absolute differences of packed unsigned 8-bit integers. | |
302 | /// | |
532ac7d7 | 303 | /// Computes the absolute differences of packed unsigned 8-bit integers in `a` |
0531ce1d XL |
304 | /// and `b`, then horizontally sum each consecutive 8 differences to produce |
305 | /// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in | |
306 | /// the low 16 bits of 64-bit elements returned. | |
83c7162d XL |
307 | /// |
308 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_epu8) | |
0531ce1d XL |
309 | #[inline] |
310 | #[target_feature(enable = "sse2")] | |
311 | #[cfg_attr(test, assert_instr(psadbw))] | |
83c7162d | 312 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 313 | pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 314 | transmute(psadbw(a.as_u8x16(), b.as_u8x16())) |
0531ce1d XL |
315 | } |
316 | ||
532ac7d7 | 317 | /// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`. |
83c7162d XL |
318 | /// |
319 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi8) | |
0531ce1d XL |
320 | #[inline] |
321 | #[target_feature(enable = "sse2")] | |
322 | #[cfg_attr(test, assert_instr(psubb))] | |
83c7162d | 323 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 324 | pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 325 | transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) |
0531ce1d XL |
326 | } |
327 | ||
532ac7d7 | 328 | /// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`. |
83c7162d XL |
329 | /// |
330 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi16) | |
0531ce1d XL |
331 | #[inline] |
332 | #[target_feature(enable = "sse2")] | |
333 | #[cfg_attr(test, assert_instr(psubw))] | |
83c7162d | 334 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 335 | pub unsafe fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 336 | transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) |
0531ce1d XL |
337 | } |
338 | ||
339 | /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. | |
83c7162d XL |
340 | /// |
341 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi32) | |
0531ce1d XL |
342 | #[inline] |
343 | #[target_feature(enable = "sse2")] | |
344 | #[cfg_attr(test, assert_instr(psubd))] | |
83c7162d | 345 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 346 | pub unsafe fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 347 | transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) |
0531ce1d XL |
348 | } |
349 | ||
350 | /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`. | |
83c7162d XL |
351 | /// |
352 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi64) | |
0531ce1d XL |
353 | #[inline] |
354 | #[target_feature(enable = "sse2")] | |
355 | #[cfg_attr(test, assert_instr(psubq))] | |
83c7162d | 356 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 357 | pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 358 | transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) |
0531ce1d XL |
359 | } |
360 | ||
361 | /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` | |
362 | /// using saturation. | |
83c7162d XL |
363 | /// |
364 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi8) | |
0531ce1d XL |
365 | #[inline] |
366 | #[target_feature(enable = "sse2")] | |
367 | #[cfg_attr(test, assert_instr(psubsb))] | |
83c7162d | 368 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 369 | pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i { |
74b04a01 | 370 | transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16())) |
0531ce1d XL |
371 | } |
372 | ||
373 | /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` | |
374 | /// using saturation. | |
83c7162d XL |
375 | /// |
376 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi16) | |
0531ce1d XL |
377 | #[inline] |
378 | #[target_feature(enable = "sse2")] | |
379 | #[cfg_attr(test, assert_instr(psubsw))] | |
83c7162d | 380 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 381 | pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i { |
74b04a01 | 382 | transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8())) |
0531ce1d XL |
383 | } |
384 | ||
385 | /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit | |
386 | /// integers in `a` using saturation. | |
83c7162d XL |
387 | /// |
388 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu8) | |
0531ce1d XL |
389 | #[inline] |
390 | #[target_feature(enable = "sse2")] | |
391 | #[cfg_attr(test, assert_instr(psubusb))] | |
83c7162d | 392 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 393 | pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i { |
74b04a01 | 394 | transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16())) |
0531ce1d XL |
395 | } |
396 | ||
397 | /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit | |
398 | /// integers in `a` using saturation. | |
83c7162d XL |
399 | /// |
400 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu16) | |
0531ce1d XL |
401 | #[inline] |
402 | #[target_feature(enable = "sse2")] | |
403 | #[cfg_attr(test, assert_instr(psubusw))] | |
83c7162d | 404 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 405 | pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i { |
74b04a01 | 406 | transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8())) |
0531ce1d XL |
407 | } |
408 | ||
17df50a5 | 409 | /// Shifts `a` left by `IMM8` bytes while shifting in zeros. |
83c7162d XL |
410 | /// |
411 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_si128) | |
0531ce1d XL |
412 | #[inline] |
413 | #[target_feature(enable = "sse2")] | |
17df50a5 XL |
414 | #[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))] |
415 | #[rustc_legacy_const_generics(1)] | |
83c7162d | 416 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
417 | pub unsafe fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
418 | static_assert_imm8!(IMM8); | |
419 | _mm_slli_si128_impl::<IMM8>(a) | |
0531ce1d XL |
420 | } |
421 | ||
422 | /// Implementation detail: converts the immediate argument of the | |
423 | /// `_mm_slli_si128` intrinsic into a compile-time constant. | |
424 | #[inline] | |
425 | #[target_feature(enable = "sse2")] | |
17df50a5 XL |
426 | unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i { |
427 | const fn mask(shift: i32, i: u32) -> u32 { | |
428 | let shift = shift as u32 & 0xff; | |
429 | if shift > 15 { | |
430 | i | |
431 | } else { | |
432 | 16 - shift + i | |
433 | } | |
0531ce1d | 434 | } |
17df50a5 XL |
435 | let zero = _mm_set1_epi8(0).as_i8x16(); |
436 | transmute::<i8x16, _>(simd_shuffle16!( | |
437 | zero, | |
438 | a.as_i8x16(), | |
439 | <const IMM8: i32> [ | |
440 | mask(IMM8, 0), | |
441 | mask(IMM8, 1), | |
442 | mask(IMM8, 2), | |
443 | mask(IMM8, 3), | |
444 | mask(IMM8, 4), | |
445 | mask(IMM8, 5), | |
446 | mask(IMM8, 6), | |
447 | mask(IMM8, 7), | |
448 | mask(IMM8, 8), | |
449 | mask(IMM8, 9), | |
450 | mask(IMM8, 10), | |
451 | mask(IMM8, 11), | |
452 | mask(IMM8, 12), | |
453 | mask(IMM8, 13), | |
454 | mask(IMM8, 14), | |
455 | mask(IMM8, 15), | |
456 | ], | |
457 | )) | |
0531ce1d XL |
458 | } |
459 | ||
17df50a5 | 460 | /// Shifts `a` left by `IMM8` bytes while shifting in zeros. |
83c7162d XL |
461 | /// |
462 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bslli_si128) | |
0531ce1d XL |
463 | #[inline] |
464 | #[target_feature(enable = "sse2")] | |
17df50a5 XL |
465 | #[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))] |
466 | #[rustc_legacy_const_generics(1)] | |
83c7162d | 467 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
468 | pub unsafe fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
469 | static_assert_imm8!(IMM8); | |
470 | _mm_slli_si128_impl::<IMM8>(a) | |
0531ce1d XL |
471 | } |
472 | ||
17df50a5 | 473 | /// Shifts `a` right by `IMM8` bytes while shifting in zeros. |
83c7162d XL |
474 | /// |
475 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bsrli_si128) | |
0531ce1d XL |
476 | #[inline] |
477 | #[target_feature(enable = "sse2")] | |
17df50a5 XL |
478 | #[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))] |
479 | #[rustc_legacy_const_generics(1)] | |
83c7162d | 480 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
481 | pub unsafe fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
482 | static_assert_imm8!(IMM8); | |
483 | _mm_srli_si128_impl::<IMM8>(a) | |
0531ce1d XL |
484 | } |
485 | ||
17df50a5 | 486 | /// Shifts packed 16-bit integers in `a` left by `IMM8` while shifting in zeros. |
83c7162d XL |
487 | /// |
488 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi16) | |
0531ce1d XL |
489 | #[inline] |
490 | #[target_feature(enable = "sse2")] | |
17df50a5 XL |
491 | #[cfg_attr(test, assert_instr(psllw, IMM8 = 7))] |
492 | #[rustc_legacy_const_generics(1)] | |
83c7162d | 493 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
494 | pub unsafe fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
495 | static_assert_imm8!(IMM8); | |
496 | transmute(pslliw(a.as_i16x8(), IMM8)) | |
0531ce1d XL |
497 | } |
498 | ||
532ac7d7 | 499 | /// Shifts packed 16-bit integers in `a` left by `count` while shifting in |
0531ce1d | 500 | /// zeros. |
83c7162d XL |
501 | /// |
502 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi16) | |
0531ce1d XL |
503 | #[inline] |
504 | #[target_feature(enable = "sse2")] | |
505 | #[cfg_attr(test, assert_instr(psllw))] | |
83c7162d | 506 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 507 | pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { |
532ac7d7 | 508 | transmute(psllw(a.as_i16x8(), count.as_i16x8())) |
0531ce1d XL |
509 | } |
510 | ||
17df50a5 | 511 | /// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros. |
83c7162d XL |
512 | /// |
513 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi32) | |
0531ce1d XL |
514 | #[inline] |
515 | #[target_feature(enable = "sse2")] | |
17df50a5 XL |
516 | #[cfg_attr(test, assert_instr(pslld, IMM8 = 7))] |
517 | #[rustc_legacy_const_generics(1)] | |
83c7162d | 518 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
519 | pub unsafe fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
520 | static_assert_imm8!(IMM8); | |
521 | transmute(psllid(a.as_i32x4(), IMM8)) | |
0531ce1d XL |
522 | } |
523 | ||
532ac7d7 | 524 | /// Shifts packed 32-bit integers in `a` left by `count` while shifting in |
0531ce1d | 525 | /// zeros. |
83c7162d XL |
526 | /// |
527 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi32) | |
0531ce1d XL |
528 | #[inline] |
529 | #[target_feature(enable = "sse2")] | |
530 | #[cfg_attr(test, assert_instr(pslld))] | |
83c7162d | 531 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 532 | pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { |
532ac7d7 | 533 | transmute(pslld(a.as_i32x4(), count.as_i32x4())) |
0531ce1d XL |
534 | } |
535 | ||
17df50a5 | 536 | /// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros. |
83c7162d XL |
537 | /// |
538 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi64) | |
0531ce1d XL |
539 | #[inline] |
540 | #[target_feature(enable = "sse2")] | |
17df50a5 XL |
541 | #[cfg_attr(test, assert_instr(psllq, IMM8 = 7))] |
542 | #[rustc_legacy_const_generics(1)] | |
83c7162d | 543 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
544 | pub unsafe fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i { |
545 | static_assert_imm8!(IMM8); | |
546 | transmute(pslliq(a.as_i64x2(), IMM8)) | |
0531ce1d XL |
547 | } |
548 | ||
532ac7d7 | 549 | /// Shifts packed 64-bit integers in `a` left by `count` while shifting in |
0531ce1d | 550 | /// zeros. |
83c7162d XL |
551 | /// |
552 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi64) | |
0531ce1d XL |
553 | #[inline] |
554 | #[target_feature(enable = "sse2")] | |
555 | #[cfg_attr(test, assert_instr(psllq))] | |
83c7162d | 556 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 557 | pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { |
532ac7d7 | 558 | transmute(psllq(a.as_i64x2(), count.as_i64x2())) |
0531ce1d XL |
559 | } |
560 | ||
17df50a5 | 561 | /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign |
0531ce1d | 562 | /// bits. |
83c7162d XL |
563 | /// |
564 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi16) | |
0531ce1d XL |
565 | #[inline] |
566 | #[target_feature(enable = "sse2")] | |
17df50a5 XL |
567 | #[cfg_attr(test, assert_instr(psraw, IMM8 = 1))] |
568 | #[rustc_legacy_const_generics(1)] | |
83c7162d | 569 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
570 | pub unsafe fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
571 | static_assert_imm8!(IMM8); | |
572 | transmute(psraiw(a.as_i16x8(), IMM8)) | |
0531ce1d XL |
573 | } |
574 | ||
532ac7d7 | 575 | /// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign |
0531ce1d | 576 | /// bits. |
83c7162d XL |
577 | /// |
578 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi16) | |
0531ce1d XL |
579 | #[inline] |
580 | #[target_feature(enable = "sse2")] | |
581 | #[cfg_attr(test, assert_instr(psraw))] | |
83c7162d | 582 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 583 | pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { |
532ac7d7 | 584 | transmute(psraw(a.as_i16x8(), count.as_i16x8())) |
0531ce1d XL |
585 | } |
586 | ||
17df50a5 | 587 | /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign |
0531ce1d | 588 | /// bits. |
83c7162d XL |
589 | /// |
590 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi32) | |
0531ce1d XL |
591 | #[inline] |
592 | #[target_feature(enable = "sse2")] | |
17df50a5 XL |
593 | #[cfg_attr(test, assert_instr(psrad, IMM8 = 1))] |
594 | #[rustc_legacy_const_generics(1)] | |
83c7162d | 595 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
596 | pub unsafe fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
597 | static_assert_imm8!(IMM8); | |
598 | transmute(psraid(a.as_i32x4(), IMM8)) | |
0531ce1d XL |
599 | } |
600 | ||
532ac7d7 | 601 | /// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign |
0531ce1d | 602 | /// bits. |
83c7162d XL |
603 | /// |
604 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi32) | |
0531ce1d XL |
605 | #[inline] |
606 | #[target_feature(enable = "sse2")] | |
607 | #[cfg_attr(test, assert_instr(psrad))] | |
83c7162d | 608 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 609 | pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { |
532ac7d7 | 610 | transmute(psrad(a.as_i32x4(), count.as_i32x4())) |
0531ce1d XL |
611 | } |
612 | ||
17df50a5 | 613 | /// Shifts `a` right by `IMM8` bytes while shifting in zeros. |
83c7162d XL |
614 | /// |
615 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_si128) | |
0531ce1d XL |
616 | #[inline] |
617 | #[target_feature(enable = "sse2")] | |
17df50a5 XL |
618 | #[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))] |
619 | #[rustc_legacy_const_generics(1)] | |
83c7162d | 620 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
621 | pub unsafe fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
622 | static_assert_imm8!(IMM8); | |
623 | _mm_srli_si128_impl::<IMM8>(a) | |
0531ce1d XL |
624 | } |
625 | ||
626 | /// Implementation detail: converts the immediate argument of the | |
627 | /// `_mm_srli_si128` intrinsic into a compile-time constant. | |
628 | #[inline] | |
629 | #[target_feature(enable = "sse2")] | |
17df50a5 XL |
630 | unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i { |
631 | const fn mask(shift: i32, i: u32) -> u32 { | |
632 | if (shift as u32) > 15 { | |
633 | i + 16 | |
634 | } else { | |
635 | i + (shift as u32) | |
636 | } | |
0531ce1d | 637 | } |
17df50a5 XL |
638 | let zero = _mm_set1_epi8(0).as_i8x16(); |
639 | let x: i8x16 = simd_shuffle16!( | |
640 | a.as_i8x16(), | |
641 | zero, | |
642 | <const IMM8: i32> [ | |
643 | mask(IMM8, 0), | |
644 | mask(IMM8, 1), | |
645 | mask(IMM8, 2), | |
646 | mask(IMM8, 3), | |
647 | mask(IMM8, 4), | |
648 | mask(IMM8, 5), | |
649 | mask(IMM8, 6), | |
650 | mask(IMM8, 7), | |
651 | mask(IMM8, 8), | |
652 | mask(IMM8, 9), | |
653 | mask(IMM8, 10), | |
654 | mask(IMM8, 11), | |
655 | mask(IMM8, 12), | |
656 | mask(IMM8, 13), | |
657 | mask(IMM8, 14), | |
658 | mask(IMM8, 15), | |
659 | ], | |
660 | ); | |
532ac7d7 | 661 | transmute(x) |
0531ce1d XL |
662 | } |
663 | ||
17df50a5 | 664 | /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in |
0531ce1d | 665 | /// zeros. |
83c7162d XL |
666 | /// |
667 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi16) | |
0531ce1d XL |
668 | #[inline] |
669 | #[target_feature(enable = "sse2")] | |
17df50a5 XL |
670 | #[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))] |
671 | #[rustc_legacy_const_generics(1)] | |
83c7162d | 672 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
673 | pub unsafe fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
674 | static_assert_imm8!(IMM8); | |
675 | transmute(psrliw(a.as_i16x8(), IMM8)) | |
0531ce1d XL |
676 | } |
677 | ||
532ac7d7 | 678 | /// Shifts packed 16-bit integers in `a` right by `count` while shifting in |
0531ce1d | 679 | /// zeros. |
83c7162d XL |
680 | /// |
681 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi16) | |
0531ce1d XL |
682 | #[inline] |
683 | #[target_feature(enable = "sse2")] | |
684 | #[cfg_attr(test, assert_instr(psrlw))] | |
83c7162d | 685 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 686 | pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { |
532ac7d7 | 687 | transmute(psrlw(a.as_i16x8(), count.as_i16x8())) |
0531ce1d XL |
688 | } |
689 | ||
17df50a5 | 690 | /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in |
0531ce1d | 691 | /// zeros. |
83c7162d XL |
692 | /// |
693 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi32) | |
0531ce1d XL |
694 | #[inline] |
695 | #[target_feature(enable = "sse2")] | |
17df50a5 XL |
696 | #[cfg_attr(test, assert_instr(psrld, IMM8 = 8))] |
697 | #[rustc_legacy_const_generics(1)] | |
83c7162d | 698 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
699 | pub unsafe fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
700 | static_assert_imm8!(IMM8); | |
701 | transmute(psrlid(a.as_i32x4(), IMM8)) | |
0531ce1d XL |
702 | } |
703 | ||
532ac7d7 | 704 | /// Shifts packed 32-bit integers in `a` right by `count` while shifting in |
0531ce1d | 705 | /// zeros. |
83c7162d XL |
706 | /// |
707 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi32) | |
0531ce1d XL |
708 | #[inline] |
709 | #[target_feature(enable = "sse2")] | |
710 | #[cfg_attr(test, assert_instr(psrld))] | |
83c7162d | 711 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 712 | pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { |
532ac7d7 | 713 | transmute(psrld(a.as_i32x4(), count.as_i32x4())) |
0531ce1d XL |
714 | } |
715 | ||
17df50a5 | 716 | /// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in |
0531ce1d | 717 | /// zeros. |
83c7162d XL |
718 | /// |
719 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi64) | |
0531ce1d XL |
720 | #[inline] |
721 | #[target_feature(enable = "sse2")] | |
17df50a5 XL |
722 | #[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))] |
723 | #[rustc_legacy_const_generics(1)] | |
83c7162d | 724 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
725 | pub unsafe fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i { |
726 | static_assert_imm8!(IMM8); | |
727 | transmute(psrliq(a.as_i64x2(), IMM8)) | |
0531ce1d XL |
728 | } |
729 | ||
532ac7d7 | 730 | /// Shifts packed 64-bit integers in `a` right by `count` while shifting in |
0531ce1d | 731 | /// zeros. |
83c7162d XL |
732 | /// |
733 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi64) | |
0531ce1d XL |
734 | #[inline] |
735 | #[target_feature(enable = "sse2")] | |
736 | #[cfg_attr(test, assert_instr(psrlq))] | |
83c7162d | 737 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 738 | pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i { |
532ac7d7 | 739 | transmute(psrlq(a.as_i64x2(), count.as_i64x2())) |
0531ce1d XL |
740 | } |
741 | ||
532ac7d7 | 742 | /// Computes the bitwise AND of 128 bits (representing integer data) in `a` and |
0531ce1d | 743 | /// `b`. |
83c7162d XL |
744 | /// |
745 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_si128) | |
0531ce1d XL |
746 | #[inline] |
747 | #[target_feature(enable = "sse2")] | |
748 | #[cfg_attr(test, assert_instr(andps))] | |
83c7162d | 749 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
750 | pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i { |
751 | simd_and(a, b) | |
752 | } | |
753 | ||
532ac7d7 | 754 | /// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and |
0531ce1d | 755 | /// then AND with `b`. |
83c7162d XL |
756 | /// |
757 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_si128) | |
0531ce1d XL |
758 | #[inline] |
759 | #[target_feature(enable = "sse2")] | |
760 | #[cfg_attr(test, assert_instr(andnps))] | |
83c7162d | 761 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
762 | pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i { |
763 | simd_and(simd_xor(_mm_set1_epi8(-1), a), b) | |
764 | } | |
765 | ||
532ac7d7 | 766 | /// Computes the bitwise OR of 128 bits (representing integer data) in `a` and |
0531ce1d | 767 | /// `b`. |
83c7162d XL |
768 | /// |
769 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_si128) | |
0531ce1d XL |
770 | #[inline] |
771 | #[target_feature(enable = "sse2")] | |
772 | #[cfg_attr(test, assert_instr(orps))] | |
83c7162d | 773 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
774 | pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i { |
775 | simd_or(a, b) | |
776 | } | |
777 | ||
532ac7d7 | 778 | /// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and |
0531ce1d | 779 | /// `b`. |
83c7162d XL |
780 | /// |
781 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_si128) | |
0531ce1d XL |
782 | #[inline] |
783 | #[target_feature(enable = "sse2")] | |
784 | #[cfg_attr(test, assert_instr(xorps))] | |
83c7162d | 785 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
786 | pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i { |
787 | simd_xor(a, b) | |
788 | } | |
789 | ||
532ac7d7 | 790 | /// Compares packed 8-bit integers in `a` and `b` for equality. |
83c7162d XL |
791 | /// |
792 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi8) | |
0531ce1d XL |
793 | #[inline] |
794 | #[target_feature(enable = "sse2")] | |
795 | #[cfg_attr(test, assert_instr(pcmpeqb))] | |
83c7162d | 796 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 797 | pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 798 | transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) |
0531ce1d XL |
799 | } |
800 | ||
532ac7d7 | 801 | /// Compares packed 16-bit integers in `a` and `b` for equality. |
83c7162d XL |
802 | /// |
803 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi16) | |
0531ce1d XL |
804 | #[inline] |
805 | #[target_feature(enable = "sse2")] | |
806 | #[cfg_attr(test, assert_instr(pcmpeqw))] | |
83c7162d | 807 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 808 | pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 809 | transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) |
0531ce1d XL |
810 | } |
811 | ||
532ac7d7 | 812 | /// Compares packed 32-bit integers in `a` and `b` for equality. |
83c7162d XL |
813 | /// |
814 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi32) | |
0531ce1d XL |
815 | #[inline] |
816 | #[target_feature(enable = "sse2")] | |
817 | #[cfg_attr(test, assert_instr(pcmpeqd))] | |
83c7162d | 818 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 819 | pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 820 | transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) |
0531ce1d XL |
821 | } |
822 | ||
532ac7d7 | 823 | /// Compares packed 8-bit integers in `a` and `b` for greater-than. |
83c7162d XL |
824 | /// |
825 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi8) | |
0531ce1d XL |
826 | #[inline] |
827 | #[target_feature(enable = "sse2")] | |
828 | #[cfg_attr(test, assert_instr(pcmpgtb))] | |
83c7162d | 829 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 830 | pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 831 | transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) |
0531ce1d XL |
832 | } |
833 | ||
532ac7d7 | 834 | /// Compares packed 16-bit integers in `a` and `b` for greater-than. |
83c7162d XL |
835 | /// |
836 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi16) | |
0531ce1d XL |
837 | #[inline] |
838 | #[target_feature(enable = "sse2")] | |
839 | #[cfg_attr(test, assert_instr(pcmpgtw))] | |
83c7162d | 840 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 841 | pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 842 | transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) |
0531ce1d XL |
843 | } |
844 | ||
532ac7d7 | 845 | /// Compares packed 32-bit integers in `a` and `b` for greater-than. |
83c7162d XL |
846 | /// |
847 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi32) | |
0531ce1d XL |
848 | #[inline] |
849 | #[target_feature(enable = "sse2")] | |
850 | #[cfg_attr(test, assert_instr(pcmpgtd))] | |
83c7162d | 851 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 852 | pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 853 | transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) |
0531ce1d XL |
854 | } |
855 | ||
532ac7d7 | 856 | /// Compares packed 8-bit integers in `a` and `b` for less-than. |
83c7162d XL |
857 | /// |
858 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi8) | |
0531ce1d XL |
859 | #[inline] |
860 | #[target_feature(enable = "sse2")] | |
861 | #[cfg_attr(test, assert_instr(pcmpgtb))] | |
83c7162d | 862 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 863 | pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 864 | transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) |
0531ce1d XL |
865 | } |
866 | ||
532ac7d7 | 867 | /// Compares packed 16-bit integers in `a` and `b` for less-than. |
83c7162d XL |
868 | /// |
869 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi16) | |
0531ce1d XL |
870 | #[inline] |
871 | #[target_feature(enable = "sse2")] | |
872 | #[cfg_attr(test, assert_instr(pcmpgtw))] | |
83c7162d | 873 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 874 | pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 875 | transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) |
0531ce1d XL |
876 | } |
877 | ||
532ac7d7 | 878 | /// Compares packed 32-bit integers in `a` and `b` for less-than. |
83c7162d XL |
879 | /// |
880 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi32) | |
0531ce1d XL |
881 | #[inline] |
882 | #[target_feature(enable = "sse2")] | |
883 | #[cfg_attr(test, assert_instr(pcmpgtd))] | |
83c7162d | 884 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 885 | pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 886 | transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) |
0531ce1d XL |
887 | } |
888 | ||
532ac7d7 | 889 | /// Converts the lower two packed 32-bit integers in `a` to packed |
0531ce1d | 890 | /// double-precision (64-bit) floating-point elements. |
83c7162d XL |
891 | /// |
892 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_pd) | |
0531ce1d XL |
893 | #[inline] |
894 | #[target_feature(enable = "sse2")] | |
895 | #[cfg_attr(test, assert_instr(cvtdq2pd))] | |
83c7162d | 896 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
897 | pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d { |
898 | let a = a.as_i32x4(); | |
17df50a5 | 899 | simd_cast::<i32x2, __m128d>(simd_shuffle2!(a, a, [0, 1])) |
0531ce1d XL |
900 | } |
901 | ||
532ac7d7 | 902 | /// Returns `a` with its lower element replaced by `b` after converting it to |
0531ce1d | 903 | /// an `f64`. |
83c7162d XL |
904 | /// |
905 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_sd) | |
0531ce1d XL |
906 | #[inline] |
907 | #[target_feature(enable = "sse2")] | |
908 | #[cfg_attr(test, assert_instr(cvtsi2sd))] | |
83c7162d | 909 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
910 | pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d { |
911 | simd_insert(a, 0, b as f64) | |
912 | } | |
913 | ||
532ac7d7 | 914 | /// Converts packed 32-bit integers in `a` to packed single-precision (32-bit) |
0531ce1d | 915 | /// floating-point elements. |
83c7162d XL |
916 | /// |
917 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ps) | |
0531ce1d XL |
918 | #[inline] |
919 | #[target_feature(enable = "sse2")] | |
920 | #[cfg_attr(test, assert_instr(cvtdq2ps))] | |
83c7162d | 921 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
922 | pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 { |
923 | cvtdq2ps(a.as_i32x4()) | |
924 | } | |
925 | ||
532ac7d7 | 926 | /// Converts packed single-precision (32-bit) floating-point elements in `a` |
0531ce1d | 927 | /// to packed 32-bit integers. |
83c7162d XL |
928 | /// |
929 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_epi32) | |
0531ce1d XL |
930 | #[inline] |
931 | #[target_feature(enable = "sse2")] | |
932 | #[cfg_attr(test, assert_instr(cvtps2dq))] | |
83c7162d | 933 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 934 | pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i { |
532ac7d7 | 935 | transmute(cvtps2dq(a)) |
0531ce1d XL |
936 | } |
937 | ||
532ac7d7 | 938 | /// Returns a vector whose lowest element is `a` and all higher elements are |
0531ce1d | 939 | /// `0`. |
83c7162d XL |
940 | /// |
941 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_si128) | |
0531ce1d XL |
942 | #[inline] |
943 | #[target_feature(enable = "sse2")] | |
944 | #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movd))] | |
83c7162d | 945 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 946 | pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i { |
532ac7d7 | 947 | transmute(i32x4::new(a, 0, 0, 0)) |
0531ce1d XL |
948 | } |
949 | ||
532ac7d7 | 950 | /// Returns the lowest element of `a`. |
83c7162d XL |
951 | /// |
952 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si32) | |
0531ce1d XL |
953 | #[inline] |
954 | #[target_feature(enable = "sse2")] | |
0731742a | 955 | #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movd))] |
83c7162d | 956 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
957 | pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 { |
958 | simd_extract(a.as_i32x4(), 0) | |
959 | } | |
960 | ||
532ac7d7 | 961 | /// Sets packed 64-bit integers with the supplied values, from highest to |
0531ce1d | 962 | /// lowest. |
83c7162d XL |
963 | /// |
964 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi64x) | |
0531ce1d XL |
965 | #[inline] |
966 | #[target_feature(enable = "sse2")] | |
967 | // no particular instruction to test | |
83c7162d | 968 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 969 | pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i { |
532ac7d7 | 970 | transmute(i64x2::new(e0, e1)) |
0531ce1d XL |
971 | } |
972 | ||
532ac7d7 | 973 | /// Sets packed 32-bit integers with the supplied values. |
83c7162d XL |
974 | /// |
975 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi32) | |
0531ce1d XL |
976 | #[inline] |
977 | #[target_feature(enable = "sse2")] | |
978 | // no particular instruction to test | |
83c7162d | 979 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 980 | pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { |
532ac7d7 | 981 | transmute(i32x4::new(e0, e1, e2, e3)) |
0531ce1d XL |
982 | } |
983 | ||
532ac7d7 | 984 | /// Sets packed 16-bit integers with the supplied values. |
83c7162d XL |
985 | /// |
986 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi16) | |
0531ce1d XL |
987 | #[inline] |
988 | #[target_feature(enable = "sse2")] | |
989 | // no particular instruction to test | |
83c7162d | 990 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 991 | pub unsafe fn _mm_set_epi16( |
0731742a XL |
992 | e7: i16, |
993 | e6: i16, | |
994 | e5: i16, | |
995 | e4: i16, | |
996 | e3: i16, | |
997 | e2: i16, | |
998 | e1: i16, | |
999 | e0: i16, | |
0531ce1d | 1000 | ) -> __m128i { |
532ac7d7 | 1001 | transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) |
0531ce1d XL |
1002 | } |
1003 | ||
532ac7d7 | 1004 | /// Sets packed 8-bit integers with the supplied values. |
83c7162d XL |
1005 | /// |
1006 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi8) | |
0531ce1d XL |
1007 | #[inline] |
1008 | #[target_feature(enable = "sse2")] | |
1009 | // no particular instruction to test | |
83c7162d | 1010 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1011 | pub unsafe fn _mm_set_epi8( |
0731742a XL |
1012 | e15: i8, |
1013 | e14: i8, | |
1014 | e13: i8, | |
1015 | e12: i8, | |
1016 | e11: i8, | |
1017 | e10: i8, | |
1018 | e9: i8, | |
1019 | e8: i8, | |
1020 | e7: i8, | |
1021 | e6: i8, | |
1022 | e5: i8, | |
1023 | e4: i8, | |
1024 | e3: i8, | |
1025 | e2: i8, | |
1026 | e1: i8, | |
1027 | e0: i8, | |
0531ce1d | 1028 | ) -> __m128i { |
0731742a | 1029 | #[rustfmt::skip] |
532ac7d7 | 1030 | transmute(i8x16::new( |
0531ce1d XL |
1031 | e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, |
1032 | )) | |
1033 | } | |
1034 | ||
532ac7d7 | 1035 | /// Broadcasts 64-bit integer `a` to all elements. |
83c7162d XL |
1036 | /// |
1037 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi64x) | |
0531ce1d XL |
1038 | #[inline] |
1039 | #[target_feature(enable = "sse2")] | |
1040 | // no particular instruction to test | |
83c7162d | 1041 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1042 | pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i { |
1043 | _mm_set_epi64x(a, a) | |
1044 | } | |
1045 | ||
532ac7d7 | 1046 | /// Broadcasts 32-bit integer `a` to all elements. |
83c7162d XL |
1047 | /// |
1048 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi32) | |
0531ce1d XL |
1049 | #[inline] |
1050 | #[target_feature(enable = "sse2")] | |
1051 | // no particular instruction to test | |
83c7162d | 1052 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1053 | pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i { |
1054 | _mm_set_epi32(a, a, a, a) | |
1055 | } | |
1056 | ||
532ac7d7 | 1057 | /// Broadcasts 16-bit integer `a` to all elements. |
83c7162d XL |
1058 | /// |
1059 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi16) | |
0531ce1d XL |
1060 | #[inline] |
1061 | #[target_feature(enable = "sse2")] | |
1062 | // no particular instruction to test | |
83c7162d | 1063 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1064 | pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i { |
1065 | _mm_set_epi16(a, a, a, a, a, a, a, a) | |
1066 | } | |
1067 | ||
532ac7d7 | 1068 | /// Broadcasts 8-bit integer `a` to all elements. |
83c7162d XL |
1069 | /// |
1070 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi8) | |
0531ce1d XL |
1071 | #[inline] |
1072 | #[target_feature(enable = "sse2")] | |
1073 | // no particular instruction to test | |
83c7162d | 1074 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1075 | pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i { |
1076 | _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) | |
1077 | } | |
1078 | ||
532ac7d7 | 1079 | /// Sets packed 32-bit integers with the supplied values in reverse order. |
83c7162d XL |
1080 | /// |
1081 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi32) | |
0531ce1d XL |
1082 | #[inline] |
1083 | #[target_feature(enable = "sse2")] | |
1084 | // no particular instruction to test | |
83c7162d | 1085 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1086 | pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { |
1087 | _mm_set_epi32(e0, e1, e2, e3) | |
1088 | } | |
1089 | ||
532ac7d7 | 1090 | /// Sets packed 16-bit integers with the supplied values in reverse order. |
83c7162d XL |
1091 | /// |
1092 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi16) | |
0531ce1d XL |
1093 | #[inline] |
1094 | #[target_feature(enable = "sse2")] | |
1095 | // no particular instruction to test | |
83c7162d | 1096 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1097 | pub unsafe fn _mm_setr_epi16( |
0731742a XL |
1098 | e7: i16, |
1099 | e6: i16, | |
1100 | e5: i16, | |
1101 | e4: i16, | |
1102 | e3: i16, | |
1103 | e2: i16, | |
1104 | e1: i16, | |
1105 | e0: i16, | |
0531ce1d XL |
1106 | ) -> __m128i { |
1107 | _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7) | |
1108 | } | |
1109 | ||
532ac7d7 | 1110 | /// Sets packed 8-bit integers with the supplied values in reverse order. |
83c7162d XL |
1111 | /// |
1112 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi8) | |
0531ce1d XL |
1113 | #[inline] |
1114 | #[target_feature(enable = "sse2")] | |
1115 | // no particular instruction to test | |
83c7162d | 1116 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1117 | pub unsafe fn _mm_setr_epi8( |
0731742a XL |
1118 | e15: i8, |
1119 | e14: i8, | |
1120 | e13: i8, | |
1121 | e12: i8, | |
1122 | e11: i8, | |
1123 | e10: i8, | |
1124 | e9: i8, | |
1125 | e8: i8, | |
1126 | e7: i8, | |
1127 | e6: i8, | |
1128 | e5: i8, | |
1129 | e4: i8, | |
1130 | e3: i8, | |
1131 | e2: i8, | |
1132 | e1: i8, | |
1133 | e0: i8, | |
0531ce1d | 1134 | ) -> __m128i { |
0731742a | 1135 | #[rustfmt::skip] |
0531ce1d XL |
1136 | _mm_set_epi8( |
1137 | e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, | |
1138 | ) | |
1139 | } | |
1140 | ||
1141 | /// Returns a vector with all elements set to zero. | |
83c7162d XL |
1142 | /// |
1143 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_si128) | |
0531ce1d XL |
1144 | #[inline] |
1145 | #[target_feature(enable = "sse2")] | |
1146 | #[cfg_attr(test, assert_instr(xorps))] | |
83c7162d | 1147 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1148 | pub unsafe fn _mm_setzero_si128() -> __m128i { |
1149 | _mm_set1_epi64x(0) | |
1150 | } | |
1151 | ||
532ac7d7 | 1152 | /// Loads 64-bit integer from memory into first element of returned vector. |
83c7162d XL |
1153 | /// |
1154 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_epi64) | |
0531ce1d XL |
1155 | #[inline] |
1156 | #[target_feature(enable = "sse2")] | |
1157 | // FIXME movsd on windows | |
8faf50e0 XL |
1158 | #[cfg_attr( |
1159 | all( | |
1160 | test, | |
1161 | not(windows), | |
1162 | not(all(target_os = "linux", target_arch = "x86_64")), | |
1163 | target_arch = "x86_64" | |
1164 | ), | |
1165 | assert_instr(movq) | |
1166 | )] | |
83c7162d | 1167 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1168 | pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i { |
a1dfa0c6 | 1169 | _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64)) |
0531ce1d XL |
1170 | } |
1171 | ||
532ac7d7 | 1172 | /// Loads 128-bits of integer data from memory into a new vector. |
0531ce1d XL |
1173 | /// |
1174 | /// `mem_addr` must be aligned on a 16-byte boundary. | |
83c7162d XL |
1175 | /// |
1176 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_si128) | |
0531ce1d XL |
1177 | #[inline] |
1178 | #[target_feature(enable = "sse2")] | |
1179 | #[cfg_attr(test, assert_instr(movaps))] | |
83c7162d | 1180 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1181 | pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i { |
1182 | *mem_addr | |
1183 | } | |
1184 | ||
532ac7d7 | 1185 | /// Loads 128-bits of integer data from memory into a new vector. |
0531ce1d XL |
1186 | /// |
1187 | /// `mem_addr` does not need to be aligned on any particular boundary. | |
83c7162d XL |
1188 | /// |
1189 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si128) | |
0531ce1d XL |
1190 | #[inline] |
1191 | #[target_feature(enable = "sse2")] | |
1192 | #[cfg_attr(test, assert_instr(movups))] | |
83c7162d | 1193 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1194 | pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i { |
1195 | let mut dst: __m128i = _mm_undefined_si128(); | |
1196 | ptr::copy_nonoverlapping( | |
1197 | mem_addr as *const u8, | |
1198 | &mut dst as *mut __m128i as *mut u8, | |
1199 | mem::size_of::<__m128i>(), | |
1200 | ); | |
1201 | dst | |
1202 | } | |
1203 | ||
1204 | /// Conditionally store 8-bit integer elements from `a` into memory using | |
1205 | /// `mask`. | |
1206 | /// | |
1207 | /// Elements are not stored when the highest bit is not set in the | |
1208 | /// corresponding element. | |
1209 | /// | |
1210 | /// `mem_addr` should correspond to a 128-bit memory location and does not need | |
1211 | /// to be aligned on any particular boundary. | |
83c7162d XL |
1212 | /// |
1213 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskmoveu_si128) | |
0531ce1d XL |
1214 | #[inline] |
1215 | #[target_feature(enable = "sse2")] | |
1216 | #[cfg_attr(test, assert_instr(maskmovdqu))] | |
83c7162d | 1217 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0731742a | 1218 | pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) { |
0531ce1d XL |
1219 | maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr) |
1220 | } | |
1221 | ||
532ac7d7 | 1222 | /// Stores 128-bits of integer data from `a` into memory. |
0531ce1d XL |
1223 | /// |
1224 | /// `mem_addr` must be aligned on a 16-byte boundary. | |
83c7162d XL |
1225 | /// |
1226 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_si128) | |
0531ce1d XL |
1227 | #[inline] |
1228 | #[target_feature(enable = "sse2")] | |
1229 | #[cfg_attr(test, assert_instr(movaps))] | |
83c7162d | 1230 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1231 | pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) { |
1232 | *mem_addr = a; | |
1233 | } | |
1234 | ||
532ac7d7 | 1235 | /// Stores 128-bits of integer data from `a` into memory. |
0531ce1d XL |
1236 | /// |
1237 | /// `mem_addr` does not need to be aligned on any particular boundary. | |
83c7162d XL |
1238 | /// |
1239 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si128) | |
0531ce1d XL |
1240 | #[inline] |
1241 | #[target_feature(enable = "sse2")] | |
1242 | #[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected | |
83c7162d | 1243 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1244 | pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) { |
1245 | storeudq(mem_addr as *mut i8, a); | |
1246 | } | |
1247 | ||
532ac7d7 | 1248 | /// Stores the lower 64-bit integer `a` to a memory location. |
0531ce1d XL |
1249 | /// |
1250 | /// `mem_addr` does not need to be aligned on any particular boundary. | |
83c7162d XL |
1251 | /// |
1252 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_epi64) | |
0531ce1d XL |
1253 | #[inline] |
1254 | #[target_feature(enable = "sse2")] | |
1255 | // FIXME mov on windows, movlps on i686 | |
8faf50e0 XL |
1256 | #[cfg_attr( |
1257 | all( | |
1258 | test, | |
1259 | not(windows), | |
1260 | not(all(target_os = "linux", target_arch = "x86_64")), | |
1261 | target_arch = "x86_64" | |
1262 | ), | |
1263 | assert_instr(movq) | |
1264 | )] | |
83c7162d | 1265 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1266 | pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) { |
0731742a | 1267 | ptr::copy_nonoverlapping(&a as *const _ as *const u8, mem_addr as *mut u8, 8); |
0531ce1d XL |
1268 | } |
1269 | ||
1270 | /// Stores a 128-bit integer vector to a 128-bit aligned memory location. | |
1271 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be | |
1272 | /// used again soon). | |
83c7162d XL |
1273 | /// |
1274 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si128) | |
0531ce1d XL |
1275 | #[inline] |
1276 | #[target_feature(enable = "sse2")] | |
1277 | #[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq | |
83c7162d | 1278 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1279 | pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) { |
532ac7d7 | 1280 | intrinsics::nontemporal_store(mem_addr, a); |
0531ce1d XL |
1281 | } |
1282 | ||
1283 | /// Stores a 32-bit integer value in the specified memory location. | |
1284 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be | |
1285 | /// used again soon). | |
83c7162d XL |
1286 | /// |
1287 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si32) | |
0531ce1d XL |
1288 | #[inline] |
1289 | #[target_feature(enable = "sse2")] | |
1290 | #[cfg_attr(test, assert_instr(movnti))] | |
83c7162d | 1291 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1292 | pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) { |
532ac7d7 | 1293 | intrinsics::nontemporal_store(mem_addr, a); |
0531ce1d XL |
1294 | } |
1295 | ||
532ac7d7 | 1296 | /// Returns a vector where the low element is extracted from `a` and its upper |
0531ce1d | 1297 | /// element is zero. |
83c7162d XL |
1298 | /// |
1299 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_epi64) | |
0531ce1d XL |
1300 | #[inline] |
1301 | #[target_feature(enable = "sse2")] | |
1302 | // FIXME movd on windows, movd on i686 | |
0731742a | 1303 | #[cfg_attr(all(test, not(windows), target_arch = "x86_64"), assert_instr(movq))] |
83c7162d | 1304 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1305 | pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i { |
1306 | let zero = _mm_setzero_si128(); | |
17df50a5 | 1307 | let r: i64x2 = simd_shuffle2!(a.as_i64x2(), zero.as_i64x2(), [0, 2]); |
532ac7d7 | 1308 | transmute(r) |
0531ce1d XL |
1309 | } |
1310 | ||
532ac7d7 | 1311 | /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers |
0531ce1d | 1312 | /// using signed saturation. |
83c7162d XL |
1313 | /// |
1314 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi16) | |
0531ce1d XL |
1315 | #[inline] |
1316 | #[target_feature(enable = "sse2")] | |
1317 | #[cfg_attr(test, assert_instr(packsswb))] | |
83c7162d | 1318 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1319 | pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 1320 | transmute(packsswb(a.as_i16x8(), b.as_i16x8())) |
0531ce1d XL |
1321 | } |
1322 | ||
532ac7d7 | 1323 | /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers |
0531ce1d | 1324 | /// using signed saturation. |
83c7162d XL |
1325 | /// |
1326 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi32) | |
0531ce1d XL |
1327 | #[inline] |
1328 | #[target_feature(enable = "sse2")] | |
1329 | #[cfg_attr(test, assert_instr(packssdw))] | |
83c7162d | 1330 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1331 | pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 1332 | transmute(packssdw(a.as_i32x4(), b.as_i32x4())) |
0531ce1d XL |
1333 | } |
1334 | ||
532ac7d7 | 1335 | /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers |
0531ce1d | 1336 | /// using unsigned saturation. |
83c7162d XL |
1337 | /// |
1338 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi16) | |
0531ce1d XL |
1339 | #[inline] |
1340 | #[target_feature(enable = "sse2")] | |
1341 | #[cfg_attr(test, assert_instr(packuswb))] | |
83c7162d | 1342 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1343 | pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 1344 | transmute(packuswb(a.as_i16x8(), b.as_i16x8())) |
0531ce1d XL |
1345 | } |
1346 | ||
532ac7d7 | 1347 | /// Returns the `imm8` element of `a`. |
83c7162d XL |
1348 | /// |
1349 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi16) | |
0531ce1d XL |
1350 | #[inline] |
1351 | #[target_feature(enable = "sse2")] | |
17df50a5 XL |
1352 | #[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))] |
1353 | #[rustc_legacy_const_generics(1)] | |
83c7162d | 1354 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
1355 | pub unsafe fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 { |
1356 | static_assert_imm3!(IMM8); | |
1357 | simd_extract::<_, u16>(a.as_u16x8(), IMM8 as u32) as i32 | |
0531ce1d XL |
1358 | } |
1359 | ||
532ac7d7 | 1360 | /// Returns a new vector where the `imm8` element of `a` is replaced with `i`. |
83c7162d XL |
1361 | /// |
1362 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi16) | |
0531ce1d XL |
1363 | #[inline] |
1364 | #[target_feature(enable = "sse2")] | |
17df50a5 XL |
1365 | #[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))] |
1366 | #[rustc_legacy_const_generics(2)] | |
83c7162d | 1367 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
1368 | pub unsafe fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i { |
1369 | static_assert_imm3!(IMM8); | |
1370 | transmute(simd_insert(a.as_i16x8(), IMM8 as u32, i as i16)) | |
0531ce1d XL |
1371 | } |
1372 | ||
532ac7d7 | 1373 | /// Returns a mask of the most significant bit of each element in `a`. |
83c7162d XL |
1374 | /// |
1375 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_epi8) | |
0531ce1d XL |
1376 | #[inline] |
1377 | #[target_feature(enable = "sse2")] | |
1378 | #[cfg_attr(test, assert_instr(pmovmskb))] | |
83c7162d | 1379 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1380 | pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 { |
1381 | pmovmskb(a.as_i8x16()) | |
1382 | } | |
1383 | ||
17df50a5 | 1384 | /// Shuffles 32-bit integers in `a` using the control in `IMM8`. |
83c7162d XL |
1385 | /// |
1386 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi32) | |
0531ce1d XL |
1387 | #[inline] |
1388 | #[target_feature(enable = "sse2")] | |
17df50a5 XL |
1389 | #[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))] |
1390 | #[rustc_legacy_const_generics(1)] | |
83c7162d | 1391 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
1392 | pub unsafe fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
1393 | static_assert_imm8!(IMM8); | |
0531ce1d | 1394 | let a = a.as_i32x4(); |
17df50a5 XL |
1395 | let x: i32x4 = simd_shuffle4!( |
1396 | a, | |
1397 | a, | |
1398 | <const IMM8: i32> [ | |
1399 | IMM8 as u32 & 0b11, | |
1400 | (IMM8 as u32 >> 2) & 0b11, | |
1401 | (IMM8 as u32 >> 4) & 0b11, | |
1402 | (IMM8 as u32 >> 6) & 0b11, | |
1403 | ], | |
1404 | ); | |
532ac7d7 | 1405 | transmute(x) |
0531ce1d XL |
1406 | } |
1407 | ||
532ac7d7 | 1408 | /// Shuffles 16-bit integers in the high 64 bits of `a` using the control in |
17df50a5 | 1409 | /// `IMM8`. |
0531ce1d XL |
1410 | /// |
1411 | /// Put the results in the high 64 bits of the returned vector, with the low 64 | |
1412 | /// bits being copied from from `a`. | |
83c7162d XL |
1413 | /// |
1414 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflehi_epi16) | |
0531ce1d XL |
1415 | #[inline] |
1416 | #[target_feature(enable = "sse2")] | |
17df50a5 XL |
1417 | #[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))] |
1418 | #[rustc_legacy_const_generics(1)] | |
83c7162d | 1419 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
1420 | pub unsafe fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
1421 | static_assert_imm8!(IMM8); | |
0531ce1d | 1422 | let a = a.as_i16x8(); |
17df50a5 XL |
1423 | let x: i16x8 = simd_shuffle8!( |
1424 | a, | |
1425 | a, | |
1426 | <const IMM8: i32> [ | |
1427 | 0, | |
1428 | 1, | |
1429 | 2, | |
1430 | 3, | |
1431 | (IMM8 as u32 & 0b11) + 4, | |
1432 | ((IMM8 as u32 >> 2) & 0b11) + 4, | |
1433 | ((IMM8 as u32 >> 4) & 0b11) + 4, | |
1434 | ((IMM8 as u32 >> 6) & 0b11) + 4, | |
1435 | ], | |
1436 | ); | |
532ac7d7 | 1437 | transmute(x) |
0531ce1d XL |
1438 | } |
1439 | ||
532ac7d7 | 1440 | /// Shuffles 16-bit integers in the low 64 bits of `a` using the control in |
17df50a5 | 1441 | /// `IMM8`. |
0531ce1d XL |
1442 | /// |
1443 | /// Put the results in the low 64 bits of the returned vector, with the high 64 | |
1444 | /// bits being copied from from `a`. | |
83c7162d XL |
1445 | /// |
1446 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflelo_epi16) | |
0531ce1d XL |
1447 | #[inline] |
1448 | #[target_feature(enable = "sse2")] | |
17df50a5 XL |
1449 | #[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))] |
1450 | #[rustc_legacy_const_generics(1)] | |
83c7162d | 1451 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
1452 | pub unsafe fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
1453 | static_assert_imm8!(IMM8); | |
0531ce1d | 1454 | let a = a.as_i16x8(); |
17df50a5 XL |
1455 | let x: i16x8 = simd_shuffle8!( |
1456 | a, | |
1457 | a, | |
1458 | <const IMM8: i32> [ | |
1459 | IMM8 as u32 & 0b11, | |
1460 | (IMM8 as u32 >> 2) & 0b11, | |
1461 | (IMM8 as u32 >> 4) & 0b11, | |
1462 | (IMM8 as u32 >> 6) & 0b11, | |
1463 | 4, | |
1464 | 5, | |
1465 | 6, | |
1466 | 7, | |
1467 | ], | |
1468 | ); | |
532ac7d7 | 1469 | transmute(x) |
0531ce1d XL |
1470 | } |
1471 | ||
532ac7d7 | 1472 | /// Unpacks and interleave 8-bit integers from the high half of `a` and `b`. |
83c7162d XL |
1473 | /// |
1474 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi8) | |
0531ce1d XL |
1475 | #[inline] |
1476 | #[target_feature(enable = "sse2")] | |
1477 | #[cfg_attr(test, assert_instr(punpckhbw))] | |
83c7162d | 1478 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1479 | pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i { |
17df50a5 | 1480 | transmute::<i8x16, _>(simd_shuffle16!( |
0531ce1d XL |
1481 | a.as_i8x16(), |
1482 | b.as_i8x16(), | |
8faf50e0 | 1483 | [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31], |
0531ce1d XL |
1484 | )) |
1485 | } | |
1486 | ||
532ac7d7 | 1487 | /// Unpacks and interleave 16-bit integers from the high half of `a` and `b`. |
83c7162d XL |
1488 | /// |
1489 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi16) | |
0531ce1d XL |
1490 | #[inline] |
1491 | #[target_feature(enable = "sse2")] | |
1492 | #[cfg_attr(test, assert_instr(punpckhwd))] | |
83c7162d | 1493 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1494 | pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i { |
17df50a5 | 1495 | let x = simd_shuffle8!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]); |
532ac7d7 | 1496 | transmute::<i16x8, _>(x) |
0531ce1d XL |
1497 | } |
1498 | ||
532ac7d7 | 1499 | /// Unpacks and interleave 32-bit integers from the high half of `a` and `b`. |
83c7162d XL |
1500 | /// |
1501 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi32) | |
0531ce1d XL |
1502 | #[inline] |
1503 | #[target_feature(enable = "sse2")] | |
1504 | #[cfg_attr(test, assert_instr(unpckhps))] | |
83c7162d | 1505 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1506 | pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i { |
17df50a5 | 1507 | transmute::<i32x4, _>(simd_shuffle4!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) |
0531ce1d XL |
1508 | } |
1509 | ||
532ac7d7 | 1510 | /// Unpacks and interleave 64-bit integers from the high half of `a` and `b`. |
83c7162d XL |
1511 | /// |
1512 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi64) | |
0531ce1d XL |
1513 | #[inline] |
1514 | #[target_feature(enable = "sse2")] | |
1515 | #[cfg_attr(test, assert_instr(unpckhpd))] | |
83c7162d | 1516 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1517 | pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i { |
17df50a5 | 1518 | transmute::<i64x2, _>(simd_shuffle2!(a.as_i64x2(), b.as_i64x2(), [1, 3])) |
0531ce1d XL |
1519 | } |
1520 | ||
532ac7d7 | 1521 | /// Unpacks and interleave 8-bit integers from the low half of `a` and `b`. |
83c7162d XL |
1522 | /// |
1523 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi8) | |
0531ce1d XL |
1524 | #[inline] |
1525 | #[target_feature(enable = "sse2")] | |
1526 | #[cfg_attr(test, assert_instr(punpcklbw))] | |
83c7162d | 1527 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1528 | pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i { |
17df50a5 | 1529 | transmute::<i8x16, _>(simd_shuffle16!( |
0531ce1d XL |
1530 | a.as_i8x16(), |
1531 | b.as_i8x16(), | |
8faf50e0 | 1532 | [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23], |
0531ce1d XL |
1533 | )) |
1534 | } | |
1535 | ||
532ac7d7 | 1536 | /// Unpacks and interleave 16-bit integers from the low half of `a` and `b`. |
83c7162d XL |
1537 | /// |
1538 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi16) | |
0531ce1d XL |
1539 | #[inline] |
1540 | #[target_feature(enable = "sse2")] | |
1541 | #[cfg_attr(test, assert_instr(punpcklwd))] | |
83c7162d | 1542 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1543 | pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i { |
17df50a5 | 1544 | let x = simd_shuffle8!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]); |
532ac7d7 | 1545 | transmute::<i16x8, _>(x) |
0531ce1d XL |
1546 | } |
1547 | ||
532ac7d7 | 1548 | /// Unpacks and interleave 32-bit integers from the low half of `a` and `b`. |
83c7162d XL |
1549 | /// |
1550 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi32) | |
0531ce1d XL |
1551 | #[inline] |
1552 | #[target_feature(enable = "sse2")] | |
1553 | #[cfg_attr(test, assert_instr(unpcklps))] | |
83c7162d | 1554 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1555 | pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i { |
17df50a5 | 1556 | transmute::<i32x4, _>(simd_shuffle4!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) |
0531ce1d XL |
1557 | } |
1558 | ||
532ac7d7 | 1559 | /// Unpacks and interleave 64-bit integers from the low half of `a` and `b`. |
83c7162d XL |
1560 | /// |
1561 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi64) | |
0531ce1d XL |
1562 | #[inline] |
1563 | #[target_feature(enable = "sse2")] | |
0731742a | 1564 | #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))] |
83c7162d | 1565 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1566 | pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i { |
17df50a5 | 1567 | transmute::<i64x2, _>(simd_shuffle2!(a.as_i64x2(), b.as_i64x2(), [0, 2])) |
0531ce1d XL |
1568 | } |
1569 | ||
532ac7d7 | 1570 | /// Returns a new vector with the low element of `a` replaced by the sum of the |
0531ce1d | 1571 | /// low elements of `a` and `b`. |
83c7162d XL |
1572 | /// |
1573 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sd) | |
0531ce1d XL |
1574 | #[inline] |
1575 | #[target_feature(enable = "sse2")] | |
1576 | #[cfg_attr(test, assert_instr(addsd))] | |
83c7162d | 1577 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1578 | pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d { |
1579 | simd_insert(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) | |
1580 | } | |
1581 | ||
532ac7d7 | 1582 | /// Adds packed double-precision (64-bit) floating-point elements in `a` and |
0531ce1d | 1583 | /// `b`. |
83c7162d XL |
1584 | /// |
1585 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_pd) | |
0531ce1d XL |
1586 | #[inline] |
1587 | #[target_feature(enable = "sse2")] | |
1588 | #[cfg_attr(test, assert_instr(addpd))] | |
83c7162d | 1589 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1590 | pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d { |
1591 | simd_add(a, b) | |
1592 | } | |
1593 | ||
532ac7d7 | 1594 | /// Returns a new vector with the low element of `a` replaced by the result of |
0531ce1d | 1595 | /// diving the lower element of `a` by the lower element of `b`. |
83c7162d XL |
1596 | /// |
1597 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sd) | |
0531ce1d XL |
1598 | #[inline] |
1599 | #[target_feature(enable = "sse2")] | |
1600 | #[cfg_attr(test, assert_instr(divsd))] | |
83c7162d | 1601 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1602 | pub unsafe fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d { |
1603 | simd_insert(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) | |
1604 | } | |
1605 | ||
1606 | /// Divide packed double-precision (64-bit) floating-point elements in `a` by | |
1607 | /// packed elements in `b`. | |
83c7162d XL |
1608 | /// |
1609 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_pd) | |
0531ce1d XL |
1610 | #[inline] |
1611 | #[target_feature(enable = "sse2")] | |
1612 | #[cfg_attr(test, assert_instr(divpd))] | |
83c7162d | 1613 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1614 | pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d { |
1615 | simd_div(a, b) | |
1616 | } | |
1617 | ||
532ac7d7 | 1618 | /// Returns a new vector with the low element of `a` replaced by the maximum |
0531ce1d | 1619 | /// of the lower elements of `a` and `b`. |
83c7162d XL |
1620 | /// |
1621 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sd) | |
0531ce1d XL |
1622 | #[inline] |
1623 | #[target_feature(enable = "sse2")] | |
1624 | #[cfg_attr(test, assert_instr(maxsd))] | |
83c7162d | 1625 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1626 | pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d { |
1627 | maxsd(a, b) | |
1628 | } | |
1629 | ||
532ac7d7 | 1630 | /// Returns a new vector with the maximum values from corresponding elements in |
0531ce1d | 1631 | /// `a` and `b`. |
83c7162d XL |
1632 | /// |
1633 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pd) | |
0531ce1d XL |
1634 | #[inline] |
1635 | #[target_feature(enable = "sse2")] | |
1636 | #[cfg_attr(test, assert_instr(maxpd))] | |
83c7162d | 1637 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1638 | pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d { |
1639 | maxpd(a, b) | |
1640 | } | |
1641 | ||
532ac7d7 | 1642 | /// Returns a new vector with the low element of `a` replaced by the minimum |
0531ce1d | 1643 | /// of the lower elements of `a` and `b`. |
83c7162d XL |
1644 | /// |
1645 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sd) | |
0531ce1d XL |
1646 | #[inline] |
1647 | #[target_feature(enable = "sse2")] | |
1648 | #[cfg_attr(test, assert_instr(minsd))] | |
83c7162d | 1649 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1650 | pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d { |
1651 | minsd(a, b) | |
1652 | } | |
1653 | ||
532ac7d7 | 1654 | /// Returns a new vector with the minimum values from corresponding elements in |
0531ce1d | 1655 | /// `a` and `b`. |
83c7162d XL |
1656 | /// |
1657 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pd) | |
0531ce1d XL |
1658 | #[inline] |
1659 | #[target_feature(enable = "sse2")] | |
1660 | #[cfg_attr(test, assert_instr(minpd))] | |
83c7162d | 1661 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1662 | pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d { |
1663 | minpd(a, b) | |
1664 | } | |
1665 | ||
532ac7d7 | 1666 | /// Returns a new vector with the low element of `a` replaced by multiplying the |
0531ce1d | 1667 | /// low elements of `a` and `b`. |
83c7162d XL |
1668 | /// |
1669 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sd) | |
0531ce1d XL |
1670 | #[inline] |
1671 | #[target_feature(enable = "sse2")] | |
1672 | #[cfg_attr(test, assert_instr(mulsd))] | |
83c7162d | 1673 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1674 | pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d { |
1675 | simd_insert(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) | |
1676 | } | |
1677 | ||
532ac7d7 | 1678 | /// Multiplies packed double-precision (64-bit) floating-point elements in `a` |
0531ce1d | 1679 | /// and `b`. |
83c7162d XL |
1680 | /// |
1681 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pd) | |
0531ce1d XL |
1682 | #[inline] |
1683 | #[target_feature(enable = "sse2")] | |
1684 | #[cfg_attr(test, assert_instr(mulpd))] | |
83c7162d | 1685 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1686 | pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d { |
1687 | simd_mul(a, b) | |
1688 | } | |
1689 | ||
532ac7d7 | 1690 | /// Returns a new vector with the low element of `a` replaced by the square |
0531ce1d | 1691 | /// root of the lower element `b`. |
83c7162d XL |
1692 | /// |
1693 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sd) | |
0531ce1d XL |
1694 | #[inline] |
1695 | #[target_feature(enable = "sse2")] | |
1696 | #[cfg_attr(test, assert_instr(sqrtsd))] | |
83c7162d | 1697 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1698 | pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d { |
1699 | simd_insert(a, 0, _mm_cvtsd_f64(sqrtsd(b))) | |
1700 | } | |
1701 | ||
532ac7d7 | 1702 | /// Returns a new vector with the square root of each of the values in `a`. |
83c7162d XL |
1703 | /// |
1704 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_pd) | |
0531ce1d XL |
1705 | #[inline] |
1706 | #[target_feature(enable = "sse2")] | |
1707 | #[cfg_attr(test, assert_instr(sqrtpd))] | |
83c7162d | 1708 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1709 | pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d { |
74b04a01 | 1710 | simd_fsqrt(a) |
0531ce1d XL |
1711 | } |
1712 | ||
532ac7d7 | 1713 | /// Returns a new vector with the low element of `a` replaced by subtracting the |
0531ce1d | 1714 | /// low element by `b` from the low element of `a`. |
83c7162d XL |
1715 | /// |
1716 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sd) | |
0531ce1d XL |
1717 | #[inline] |
1718 | #[target_feature(enable = "sse2")] | |
1719 | #[cfg_attr(test, assert_instr(subsd))] | |
83c7162d | 1720 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1721 | pub unsafe fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d { |
1722 | simd_insert(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) | |
1723 | } | |
1724 | ||
1725 | /// Subtract packed double-precision (64-bit) floating-point elements in `b` | |
1726 | /// from `a`. | |
83c7162d XL |
1727 | /// |
1728 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_pd) | |
0531ce1d XL |
1729 | #[inline] |
1730 | #[target_feature(enable = "sse2")] | |
1731 | #[cfg_attr(test, assert_instr(subpd))] | |
83c7162d | 1732 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1733 | pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d { |
1734 | simd_sub(a, b) | |
1735 | } | |
1736 | ||
532ac7d7 | 1737 | /// Computes the bitwise AND of packed double-precision (64-bit) floating-point |
0531ce1d | 1738 | /// elements in `a` and `b`. |
83c7162d XL |
1739 | /// |
1740 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_pd) | |
0531ce1d XL |
1741 | #[inline] |
1742 | #[target_feature(enable = "sse2")] | |
1743 | #[cfg_attr(test, assert_instr(andps))] | |
83c7162d | 1744 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1745 | pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d { |
532ac7d7 XL |
1746 | let a: __m128i = transmute(a); |
1747 | let b: __m128i = transmute(b); | |
1748 | transmute(_mm_and_si128(a, b)) | |
0531ce1d XL |
1749 | } |
1750 | ||
532ac7d7 | 1751 | /// Computes the bitwise NOT of `a` and then AND with `b`. |
83c7162d XL |
1752 | /// |
1753 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_pd) | |
0531ce1d XL |
1754 | #[inline] |
1755 | #[target_feature(enable = "sse2")] | |
1756 | #[cfg_attr(test, assert_instr(andnps))] | |
83c7162d | 1757 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1758 | pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d { |
532ac7d7 XL |
1759 | let a: __m128i = transmute(a); |
1760 | let b: __m128i = transmute(b); | |
1761 | transmute(_mm_andnot_si128(a, b)) | |
0531ce1d XL |
1762 | } |
1763 | ||
532ac7d7 | 1764 | /// Computes the bitwise OR of `a` and `b`. |
83c7162d XL |
1765 | /// |
1766 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_pd) | |
0531ce1d XL |
1767 | #[inline] |
1768 | #[target_feature(enable = "sse2")] | |
1769 | #[cfg_attr(test, assert_instr(orps))] | |
83c7162d | 1770 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1771 | pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d { |
532ac7d7 XL |
1772 | let a: __m128i = transmute(a); |
1773 | let b: __m128i = transmute(b); | |
1774 | transmute(_mm_or_si128(a, b)) | |
0531ce1d XL |
1775 | } |
1776 | ||
532ac7d7 | 1777 | /// Computes the bitwise OR of `a` and `b`. |
83c7162d XL |
1778 | /// |
1779 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_pd) | |
0531ce1d XL |
1780 | #[inline] |
1781 | #[target_feature(enable = "sse2")] | |
1782 | #[cfg_attr(test, assert_instr(xorps))] | |
83c7162d | 1783 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1784 | pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d { |
532ac7d7 XL |
1785 | let a: __m128i = transmute(a); |
1786 | let b: __m128i = transmute(b); | |
1787 | transmute(_mm_xor_si128(a, b)) | |
0531ce1d XL |
1788 | } |
1789 | ||
532ac7d7 | 1790 | /// Returns a new vector with the low element of `a` replaced by the equality |
0531ce1d | 1791 | /// comparison of the lower elements of `a` and `b`. |
83c7162d XL |
1792 | /// |
1793 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_sd) | |
0531ce1d XL |
1794 | #[inline] |
1795 | #[target_feature(enable = "sse2")] | |
1796 | #[cfg_attr(test, assert_instr(cmpeqsd))] | |
83c7162d | 1797 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1798 | pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d { |
1799 | cmpsd(a, b, 0) | |
1800 | } | |
1801 | ||
532ac7d7 | 1802 | /// Returns a new vector with the low element of `a` replaced by the less-than |
0531ce1d | 1803 | /// comparison of the lower elements of `a` and `b`. |
83c7162d XL |
1804 | /// |
1805 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_sd) | |
0531ce1d XL |
1806 | #[inline] |
1807 | #[target_feature(enable = "sse2")] | |
1808 | #[cfg_attr(test, assert_instr(cmpltsd))] | |
83c7162d | 1809 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1810 | pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d { |
1811 | cmpsd(a, b, 1) | |
1812 | } | |
1813 | ||
532ac7d7 | 1814 | /// Returns a new vector with the low element of `a` replaced by the |
0531ce1d | 1815 | /// less-than-or-equal comparison of the lower elements of `a` and `b`. |
83c7162d XL |
1816 | /// |
1817 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_sd) | |
0531ce1d XL |
1818 | #[inline] |
1819 | #[target_feature(enable = "sse2")] | |
1820 | #[cfg_attr(test, assert_instr(cmplesd))] | |
83c7162d | 1821 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1822 | pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d { |
1823 | cmpsd(a, b, 2) | |
1824 | } | |
1825 | ||
532ac7d7 | 1826 | /// Returns a new vector with the low element of `a` replaced by the |
0531ce1d | 1827 | /// greater-than comparison of the lower elements of `a` and `b`. |
83c7162d XL |
1828 | /// |
1829 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_sd) | |
0531ce1d XL |
1830 | #[inline] |
1831 | #[target_feature(enable = "sse2")] | |
1832 | #[cfg_attr(test, assert_instr(cmpltsd))] | |
83c7162d | 1833 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1834 | pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d { |
8faf50e0 | 1835 | simd_insert(_mm_cmplt_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) |
0531ce1d XL |
1836 | } |
1837 | ||
532ac7d7 | 1838 | /// Returns a new vector with the low element of `a` replaced by the |
0531ce1d | 1839 | /// greater-than-or-equal comparison of the lower elements of `a` and `b`. |
83c7162d XL |
1840 | /// |
1841 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_sd) | |
0531ce1d XL |
1842 | #[inline] |
1843 | #[target_feature(enable = "sse2")] | |
1844 | #[cfg_attr(test, assert_instr(cmplesd))] | |
83c7162d | 1845 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1846 | pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d { |
8faf50e0 | 1847 | simd_insert(_mm_cmple_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) |
0531ce1d XL |
1848 | } |
1849 | ||
532ac7d7 | 1850 | /// Returns a new vector with the low element of `a` replaced by the result |
0531ce1d XL |
1851 | /// of comparing both of the lower elements of `a` and `b` to `NaN`. If |
1852 | /// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` | |
1853 | /// otherwise. | |
83c7162d XL |
1854 | /// |
1855 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_sd) | |
0531ce1d XL |
1856 | #[inline] |
1857 | #[target_feature(enable = "sse2")] | |
1858 | #[cfg_attr(test, assert_instr(cmpordsd))] | |
83c7162d | 1859 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1860 | pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d { |
1861 | cmpsd(a, b, 7) | |
1862 | } | |
1863 | ||
532ac7d7 | 1864 | /// Returns a new vector with the low element of `a` replaced by the result of |
0531ce1d XL |
1865 | /// comparing both of the lower elements of `a` and `b` to `NaN`. If either is |
1866 | /// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise. | |
83c7162d XL |
1867 | /// |
1868 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_sd) | |
0531ce1d XL |
1869 | #[inline] |
1870 | #[target_feature(enable = "sse2")] | |
1871 | #[cfg_attr(test, assert_instr(cmpunordsd))] | |
83c7162d | 1872 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1873 | pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d { |
1874 | cmpsd(a, b, 3) | |
1875 | } | |
1876 | ||
532ac7d7 | 1877 | /// Returns a new vector with the low element of `a` replaced by the not-equal |
0531ce1d | 1878 | /// comparison of the lower elements of `a` and `b`. |
83c7162d XL |
1879 | /// |
1880 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_sd) | |
0531ce1d XL |
1881 | #[inline] |
1882 | #[target_feature(enable = "sse2")] | |
1883 | #[cfg_attr(test, assert_instr(cmpneqsd))] | |
83c7162d | 1884 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1885 | pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d { |
1886 | cmpsd(a, b, 4) | |
1887 | } | |
1888 | ||
532ac7d7 | 1889 | /// Returns a new vector with the low element of `a` replaced by the |
0531ce1d | 1890 | /// not-less-than comparison of the lower elements of `a` and `b`. |
83c7162d XL |
1891 | /// |
1892 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_sd) | |
0531ce1d XL |
1893 | #[inline] |
1894 | #[target_feature(enable = "sse2")] | |
1895 | #[cfg_attr(test, assert_instr(cmpnltsd))] | |
83c7162d | 1896 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1897 | pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d { |
1898 | cmpsd(a, b, 5) | |
1899 | } | |
1900 | ||
532ac7d7 | 1901 | /// Returns a new vector with the low element of `a` replaced by the |
0531ce1d | 1902 | /// not-less-than-or-equal comparison of the lower elements of `a` and `b`. |
83c7162d XL |
1903 | /// |
1904 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_sd) | |
0531ce1d XL |
1905 | #[inline] |
1906 | #[target_feature(enable = "sse2")] | |
1907 | #[cfg_attr(test, assert_instr(cmpnlesd))] | |
83c7162d | 1908 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1909 | pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d { |
1910 | cmpsd(a, b, 6) | |
1911 | } | |
1912 | ||
532ac7d7 | 1913 | /// Returns a new vector with the low element of `a` replaced by the |
0531ce1d | 1914 | /// not-greater-than comparison of the lower elements of `a` and `b`. |
83c7162d XL |
1915 | /// |
1916 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_sd) | |
0531ce1d XL |
1917 | #[inline] |
1918 | #[target_feature(enable = "sse2")] | |
1919 | #[cfg_attr(test, assert_instr(cmpnltsd))] | |
83c7162d | 1920 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1921 | pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d { |
8faf50e0 | 1922 | simd_insert(_mm_cmpnlt_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) |
0531ce1d XL |
1923 | } |
1924 | ||
532ac7d7 | 1925 | /// Returns a new vector with the low element of `a` replaced by the |
0531ce1d | 1926 | /// not-greater-than-or-equal comparison of the lower elements of `a` and `b`. |
83c7162d XL |
1927 | /// |
1928 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_sd) | |
0531ce1d XL |
1929 | #[inline] |
1930 | #[target_feature(enable = "sse2")] | |
1931 | #[cfg_attr(test, assert_instr(cmpnlesd))] | |
83c7162d | 1932 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1933 | pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d { |
8faf50e0 | 1934 | simd_insert(_mm_cmpnle_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) |
0531ce1d XL |
1935 | } |
1936 | ||
532ac7d7 | 1937 | /// Compares corresponding elements in `a` and `b` for equality. |
83c7162d XL |
1938 | /// |
1939 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_pd) | |
0531ce1d XL |
1940 | #[inline] |
1941 | #[target_feature(enable = "sse2")] | |
1942 | #[cfg_attr(test, assert_instr(cmpeqpd))] | |
83c7162d | 1943 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1944 | pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d { |
1945 | cmppd(a, b, 0) | |
1946 | } | |
1947 | ||
532ac7d7 | 1948 | /// Compares corresponding elements in `a` and `b` for less-than. |
83c7162d XL |
1949 | /// |
1950 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_pd) | |
0531ce1d XL |
1951 | #[inline] |
1952 | #[target_feature(enable = "sse2")] | |
1953 | #[cfg_attr(test, assert_instr(cmpltpd))] | |
83c7162d | 1954 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1955 | pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d { |
1956 | cmppd(a, b, 1) | |
1957 | } | |
1958 | ||
532ac7d7 | 1959 | /// Compares corresponding elements in `a` and `b` for less-than-or-equal |
83c7162d XL |
1960 | /// |
1961 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_pd) | |
0531ce1d XL |
1962 | #[inline] |
1963 | #[target_feature(enable = "sse2")] | |
1964 | #[cfg_attr(test, assert_instr(cmplepd))] | |
83c7162d | 1965 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1966 | pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d { |
1967 | cmppd(a, b, 2) | |
1968 | } | |
1969 | ||
532ac7d7 | 1970 | /// Compares corresponding elements in `a` and `b` for greater-than. |
83c7162d XL |
1971 | /// |
1972 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_pd) | |
0531ce1d XL |
1973 | #[inline] |
1974 | #[target_feature(enable = "sse2")] | |
1975 | #[cfg_attr(test, assert_instr(cmpltpd))] | |
83c7162d | 1976 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1977 | pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d { |
1978 | _mm_cmplt_pd(b, a) | |
1979 | } | |
1980 | ||
532ac7d7 | 1981 | /// Compares corresponding elements in `a` and `b` for greater-than-or-equal. |
83c7162d XL |
1982 | /// |
1983 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_pd) | |
0531ce1d XL |
1984 | #[inline] |
1985 | #[target_feature(enable = "sse2")] | |
1986 | #[cfg_attr(test, assert_instr(cmplepd))] | |
83c7162d | 1987 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1988 | pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d { |
1989 | _mm_cmple_pd(b, a) | |
1990 | } | |
1991 | ||
532ac7d7 | 1992 | /// Compares corresponding elements in `a` and `b` to see if neither is `NaN`. |
83c7162d XL |
1993 | /// |
1994 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_pd) | |
0531ce1d XL |
1995 | #[inline] |
1996 | #[target_feature(enable = "sse2")] | |
1997 | #[cfg_attr(test, assert_instr(cmpordpd))] | |
83c7162d | 1998 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1999 | pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d { |
2000 | cmppd(a, b, 7) | |
2001 | } | |
2002 | ||
532ac7d7 | 2003 | /// Compares corresponding elements in `a` and `b` to see if either is `NaN`. |
83c7162d XL |
2004 | /// |
2005 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_pd) | |
0531ce1d XL |
2006 | #[inline] |
2007 | #[target_feature(enable = "sse2")] | |
2008 | #[cfg_attr(test, assert_instr(cmpunordpd))] | |
83c7162d | 2009 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2010 | pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d { |
2011 | cmppd(a, b, 3) | |
2012 | } | |
2013 | ||
532ac7d7 | 2014 | /// Compares corresponding elements in `a` and `b` for not-equal. |
83c7162d XL |
2015 | /// |
2016 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_pd) | |
0531ce1d XL |
2017 | #[inline] |
2018 | #[target_feature(enable = "sse2")] | |
2019 | #[cfg_attr(test, assert_instr(cmpneqpd))] | |
83c7162d | 2020 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2021 | pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d { |
2022 | cmppd(a, b, 4) | |
2023 | } | |
2024 | ||
532ac7d7 | 2025 | /// Compares corresponding elements in `a` and `b` for not-less-than. |
83c7162d XL |
2026 | /// |
2027 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_pd) | |
0531ce1d XL |
2028 | #[inline] |
2029 | #[target_feature(enable = "sse2")] | |
2030 | #[cfg_attr(test, assert_instr(cmpnltpd))] | |
83c7162d | 2031 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2032 | pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d { |
2033 | cmppd(a, b, 5) | |
2034 | } | |
2035 | ||
532ac7d7 | 2036 | /// Compares corresponding elements in `a` and `b` for not-less-than-or-equal. |
83c7162d XL |
2037 | /// |
2038 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_pd) | |
0531ce1d XL |
2039 | #[inline] |
2040 | #[target_feature(enable = "sse2")] | |
2041 | #[cfg_attr(test, assert_instr(cmpnlepd))] | |
83c7162d | 2042 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2043 | pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d { |
2044 | cmppd(a, b, 6) | |
2045 | } | |
2046 | ||
532ac7d7 | 2047 | /// Compares corresponding elements in `a` and `b` for not-greater-than. |
83c7162d XL |
2048 | /// |
2049 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_pd) | |
0531ce1d XL |
2050 | #[inline] |
2051 | #[target_feature(enable = "sse2")] | |
2052 | #[cfg_attr(test, assert_instr(cmpnltpd))] | |
83c7162d | 2053 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2054 | pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d { |
2055 | _mm_cmpnlt_pd(b, a) | |
2056 | } | |
2057 | ||
532ac7d7 | 2058 | /// Compares corresponding elements in `a` and `b` for |
0531ce1d | 2059 | /// not-greater-than-or-equal. |
83c7162d XL |
2060 | /// |
2061 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_pd) | |
0531ce1d XL |
2062 | #[inline] |
2063 | #[target_feature(enable = "sse2")] | |
2064 | #[cfg_attr(test, assert_instr(cmpnlepd))] | |
83c7162d | 2065 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2066 | pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d { |
2067 | _mm_cmpnle_pd(b, a) | |
2068 | } | |
2069 | ||
532ac7d7 | 2070 | /// Compares the lower element of `a` and `b` for equality. |
83c7162d XL |
2071 | /// |
2072 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sd) | |
0531ce1d XL |
2073 | #[inline] |
2074 | #[target_feature(enable = "sse2")] | |
2075 | #[cfg_attr(test, assert_instr(comisd))] | |
83c7162d | 2076 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2077 | pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 { |
2078 | comieqsd(a, b) | |
2079 | } | |
2080 | ||
532ac7d7 | 2081 | /// Compares the lower element of `a` and `b` for less-than. |
83c7162d XL |
2082 | /// |
2083 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sd) | |
0531ce1d XL |
2084 | #[inline] |
2085 | #[target_feature(enable = "sse2")] | |
2086 | #[cfg_attr(test, assert_instr(comisd))] | |
83c7162d | 2087 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2088 | pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 { |
2089 | comiltsd(a, b) | |
2090 | } | |
2091 | ||
532ac7d7 | 2092 | /// Compares the lower element of `a` and `b` for less-than-or-equal. |
83c7162d XL |
2093 | /// |
2094 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sd) | |
0531ce1d XL |
2095 | #[inline] |
2096 | #[target_feature(enable = "sse2")] | |
2097 | #[cfg_attr(test, assert_instr(comisd))] | |
83c7162d | 2098 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2099 | pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 { |
2100 | comilesd(a, b) | |
2101 | } | |
2102 | ||
532ac7d7 | 2103 | /// Compares the lower element of `a` and `b` for greater-than. |
83c7162d XL |
2104 | /// |
2105 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sd) | |
0531ce1d XL |
2106 | #[inline] |
2107 | #[target_feature(enable = "sse2")] | |
2108 | #[cfg_attr(test, assert_instr(comisd))] | |
83c7162d | 2109 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2110 | pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 { |
2111 | comigtsd(a, b) | |
2112 | } | |
2113 | ||
532ac7d7 | 2114 | /// Compares the lower element of `a` and `b` for greater-than-or-equal. |
83c7162d XL |
2115 | /// |
2116 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sd) | |
0531ce1d XL |
2117 | #[inline] |
2118 | #[target_feature(enable = "sse2")] | |
2119 | #[cfg_attr(test, assert_instr(comisd))] | |
83c7162d | 2120 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2121 | pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 { |
2122 | comigesd(a, b) | |
2123 | } | |
2124 | ||
532ac7d7 | 2125 | /// Compares the lower element of `a` and `b` for not-equal. |
83c7162d XL |
2126 | /// |
2127 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sd) | |
0531ce1d XL |
2128 | #[inline] |
2129 | #[target_feature(enable = "sse2")] | |
2130 | #[cfg_attr(test, assert_instr(comisd))] | |
83c7162d | 2131 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2132 | pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 { |
2133 | comineqsd(a, b) | |
2134 | } | |
2135 | ||
532ac7d7 | 2136 | /// Compares the lower element of `a` and `b` for equality. |
83c7162d XL |
2137 | /// |
2138 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sd) | |
0531ce1d XL |
2139 | #[inline] |
2140 | #[target_feature(enable = "sse2")] | |
2141 | #[cfg_attr(test, assert_instr(ucomisd))] | |
83c7162d | 2142 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2143 | pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 { |
2144 | ucomieqsd(a, b) | |
2145 | } | |
2146 | ||
532ac7d7 | 2147 | /// Compares the lower element of `a` and `b` for less-than. |
83c7162d XL |
2148 | /// |
2149 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sd) | |
0531ce1d XL |
2150 | #[inline] |
2151 | #[target_feature(enable = "sse2")] | |
2152 | #[cfg_attr(test, assert_instr(ucomisd))] | |
83c7162d | 2153 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2154 | pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 { |
2155 | ucomiltsd(a, b) | |
2156 | } | |
2157 | ||
532ac7d7 | 2158 | /// Compares the lower element of `a` and `b` for less-than-or-equal. |
83c7162d XL |
2159 | /// |
2160 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sd) | |
0531ce1d XL |
2161 | #[inline] |
2162 | #[target_feature(enable = "sse2")] | |
2163 | #[cfg_attr(test, assert_instr(ucomisd))] | |
83c7162d | 2164 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2165 | pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 { |
2166 | ucomilesd(a, b) | |
2167 | } | |
2168 | ||
532ac7d7 | 2169 | /// Compares the lower element of `a` and `b` for greater-than. |
83c7162d XL |
2170 | /// |
2171 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sd) | |
0531ce1d XL |
2172 | #[inline] |
2173 | #[target_feature(enable = "sse2")] | |
2174 | #[cfg_attr(test, assert_instr(ucomisd))] | |
83c7162d | 2175 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2176 | pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 { |
2177 | ucomigtsd(a, b) | |
2178 | } | |
2179 | ||
532ac7d7 | 2180 | /// Compares the lower element of `a` and `b` for greater-than-or-equal. |
83c7162d XL |
2181 | /// |
2182 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sd) | |
0531ce1d XL |
2183 | #[inline] |
2184 | #[target_feature(enable = "sse2")] | |
2185 | #[cfg_attr(test, assert_instr(ucomisd))] | |
83c7162d | 2186 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2187 | pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 { |
2188 | ucomigesd(a, b) | |
2189 | } | |
2190 | ||
532ac7d7 | 2191 | /// Compares the lower element of `a` and `b` for not-equal. |
83c7162d XL |
2192 | /// |
2193 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sd) | |
0531ce1d XL |
2194 | #[inline] |
2195 | #[target_feature(enable = "sse2")] | |
2196 | #[cfg_attr(test, assert_instr(ucomisd))] | |
83c7162d | 2197 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2198 | pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 { |
2199 | ucomineqsd(a, b) | |
2200 | } | |
2201 | ||
e1599b0c | 2202 | /// Converts packed double-precision (64-bit) floating-point elements in `a` to |
0531ce1d | 2203 | /// packed single-precision (32-bit) floating-point elements |
83c7162d XL |
2204 | /// |
2205 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ps) | |
0531ce1d XL |
2206 | #[inline] |
2207 | #[target_feature(enable = "sse2")] | |
2208 | #[cfg_attr(test, assert_instr(cvtpd2ps))] | |
83c7162d | 2209 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2210 | pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 { |
2211 | cvtpd2ps(a) | |
2212 | } | |
2213 | ||
532ac7d7 | 2214 | /// Converts packed single-precision (32-bit) floating-point elements in `a` to |
0531ce1d XL |
2215 | /// packed |
2216 | /// double-precision (64-bit) floating-point elements. | |
83c7162d XL |
2217 | /// |
2218 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pd) | |
0531ce1d XL |
2219 | #[inline] |
2220 | #[target_feature(enable = "sse2")] | |
2221 | #[cfg_attr(test, assert_instr(cvtps2pd))] | |
83c7162d | 2222 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2223 | pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d { |
2224 | cvtps2pd(a) | |
2225 | } | |
2226 | ||
532ac7d7 | 2227 | /// Converts packed double-precision (64-bit) floating-point elements in `a` to |
0531ce1d | 2228 | /// packed 32-bit integers. |
83c7162d XL |
2229 | /// |
2230 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_epi32) | |
0531ce1d XL |
2231 | #[inline] |
2232 | #[target_feature(enable = "sse2")] | |
2233 | #[cfg_attr(test, assert_instr(cvtpd2dq))] | |
83c7162d | 2234 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 2235 | pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i { |
532ac7d7 | 2236 | transmute(cvtpd2dq(a)) |
0531ce1d XL |
2237 | } |
2238 | ||
532ac7d7 | 2239 | /// Converts the lower double-precision (64-bit) floating-point element in a to |
0531ce1d | 2240 | /// a 32-bit integer. |
83c7162d XL |
2241 | /// |
2242 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si32) | |
0531ce1d XL |
2243 | #[inline] |
2244 | #[target_feature(enable = "sse2")] | |
2245 | #[cfg_attr(test, assert_instr(cvtsd2si))] | |
83c7162d | 2246 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2247 | pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 { |
2248 | cvtsd2si(a) | |
2249 | } | |
2250 | ||
532ac7d7 | 2251 | /// Converts the lower double-precision (64-bit) floating-point element in `b` |
0531ce1d | 2252 | /// to a single-precision (32-bit) floating-point element, store the result in |
532ac7d7 | 2253 | /// the lower element of the return value, and copies the upper element from `a` |
0531ce1d | 2254 | /// to the upper element the return value. |
83c7162d XL |
2255 | /// |
2256 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_ss) | |
0531ce1d XL |
2257 | #[inline] |
2258 | #[target_feature(enable = "sse2")] | |
2259 | #[cfg_attr(test, assert_instr(cvtsd2ss))] | |
83c7162d | 2260 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2261 | pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 { |
2262 | cvtsd2ss(a, b) | |
2263 | } | |
2264 | ||
e1599b0c | 2265 | /// Returns the lower double-precision (64-bit) floating-point element of `a`. |
83c7162d XL |
2266 | /// |
2267 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_f64) | |
0531ce1d XL |
2268 | #[inline] |
2269 | #[target_feature(enable = "sse2")] | |
83c7162d | 2270 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2271 | pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 { |
2272 | simd_extract(a, 0) | |
2273 | } | |
2274 | ||
532ac7d7 | 2275 | /// Converts the lower single-precision (32-bit) floating-point element in `b` |
0531ce1d | 2276 | /// to a double-precision (64-bit) floating-point element, store the result in |
532ac7d7 | 2277 | /// the lower element of the return value, and copies the upper element from `a` |
0531ce1d | 2278 | /// to the upper element the return value. |
83c7162d XL |
2279 | /// |
2280 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sd) | |
0531ce1d XL |
2281 | #[inline] |
2282 | #[target_feature(enable = "sse2")] | |
2283 | #[cfg_attr(test, assert_instr(cvtss2sd))] | |
83c7162d | 2284 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2285 | pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d { |
2286 | cvtss2sd(a, b) | |
2287 | } | |
2288 | ||
532ac7d7 | 2289 | /// Converts packed double-precision (64-bit) floating-point elements in `a` to |
0531ce1d | 2290 | /// packed 32-bit integers with truncation. |
83c7162d XL |
2291 | /// |
2292 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_epi32) | |
0531ce1d XL |
2293 | #[inline] |
2294 | #[target_feature(enable = "sse2")] | |
2295 | #[cfg_attr(test, assert_instr(cvttpd2dq))] | |
83c7162d | 2296 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 2297 | pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i { |
532ac7d7 | 2298 | transmute(cvttpd2dq(a)) |
0531ce1d XL |
2299 | } |
2300 | ||
532ac7d7 | 2301 | /// Converts the lower double-precision (64-bit) floating-point element in `a` |
0531ce1d | 2302 | /// to a 32-bit integer with truncation. |
83c7162d XL |
2303 | /// |
2304 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si32) | |
0531ce1d XL |
2305 | #[inline] |
2306 | #[target_feature(enable = "sse2")] | |
2307 | #[cfg_attr(test, assert_instr(cvttsd2si))] | |
83c7162d | 2308 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2309 | pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 { |
2310 | cvttsd2si(a) | |
2311 | } | |
2312 | ||
532ac7d7 | 2313 | /// Converts packed single-precision (32-bit) floating-point elements in `a` to |
0531ce1d | 2314 | /// packed 32-bit integers with truncation. |
83c7162d XL |
2315 | /// |
2316 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_epi32) | |
0531ce1d XL |
2317 | #[inline] |
2318 | #[target_feature(enable = "sse2")] | |
2319 | #[cfg_attr(test, assert_instr(cvttps2dq))] | |
83c7162d | 2320 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 2321 | pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i { |
532ac7d7 | 2322 | transmute(cvttps2dq(a)) |
0531ce1d XL |
2323 | } |
2324 | ||
532ac7d7 | 2325 | /// Copies double-precision (64-bit) floating-point element `a` to the lower |
0531ce1d | 2326 | /// element of the packed 64-bit return value. |
83c7162d XL |
2327 | /// |
2328 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sd) | |
0531ce1d XL |
2329 | #[inline] |
2330 | #[target_feature(enable = "sse2")] | |
83c7162d | 2331 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2332 | pub unsafe fn _mm_set_sd(a: f64) -> __m128d { |
2333 | _mm_set_pd(0.0, a) | |
2334 | } | |
2335 | ||
532ac7d7 | 2336 | /// Broadcasts double-precision (64-bit) floating-point value a to all elements |
0531ce1d | 2337 | /// of the return value. |
83c7162d XL |
2338 | /// |
2339 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pd) | |
0531ce1d XL |
2340 | #[inline] |
2341 | #[target_feature(enable = "sse2")] | |
83c7162d | 2342 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2343 | pub unsafe fn _mm_set1_pd(a: f64) -> __m128d { |
2344 | _mm_set_pd(a, a) | |
2345 | } | |
2346 | ||
532ac7d7 | 2347 | /// Broadcasts double-precision (64-bit) floating-point value a to all elements |
0531ce1d | 2348 | /// of the return value. |
83c7162d XL |
2349 | /// |
2350 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd1) | |
0531ce1d XL |
2351 | #[inline] |
2352 | #[target_feature(enable = "sse2")] | |
83c7162d | 2353 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2354 | pub unsafe fn _mm_set_pd1(a: f64) -> __m128d { |
2355 | _mm_set_pd(a, a) | |
2356 | } | |
2357 | ||
532ac7d7 | 2358 | /// Sets packed double-precision (64-bit) floating-point elements in the return |
0531ce1d | 2359 | /// value with the supplied values. |
83c7162d XL |
2360 | /// |
2361 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd) | |
0531ce1d XL |
2362 | #[inline] |
2363 | #[target_feature(enable = "sse2")] | |
83c7162d | 2364 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2365 | pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d { |
2366 | __m128d(b, a) | |
2367 | } | |
2368 | ||
532ac7d7 | 2369 | /// Sets packed double-precision (64-bit) floating-point elements in the return |
0531ce1d | 2370 | /// value with the supplied values in reverse order. |
83c7162d XL |
2371 | /// |
2372 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_pd) | |
0531ce1d XL |
2373 | #[inline] |
2374 | #[target_feature(enable = "sse2")] | |
83c7162d | 2375 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2376 | pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d { |
2377 | _mm_set_pd(b, a) | |
2378 | } | |
2379 | ||
2380 | /// Returns packed double-precision (64-bit) floating-point elements with all | |
2381 | /// zeros. | |
83c7162d XL |
2382 | /// |
2383 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_pd) | |
0531ce1d XL |
2384 | #[inline] |
2385 | #[target_feature(enable = "sse2")] | |
2386 | #[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected | |
83c7162d | 2387 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2388 | pub unsafe fn _mm_setzero_pd() -> __m128d { |
2389 | _mm_set_pd(0.0, 0.0) | |
2390 | } | |
2391 | ||
532ac7d7 | 2392 | /// Returns a mask of the most significant bit of each element in `a`. |
0531ce1d XL |
2393 | /// |
2394 | /// The mask is stored in the 2 least significant bits of the return value. | |
2395 | /// All other bits are set to `0`. | |
83c7162d XL |
2396 | /// |
2397 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_pd) | |
0531ce1d XL |
2398 | #[inline] |
2399 | #[target_feature(enable = "sse2")] | |
2400 | #[cfg_attr(test, assert_instr(movmskpd))] | |
83c7162d | 2401 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2402 | pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 { |
2403 | movmskpd(a) | |
2404 | } | |
2405 | ||
532ac7d7 | 2406 | /// Loads 128-bits (composed of 2 packed double-precision (64-bit) |
0531ce1d XL |
2407 | /// floating-point elements) from memory into the returned vector. |
2408 | /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection | |
2409 | /// exception may be generated. | |
83c7162d XL |
2410 | /// |
2411 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd) | |
0531ce1d XL |
2412 | #[inline] |
2413 | #[target_feature(enable = "sse2")] | |
2414 | #[cfg_attr(test, assert_instr(movaps))] | |
83c7162d | 2415 | #[stable(feature = "simd_x86", since = "1.27.0")] |
48663c56 | 2416 | #[allow(clippy::cast_ptr_alignment)] |
0531ce1d XL |
2417 | pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d { |
2418 | *(mem_addr as *const __m128d) | |
2419 | } | |
2420 | ||
2421 | /// Loads a 64-bit double-precision value to the low element of a | |
2422 | /// 128-bit integer vector and clears the upper element. | |
83c7162d XL |
2423 | /// |
2424 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sd) | |
0531ce1d XL |
2425 | #[inline] |
2426 | #[target_feature(enable = "sse2")] | |
2427 | #[cfg_attr(test, assert_instr(movsd))] | |
83c7162d | 2428 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2429 | pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d { |
2430 | _mm_setr_pd(*mem_addr, 0.) | |
2431 | } | |
2432 | ||
2433 | /// Loads a double-precision value into the high-order bits of a 128-bit | |
83c7162d | 2434 | /// vector of `[2 x double]`. The low-order bits are copied from the low-order |
0531ce1d | 2435 | /// bits of the first operand. |
83c7162d XL |
2436 | /// |
2437 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadh_pd) | |
0531ce1d XL |
2438 | #[inline] |
2439 | #[target_feature(enable = "sse2")] | |
e1599b0c | 2440 | #[cfg_attr(test, assert_instr(movhps))] |
83c7162d | 2441 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2442 | pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d { |
2443 | _mm_setr_pd(simd_extract(a, 0), *mem_addr) | |
2444 | } | |
2445 | ||
2446 | /// Loads a double-precision value into the low-order bits of a 128-bit | |
83c7162d | 2447 | /// vector of `[2 x double]`. The high-order bits are copied from the |
0531ce1d | 2448 | /// high-order bits of the first operand. |
83c7162d XL |
2449 | /// |
2450 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_pd) | |
0531ce1d XL |
2451 | #[inline] |
2452 | #[target_feature(enable = "sse2")] | |
e1599b0c | 2453 | #[cfg_attr(test, assert_instr(movlps))] |
83c7162d | 2454 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2455 | pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d { |
2456 | _mm_setr_pd(*mem_addr, simd_extract(a, 1)) | |
2457 | } | |
2458 | ||
83c7162d | 2459 | /// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit |
0531ce1d XL |
2460 | /// aligned memory location. |
2461 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be | |
2462 | /// used again soon). | |
83c7162d XL |
2463 | /// |
2464 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_pd) | |
0531ce1d XL |
2465 | #[inline] |
2466 | #[target_feature(enable = "sse2")] | |
2467 | #[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd | |
83c7162d | 2468 | #[stable(feature = "simd_x86", since = "1.27.0")] |
48663c56 | 2469 | #[allow(clippy::cast_ptr_alignment)] |
0531ce1d | 2470 | pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) { |
0731742a | 2471 | intrinsics::nontemporal_store(mem_addr as *mut __m128d, a); |
0531ce1d XL |
2472 | } |
2473 | ||
83c7162d | 2474 | /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a |
0531ce1d | 2475 | /// memory location. |
83c7162d XL |
2476 | /// |
2477 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sd) | |
0531ce1d XL |
2478 | #[inline] |
2479 | #[target_feature(enable = "sse2")] | |
0731742a | 2480 | #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))] |
83c7162d | 2481 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2482 | pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) { |
2483 | *mem_addr = simd_extract(a, 0) | |
2484 | } | |
2485 | ||
532ac7d7 | 2486 | /// Stores 128-bits (composed of 2 packed double-precision (64-bit) |
0531ce1d XL |
2487 | /// floating-point elements) from `a` into memory. `mem_addr` must be aligned |
2488 | /// on a 16-byte boundary or a general-protection exception may be generated. | |
83c7162d XL |
2489 | /// |
2490 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd) | |
0531ce1d XL |
2491 | #[inline] |
2492 | #[target_feature(enable = "sse2")] | |
2493 | #[cfg_attr(test, assert_instr(movaps))] | |
83c7162d | 2494 | #[stable(feature = "simd_x86", since = "1.27.0")] |
48663c56 | 2495 | #[allow(clippy::cast_ptr_alignment)] |
0531ce1d XL |
2496 | pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) { |
2497 | *(mem_addr as *mut __m128d) = a; | |
2498 | } | |
2499 | ||
532ac7d7 | 2500 | /// Stores 128-bits (composed of 2 packed double-precision (64-bit) |
0531ce1d XL |
2501 | /// floating-point elements) from `a` into memory. |
2502 | /// `mem_addr` does not need to be aligned on any particular boundary. | |
83c7162d XL |
2503 | /// |
2504 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_pd) | |
0531ce1d XL |
2505 | #[inline] |
2506 | #[target_feature(enable = "sse2")] | |
2507 | #[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected | |
83c7162d | 2508 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2509 | pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) { |
2510 | storeupd(mem_addr as *mut i8, a); | |
2511 | } | |
2512 | ||
532ac7d7 | 2513 | /// Stores the lower double-precision (64-bit) floating-point element from `a` |
0531ce1d XL |
2514 | /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a |
2515 | /// 16-byte boundary or a general-protection exception may be generated. | |
83c7162d XL |
2516 | /// |
2517 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_pd) | |
0531ce1d XL |
2518 | #[inline] |
2519 | #[target_feature(enable = "sse2")] | |
83c7162d | 2520 | #[stable(feature = "simd_x86", since = "1.27.0")] |
48663c56 | 2521 | #[allow(clippy::cast_ptr_alignment)] |
0531ce1d | 2522 | pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) { |
17df50a5 | 2523 | let b: __m128d = simd_shuffle2!(a, a, [0, 0]); |
0531ce1d XL |
2524 | *(mem_addr as *mut __m128d) = b; |
2525 | } | |
2526 | ||
532ac7d7 | 2527 | /// Stores the lower double-precision (64-bit) floating-point element from `a` |
0531ce1d XL |
2528 | /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a |
2529 | /// 16-byte boundary or a general-protection exception may be generated. | |
83c7162d XL |
2530 | /// |
2531 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd1) | |
0531ce1d XL |
2532 | #[inline] |
2533 | #[target_feature(enable = "sse2")] | |
83c7162d | 2534 | #[stable(feature = "simd_x86", since = "1.27.0")] |
48663c56 | 2535 | #[allow(clippy::cast_ptr_alignment)] |
0531ce1d | 2536 | pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) { |
17df50a5 | 2537 | let b: __m128d = simd_shuffle2!(a, a, [0, 0]); |
0531ce1d XL |
2538 | *(mem_addr as *mut __m128d) = b; |
2539 | } | |
2540 | ||
532ac7d7 | 2541 | /// Stores 2 double-precision (64-bit) floating-point elements from `a` into |
0531ce1d XL |
2542 | /// memory in reverse order. |
2543 | /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection | |
2544 | /// exception may be generated. | |
83c7162d XL |
2545 | /// |
2546 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_pd) | |
0531ce1d XL |
2547 | #[inline] |
2548 | #[target_feature(enable = "sse2")] | |
83c7162d | 2549 | #[stable(feature = "simd_x86", since = "1.27.0")] |
48663c56 | 2550 | #[allow(clippy::cast_ptr_alignment)] |
0531ce1d | 2551 | pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) { |
17df50a5 | 2552 | let b: __m128d = simd_shuffle2!(a, a, [1, 0]); |
0531ce1d XL |
2553 | *(mem_addr as *mut __m128d) = b; |
2554 | } | |
2555 | ||
83c7162d | 2556 | /// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a |
0531ce1d | 2557 | /// memory location. |
83c7162d XL |
2558 | /// |
2559 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeh_pd) | |
0531ce1d XL |
2560 | #[inline] |
2561 | #[target_feature(enable = "sse2")] | |
e1599b0c | 2562 | #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movhps))] |
83c7162d | 2563 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2564 | pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) { |
2565 | *mem_addr = simd_extract(a, 1); | |
2566 | } | |
2567 | ||
83c7162d | 2568 | /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a |
0531ce1d | 2569 | /// memory location. |
83c7162d XL |
2570 | /// |
2571 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_pd) | |
0531ce1d XL |
2572 | #[inline] |
2573 | #[target_feature(enable = "sse2")] | |
0731742a | 2574 | #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))] |
83c7162d | 2575 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2576 | pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) { |
2577 | *mem_addr = simd_extract(a, 0); | |
2578 | } | |
2579 | ||
532ac7d7 | 2580 | /// Loads a double-precision (64-bit) floating-point element from memory |
0531ce1d | 2581 | /// into both elements of returned vector. |
83c7162d XL |
2582 | /// |
2583 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_pd) | |
0531ce1d XL |
2584 | #[inline] |
2585 | #[target_feature(enable = "sse2")] | |
2586 | // #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen | |
83c7162d | 2587 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2588 | pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d { |
2589 | let d = *mem_addr; | |
2590 | _mm_setr_pd(d, d) | |
2591 | } | |
2592 | ||
532ac7d7 | 2593 | /// Loads a double-precision (64-bit) floating-point element from memory |
0531ce1d | 2594 | /// into both elements of returned vector. |
83c7162d XL |
2595 | /// |
2596 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd1) | |
0531ce1d XL |
2597 | #[inline] |
2598 | #[target_feature(enable = "sse2")] | |
2599 | // #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd | |
83c7162d | 2600 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2601 | pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d { |
2602 | _mm_load1_pd(mem_addr) | |
2603 | } | |
2604 | ||
532ac7d7 | 2605 | /// Loads 2 double-precision (64-bit) floating-point elements from memory into |
0531ce1d XL |
2606 | /// the returned vector in reverse order. `mem_addr` must be aligned on a |
2607 | /// 16-byte boundary or a general-protection exception may be generated. | |
83c7162d XL |
2608 | /// |
2609 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_pd) | |
0531ce1d XL |
2610 | #[inline] |
2611 | #[target_feature(enable = "sse2")] | |
e1599b0c | 2612 | #[cfg_attr(test, assert_instr(movaps))] |
83c7162d | 2613 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2614 | pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d { |
2615 | let a = _mm_load_pd(mem_addr); | |
17df50a5 | 2616 | simd_shuffle2!(a, a, [1, 0]) |
0531ce1d XL |
2617 | } |
2618 | ||
532ac7d7 | 2619 | /// Loads 128-bits (composed of 2 packed double-precision (64-bit) |
0531ce1d XL |
2620 | /// floating-point elements) from memory into the returned vector. |
2621 | /// `mem_addr` does not need to be aligned on any particular boundary. | |
83c7162d XL |
2622 | /// |
2623 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_pd) | |
0531ce1d XL |
2624 | #[inline] |
2625 | #[target_feature(enable = "sse2")] | |
2626 | #[cfg_attr(test, assert_instr(movups))] | |
83c7162d | 2627 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2628 | pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d { |
2629 | let mut dst = _mm_undefined_pd(); | |
2630 | ptr::copy_nonoverlapping( | |
2631 | mem_addr as *const u8, | |
2632 | &mut dst as *mut __m128d as *mut u8, | |
2633 | mem::size_of::<__m128d>(), | |
2634 | ); | |
2635 | dst | |
2636 | } | |
2637 | ||
83c7162d XL |
2638 | /// Constructs a 128-bit floating-point vector of `[2 x double]` from two |
2639 | /// 128-bit vector parameters of `[2 x double]`, using the immediate-value | |
0531ce1d | 2640 | /// parameter as a specifier. |
83c7162d XL |
2641 | /// |
2642 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pd) | |
0531ce1d XL |
2643 | #[inline] |
2644 | #[target_feature(enable = "sse2")] | |
fc512014 XL |
2645 | #[cfg_attr( |
2646 | all(test, any(not(target_os = "windows"), target_arch = "x86")), | |
17df50a5 | 2647 | cfg_attr(test, assert_instr(shufps, MASK = 2)) // FIXME shufpd expected |
fc512014 XL |
2648 | )] |
2649 | #[cfg_attr( | |
2650 | all(test, all(target_os = "windows", target_arch = "x86_64")), | |
17df50a5 | 2651 | cfg_attr(test, assert_instr(shufpd, MASK = 1)) |
fc512014 | 2652 | )] |
17df50a5 | 2653 | #[rustc_legacy_const_generics(2)] |
83c7162d | 2654 | #[stable(feature = "simd_x86", since = "1.27.0")] |
17df50a5 XL |
2655 | pub unsafe fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d { |
2656 | static_assert_imm8!(MASK); | |
2657 | simd_shuffle2!(a, b, <const MASK: i32> [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) | |
0531ce1d XL |
2658 | } |
2659 | ||
83c7162d | 2660 | /// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower |
0531ce1d XL |
2661 | /// 64 bits are set to the lower 64 bits of the second parameter. The upper |
2662 | /// 64 bits are set to the upper 64 bits of the first parameter. | |
83c7162d XL |
2663 | /// |
2664 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sd) | |
0531ce1d XL |
2665 | #[inline] |
2666 | #[target_feature(enable = "sse2")] | |
fc512014 XL |
2667 | #[cfg_attr( |
2668 | all(test, any(not(target_os = "windows"), target_arch = "x86")), | |
2669 | assert_instr(movsd) | |
2670 | )] | |
2671 | #[cfg_attr( | |
2672 | all(test, all(target_os = "windows", target_arch = "x86_64")), | |
2673 | assert_instr(movlps) | |
2674 | )] | |
83c7162d | 2675 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2676 | pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d { |
2677 | _mm_setr_pd(simd_extract(b, 0), simd_extract(a, 1)) | |
2678 | } | |
2679 | ||
83c7162d XL |
2680 | /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit |
2681 | /// floating-point vector of `[4 x float]`. | |
2682 | /// | |
2683 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ps) | |
0531ce1d XL |
2684 | #[inline] |
2685 | #[target_feature(enable = "sse2")] | |
83c7162d | 2686 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 2687 | pub unsafe fn _mm_castpd_ps(a: __m128d) -> __m128 { |
532ac7d7 | 2688 | transmute(a) |
0531ce1d XL |
2689 | } |
2690 | ||
83c7162d | 2691 | /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit |
0531ce1d | 2692 | /// integer vector. |
83c7162d XL |
2693 | /// |
2694 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_si128) | |
0531ce1d XL |
2695 | #[inline] |
2696 | #[target_feature(enable = "sse2")] | |
83c7162d | 2697 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 2698 | pub unsafe fn _mm_castpd_si128(a: __m128d) -> __m128i { |
532ac7d7 | 2699 | transmute(a) |
0531ce1d XL |
2700 | } |
2701 | ||
83c7162d XL |
2702 | /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit |
2703 | /// floating-point vector of `[2 x double]`. | |
2704 | /// | |
2705 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_pd) | |
0531ce1d XL |
2706 | #[inline] |
2707 | #[target_feature(enable = "sse2")] | |
83c7162d | 2708 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 2709 | pub unsafe fn _mm_castps_pd(a: __m128) -> __m128d { |
532ac7d7 | 2710 | transmute(a) |
0531ce1d XL |
2711 | } |
2712 | ||
83c7162d | 2713 | /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit |
0531ce1d | 2714 | /// integer vector. |
83c7162d XL |
2715 | /// |
2716 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_si128) | |
0531ce1d XL |
2717 | #[inline] |
2718 | #[target_feature(enable = "sse2")] | |
83c7162d | 2719 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 2720 | pub unsafe fn _mm_castps_si128(a: __m128) -> __m128i { |
532ac7d7 | 2721 | transmute(a) |
0531ce1d XL |
2722 | } |
2723 | ||
2724 | /// Casts a 128-bit integer vector into a 128-bit floating-point vector | |
83c7162d XL |
2725 | /// of `[2 x double]`. |
2726 | /// | |
2727 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_pd) | |
0531ce1d XL |
2728 | #[inline] |
2729 | #[target_feature(enable = "sse2")] | |
83c7162d | 2730 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 2731 | pub unsafe fn _mm_castsi128_pd(a: __m128i) -> __m128d { |
532ac7d7 | 2732 | transmute(a) |
0531ce1d XL |
2733 | } |
2734 | ||
2735 | /// Casts a 128-bit integer vector into a 128-bit floating-point vector | |
83c7162d XL |
2736 | /// of `[4 x float]`. |
2737 | /// | |
2738 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ps) | |
0531ce1d XL |
2739 | #[inline] |
2740 | #[target_feature(enable = "sse2")] | |
83c7162d | 2741 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 2742 | pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 { |
532ac7d7 | 2743 | transmute(a) |
0531ce1d XL |
2744 | } |
2745 | ||
532ac7d7 | 2746 | /// Returns vector of type __m128d with undefined elements. |
83c7162d XL |
2747 | /// |
2748 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_pd) | |
0531ce1d XL |
2749 | #[inline] |
2750 | #[target_feature(enable = "sse2")] | |
83c7162d | 2751 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 2752 | pub unsafe fn _mm_undefined_pd() -> __m128d { |
0731742a | 2753 | // FIXME: this function should return MaybeUninit<__m128d> |
532ac7d7 | 2754 | mem::MaybeUninit::<__m128d>::uninit().assume_init() |
0531ce1d XL |
2755 | } |
2756 | ||
532ac7d7 | 2757 | /// Returns vector of type __m128i with undefined elements. |
83c7162d XL |
2758 | /// |
2759 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_si128) | |
0531ce1d XL |
2760 | #[inline] |
2761 | #[target_feature(enable = "sse2")] | |
83c7162d | 2762 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 2763 | pub unsafe fn _mm_undefined_si128() -> __m128i { |
0731742a | 2764 | // FIXME: this function should return MaybeUninit<__m128i> |
532ac7d7 | 2765 | mem::MaybeUninit::<__m128i>::uninit().assume_init() |
0531ce1d XL |
2766 | } |
2767 | ||
2768 | /// The resulting `__m128d` element is composed by the low-order values of | |
2769 | /// the two `__m128d` interleaved input elements, i.e.: | |
2770 | /// | |
8faf50e0 XL |
2771 | /// * The `[127:64]` bits are copied from the `[127:64]` bits of the second |
2772 | /// input * The `[63:0]` bits are copied from the `[127:64]` bits of the first | |
2773 | /// input | |
83c7162d XL |
2774 | /// |
2775 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_pd) | |
0531ce1d XL |
2776 | #[inline] |
2777 | #[target_feature(enable = "sse2")] | |
2778 | #[cfg_attr(test, assert_instr(unpckhpd))] | |
83c7162d | 2779 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 2780 | pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d { |
17df50a5 | 2781 | simd_shuffle2!(a, b, [1, 3]) |
0531ce1d XL |
2782 | } |
2783 | ||
2784 | /// The resulting `__m128d` element is composed by the high-order values of | |
2785 | /// the two `__m128d` interleaved input elements, i.e.: | |
2786 | /// | |
83c7162d XL |
2787 | /// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input |
2788 | /// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input | |
2789 | /// | |
2790 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_pd) | |
0531ce1d XL |
2791 | #[inline] |
2792 | #[target_feature(enable = "sse2")] | |
0731742a | 2793 | #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))] |
83c7162d | 2794 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 2795 | pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d { |
17df50a5 | 2796 | simd_shuffle2!(a, b, [0, 2]) |
0531ce1d XL |
2797 | } |
2798 | ||
0531ce1d XL |
2799 | #[allow(improper_ctypes)] |
2800 | extern "C" { | |
2801 | #[link_name = "llvm.x86.sse2.pause"] | |
2802 | fn pause(); | |
2803 | #[link_name = "llvm.x86.sse2.clflush"] | |
416331ca | 2804 | fn clflush(p: *const u8); |
0531ce1d XL |
2805 | #[link_name = "llvm.x86.sse2.lfence"] |
2806 | fn lfence(); | |
2807 | #[link_name = "llvm.x86.sse2.mfence"] | |
2808 | fn mfence(); | |
0531ce1d XL |
2809 | #[link_name = "llvm.x86.sse2.pavg.b"] |
2810 | fn pavgb(a: u8x16, b: u8x16) -> u8x16; | |
2811 | #[link_name = "llvm.x86.sse2.pavg.w"] | |
2812 | fn pavgw(a: u16x8, b: u16x8) -> u16x8; | |
2813 | #[link_name = "llvm.x86.sse2.pmadd.wd"] | |
2814 | fn pmaddwd(a: i16x8, b: i16x8) -> i32x4; | |
2815 | #[link_name = "llvm.x86.sse2.pmaxs.w"] | |
2816 | fn pmaxsw(a: i16x8, b: i16x8) -> i16x8; | |
2817 | #[link_name = "llvm.x86.sse2.pmaxu.b"] | |
2818 | fn pmaxub(a: u8x16, b: u8x16) -> u8x16; | |
2819 | #[link_name = "llvm.x86.sse2.pmins.w"] | |
2820 | fn pminsw(a: i16x8, b: i16x8) -> i16x8; | |
2821 | #[link_name = "llvm.x86.sse2.pminu.b"] | |
2822 | fn pminub(a: u8x16, b: u8x16) -> u8x16; | |
2823 | #[link_name = "llvm.x86.sse2.pmulh.w"] | |
2824 | fn pmulhw(a: i16x8, b: i16x8) -> i16x8; | |
2825 | #[link_name = "llvm.x86.sse2.pmulhu.w"] | |
2826 | fn pmulhuw(a: u16x8, b: u16x8) -> u16x8; | |
2827 | #[link_name = "llvm.x86.sse2.pmulu.dq"] | |
2828 | fn pmuludq(a: u32x4, b: u32x4) -> u64x2; | |
2829 | #[link_name = "llvm.x86.sse2.psad.bw"] | |
2830 | fn psadbw(a: u8x16, b: u8x16) -> u64x2; | |
0531ce1d XL |
2831 | #[link_name = "llvm.x86.sse2.pslli.w"] |
2832 | fn pslliw(a: i16x8, imm8: i32) -> i16x8; | |
2833 | #[link_name = "llvm.x86.sse2.psll.w"] | |
2834 | fn psllw(a: i16x8, count: i16x8) -> i16x8; | |
2835 | #[link_name = "llvm.x86.sse2.pslli.d"] | |
2836 | fn psllid(a: i32x4, imm8: i32) -> i32x4; | |
2837 | #[link_name = "llvm.x86.sse2.psll.d"] | |
2838 | fn pslld(a: i32x4, count: i32x4) -> i32x4; | |
2839 | #[link_name = "llvm.x86.sse2.pslli.q"] | |
2840 | fn pslliq(a: i64x2, imm8: i32) -> i64x2; | |
2841 | #[link_name = "llvm.x86.sse2.psll.q"] | |
2842 | fn psllq(a: i64x2, count: i64x2) -> i64x2; | |
2843 | #[link_name = "llvm.x86.sse2.psrai.w"] | |
2844 | fn psraiw(a: i16x8, imm8: i32) -> i16x8; | |
2845 | #[link_name = "llvm.x86.sse2.psra.w"] | |
2846 | fn psraw(a: i16x8, count: i16x8) -> i16x8; | |
2847 | #[link_name = "llvm.x86.sse2.psrai.d"] | |
2848 | fn psraid(a: i32x4, imm8: i32) -> i32x4; | |
2849 | #[link_name = "llvm.x86.sse2.psra.d"] | |
2850 | fn psrad(a: i32x4, count: i32x4) -> i32x4; | |
2851 | #[link_name = "llvm.x86.sse2.psrli.w"] | |
2852 | fn psrliw(a: i16x8, imm8: i32) -> i16x8; | |
2853 | #[link_name = "llvm.x86.sse2.psrl.w"] | |
2854 | fn psrlw(a: i16x8, count: i16x8) -> i16x8; | |
2855 | #[link_name = "llvm.x86.sse2.psrli.d"] | |
2856 | fn psrlid(a: i32x4, imm8: i32) -> i32x4; | |
2857 | #[link_name = "llvm.x86.sse2.psrl.d"] | |
2858 | fn psrld(a: i32x4, count: i32x4) -> i32x4; | |
2859 | #[link_name = "llvm.x86.sse2.psrli.q"] | |
2860 | fn psrliq(a: i64x2, imm8: i32) -> i64x2; | |
2861 | #[link_name = "llvm.x86.sse2.psrl.q"] | |
2862 | fn psrlq(a: i64x2, count: i64x2) -> i64x2; | |
2863 | #[link_name = "llvm.x86.sse2.cvtdq2ps"] | |
2864 | fn cvtdq2ps(a: i32x4) -> __m128; | |
2865 | #[link_name = "llvm.x86.sse2.cvtps2dq"] | |
2866 | fn cvtps2dq(a: __m128) -> i32x4; | |
2867 | #[link_name = "llvm.x86.sse2.maskmov.dqu"] | |
2868 | fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8); | |
2869 | #[link_name = "llvm.x86.sse2.packsswb.128"] | |
2870 | fn packsswb(a: i16x8, b: i16x8) -> i8x16; | |
2871 | #[link_name = "llvm.x86.sse2.packssdw.128"] | |
2872 | fn packssdw(a: i32x4, b: i32x4) -> i16x8; | |
2873 | #[link_name = "llvm.x86.sse2.packuswb.128"] | |
2874 | fn packuswb(a: i16x8, b: i16x8) -> u8x16; | |
2875 | #[link_name = "llvm.x86.sse2.pmovmskb.128"] | |
2876 | fn pmovmskb(a: i8x16) -> i32; | |
2877 | #[link_name = "llvm.x86.sse2.max.sd"] | |
2878 | fn maxsd(a: __m128d, b: __m128d) -> __m128d; | |
2879 | #[link_name = "llvm.x86.sse2.max.pd"] | |
2880 | fn maxpd(a: __m128d, b: __m128d) -> __m128d; | |
2881 | #[link_name = "llvm.x86.sse2.min.sd"] | |
2882 | fn minsd(a: __m128d, b: __m128d) -> __m128d; | |
2883 | #[link_name = "llvm.x86.sse2.min.pd"] | |
2884 | fn minpd(a: __m128d, b: __m128d) -> __m128d; | |
2885 | #[link_name = "llvm.x86.sse2.sqrt.sd"] | |
2886 | fn sqrtsd(a: __m128d) -> __m128d; | |
2887 | #[link_name = "llvm.x86.sse2.sqrt.pd"] | |
2888 | fn sqrtpd(a: __m128d) -> __m128d; | |
2889 | #[link_name = "llvm.x86.sse2.cmp.sd"] | |
2890 | fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; | |
2891 | #[link_name = "llvm.x86.sse2.cmp.pd"] | |
2892 | fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; | |
2893 | #[link_name = "llvm.x86.sse2.comieq.sd"] | |
2894 | fn comieqsd(a: __m128d, b: __m128d) -> i32; | |
2895 | #[link_name = "llvm.x86.sse2.comilt.sd"] | |
2896 | fn comiltsd(a: __m128d, b: __m128d) -> i32; | |
2897 | #[link_name = "llvm.x86.sse2.comile.sd"] | |
2898 | fn comilesd(a: __m128d, b: __m128d) -> i32; | |
2899 | #[link_name = "llvm.x86.sse2.comigt.sd"] | |
2900 | fn comigtsd(a: __m128d, b: __m128d) -> i32; | |
2901 | #[link_name = "llvm.x86.sse2.comige.sd"] | |
2902 | fn comigesd(a: __m128d, b: __m128d) -> i32; | |
2903 | #[link_name = "llvm.x86.sse2.comineq.sd"] | |
2904 | fn comineqsd(a: __m128d, b: __m128d) -> i32; | |
2905 | #[link_name = "llvm.x86.sse2.ucomieq.sd"] | |
2906 | fn ucomieqsd(a: __m128d, b: __m128d) -> i32; | |
2907 | #[link_name = "llvm.x86.sse2.ucomilt.sd"] | |
2908 | fn ucomiltsd(a: __m128d, b: __m128d) -> i32; | |
2909 | #[link_name = "llvm.x86.sse2.ucomile.sd"] | |
2910 | fn ucomilesd(a: __m128d, b: __m128d) -> i32; | |
2911 | #[link_name = "llvm.x86.sse2.ucomigt.sd"] | |
2912 | fn ucomigtsd(a: __m128d, b: __m128d) -> i32; | |
2913 | #[link_name = "llvm.x86.sse2.ucomige.sd"] | |
2914 | fn ucomigesd(a: __m128d, b: __m128d) -> i32; | |
2915 | #[link_name = "llvm.x86.sse2.ucomineq.sd"] | |
2916 | fn ucomineqsd(a: __m128d, b: __m128d) -> i32; | |
2917 | #[link_name = "llvm.x86.sse2.movmsk.pd"] | |
2918 | fn movmskpd(a: __m128d) -> i32; | |
2919 | #[link_name = "llvm.x86.sse2.cvtpd2ps"] | |
2920 | fn cvtpd2ps(a: __m128d) -> __m128; | |
2921 | #[link_name = "llvm.x86.sse2.cvtps2pd"] | |
2922 | fn cvtps2pd(a: __m128) -> __m128d; | |
2923 | #[link_name = "llvm.x86.sse2.cvtpd2dq"] | |
2924 | fn cvtpd2dq(a: __m128d) -> i32x4; | |
2925 | #[link_name = "llvm.x86.sse2.cvtsd2si"] | |
2926 | fn cvtsd2si(a: __m128d) -> i32; | |
2927 | #[link_name = "llvm.x86.sse2.cvtsd2ss"] | |
2928 | fn cvtsd2ss(a: __m128, b: __m128d) -> __m128; | |
2929 | #[link_name = "llvm.x86.sse2.cvtss2sd"] | |
2930 | fn cvtss2sd(a: __m128d, b: __m128) -> __m128d; | |
2931 | #[link_name = "llvm.x86.sse2.cvttpd2dq"] | |
2932 | fn cvttpd2dq(a: __m128d) -> i32x4; | |
2933 | #[link_name = "llvm.x86.sse2.cvttsd2si"] | |
2934 | fn cvttsd2si(a: __m128d) -> i32; | |
2935 | #[link_name = "llvm.x86.sse2.cvttps2dq"] | |
2936 | fn cvttps2dq(a: __m128) -> i32x4; | |
2937 | #[link_name = "llvm.x86.sse2.storeu.dq"] | |
2938 | fn storeudq(mem_addr: *mut i8, a: __m128i); | |
2939 | #[link_name = "llvm.x86.sse2.storeu.pd"] | |
2940 | fn storeupd(mem_addr: *mut i8, a: __m128d); | |
0531ce1d XL |
2941 | } |
2942 | ||
2943 | #[cfg(test)] | |
2944 | mod tests { | |
48663c56 XL |
2945 | use crate::{ |
2946 | core_arch::{simd::*, x86::*}, | |
2947 | hint::black_box, | |
2948 | }; | |
2949 | use std::{ | |
2950 | boxed, f32, | |
2951 | f64::{self, NAN}, | |
2952 | i32, | |
2953 | mem::{self, transmute}, | |
2954 | }; | |
416331ca | 2955 | use stdarch_test::simd_test; |
0531ce1d | 2956 | |
532ac7d7 XL |
2957 | #[test] |
2958 | fn test_mm_pause() { | |
2959 | unsafe { _mm_pause() } | |
0531ce1d XL |
2960 | } |
2961 | ||
83c7162d | 2962 | #[simd_test(enable = "sse2")] |
0531ce1d | 2963 | unsafe fn test_mm_clflush() { |
416331ca XL |
2964 | let x = 0_u8; |
2965 | _mm_clflush(&x as *const _); | |
0531ce1d XL |
2966 | } |
2967 | ||
83c7162d | 2968 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
2969 | unsafe fn test_mm_lfence() { |
2970 | _mm_lfence(); | |
2971 | } | |
2972 | ||
83c7162d | 2973 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
2974 | unsafe fn test_mm_mfence() { |
2975 | _mm_mfence(); | |
2976 | } | |
2977 | ||
83c7162d | 2978 | #[simd_test(enable = "sse2")] |
0531ce1d | 2979 | unsafe fn test_mm_add_epi8() { |
0731742a XL |
2980 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
2981 | #[rustfmt::skip] | |
0531ce1d XL |
2982 | let b = _mm_setr_epi8( |
2983 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
2984 | ); | |
2985 | let r = _mm_add_epi8(a, b); | |
0731742a | 2986 | #[rustfmt::skip] |
0531ce1d XL |
2987 | let e = _mm_setr_epi8( |
2988 | 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, | |
2989 | ); | |
2990 | assert_eq_m128i(r, e); | |
2991 | } | |
2992 | ||
83c7162d | 2993 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
2994 | unsafe fn test_mm_add_epi8_overflow() { |
2995 | let a = _mm_set1_epi8(0x7F); | |
2996 | let b = _mm_set1_epi8(1); | |
2997 | let r = _mm_add_epi8(a, b); | |
2998 | assert_eq_m128i(r, _mm_set1_epi8(-128)); | |
2999 | } | |
3000 | ||
83c7162d | 3001 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3002 | unsafe fn test_mm_add_epi16() { |
3003 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
3004 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); | |
3005 | let r = _mm_add_epi16(a, b); | |
3006 | let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); | |
3007 | assert_eq_m128i(r, e); | |
3008 | } | |
3009 | ||
83c7162d | 3010 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3011 | unsafe fn test_mm_add_epi32() { |
3012 | let a = _mm_setr_epi32(0, 1, 2, 3); | |
3013 | let b = _mm_setr_epi32(4, 5, 6, 7); | |
3014 | let r = _mm_add_epi32(a, b); | |
3015 | let e = _mm_setr_epi32(4, 6, 8, 10); | |
3016 | assert_eq_m128i(r, e); | |
3017 | } | |
3018 | ||
83c7162d | 3019 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3020 | unsafe fn test_mm_add_epi64() { |
3021 | let a = _mm_setr_epi64x(0, 1); | |
3022 | let b = _mm_setr_epi64x(2, 3); | |
3023 | let r = _mm_add_epi64(a, b); | |
3024 | let e = _mm_setr_epi64x(2, 4); | |
3025 | assert_eq_m128i(r, e); | |
3026 | } | |
3027 | ||
83c7162d | 3028 | #[simd_test(enable = "sse2")] |
0531ce1d | 3029 | unsafe fn test_mm_adds_epi8() { |
0731742a XL |
3030 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3031 | #[rustfmt::skip] | |
0531ce1d XL |
3032 | let b = _mm_setr_epi8( |
3033 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
3034 | ); | |
3035 | let r = _mm_adds_epi8(a, b); | |
0731742a | 3036 | #[rustfmt::skip] |
0531ce1d XL |
3037 | let e = _mm_setr_epi8( |
3038 | 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, | |
3039 | ); | |
3040 | assert_eq_m128i(r, e); | |
3041 | } | |
3042 | ||
83c7162d | 3043 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3044 | unsafe fn test_mm_adds_epi8_saturate_positive() { |
3045 | let a = _mm_set1_epi8(0x7F); | |
3046 | let b = _mm_set1_epi8(1); | |
3047 | let r = _mm_adds_epi8(a, b); | |
3048 | assert_eq_m128i(r, a); | |
3049 | } | |
3050 | ||
83c7162d | 3051 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3052 | unsafe fn test_mm_adds_epi8_saturate_negative() { |
3053 | let a = _mm_set1_epi8(-0x80); | |
3054 | let b = _mm_set1_epi8(-1); | |
3055 | let r = _mm_adds_epi8(a, b); | |
3056 | assert_eq_m128i(r, a); | |
3057 | } | |
3058 | ||
83c7162d | 3059 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3060 | unsafe fn test_mm_adds_epi16() { |
3061 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
3062 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); | |
3063 | let r = _mm_adds_epi16(a, b); | |
3064 | let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); | |
3065 | assert_eq_m128i(r, e); | |
3066 | } | |
3067 | ||
83c7162d | 3068 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3069 | unsafe fn test_mm_adds_epi16_saturate_positive() { |
3070 | let a = _mm_set1_epi16(0x7FFF); | |
3071 | let b = _mm_set1_epi16(1); | |
3072 | let r = _mm_adds_epi16(a, b); | |
3073 | assert_eq_m128i(r, a); | |
3074 | } | |
3075 | ||
83c7162d | 3076 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3077 | unsafe fn test_mm_adds_epi16_saturate_negative() { |
3078 | let a = _mm_set1_epi16(-0x8000); | |
3079 | let b = _mm_set1_epi16(-1); | |
3080 | let r = _mm_adds_epi16(a, b); | |
3081 | assert_eq_m128i(r, a); | |
3082 | } | |
3083 | ||
83c7162d | 3084 | #[simd_test(enable = "sse2")] |
0531ce1d | 3085 | unsafe fn test_mm_adds_epu8() { |
0731742a XL |
3086 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3087 | #[rustfmt::skip] | |
0531ce1d XL |
3088 | let b = _mm_setr_epi8( |
3089 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
3090 | ); | |
3091 | let r = _mm_adds_epu8(a, b); | |
0731742a | 3092 | #[rustfmt::skip] |
0531ce1d XL |
3093 | let e = _mm_setr_epi8( |
3094 | 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, | |
3095 | ); | |
3096 | assert_eq_m128i(r, e); | |
3097 | } | |
3098 | ||
83c7162d | 3099 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3100 | unsafe fn test_mm_adds_epu8_saturate() { |
3101 | let a = _mm_set1_epi8(!0); | |
3102 | let b = _mm_set1_epi8(1); | |
3103 | let r = _mm_adds_epu8(a, b); | |
3104 | assert_eq_m128i(r, a); | |
3105 | } | |
3106 | ||
83c7162d | 3107 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3108 | unsafe fn test_mm_adds_epu16() { |
3109 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
3110 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); | |
3111 | let r = _mm_adds_epu16(a, b); | |
3112 | let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); | |
3113 | assert_eq_m128i(r, e); | |
3114 | } | |
3115 | ||
83c7162d | 3116 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3117 | unsafe fn test_mm_adds_epu16_saturate() { |
3118 | let a = _mm_set1_epi16(!0); | |
3119 | let b = _mm_set1_epi16(1); | |
3120 | let r = _mm_adds_epu16(a, b); | |
3121 | assert_eq_m128i(r, a); | |
3122 | } | |
3123 | ||
83c7162d | 3124 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3125 | unsafe fn test_mm_avg_epu8() { |
3126 | let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9)); | |
3127 | let r = _mm_avg_epu8(a, b); | |
3128 | assert_eq_m128i(r, _mm_set1_epi8(6)); | |
3129 | } | |
3130 | ||
83c7162d | 3131 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3132 | unsafe fn test_mm_avg_epu16() { |
3133 | let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9)); | |
3134 | let r = _mm_avg_epu16(a, b); | |
3135 | assert_eq_m128i(r, _mm_set1_epi16(6)); | |
3136 | } | |
3137 | ||
83c7162d | 3138 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3139 | unsafe fn test_mm_madd_epi16() { |
3140 | let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); | |
3141 | let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16); | |
3142 | let r = _mm_madd_epi16(a, b); | |
3143 | let e = _mm_setr_epi32(29, 81, 149, 233); | |
3144 | assert_eq_m128i(r, e); | |
3145 | } | |
3146 | ||
83c7162d | 3147 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3148 | unsafe fn test_mm_max_epi16() { |
3149 | let a = _mm_set1_epi16(1); | |
3150 | let b = _mm_set1_epi16(-1); | |
3151 | let r = _mm_max_epi16(a, b); | |
3152 | assert_eq_m128i(r, a); | |
3153 | } | |
3154 | ||
83c7162d | 3155 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3156 | unsafe fn test_mm_max_epu8() { |
3157 | let a = _mm_set1_epi8(1); | |
3158 | let b = _mm_set1_epi8(!0); | |
3159 | let r = _mm_max_epu8(a, b); | |
3160 | assert_eq_m128i(r, b); | |
3161 | } | |
3162 | ||
83c7162d | 3163 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3164 | unsafe fn test_mm_min_epi16() { |
3165 | let a = _mm_set1_epi16(1); | |
3166 | let b = _mm_set1_epi16(-1); | |
3167 | let r = _mm_min_epi16(a, b); | |
3168 | assert_eq_m128i(r, b); | |
3169 | } | |
3170 | ||
83c7162d | 3171 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3172 | unsafe fn test_mm_min_epu8() { |
3173 | let a = _mm_set1_epi8(1); | |
3174 | let b = _mm_set1_epi8(!0); | |
3175 | let r = _mm_min_epu8(a, b); | |
3176 | assert_eq_m128i(r, a); | |
3177 | } | |
3178 | ||
83c7162d | 3179 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3180 | unsafe fn test_mm_mulhi_epi16() { |
3181 | let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); | |
3182 | let r = _mm_mulhi_epi16(a, b); | |
3183 | assert_eq_m128i(r, _mm_set1_epi16(-16)); | |
3184 | } | |
3185 | ||
83c7162d | 3186 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3187 | unsafe fn test_mm_mulhi_epu16() { |
3188 | let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001)); | |
3189 | let r = _mm_mulhi_epu16(a, b); | |
3190 | assert_eq_m128i(r, _mm_set1_epi16(15)); | |
3191 | } | |
3192 | ||
83c7162d | 3193 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3194 | unsafe fn test_mm_mullo_epi16() { |
3195 | let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); | |
3196 | let r = _mm_mullo_epi16(a, b); | |
3197 | assert_eq_m128i(r, _mm_set1_epi16(-17960)); | |
3198 | } | |
3199 | ||
83c7162d | 3200 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3201 | unsafe fn test_mm_mul_epu32() { |
3202 | let a = _mm_setr_epi64x(1_000_000_000, 1 << 34); | |
3203 | let b = _mm_setr_epi64x(1_000_000_000, 1 << 35); | |
3204 | let r = _mm_mul_epu32(a, b); | |
3205 | let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0); | |
3206 | assert_eq_m128i(r, e); | |
3207 | } | |
3208 | ||
83c7162d | 3209 | #[simd_test(enable = "sse2")] |
0531ce1d | 3210 | unsafe fn test_mm_sad_epu8() { |
0731742a | 3211 | #[rustfmt::skip] |
0531ce1d XL |
3212 | let a = _mm_setr_epi8( |
3213 | 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8, | |
3214 | 1, 2, 3, 4, | |
3215 | 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8, | |
3216 | 1, 2, 3, 4, | |
3217 | ); | |
3218 | let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2); | |
3219 | let r = _mm_sad_epu8(a, b); | |
3220 | let e = _mm_setr_epi64x(1020, 614); | |
3221 | assert_eq_m128i(r, e); | |
3222 | } | |
3223 | ||
83c7162d | 3224 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3225 | unsafe fn test_mm_sub_epi8() { |
3226 | let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6)); | |
3227 | let r = _mm_sub_epi8(a, b); | |
3228 | assert_eq_m128i(r, _mm_set1_epi8(-1)); | |
3229 | } | |
3230 | ||
83c7162d | 3231 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3232 | unsafe fn test_mm_sub_epi16() { |
3233 | let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6)); | |
3234 | let r = _mm_sub_epi16(a, b); | |
3235 | assert_eq_m128i(r, _mm_set1_epi16(-1)); | |
3236 | } | |
3237 | ||
83c7162d | 3238 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3239 | unsafe fn test_mm_sub_epi32() { |
3240 | let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6)); | |
3241 | let r = _mm_sub_epi32(a, b); | |
3242 | assert_eq_m128i(r, _mm_set1_epi32(-1)); | |
3243 | } | |
3244 | ||
83c7162d | 3245 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3246 | unsafe fn test_mm_sub_epi64() { |
3247 | let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6)); | |
3248 | let r = _mm_sub_epi64(a, b); | |
3249 | assert_eq_m128i(r, _mm_set1_epi64x(-1)); | |
3250 | } | |
3251 | ||
83c7162d | 3252 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3253 | unsafe fn test_mm_subs_epi8() { |
3254 | let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); | |
3255 | let r = _mm_subs_epi8(a, b); | |
3256 | assert_eq_m128i(r, _mm_set1_epi8(3)); | |
3257 | } | |
3258 | ||
83c7162d | 3259 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3260 | unsafe fn test_mm_subs_epi8_saturate_positive() { |
3261 | let a = _mm_set1_epi8(0x7F); | |
3262 | let b = _mm_set1_epi8(-1); | |
3263 | let r = _mm_subs_epi8(a, b); | |
3264 | assert_eq_m128i(r, a); | |
3265 | } | |
3266 | ||
83c7162d | 3267 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3268 | unsafe fn test_mm_subs_epi8_saturate_negative() { |
3269 | let a = _mm_set1_epi8(-0x80); | |
3270 | let b = _mm_set1_epi8(1); | |
3271 | let r = _mm_subs_epi8(a, b); | |
3272 | assert_eq_m128i(r, a); | |
3273 | } | |
3274 | ||
83c7162d | 3275 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3276 | unsafe fn test_mm_subs_epi16() { |
3277 | let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); | |
3278 | let r = _mm_subs_epi16(a, b); | |
3279 | assert_eq_m128i(r, _mm_set1_epi16(3)); | |
3280 | } | |
3281 | ||
83c7162d | 3282 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3283 | unsafe fn test_mm_subs_epi16_saturate_positive() { |
3284 | let a = _mm_set1_epi16(0x7FFF); | |
3285 | let b = _mm_set1_epi16(-1); | |
3286 | let r = _mm_subs_epi16(a, b); | |
3287 | assert_eq_m128i(r, a); | |
3288 | } | |
3289 | ||
83c7162d | 3290 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3291 | unsafe fn test_mm_subs_epi16_saturate_negative() { |
3292 | let a = _mm_set1_epi16(-0x8000); | |
3293 | let b = _mm_set1_epi16(1); | |
3294 | let r = _mm_subs_epi16(a, b); | |
3295 | assert_eq_m128i(r, a); | |
3296 | } | |
3297 | ||
83c7162d | 3298 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3299 | unsafe fn test_mm_subs_epu8() { |
3300 | let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); | |
3301 | let r = _mm_subs_epu8(a, b); | |
3302 | assert_eq_m128i(r, _mm_set1_epi8(3)); | |
3303 | } | |
3304 | ||
83c7162d | 3305 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3306 | unsafe fn test_mm_subs_epu8_saturate() { |
3307 | let a = _mm_set1_epi8(0); | |
3308 | let b = _mm_set1_epi8(1); | |
3309 | let r = _mm_subs_epu8(a, b); | |
3310 | assert_eq_m128i(r, a); | |
3311 | } | |
3312 | ||
83c7162d | 3313 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3314 | unsafe fn test_mm_subs_epu16() { |
3315 | let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); | |
3316 | let r = _mm_subs_epu16(a, b); | |
3317 | assert_eq_m128i(r, _mm_set1_epi16(3)); | |
3318 | } | |
3319 | ||
83c7162d | 3320 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3321 | unsafe fn test_mm_subs_epu16_saturate() { |
3322 | let a = _mm_set1_epi16(0); | |
3323 | let b = _mm_set1_epi16(1); | |
3324 | let r = _mm_subs_epu16(a, b); | |
3325 | assert_eq_m128i(r, a); | |
3326 | } | |
3327 | ||
83c7162d | 3328 | #[simd_test(enable = "sse2")] |
0531ce1d | 3329 | unsafe fn test_mm_slli_si128() { |
0731742a | 3330 | #[rustfmt::skip] |
0531ce1d XL |
3331 | let a = _mm_setr_epi8( |
3332 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
3333 | ); | |
17df50a5 | 3334 | let r = _mm_slli_si128::<1>(a); |
0731742a | 3335 | let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
0531ce1d XL |
3336 | assert_eq_m128i(r, e); |
3337 | ||
0731742a | 3338 | #[rustfmt::skip] |
0531ce1d XL |
3339 | let a = _mm_setr_epi8( |
3340 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
3341 | ); | |
17df50a5 | 3342 | let r = _mm_slli_si128::<15>(a); |
0531ce1d XL |
3343 | let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); |
3344 | assert_eq_m128i(r, e); | |
3345 | ||
0731742a | 3346 | #[rustfmt::skip] |
0531ce1d XL |
3347 | let a = _mm_setr_epi8( |
3348 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
3349 | ); | |
17df50a5 | 3350 | let r = _mm_slli_si128::<16>(a); |
0531ce1d XL |
3351 | assert_eq_m128i(r, _mm_set1_epi8(0)); |
3352 | } | |
3353 | ||
83c7162d | 3354 | #[simd_test(enable = "sse2")] |
0531ce1d | 3355 | unsafe fn test_mm_slli_epi16() { |
0731742a | 3356 | #[rustfmt::skip] |
0531ce1d XL |
3357 | let a = _mm_setr_epi16( |
3358 | 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0, | |
3359 | ); | |
17df50a5 | 3360 | let r = _mm_slli_epi16::<4>(a); |
0531ce1d | 3361 | |
0731742a | 3362 | #[rustfmt::skip] |
0531ce1d XL |
3363 | let e = _mm_setr_epi16( |
3364 | 0xFFF0 as u16 as i16, 0xFFF0 as u16 as i16, 0x0FF0, 0x00F0, | |
3365 | 0, 0, 0, 0, | |
3366 | ); | |
3367 | assert_eq_m128i(r, e); | |
3368 | } | |
3369 | ||
83c7162d | 3370 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3371 | unsafe fn test_mm_sll_epi16() { |
3372 | let a = _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0); | |
3373 | let r = _mm_sll_epi16(a, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0)); | |
3374 | assert_eq_m128i(r, _mm_setr_epi16(0xFF0, 0, 0, 0, 0, 0, 0, 0)); | |
3375 | let r = _mm_sll_epi16(a, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0)); | |
3376 | assert_eq_m128i(r, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0)); | |
3377 | } | |
3378 | ||
83c7162d | 3379 | #[simd_test(enable = "sse2")] |
0531ce1d | 3380 | unsafe fn test_mm_slli_epi32() { |
17df50a5 | 3381 | let r = _mm_slli_epi32::<4>(_mm_set1_epi32(0xFFFF)); |
0531ce1d XL |
3382 | assert_eq_m128i(r, _mm_set1_epi32(0xFFFF0)); |
3383 | } | |
3384 | ||
83c7162d | 3385 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3386 | unsafe fn test_mm_sll_epi32() { |
3387 | let a = _mm_set1_epi32(0xFFFF); | |
3388 | let b = _mm_setr_epi32(4, 0, 0, 0); | |
3389 | let r = _mm_sll_epi32(a, b); | |
3390 | assert_eq_m128i(r, _mm_set1_epi32(0xFFFF0)); | |
3391 | } | |
3392 | ||
83c7162d | 3393 | #[simd_test(enable = "sse2")] |
0531ce1d | 3394 | unsafe fn test_mm_slli_epi64() { |
17df50a5 | 3395 | let r = _mm_slli_epi64::<4>(_mm_set1_epi64x(0xFFFFFFFF)); |
0531ce1d XL |
3396 | assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFFF0)); |
3397 | } | |
3398 | ||
83c7162d | 3399 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3400 | unsafe fn test_mm_sll_epi64() { |
3401 | let a = _mm_set1_epi64x(0xFFFFFFFF); | |
3402 | let b = _mm_setr_epi64x(4, 0); | |
3403 | let r = _mm_sll_epi64(a, b); | |
3404 | assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFFF0)); | |
3405 | } | |
3406 | ||
83c7162d | 3407 | #[simd_test(enable = "sse2")] |
0531ce1d | 3408 | unsafe fn test_mm_srai_epi16() { |
17df50a5 | 3409 | let r = _mm_srai_epi16::<1>(_mm_set1_epi16(-1)); |
0531ce1d XL |
3410 | assert_eq_m128i(r, _mm_set1_epi16(-1)); |
3411 | } | |
3412 | ||
83c7162d | 3413 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3414 | unsafe fn test_mm_sra_epi16() { |
3415 | let a = _mm_set1_epi16(-1); | |
3416 | let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); | |
3417 | let r = _mm_sra_epi16(a, b); | |
3418 | assert_eq_m128i(r, _mm_set1_epi16(-1)); | |
3419 | } | |
3420 | ||
83c7162d | 3421 | #[simd_test(enable = "sse2")] |
0531ce1d | 3422 | unsafe fn test_mm_srai_epi32() { |
17df50a5 | 3423 | let r = _mm_srai_epi32::<1>(_mm_set1_epi32(-1)); |
0531ce1d XL |
3424 | assert_eq_m128i(r, _mm_set1_epi32(-1)); |
3425 | } | |
3426 | ||
83c7162d | 3427 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3428 | unsafe fn test_mm_sra_epi32() { |
3429 | let a = _mm_set1_epi32(-1); | |
3430 | let b = _mm_setr_epi32(1, 0, 0, 0); | |
3431 | let r = _mm_sra_epi32(a, b); | |
3432 | assert_eq_m128i(r, _mm_set1_epi32(-1)); | |
3433 | } | |
3434 | ||
83c7162d | 3435 | #[simd_test(enable = "sse2")] |
0531ce1d | 3436 | unsafe fn test_mm_srli_si128() { |
0731742a | 3437 | #[rustfmt::skip] |
0531ce1d XL |
3438 | let a = _mm_setr_epi8( |
3439 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
3440 | ); | |
17df50a5 | 3441 | let r = _mm_srli_si128::<1>(a); |
0731742a | 3442 | #[rustfmt::skip] |
0531ce1d XL |
3443 | let e = _mm_setr_epi8( |
3444 | 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, | |
3445 | ); | |
3446 | assert_eq_m128i(r, e); | |
3447 | ||
0731742a | 3448 | #[rustfmt::skip] |
0531ce1d XL |
3449 | let a = _mm_setr_epi8( |
3450 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
3451 | ); | |
17df50a5 | 3452 | let r = _mm_srli_si128::<15>(a); |
0531ce1d XL |
3453 | let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
3454 | assert_eq_m128i(r, e); | |
3455 | ||
0731742a | 3456 | #[rustfmt::skip] |
0531ce1d XL |
3457 | let a = _mm_setr_epi8( |
3458 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
3459 | ); | |
17df50a5 | 3460 | let r = _mm_srli_si128::<16>(a); |
0531ce1d XL |
3461 | assert_eq_m128i(r, _mm_set1_epi8(0)); |
3462 | } | |
3463 | ||
83c7162d | 3464 | #[simd_test(enable = "sse2")] |
0531ce1d | 3465 | unsafe fn test_mm_srli_epi16() { |
0731742a | 3466 | #[rustfmt::skip] |
0531ce1d XL |
3467 | let a = _mm_setr_epi16( |
3468 | 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0, | |
3469 | ); | |
17df50a5 | 3470 | let r = _mm_srli_epi16::<4>(a); |
0731742a | 3471 | #[rustfmt::skip] |
0531ce1d XL |
3472 | let e = _mm_setr_epi16( |
3473 | 0xFFF as u16 as i16, 0xFF as u16 as i16, 0xF, 0, 0, 0, 0, 0, | |
3474 | ); | |
3475 | assert_eq_m128i(r, e); | |
3476 | } | |
3477 | ||
83c7162d | 3478 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3479 | unsafe fn test_mm_srl_epi16() { |
3480 | let a = _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0); | |
3481 | let r = _mm_srl_epi16(a, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0)); | |
3482 | assert_eq_m128i(r, _mm_setr_epi16(0xF, 0, 0, 0, 0, 0, 0, 0)); | |
3483 | let r = _mm_srl_epi16(a, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0)); | |
3484 | assert_eq_m128i(r, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0)); | |
3485 | } | |
3486 | ||
83c7162d | 3487 | #[simd_test(enable = "sse2")] |
0531ce1d | 3488 | unsafe fn test_mm_srli_epi32() { |
17df50a5 | 3489 | let r = _mm_srli_epi32::<4>(_mm_set1_epi32(0xFFFF)); |
0531ce1d XL |
3490 | assert_eq_m128i(r, _mm_set1_epi32(0xFFF)); |
3491 | } | |
3492 | ||
83c7162d | 3493 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3494 | unsafe fn test_mm_srl_epi32() { |
3495 | let a = _mm_set1_epi32(0xFFFF); | |
3496 | let b = _mm_setr_epi32(4, 0, 0, 0); | |
3497 | let r = _mm_srl_epi32(a, b); | |
3498 | assert_eq_m128i(r, _mm_set1_epi32(0xFFF)); | |
3499 | } | |
3500 | ||
83c7162d | 3501 | #[simd_test(enable = "sse2")] |
0531ce1d | 3502 | unsafe fn test_mm_srli_epi64() { |
17df50a5 | 3503 | let r = _mm_srli_epi64::<4>(_mm_set1_epi64x(0xFFFFFFFF)); |
0531ce1d XL |
3504 | assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFF)); |
3505 | } | |
3506 | ||
83c7162d | 3507 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3508 | unsafe fn test_mm_srl_epi64() { |
3509 | let a = _mm_set1_epi64x(0xFFFFFFFF); | |
3510 | let b = _mm_setr_epi64x(4, 0); | |
3511 | let r = _mm_srl_epi64(a, b); | |
3512 | assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFF)); | |
3513 | } | |
3514 | ||
83c7162d | 3515 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3516 | unsafe fn test_mm_and_si128() { |
3517 | let a = _mm_set1_epi8(5); | |
3518 | let b = _mm_set1_epi8(3); | |
3519 | let r = _mm_and_si128(a, b); | |
3520 | assert_eq_m128i(r, _mm_set1_epi8(1)); | |
3521 | } | |
3522 | ||
83c7162d | 3523 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3524 | unsafe fn test_mm_andnot_si128() { |
3525 | let a = _mm_set1_epi8(5); | |
3526 | let b = _mm_set1_epi8(3); | |
3527 | let r = _mm_andnot_si128(a, b); | |
3528 | assert_eq_m128i(r, _mm_set1_epi8(2)); | |
3529 | } | |
3530 | ||
83c7162d | 3531 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3532 | unsafe fn test_mm_or_si128() { |
3533 | let a = _mm_set1_epi8(5); | |
3534 | let b = _mm_set1_epi8(3); | |
3535 | let r = _mm_or_si128(a, b); | |
3536 | assert_eq_m128i(r, _mm_set1_epi8(7)); | |
3537 | } | |
3538 | ||
83c7162d | 3539 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3540 | unsafe fn test_mm_xor_si128() { |
3541 | let a = _mm_set1_epi8(5); | |
3542 | let b = _mm_set1_epi8(3); | |
3543 | let r = _mm_xor_si128(a, b); | |
3544 | assert_eq_m128i(r, _mm_set1_epi8(6)); | |
3545 | } | |
3546 | ||
83c7162d | 3547 | #[simd_test(enable = "sse2")] |
0531ce1d | 3548 | unsafe fn test_mm_cmpeq_epi8() { |
0731742a XL |
3549 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3550 | let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
0531ce1d | 3551 | let r = _mm_cmpeq_epi8(a, b); |
0731742a | 3552 | #[rustfmt::skip] |
0531ce1d XL |
3553 | assert_eq_m128i( |
3554 | r, | |
3555 | _mm_setr_epi8( | |
3556 | 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | |
3557 | ) | |
3558 | ); | |
3559 | } | |
3560 | ||
83c7162d | 3561 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3562 | unsafe fn test_mm_cmpeq_epi16() { |
3563 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
3564 | let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0); | |
3565 | let r = _mm_cmpeq_epi16(a, b); | |
3566 | assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0)); | |
3567 | } | |
3568 | ||
83c7162d | 3569 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3570 | unsafe fn test_mm_cmpeq_epi32() { |
3571 | let a = _mm_setr_epi32(0, 1, 2, 3); | |
3572 | let b = _mm_setr_epi32(3, 2, 2, 0); | |
3573 | let r = _mm_cmpeq_epi32(a, b); | |
3574 | assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0)); | |
3575 | } | |
3576 | ||
83c7162d | 3577 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3578 | unsafe fn test_mm_cmpgt_epi8() { |
3579 | let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
3580 | let b = _mm_set1_epi8(0); | |
3581 | let r = _mm_cmpgt_epi8(a, b); | |
3582 | let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
3583 | assert_eq_m128i(r, e); | |
3584 | } | |
3585 | ||
83c7162d | 3586 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3587 | unsafe fn test_mm_cmpgt_epi16() { |
3588 | let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); | |
3589 | let b = _mm_set1_epi16(0); | |
3590 | let r = _mm_cmpgt_epi16(a, b); | |
3591 | let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); | |
3592 | assert_eq_m128i(r, e); | |
3593 | } | |
3594 | ||
83c7162d | 3595 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3596 | unsafe fn test_mm_cmpgt_epi32() { |
3597 | let a = _mm_set_epi32(5, 0, 0, 0); | |
3598 | let b = _mm_set1_epi32(0); | |
3599 | let r = _mm_cmpgt_epi32(a, b); | |
3600 | assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0)); | |
3601 | } | |
3602 | ||
83c7162d | 3603 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3604 | unsafe fn test_mm_cmplt_epi8() { |
3605 | let a = _mm_set1_epi8(0); | |
3606 | let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
3607 | let r = _mm_cmplt_epi8(a, b); | |
3608 | let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
3609 | assert_eq_m128i(r, e); | |
3610 | } | |
3611 | ||
83c7162d | 3612 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3613 | unsafe fn test_mm_cmplt_epi16() { |
3614 | let a = _mm_set1_epi16(0); | |
3615 | let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); | |
3616 | let r = _mm_cmplt_epi16(a, b); | |
3617 | let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); | |
3618 | assert_eq_m128i(r, e); | |
3619 | } | |
3620 | ||
83c7162d | 3621 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3622 | unsafe fn test_mm_cmplt_epi32() { |
3623 | let a = _mm_set1_epi32(0); | |
3624 | let b = _mm_set_epi32(5, 0, 0, 0); | |
3625 | let r = _mm_cmplt_epi32(a, b); | |
3626 | assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0)); | |
3627 | } | |
3628 | ||
83c7162d | 3629 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3630 | unsafe fn test_mm_cvtepi32_pd() { |
3631 | let a = _mm_set_epi32(35, 25, 15, 5); | |
3632 | let r = _mm_cvtepi32_pd(a); | |
3633 | assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0)); | |
3634 | } | |
3635 | ||
83c7162d | 3636 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3637 | unsafe fn test_mm_cvtsi32_sd() { |
3638 | let a = _mm_set1_pd(3.5); | |
3639 | let r = _mm_cvtsi32_sd(a, 5); | |
3640 | assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5)); | |
3641 | } | |
3642 | ||
83c7162d | 3643 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3644 | unsafe fn test_mm_cvtepi32_ps() { |
3645 | let a = _mm_setr_epi32(1, 2, 3, 4); | |
3646 | let r = _mm_cvtepi32_ps(a); | |
3647 | assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0)); | |
3648 | } | |
3649 | ||
83c7162d | 3650 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3651 | unsafe fn test_mm_cvtps_epi32() { |
3652 | let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); | |
3653 | let r = _mm_cvtps_epi32(a); | |
3654 | assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4)); | |
3655 | } | |
3656 | ||
83c7162d | 3657 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3658 | unsafe fn test_mm_cvtsi32_si128() { |
3659 | let r = _mm_cvtsi32_si128(5); | |
3660 | assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0)); | |
3661 | } | |
3662 | ||
83c7162d | 3663 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3664 | unsafe fn test_mm_cvtsi128_si32() { |
3665 | let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0)); | |
3666 | assert_eq!(r, 5); | |
3667 | } | |
3668 | ||
83c7162d | 3669 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3670 | unsafe fn test_mm_set_epi64x() { |
3671 | let r = _mm_set_epi64x(0, 1); | |
3672 | assert_eq_m128i(r, _mm_setr_epi64x(1, 0)); | |
3673 | } | |
3674 | ||
83c7162d | 3675 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3676 | unsafe fn test_mm_set_epi32() { |
3677 | let r = _mm_set_epi32(0, 1, 2, 3); | |
3678 | assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0)); | |
3679 | } | |
3680 | ||
83c7162d | 3681 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3682 | unsafe fn test_mm_set_epi16() { |
3683 | let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
3684 | assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0)); | |
3685 | } | |
3686 | ||
83c7162d | 3687 | #[simd_test(enable = "sse2")] |
0531ce1d | 3688 | unsafe fn test_mm_set_epi8() { |
0731742a | 3689 | #[rustfmt::skip] |
0531ce1d XL |
3690 | let r = _mm_set_epi8( |
3691 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
3692 | ); | |
0731742a | 3693 | #[rustfmt::skip] |
0531ce1d XL |
3694 | let e = _mm_setr_epi8( |
3695 | 15, 14, 13, 12, 11, 10, 9, 8, | |
3696 | 7, 6, 5, 4, 3, 2, 1, 0, | |
3697 | ); | |
3698 | assert_eq_m128i(r, e); | |
3699 | } | |
3700 | ||
83c7162d | 3701 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3702 | unsafe fn test_mm_set1_epi64x() { |
3703 | let r = _mm_set1_epi64x(1); | |
3704 | assert_eq_m128i(r, _mm_set1_epi64x(1)); | |
3705 | } | |
3706 | ||
83c7162d | 3707 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3708 | unsafe fn test_mm_set1_epi32() { |
3709 | let r = _mm_set1_epi32(1); | |
3710 | assert_eq_m128i(r, _mm_set1_epi32(1)); | |
3711 | } | |
3712 | ||
83c7162d | 3713 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3714 | unsafe fn test_mm_set1_epi16() { |
3715 | let r = _mm_set1_epi16(1); | |
3716 | assert_eq_m128i(r, _mm_set1_epi16(1)); | |
3717 | } | |
3718 | ||
83c7162d | 3719 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3720 | unsafe fn test_mm_set1_epi8() { |
3721 | let r = _mm_set1_epi8(1); | |
3722 | assert_eq_m128i(r, _mm_set1_epi8(1)); | |
3723 | } | |
3724 | ||
83c7162d | 3725 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3726 | unsafe fn test_mm_setr_epi32() { |
3727 | let r = _mm_setr_epi32(0, 1, 2, 3); | |
3728 | assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3)); | |
3729 | } | |
3730 | ||
83c7162d | 3731 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3732 | unsafe fn test_mm_setr_epi16() { |
3733 | let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
3734 | assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7)); | |
3735 | } | |
3736 | ||
83c7162d | 3737 | #[simd_test(enable = "sse2")] |
0531ce1d | 3738 | unsafe fn test_mm_setr_epi8() { |
0731742a | 3739 | #[rustfmt::skip] |
0531ce1d XL |
3740 | let r = _mm_setr_epi8( |
3741 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
3742 | ); | |
0731742a | 3743 | #[rustfmt::skip] |
0531ce1d XL |
3744 | let e = _mm_setr_epi8( |
3745 | 0, 1, 2, 3, 4, 5, 6, 7, | |
3746 | 8, 9, 10, 11, 12, 13, 14, 15, | |
3747 | ); | |
3748 | assert_eq_m128i(r, e); | |
3749 | } | |
3750 | ||
83c7162d | 3751 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3752 | unsafe fn test_mm_setzero_si128() { |
3753 | let r = _mm_setzero_si128(); | |
3754 | assert_eq_m128i(r, _mm_set1_epi64x(0)); | |
3755 | } | |
3756 | ||
83c7162d | 3757 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3758 | unsafe fn test_mm_loadl_epi64() { |
3759 | let a = _mm_setr_epi64x(6, 5); | |
3760 | let r = _mm_loadl_epi64(&a as *const _); | |
3761 | assert_eq_m128i(r, _mm_setr_epi64x(6, 0)); | |
3762 | } | |
3763 | ||
83c7162d | 3764 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3765 | unsafe fn test_mm_load_si128() { |
3766 | let a = _mm_set_epi64x(5, 6); | |
3767 | let r = _mm_load_si128(&a as *const _ as *const _); | |
3768 | assert_eq_m128i(a, r); | |
3769 | } | |
3770 | ||
83c7162d | 3771 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3772 | unsafe fn test_mm_loadu_si128() { |
3773 | let a = _mm_set_epi64x(5, 6); | |
3774 | let r = _mm_loadu_si128(&a as *const _ as *const _); | |
3775 | assert_eq_m128i(a, r); | |
3776 | } | |
3777 | ||
83c7162d | 3778 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3779 | unsafe fn test_mm_maskmoveu_si128() { |
3780 | let a = _mm_set1_epi8(9); | |
0731742a | 3781 | #[rustfmt::skip] |
0531ce1d XL |
3782 | let mask = _mm_set_epi8( |
3783 | 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0, | |
3784 | 0, 0, 0, 0, 0, 0, 0, 0, | |
3785 | ); | |
3786 | let mut r = _mm_set1_epi8(0); | |
3787 | _mm_maskmoveu_si128(a, mask, &mut r as *mut _ as *mut i8); | |
3788 | let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
3789 | assert_eq_m128i(r, e); | |
3790 | } | |
3791 | ||
83c7162d | 3792 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3793 | unsafe fn test_mm_store_si128() { |
3794 | let a = _mm_set1_epi8(9); | |
3795 | let mut r = _mm_set1_epi8(0); | |
3796 | _mm_store_si128(&mut r as *mut _ as *mut __m128i, a); | |
3797 | assert_eq_m128i(r, a); | |
3798 | } | |
3799 | ||
83c7162d | 3800 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3801 | unsafe fn test_mm_storeu_si128() { |
3802 | let a = _mm_set1_epi8(9); | |
3803 | let mut r = _mm_set1_epi8(0); | |
3804 | _mm_storeu_si128(&mut r as *mut _ as *mut __m128i, a); | |
3805 | assert_eq_m128i(r, a); | |
3806 | } | |
3807 | ||
83c7162d | 3808 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3809 | unsafe fn test_mm_storel_epi64() { |
3810 | let a = _mm_setr_epi64x(2, 9); | |
3811 | let mut r = _mm_set1_epi8(0); | |
3812 | _mm_storel_epi64(&mut r as *mut _ as *mut __m128i, a); | |
3813 | assert_eq_m128i(r, _mm_setr_epi64x(2, 0)); | |
3814 | } | |
3815 | ||
83c7162d | 3816 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3817 | unsafe fn test_mm_stream_si128() { |
3818 | let a = _mm_setr_epi32(1, 2, 3, 4); | |
3819 | let mut r = _mm_undefined_si128(); | |
3820 | _mm_stream_si128(&mut r as *mut _, a); | |
3821 | assert_eq_m128i(r, a); | |
3822 | } | |
3823 | ||
83c7162d | 3824 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3825 | unsafe fn test_mm_stream_si32() { |
3826 | let a: i32 = 7; | |
48663c56 | 3827 | let mut mem = boxed::Box::<i32>::new(-1); |
0531ce1d XL |
3828 | _mm_stream_si32(&mut *mem as *mut i32, a); |
3829 | assert_eq!(a, *mem); | |
3830 | } | |
3831 | ||
83c7162d | 3832 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3833 | unsafe fn test_mm_move_epi64() { |
3834 | let a = _mm_setr_epi64x(5, 6); | |
3835 | let r = _mm_move_epi64(a); | |
3836 | assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); | |
3837 | } | |
3838 | ||
83c7162d | 3839 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3840 | unsafe fn test_mm_packs_epi16() { |
3841 | let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0); | |
3842 | let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80); | |
3843 | let r = _mm_packs_epi16(a, b); | |
0731742a | 3844 | #[rustfmt::skip] |
0531ce1d XL |
3845 | assert_eq_m128i( |
3846 | r, | |
3847 | _mm_setr_epi8( | |
3848 | 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F | |
3849 | ) | |
3850 | ); | |
3851 | } | |
3852 | ||
83c7162d | 3853 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3854 | unsafe fn test_mm_packs_epi32() { |
3855 | let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0); | |
3856 | let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000); | |
3857 | let r = _mm_packs_epi32(a, b); | |
3858 | assert_eq_m128i( | |
3859 | r, | |
3860 | _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF), | |
3861 | ); | |
3862 | } | |
3863 | ||
83c7162d | 3864 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3865 | unsafe fn test_mm_packus_epi16() { |
3866 | let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0); | |
3867 | let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100); | |
3868 | let r = _mm_packus_epi16(a, b); | |
3869 | assert_eq_m128i( | |
3870 | r, | |
3871 | _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0), | |
3872 | ); | |
3873 | } | |
3874 | ||
83c7162d | 3875 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3876 | unsafe fn test_mm_extract_epi16() { |
3877 | let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7); | |
17df50a5 XL |
3878 | let r1 = _mm_extract_epi16::<0>(a); |
3879 | let r2 = _mm_extract_epi16::<3>(a); | |
3dfed10e | 3880 | assert_eq!(r1, 0xFFFF); |
0531ce1d XL |
3881 | assert_eq!(r2, 3); |
3882 | } | |
3883 | ||
83c7162d | 3884 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3885 | unsafe fn test_mm_insert_epi16() { |
3886 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
17df50a5 | 3887 | let r = _mm_insert_epi16::<0>(a, 9); |
0531ce1d XL |
3888 | let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7); |
3889 | assert_eq_m128i(r, e); | |
3890 | } | |
3891 | ||
83c7162d | 3892 | #[simd_test(enable = "sse2")] |
0531ce1d | 3893 | unsafe fn test_mm_movemask_epi8() { |
0731742a | 3894 | #[rustfmt::skip] |
0531ce1d XL |
3895 | let a = _mm_setr_epi8( |
3896 | 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01, | |
3897 | 0b0101, 0b1111_0000u8 as i8, 0, 0, | |
e1599b0c | 3898 | 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101, |
0531ce1d XL |
3899 | 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, |
3900 | ); | |
3901 | let r = _mm_movemask_epi8(a); | |
e1599b0c | 3902 | assert_eq!(r, 0b10100110_00100101); |
0531ce1d XL |
3903 | } |
3904 | ||
83c7162d | 3905 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3906 | unsafe fn test_mm_shuffle_epi32() { |
3907 | let a = _mm_setr_epi32(5, 10, 15, 20); | |
17df50a5 | 3908 | let r = _mm_shuffle_epi32::<0b00_01_01_11>(a); |
0531ce1d XL |
3909 | let e = _mm_setr_epi32(20, 10, 10, 5); |
3910 | assert_eq_m128i(r, e); | |
3911 | } | |
3912 | ||
83c7162d | 3913 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3914 | unsafe fn test_mm_shufflehi_epi16() { |
3915 | let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20); | |
17df50a5 | 3916 | let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a); |
0531ce1d XL |
3917 | let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5); |
3918 | assert_eq_m128i(r, e); | |
3919 | } | |
3920 | ||
83c7162d | 3921 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3922 | unsafe fn test_mm_shufflelo_epi16() { |
3923 | let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4); | |
17df50a5 | 3924 | let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a); |
0531ce1d XL |
3925 | let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4); |
3926 | assert_eq_m128i(r, e); | |
3927 | } | |
3928 | ||
83c7162d | 3929 | #[simd_test(enable = "sse2")] |
0531ce1d | 3930 | unsafe fn test_mm_unpackhi_epi8() { |
0731742a | 3931 | #[rustfmt::skip] |
0531ce1d XL |
3932 | let a = _mm_setr_epi8( |
3933 | 0, 1, 2, 3, 4, 5, 6, 7, | |
3934 | 8, 9, 10, 11, 12, 13, 14, 15, | |
3935 | ); | |
0731742a | 3936 | #[rustfmt::skip] |
0531ce1d XL |
3937 | let b = _mm_setr_epi8( |
3938 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
3939 | ); | |
3940 | let r = _mm_unpackhi_epi8(a, b); | |
0731742a | 3941 | #[rustfmt::skip] |
0531ce1d XL |
3942 | let e = _mm_setr_epi8( |
3943 | 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, | |
3944 | ); | |
3945 | assert_eq_m128i(r, e); | |
3946 | } | |
3947 | ||
83c7162d | 3948 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3949 | unsafe fn test_mm_unpackhi_epi16() { |
3950 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
3951 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); | |
3952 | let r = _mm_unpackhi_epi16(a, b); | |
3953 | let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15); | |
3954 | assert_eq_m128i(r, e); | |
3955 | } | |
3956 | ||
83c7162d | 3957 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3958 | unsafe fn test_mm_unpackhi_epi32() { |
3959 | let a = _mm_setr_epi32(0, 1, 2, 3); | |
3960 | let b = _mm_setr_epi32(4, 5, 6, 7); | |
3961 | let r = _mm_unpackhi_epi32(a, b); | |
3962 | let e = _mm_setr_epi32(2, 6, 3, 7); | |
3963 | assert_eq_m128i(r, e); | |
3964 | } | |
3965 | ||
83c7162d | 3966 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3967 | unsafe fn test_mm_unpackhi_epi64() { |
3968 | let a = _mm_setr_epi64x(0, 1); | |
3969 | let b = _mm_setr_epi64x(2, 3); | |
3970 | let r = _mm_unpackhi_epi64(a, b); | |
3971 | let e = _mm_setr_epi64x(1, 3); | |
3972 | assert_eq_m128i(r, e); | |
3973 | } | |
3974 | ||
83c7162d | 3975 | #[simd_test(enable = "sse2")] |
0531ce1d | 3976 | unsafe fn test_mm_unpacklo_epi8() { |
0731742a | 3977 | #[rustfmt::skip] |
0531ce1d XL |
3978 | let a = _mm_setr_epi8( |
3979 | 0, 1, 2, 3, 4, 5, 6, 7, | |
3980 | 8, 9, 10, 11, 12, 13, 14, 15, | |
3981 | ); | |
0731742a | 3982 | #[rustfmt::skip] |
0531ce1d XL |
3983 | let b = _mm_setr_epi8( |
3984 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
3985 | ); | |
3986 | let r = _mm_unpacklo_epi8(a, b); | |
0731742a | 3987 | #[rustfmt::skip] |
0531ce1d XL |
3988 | let e = _mm_setr_epi8( |
3989 | 0, 16, 1, 17, 2, 18, 3, 19, | |
3990 | 4, 20, 5, 21, 6, 22, 7, 23, | |
3991 | ); | |
3992 | assert_eq_m128i(r, e); | |
3993 | } | |
3994 | ||
83c7162d | 3995 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3996 | unsafe fn test_mm_unpacklo_epi16() { |
3997 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
3998 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); | |
3999 | let r = _mm_unpacklo_epi16(a, b); | |
4000 | let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11); | |
4001 | assert_eq_m128i(r, e); | |
4002 | } | |
4003 | ||
83c7162d | 4004 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4005 | unsafe fn test_mm_unpacklo_epi32() { |
4006 | let a = _mm_setr_epi32(0, 1, 2, 3); | |
4007 | let b = _mm_setr_epi32(4, 5, 6, 7); | |
4008 | let r = _mm_unpacklo_epi32(a, b); | |
4009 | let e = _mm_setr_epi32(0, 4, 1, 5); | |
4010 | assert_eq_m128i(r, e); | |
4011 | } | |
4012 | ||
83c7162d | 4013 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4014 | unsafe fn test_mm_unpacklo_epi64() { |
4015 | let a = _mm_setr_epi64x(0, 1); | |
4016 | let b = _mm_setr_epi64x(2, 3); | |
4017 | let r = _mm_unpacklo_epi64(a, b); | |
4018 | let e = _mm_setr_epi64x(0, 2); | |
4019 | assert_eq_m128i(r, e); | |
4020 | } | |
4021 | ||
83c7162d | 4022 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4023 | unsafe fn test_mm_add_sd() { |
4024 | let a = _mm_setr_pd(1.0, 2.0); | |
4025 | let b = _mm_setr_pd(5.0, 10.0); | |
4026 | let r = _mm_add_sd(a, b); | |
4027 | assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0)); | |
4028 | } | |
4029 | ||
83c7162d | 4030 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4031 | unsafe fn test_mm_add_pd() { |
4032 | let a = _mm_setr_pd(1.0, 2.0); | |
4033 | let b = _mm_setr_pd(5.0, 10.0); | |
4034 | let r = _mm_add_pd(a, b); | |
4035 | assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0)); | |
4036 | } | |
4037 | ||
83c7162d | 4038 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4039 | unsafe fn test_mm_div_sd() { |
4040 | let a = _mm_setr_pd(1.0, 2.0); | |
4041 | let b = _mm_setr_pd(5.0, 10.0); | |
4042 | let r = _mm_div_sd(a, b); | |
4043 | assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0)); | |
4044 | } | |
4045 | ||
83c7162d | 4046 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4047 | unsafe fn test_mm_div_pd() { |
4048 | let a = _mm_setr_pd(1.0, 2.0); | |
4049 | let b = _mm_setr_pd(5.0, 10.0); | |
4050 | let r = _mm_div_pd(a, b); | |
4051 | assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2)); | |
4052 | } | |
4053 | ||
83c7162d | 4054 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4055 | unsafe fn test_mm_max_sd() { |
4056 | let a = _mm_setr_pd(1.0, 2.0); | |
4057 | let b = _mm_setr_pd(5.0, 10.0); | |
4058 | let r = _mm_max_sd(a, b); | |
4059 | assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); | |
4060 | } | |
4061 | ||
83c7162d | 4062 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4063 | unsafe fn test_mm_max_pd() { |
4064 | let a = _mm_setr_pd(1.0, 2.0); | |
4065 | let b = _mm_setr_pd(5.0, 10.0); | |
4066 | let r = _mm_max_pd(a, b); | |
4067 | assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0)); | |
4068 | } | |
4069 | ||
83c7162d | 4070 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4071 | unsafe fn test_mm_min_sd() { |
4072 | let a = _mm_setr_pd(1.0, 2.0); | |
4073 | let b = _mm_setr_pd(5.0, 10.0); | |
4074 | let r = _mm_min_sd(a, b); | |
4075 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); | |
4076 | } | |
4077 | ||
83c7162d | 4078 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4079 | unsafe fn test_mm_min_pd() { |
4080 | let a = _mm_setr_pd(1.0, 2.0); | |
4081 | let b = _mm_setr_pd(5.0, 10.0); | |
4082 | let r = _mm_min_pd(a, b); | |
4083 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); | |
4084 | } | |
4085 | ||
83c7162d | 4086 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4087 | unsafe fn test_mm_mul_sd() { |
4088 | let a = _mm_setr_pd(1.0, 2.0); | |
4089 | let b = _mm_setr_pd(5.0, 10.0); | |
4090 | let r = _mm_mul_sd(a, b); | |
4091 | assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); | |
4092 | } | |
4093 | ||
83c7162d | 4094 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4095 | unsafe fn test_mm_mul_pd() { |
4096 | let a = _mm_setr_pd(1.0, 2.0); | |
4097 | let b = _mm_setr_pd(5.0, 10.0); | |
4098 | let r = _mm_mul_pd(a, b); | |
4099 | assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0)); | |
4100 | } | |
4101 | ||
83c7162d | 4102 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4103 | unsafe fn test_mm_sqrt_sd() { |
4104 | let a = _mm_setr_pd(1.0, 2.0); | |
4105 | let b = _mm_setr_pd(5.0, 10.0); | |
4106 | let r = _mm_sqrt_sd(a, b); | |
4107 | assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0)); | |
4108 | } | |
4109 | ||
83c7162d | 4110 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4111 | unsafe fn test_mm_sqrt_pd() { |
4112 | let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0)); | |
4113 | assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt())); | |
4114 | } | |
4115 | ||
83c7162d | 4116 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4117 | unsafe fn test_mm_sub_sd() { |
4118 | let a = _mm_setr_pd(1.0, 2.0); | |
4119 | let b = _mm_setr_pd(5.0, 10.0); | |
4120 | let r = _mm_sub_sd(a, b); | |
4121 | assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0)); | |
4122 | } | |
4123 | ||
83c7162d | 4124 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4125 | unsafe fn test_mm_sub_pd() { |
4126 | let a = _mm_setr_pd(1.0, 2.0); | |
4127 | let b = _mm_setr_pd(5.0, 10.0); | |
4128 | let r = _mm_sub_pd(a, b); | |
4129 | assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0)); | |
4130 | } | |
4131 | ||
83c7162d | 4132 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4133 | unsafe fn test_mm_and_pd() { |
4134 | let a = transmute(u64x2::splat(5)); | |
4135 | let b = transmute(u64x2::splat(3)); | |
4136 | let r = _mm_and_pd(a, b); | |
4137 | let e = transmute(u64x2::splat(1)); | |
4138 | assert_eq_m128d(r, e); | |
4139 | } | |
4140 | ||
83c7162d | 4141 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4142 | unsafe fn test_mm_andnot_pd() { |
4143 | let a = transmute(u64x2::splat(5)); | |
4144 | let b = transmute(u64x2::splat(3)); | |
4145 | let r = _mm_andnot_pd(a, b); | |
4146 | let e = transmute(u64x2::splat(2)); | |
4147 | assert_eq_m128d(r, e); | |
4148 | } | |
4149 | ||
83c7162d | 4150 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4151 | unsafe fn test_mm_or_pd() { |
4152 | let a = transmute(u64x2::splat(5)); | |
4153 | let b = transmute(u64x2::splat(3)); | |
4154 | let r = _mm_or_pd(a, b); | |
4155 | let e = transmute(u64x2::splat(7)); | |
4156 | assert_eq_m128d(r, e); | |
4157 | } | |
4158 | ||
83c7162d | 4159 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4160 | unsafe fn test_mm_xor_pd() { |
4161 | let a = transmute(u64x2::splat(5)); | |
4162 | let b = transmute(u64x2::splat(3)); | |
4163 | let r = _mm_xor_pd(a, b); | |
4164 | let e = transmute(u64x2::splat(6)); | |
4165 | assert_eq_m128d(r, e); | |
4166 | } | |
4167 | ||
83c7162d | 4168 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4169 | unsafe fn test_mm_cmpeq_sd() { |
4170 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4171 | let e = _mm_setr_epi64x(!0, transmute(2.0f64)); | |
4172 | let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b)); | |
4173 | assert_eq_m128i(r, e); | |
4174 | } | |
4175 | ||
83c7162d | 4176 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4177 | unsafe fn test_mm_cmplt_sd() { |
4178 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); | |
4179 | let e = _mm_setr_epi64x(!0, transmute(2.0f64)); | |
4180 | let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b)); | |
4181 | assert_eq_m128i(r, e); | |
4182 | } | |
4183 | ||
83c7162d | 4184 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4185 | unsafe fn test_mm_cmple_sd() { |
4186 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4187 | let e = _mm_setr_epi64x(!0, transmute(2.0f64)); | |
4188 | let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b)); | |
4189 | assert_eq_m128i(r, e); | |
4190 | } | |
4191 | ||
83c7162d | 4192 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4193 | unsafe fn test_mm_cmpgt_sd() { |
4194 | let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4195 | let e = _mm_setr_epi64x(!0, transmute(2.0f64)); | |
4196 | let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b)); | |
4197 | assert_eq_m128i(r, e); | |
4198 | } | |
4199 | ||
83c7162d | 4200 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4201 | unsafe fn test_mm_cmpge_sd() { |
4202 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4203 | let e = _mm_setr_epi64x(!0, transmute(2.0f64)); | |
4204 | let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b)); | |
4205 | assert_eq_m128i(r, e); | |
4206 | } | |
4207 | ||
83c7162d | 4208 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4209 | unsafe fn test_mm_cmpord_sd() { |
4210 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); | |
4211 | let e = _mm_setr_epi64x(0, transmute(2.0f64)); | |
4212 | let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b)); | |
4213 | assert_eq_m128i(r, e); | |
4214 | } | |
4215 | ||
83c7162d | 4216 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4217 | unsafe fn test_mm_cmpunord_sd() { |
4218 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); | |
4219 | let e = _mm_setr_epi64x(!0, transmute(2.0f64)); | |
4220 | let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b)); | |
4221 | assert_eq_m128i(r, e); | |
4222 | } | |
4223 | ||
83c7162d | 4224 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4225 | unsafe fn test_mm_cmpneq_sd() { |
4226 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); | |
4227 | let e = _mm_setr_epi64x(!0, transmute(2.0f64)); | |
4228 | let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b)); | |
4229 | assert_eq_m128i(r, e); | |
4230 | } | |
4231 | ||
83c7162d | 4232 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4233 | unsafe fn test_mm_cmpnlt_sd() { |
4234 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); | |
4235 | let e = _mm_setr_epi64x(0, transmute(2.0f64)); | |
4236 | let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b)); | |
4237 | assert_eq_m128i(r, e); | |
4238 | } | |
4239 | ||
83c7162d | 4240 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4241 | unsafe fn test_mm_cmpnle_sd() { |
4242 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4243 | let e = _mm_setr_epi64x(0, transmute(2.0f64)); | |
4244 | let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b)); | |
4245 | assert_eq_m128i(r, e); | |
4246 | } | |
4247 | ||
83c7162d | 4248 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4249 | unsafe fn test_mm_cmpngt_sd() { |
4250 | let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4251 | let e = _mm_setr_epi64x(0, transmute(2.0f64)); | |
4252 | let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b)); | |
4253 | assert_eq_m128i(r, e); | |
4254 | } | |
4255 | ||
83c7162d | 4256 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4257 | unsafe fn test_mm_cmpnge_sd() { |
4258 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4259 | let e = _mm_setr_epi64x(0, transmute(2.0f64)); | |
4260 | let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b)); | |
4261 | assert_eq_m128i(r, e); | |
4262 | } | |
4263 | ||
83c7162d | 4264 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4265 | unsafe fn test_mm_cmpeq_pd() { |
4266 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4267 | let e = _mm_setr_epi64x(!0, 0); | |
4268 | let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b)); | |
4269 | assert_eq_m128i(r, e); | |
4270 | } | |
4271 | ||
83c7162d | 4272 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4273 | unsafe fn test_mm_cmplt_pd() { |
4274 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4275 | let e = _mm_setr_epi64x(0, !0); | |
4276 | let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b)); | |
4277 | assert_eq_m128i(r, e); | |
4278 | } | |
4279 | ||
83c7162d | 4280 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4281 | unsafe fn test_mm_cmple_pd() { |
4282 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4283 | let e = _mm_setr_epi64x(!0, !0); | |
4284 | let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b)); | |
4285 | assert_eq_m128i(r, e); | |
4286 | } | |
4287 | ||
83c7162d | 4288 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4289 | unsafe fn test_mm_cmpgt_pd() { |
4290 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4291 | let e = _mm_setr_epi64x(0, 0); | |
4292 | let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b)); | |
4293 | assert_eq_m128i(r, e); | |
4294 | } | |
4295 | ||
83c7162d | 4296 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4297 | unsafe fn test_mm_cmpge_pd() { |
4298 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4299 | let e = _mm_setr_epi64x(!0, 0); | |
4300 | let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b)); | |
4301 | assert_eq_m128i(r, e); | |
4302 | } | |
4303 | ||
83c7162d | 4304 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4305 | unsafe fn test_mm_cmpord_pd() { |
4306 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); | |
4307 | let e = _mm_setr_epi64x(0, !0); | |
4308 | let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b)); | |
4309 | assert_eq_m128i(r, e); | |
4310 | } | |
4311 | ||
83c7162d | 4312 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4313 | unsafe fn test_mm_cmpunord_pd() { |
4314 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); | |
4315 | let e = _mm_setr_epi64x(!0, 0); | |
4316 | let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b)); | |
4317 | assert_eq_m128i(r, e); | |
4318 | } | |
4319 | ||
83c7162d | 4320 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4321 | unsafe fn test_mm_cmpneq_pd() { |
4322 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); | |
4323 | let e = _mm_setr_epi64x(!0, !0); | |
4324 | let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b)); | |
4325 | assert_eq_m128i(r, e); | |
4326 | } | |
4327 | ||
83c7162d | 4328 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4329 | unsafe fn test_mm_cmpnlt_pd() { |
4330 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); | |
4331 | let e = _mm_setr_epi64x(0, 0); | |
4332 | let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b)); | |
4333 | assert_eq_m128i(r, e); | |
4334 | } | |
4335 | ||
83c7162d | 4336 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4337 | unsafe fn test_mm_cmpnle_pd() { |
4338 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4339 | let e = _mm_setr_epi64x(0, 0); | |
4340 | let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b)); | |
4341 | assert_eq_m128i(r, e); | |
4342 | } | |
4343 | ||
83c7162d | 4344 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4345 | unsafe fn test_mm_cmpngt_pd() { |
4346 | let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4347 | let e = _mm_setr_epi64x(0, !0); | |
4348 | let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b)); | |
4349 | assert_eq_m128i(r, e); | |
4350 | } | |
4351 | ||
83c7162d | 4352 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4353 | unsafe fn test_mm_cmpnge_pd() { |
4354 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4355 | let e = _mm_setr_epi64x(0, !0); | |
4356 | let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b)); | |
4357 | assert_eq_m128i(r, e); | |
4358 | } | |
4359 | ||
83c7162d | 4360 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4361 | unsafe fn test_mm_comieq_sd() { |
4362 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4363 | assert!(_mm_comieq_sd(a, b) != 0); | |
4364 | ||
4365 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4366 | assert!(_mm_comieq_sd(a, b) == 0); | |
4367 | } | |
4368 | ||
83c7162d | 4369 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4370 | unsafe fn test_mm_comilt_sd() { |
4371 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4372 | assert!(_mm_comilt_sd(a, b) == 0); | |
4373 | } | |
4374 | ||
83c7162d | 4375 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4376 | unsafe fn test_mm_comile_sd() { |
4377 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4378 | assert!(_mm_comile_sd(a, b) != 0); | |
4379 | } | |
4380 | ||
83c7162d | 4381 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4382 | unsafe fn test_mm_comigt_sd() { |
4383 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4384 | assert!(_mm_comigt_sd(a, b) == 0); | |
4385 | } | |
4386 | ||
83c7162d | 4387 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4388 | unsafe fn test_mm_comige_sd() { |
4389 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4390 | assert!(_mm_comige_sd(a, b) != 0); | |
4391 | } | |
4392 | ||
83c7162d | 4393 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4394 | unsafe fn test_mm_comineq_sd() { |
4395 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4396 | assert!(_mm_comineq_sd(a, b) == 0); | |
4397 | } | |
4398 | ||
83c7162d | 4399 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4400 | unsafe fn test_mm_ucomieq_sd() { |
4401 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4402 | assert!(_mm_ucomieq_sd(a, b) != 0); | |
4403 | ||
4404 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0)); | |
4405 | assert!(_mm_ucomieq_sd(a, b) == 0); | |
4406 | } | |
4407 | ||
83c7162d | 4408 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4409 | unsafe fn test_mm_ucomilt_sd() { |
4410 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4411 | assert!(_mm_ucomilt_sd(a, b) == 0); | |
4412 | } | |
4413 | ||
83c7162d | 4414 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4415 | unsafe fn test_mm_ucomile_sd() { |
4416 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4417 | assert!(_mm_ucomile_sd(a, b) != 0); | |
4418 | } | |
4419 | ||
83c7162d | 4420 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4421 | unsafe fn test_mm_ucomigt_sd() { |
4422 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4423 | assert!(_mm_ucomigt_sd(a, b) == 0); | |
4424 | } | |
4425 | ||
83c7162d | 4426 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4427 | unsafe fn test_mm_ucomige_sd() { |
4428 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4429 | assert!(_mm_ucomige_sd(a, b) != 0); | |
4430 | } | |
4431 | ||
83c7162d | 4432 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4433 | unsafe fn test_mm_ucomineq_sd() { |
4434 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4435 | assert!(_mm_ucomineq_sd(a, b) == 0); | |
4436 | } | |
4437 | ||
83c7162d | 4438 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4439 | unsafe fn test_mm_movemask_pd() { |
4440 | let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0)); | |
4441 | assert_eq!(r, 0b01); | |
4442 | ||
4443 | let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0)); | |
4444 | assert_eq!(r, 0b11); | |
4445 | } | |
4446 | ||
4447 | #[repr(align(16))] | |
4448 | struct Memory { | |
4449 | data: [f64; 4], | |
4450 | } | |
4451 | ||
83c7162d | 4452 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4453 | unsafe fn test_mm_load_pd() { |
4454 | let mem = Memory { | |
4455 | data: [1.0f64, 2.0, 3.0, 4.0], | |
4456 | }; | |
4457 | let vals = &mem.data; | |
4458 | let d = vals.as_ptr(); | |
4459 | ||
4460 | let r = _mm_load_pd(d); | |
4461 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); | |
4462 | } | |
4463 | ||
83c7162d | 4464 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4465 | unsafe fn test_mm_load_sd() { |
4466 | let a = 1.; | |
4467 | let expected = _mm_setr_pd(a, 0.); | |
4468 | let r = _mm_load_sd(&a); | |
4469 | assert_eq_m128d(r, expected); | |
4470 | } | |
4471 | ||
83c7162d | 4472 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4473 | unsafe fn test_mm_loadh_pd() { |
4474 | let a = _mm_setr_pd(1., 2.); | |
4475 | let b = 3.; | |
4476 | let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.); | |
4477 | let r = _mm_loadh_pd(a, &b); | |
4478 | assert_eq_m128d(r, expected); | |
4479 | } | |
4480 | ||
83c7162d | 4481 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4482 | unsafe fn test_mm_loadl_pd() { |
4483 | let a = _mm_setr_pd(1., 2.); | |
4484 | let b = 3.; | |
4485 | let expected = _mm_setr_pd(3., get_m128d(a, 1)); | |
4486 | let r = _mm_loadl_pd(a, &b); | |
4487 | assert_eq_m128d(r, expected); | |
4488 | } | |
4489 | ||
83c7162d | 4490 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4491 | unsafe fn test_mm_stream_pd() { |
4492 | #[repr(align(128))] | |
4493 | struct Memory { | |
4494 | pub data: [f64; 2], | |
4495 | } | |
4496 | let a = _mm_set1_pd(7.0); | |
8faf50e0 | 4497 | let mut mem = Memory { data: [-1.0; 2] }; |
0531ce1d XL |
4498 | |
4499 | _mm_stream_pd(&mut mem.data[0] as *mut f64, a); | |
4500 | for i in 0..2 { | |
4501 | assert_eq!(mem.data[i], get_m128d(a, i)); | |
4502 | } | |
4503 | } | |
4504 | ||
83c7162d | 4505 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4506 | unsafe fn test_mm_store_sd() { |
4507 | let mut dest = 0.; | |
4508 | let a = _mm_setr_pd(1., 2.); | |
4509 | _mm_store_sd(&mut dest, a); | |
4510 | assert_eq!(dest, _mm_cvtsd_f64(a)); | |
4511 | } | |
4512 | ||
83c7162d | 4513 | #[simd_test(enable = "sse2")] |
0531ce1d | 4514 | unsafe fn test_mm_store_pd() { |
8faf50e0 | 4515 | let mut mem = Memory { data: [0.0f64; 4] }; |
0531ce1d XL |
4516 | let vals = &mut mem.data; |
4517 | let a = _mm_setr_pd(1.0, 2.0); | |
4518 | let d = vals.as_mut_ptr(); | |
4519 | ||
4520 | _mm_store_pd(d, *black_box(&a)); | |
4521 | assert_eq!(vals[0], 1.0); | |
4522 | assert_eq!(vals[1], 2.0); | |
4523 | } | |
4524 | ||
cdc7bbd5 | 4525 | #[simd_test(enable = "sse2")] |
0531ce1d | 4526 | unsafe fn test_mm_storeu_pd() { |
8faf50e0 | 4527 | let mut mem = Memory { data: [0.0f64; 4] }; |
0531ce1d XL |
4528 | let vals = &mut mem.data; |
4529 | let a = _mm_setr_pd(1.0, 2.0); | |
4530 | ||
4531 | let mut ofs = 0; | |
4532 | let mut p = vals.as_mut_ptr(); | |
4533 | ||
532ac7d7 | 4534 | // Make sure p is **not** aligned to 16-byte boundary |
0531ce1d XL |
4535 | if (p as usize) & 0xf == 0 { |
4536 | ofs = 1; | |
4537 | p = p.offset(1); | |
4538 | } | |
4539 | ||
4540 | _mm_storeu_pd(p, *black_box(&a)); | |
4541 | ||
4542 | if ofs > 0 { | |
4543 | assert_eq!(vals[ofs - 1], 0.0); | |
4544 | } | |
4545 | assert_eq!(vals[ofs + 0], 1.0); | |
4546 | assert_eq!(vals[ofs + 1], 2.0); | |
4547 | } | |
4548 | ||
83c7162d | 4549 | #[simd_test(enable = "sse2")] |
0531ce1d | 4550 | unsafe fn test_mm_store1_pd() { |
8faf50e0 | 4551 | let mut mem = Memory { data: [0.0f64; 4] }; |
0531ce1d XL |
4552 | let vals = &mut mem.data; |
4553 | let a = _mm_setr_pd(1.0, 2.0); | |
4554 | let d = vals.as_mut_ptr(); | |
4555 | ||
4556 | _mm_store1_pd(d, *black_box(&a)); | |
4557 | assert_eq!(vals[0], 1.0); | |
4558 | assert_eq!(vals[1], 1.0); | |
4559 | } | |
4560 | ||
83c7162d | 4561 | #[simd_test(enable = "sse2")] |
0531ce1d | 4562 | unsafe fn test_mm_store_pd1() { |
8faf50e0 | 4563 | let mut mem = Memory { data: [0.0f64; 4] }; |
0531ce1d XL |
4564 | let vals = &mut mem.data; |
4565 | let a = _mm_setr_pd(1.0, 2.0); | |
4566 | let d = vals.as_mut_ptr(); | |
4567 | ||
4568 | _mm_store_pd1(d, *black_box(&a)); | |
4569 | assert_eq!(vals[0], 1.0); | |
4570 | assert_eq!(vals[1], 1.0); | |
4571 | } | |
4572 | ||
83c7162d | 4573 | #[simd_test(enable = "sse2")] |
0531ce1d | 4574 | unsafe fn test_mm_storer_pd() { |
8faf50e0 | 4575 | let mut mem = Memory { data: [0.0f64; 4] }; |
0531ce1d XL |
4576 | let vals = &mut mem.data; |
4577 | let a = _mm_setr_pd(1.0, 2.0); | |
4578 | let d = vals.as_mut_ptr(); | |
4579 | ||
4580 | _mm_storer_pd(d, *black_box(&a)); | |
4581 | assert_eq!(vals[0], 2.0); | |
4582 | assert_eq!(vals[1], 1.0); | |
4583 | } | |
4584 | ||
83c7162d | 4585 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4586 | unsafe fn test_mm_storeh_pd() { |
4587 | let mut dest = 0.; | |
4588 | let a = _mm_setr_pd(1., 2.); | |
4589 | _mm_storeh_pd(&mut dest, a); | |
4590 | assert_eq!(dest, get_m128d(a, 1)); | |
4591 | } | |
4592 | ||
83c7162d | 4593 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4594 | unsafe fn test_mm_storel_pd() { |
4595 | let mut dest = 0.; | |
4596 | let a = _mm_setr_pd(1., 2.); | |
4597 | _mm_storel_pd(&mut dest, a); | |
4598 | assert_eq!(dest, _mm_cvtsd_f64(a)); | |
4599 | } | |
4600 | ||
83c7162d | 4601 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4602 | unsafe fn test_mm_loadr_pd() { |
4603 | let mut mem = Memory { | |
4604 | data: [1.0f64, 2.0, 3.0, 4.0], | |
4605 | }; | |
4606 | let vals = &mut mem.data; | |
4607 | let d = vals.as_ptr(); | |
4608 | ||
4609 | let r = _mm_loadr_pd(d); | |
4610 | assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0)); | |
4611 | } | |
4612 | ||
83c7162d | 4613 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4614 | unsafe fn test_mm_loadu_pd() { |
4615 | let mut mem = Memory { | |
4616 | data: [1.0f64, 2.0, 3.0, 4.0], | |
4617 | }; | |
4618 | let vals = &mut mem.data; | |
4619 | let mut d = vals.as_ptr(); | |
4620 | ||
4621 | // make sure d is not aligned to 16-byte boundary | |
4622 | let mut offset = 0; | |
4623 | if (d as usize) & 0xf == 0 { | |
4624 | offset = 1; | |
4625 | d = d.offset(offset as isize); | |
4626 | } | |
4627 | ||
4628 | let r = _mm_loadu_pd(d); | |
8faf50e0 | 4629 | let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64)); |
0531ce1d XL |
4630 | assert_eq_m128d(r, e); |
4631 | } | |
4632 | ||
83c7162d | 4633 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4634 | unsafe fn test_mm_cvtpd_ps() { |
4635 | let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0)); | |
4636 | assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0)); | |
4637 | ||
4638 | let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0)); | |
4639 | assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0)); | |
4640 | ||
4641 | let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN)); | |
0731742a | 4642 | assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0)); |
0531ce1d XL |
4643 | |
4644 | let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64)); | |
4645 | assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0)); | |
4646 | } | |
4647 | ||
83c7162d | 4648 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4649 | unsafe fn test_mm_cvtps_pd() { |
4650 | let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0)); | |
4651 | assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0)); | |
4652 | ||
4653 | let r = _mm_cvtps_pd(_mm_setr_ps( | |
4654 | f32::MAX, | |
4655 | f32::INFINITY, | |
4656 | f32::NEG_INFINITY, | |
4657 | f32::MIN, | |
4658 | )); | |
4659 | assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY)); | |
4660 | } | |
4661 | ||
83c7162d | 4662 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4663 | unsafe fn test_mm_cvtpd_epi32() { |
4664 | let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0)); | |
4665 | assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0)); | |
4666 | ||
4667 | let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0)); | |
4668 | assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0)); | |
4669 | ||
4670 | let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN)); | |
4671 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); | |
4672 | ||
4673 | let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY)); | |
4674 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); | |
4675 | ||
4676 | let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN)); | |
4677 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); | |
4678 | } | |
4679 | ||
83c7162d | 4680 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4681 | unsafe fn test_mm_cvtsd_si32() { |
4682 | let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0)); | |
4683 | assert_eq!(r, -2); | |
4684 | ||
4685 | let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN)); | |
4686 | assert_eq!(r, i32::MIN); | |
4687 | ||
4688 | let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN)); | |
4689 | assert_eq!(r, i32::MIN); | |
4690 | } | |
4691 | ||
83c7162d | 4692 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4693 | unsafe fn test_mm_cvtsd_ss() { |
4694 | let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4); | |
4695 | let b = _mm_setr_pd(2.0, -5.0); | |
4696 | ||
4697 | let r = _mm_cvtsd_ss(a, b); | |
4698 | ||
4699 | assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4)); | |
4700 | ||
0731742a | 4701 | let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY); |
0531ce1d XL |
4702 | let b = _mm_setr_pd(f64::INFINITY, -5.0); |
4703 | ||
4704 | let r = _mm_cvtsd_ss(a, b); | |
4705 | ||
4706 | assert_eq_m128( | |
4707 | r, | |
4708 | _mm_setr_ps( | |
4709 | f32::INFINITY, | |
4710 | f32::NEG_INFINITY, | |
4711 | f32::MAX, | |
4712 | f32::NEG_INFINITY, | |
4713 | ), | |
4714 | ); | |
4715 | } | |
4716 | ||
83c7162d | 4717 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4718 | unsafe fn test_mm_cvtsd_f64() { |
4719 | let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2)); | |
4720 | assert_eq!(r, -1.1); | |
4721 | } | |
4722 | ||
83c7162d | 4723 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4724 | unsafe fn test_mm_cvtss_sd() { |
4725 | let a = _mm_setr_pd(-1.1, 2.2); | |
4726 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); | |
4727 | ||
4728 | let r = _mm_cvtss_sd(a, b); | |
4729 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2)); | |
4730 | ||
4731 | let a = _mm_setr_pd(-1.1, f64::INFINITY); | |
4732 | let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0); | |
4733 | ||
4734 | let r = _mm_cvtss_sd(a, b); | |
4735 | assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY)); | |
4736 | } | |
4737 | ||
83c7162d | 4738 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4739 | unsafe fn test_mm_cvttpd_epi32() { |
4740 | let a = _mm_setr_pd(-1.1, 2.2); | |
4741 | let r = _mm_cvttpd_epi32(a); | |
4742 | assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0)); | |
4743 | ||
4744 | let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); | |
4745 | let r = _mm_cvttpd_epi32(a); | |
4746 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); | |
4747 | } | |
4748 | ||
83c7162d | 4749 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4750 | unsafe fn test_mm_cvttsd_si32() { |
4751 | let a = _mm_setr_pd(-1.1, 2.2); | |
4752 | let r = _mm_cvttsd_si32(a); | |
4753 | assert_eq!(r, -1); | |
4754 | ||
4755 | let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); | |
4756 | let r = _mm_cvttsd_si32(a); | |
4757 | assert_eq!(r, i32::MIN); | |
4758 | } | |
4759 | ||
83c7162d | 4760 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4761 | unsafe fn test_mm_cvttps_epi32() { |
4762 | let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6); | |
4763 | let r = _mm_cvttps_epi32(a); | |
4764 | assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6)); | |
4765 | ||
0731742a | 4766 | let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX); |
0531ce1d | 4767 | let r = _mm_cvttps_epi32(a); |
0731742a | 4768 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN)); |
0531ce1d XL |
4769 | } |
4770 | ||
83c7162d | 4771 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4772 | unsafe fn test_mm_set_sd() { |
4773 | let r = _mm_set_sd(-1.0_f64); | |
4774 | assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64)); | |
4775 | } | |
4776 | ||
83c7162d | 4777 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4778 | unsafe fn test_mm_set1_pd() { |
4779 | let r = _mm_set1_pd(-1.0_f64); | |
4780 | assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64)); | |
4781 | } | |
4782 | ||
83c7162d | 4783 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4784 | unsafe fn test_mm_set_pd1() { |
4785 | let r = _mm_set_pd1(-2.0_f64); | |
4786 | assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64)); | |
4787 | } | |
4788 | ||
83c7162d | 4789 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4790 | unsafe fn test_mm_set_pd() { |
4791 | let r = _mm_set_pd(1.0_f64, 5.0_f64); | |
4792 | assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64)); | |
4793 | } | |
4794 | ||
83c7162d | 4795 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4796 | unsafe fn test_mm_setr_pd() { |
4797 | let r = _mm_setr_pd(1.0_f64, -5.0_f64); | |
4798 | assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64)); | |
4799 | } | |
4800 | ||
83c7162d | 4801 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4802 | unsafe fn test_mm_setzero_pd() { |
4803 | let r = _mm_setzero_pd(); | |
4804 | assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64)); | |
4805 | } | |
4806 | ||
83c7162d | 4807 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4808 | unsafe fn test_mm_load1_pd() { |
4809 | let d = -5.0; | |
4810 | let r = _mm_load1_pd(&d); | |
4811 | assert_eq_m128d(r, _mm_setr_pd(d, d)); | |
4812 | } | |
4813 | ||
83c7162d | 4814 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4815 | unsafe fn test_mm_load_pd1() { |
4816 | let d = -5.0; | |
4817 | let r = _mm_load_pd1(&d); | |
4818 | assert_eq_m128d(r, _mm_setr_pd(d, d)); | |
4819 | } | |
4820 | ||
83c7162d | 4821 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4822 | unsafe fn test_mm_unpackhi_pd() { |
4823 | let a = _mm_setr_pd(1.0, 2.0); | |
4824 | let b = _mm_setr_pd(3.0, 4.0); | |
4825 | let r = _mm_unpackhi_pd(a, b); | |
4826 | assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0)); | |
4827 | } | |
4828 | ||
83c7162d | 4829 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4830 | unsafe fn test_mm_unpacklo_pd() { |
4831 | let a = _mm_setr_pd(1.0, 2.0); | |
4832 | let b = _mm_setr_pd(3.0, 4.0); | |
4833 | let r = _mm_unpacklo_pd(a, b); | |
4834 | assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0)); | |
4835 | } | |
4836 | ||
83c7162d | 4837 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4838 | unsafe fn test_mm_shuffle_pd() { |
4839 | let a = _mm_setr_pd(1., 2.); | |
4840 | let b = _mm_setr_pd(3., 4.); | |
4841 | let expected = _mm_setr_pd(1., 3.); | |
17df50a5 | 4842 | let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b); |
0531ce1d XL |
4843 | assert_eq_m128d(r, expected); |
4844 | } | |
4845 | ||
83c7162d | 4846 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4847 | unsafe fn test_mm_move_sd() { |
4848 | let a = _mm_setr_pd(1., 2.); | |
4849 | let b = _mm_setr_pd(3., 4.); | |
4850 | let expected = _mm_setr_pd(3., 2.); | |
4851 | let r = _mm_move_sd(a, b); | |
4852 | assert_eq_m128d(r, expected); | |
4853 | } | |
4854 | ||
83c7162d | 4855 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4856 | unsafe fn test_mm_castpd_ps() { |
4857 | let a = _mm_set1_pd(0.); | |
4858 | let expected = _mm_set1_ps(0.); | |
4859 | let r = _mm_castpd_ps(a); | |
4860 | assert_eq_m128(r, expected); | |
4861 | } | |
4862 | ||
83c7162d | 4863 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4864 | unsafe fn test_mm_castpd_si128() { |
4865 | let a = _mm_set1_pd(0.); | |
4866 | let expected = _mm_set1_epi64x(0); | |
4867 | let r = _mm_castpd_si128(a); | |
4868 | assert_eq_m128i(r, expected); | |
4869 | } | |
4870 | ||
83c7162d | 4871 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4872 | unsafe fn test_mm_castps_pd() { |
4873 | let a = _mm_set1_ps(0.); | |
4874 | let expected = _mm_set1_pd(0.); | |
4875 | let r = _mm_castps_pd(a); | |
4876 | assert_eq_m128d(r, expected); | |
4877 | } | |
4878 | ||
83c7162d | 4879 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4880 | unsafe fn test_mm_castps_si128() { |
4881 | let a = _mm_set1_ps(0.); | |
4882 | let expected = _mm_set1_epi32(0); | |
4883 | let r = _mm_castps_si128(a); | |
4884 | assert_eq_m128i(r, expected); | |
4885 | } | |
4886 | ||
83c7162d | 4887 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4888 | unsafe fn test_mm_castsi128_pd() { |
4889 | let a = _mm_set1_epi64x(0); | |
4890 | let expected = _mm_set1_pd(0.); | |
4891 | let r = _mm_castsi128_pd(a); | |
4892 | assert_eq_m128d(r, expected); | |
4893 | } | |
4894 | ||
83c7162d | 4895 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4896 | unsafe fn test_mm_castsi128_ps() { |
4897 | let a = _mm_set1_epi32(0); | |
4898 | let expected = _mm_set1_ps(0.); | |
4899 | let r = _mm_castsi128_ps(a); | |
4900 | assert_eq_m128(r, expected); | |
4901 | } | |
0531ce1d | 4902 | } |