]>
Commit | Line | Data |
---|---|---|
0531ce1d XL |
1 | //! Streaming SIMD Extensions 2 (SSE2) |
2 | ||
3 | #[cfg(test)] | |
4 | use stdsimd_test::assert_instr; | |
5 | ||
0531ce1d | 6 | use coresimd::simd::*; |
83c7162d | 7 | use coresimd::simd_llvm::*; |
0531ce1d XL |
8 | use coresimd::x86::*; |
9 | use intrinsics; | |
10 | use mem; | |
11 | use ptr; | |
12 | ||
13 | /// Provide a hint to the processor that the code sequence is a spin-wait loop. | |
14 | /// | |
15 | /// This can help improve the performance and power consumption of spin-wait | |
16 | /// loops. | |
83c7162d XL |
17 | /// |
18 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_pause) | |
0531ce1d XL |
19 | #[inline] |
20 | #[target_feature(enable = "sse2")] | |
21 | #[cfg_attr(test, assert_instr(pause))] | |
83c7162d | 22 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
23 | pub unsafe fn _mm_pause() { |
24 | pause() | |
25 | } | |
26 | ||
27 | /// Invalidate and flush the cache line that contains `p` from all levels of | |
28 | /// the cache hierarchy. | |
83c7162d XL |
29 | /// |
30 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clflush) | |
0531ce1d XL |
31 | #[inline] |
32 | #[target_feature(enable = "sse2")] | |
33 | #[cfg_attr(test, assert_instr(clflush))] | |
83c7162d | 34 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
35 | pub unsafe fn _mm_clflush(p: *mut u8) { |
36 | clflush(p) | |
37 | } | |
38 | ||
39 | /// Perform a serializing operation on all load-from-memory instructions | |
40 | /// that were issued prior to this instruction. | |
41 | /// | |
42 | /// Guarantees that every load instruction that precedes, in program order, is | |
43 | /// globally visible before any load instruction which follows the fence in | |
44 | /// program order. | |
83c7162d XL |
45 | /// |
46 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lfence) | |
0531ce1d XL |
47 | #[inline] |
48 | #[target_feature(enable = "sse2")] | |
49 | #[cfg_attr(test, assert_instr(lfence))] | |
83c7162d | 50 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
51 | pub unsafe fn _mm_lfence() { |
52 | lfence() | |
53 | } | |
54 | ||
55 | /// Perform a serializing operation on all load-from-memory and store-to-memory | |
56 | /// instructions that were issued prior to this instruction. | |
57 | /// | |
58 | /// Guarantees that every memory access that precedes, in program order, the | |
59 | /// memory fence instruction is globally visible before any memory instruction | |
60 | /// which follows the fence in program order. | |
83c7162d XL |
61 | /// |
62 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mfence) | |
0531ce1d XL |
63 | #[inline] |
64 | #[target_feature(enable = "sse2")] | |
65 | #[cfg_attr(test, assert_instr(mfence))] | |
83c7162d | 66 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
67 | pub unsafe fn _mm_mfence() { |
68 | mfence() | |
69 | } | |
70 | ||
71 | /// Add packed 8-bit integers in `a` and `b`. | |
83c7162d XL |
72 | /// |
73 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi8) | |
0531ce1d XL |
74 | #[inline] |
75 | #[target_feature(enable = "sse2")] | |
76 | #[cfg_attr(test, assert_instr(paddb))] | |
83c7162d | 77 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
78 | pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i { |
79 | mem::transmute(simd_add(a.as_i8x16(), b.as_i8x16())) | |
80 | } | |
81 | ||
82 | /// Add packed 16-bit integers in `a` and `b`. | |
83c7162d XL |
83 | /// |
84 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi16) | |
0531ce1d XL |
85 | #[inline] |
86 | #[target_feature(enable = "sse2")] | |
87 | #[cfg_attr(test, assert_instr(paddw))] | |
83c7162d | 88 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
89 | pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i { |
90 | mem::transmute(simd_add(a.as_i16x8(), b.as_i16x8())) | |
91 | } | |
92 | ||
93 | /// Add packed 32-bit integers in `a` and `b`. | |
83c7162d XL |
94 | /// |
95 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi32) | |
0531ce1d XL |
96 | #[inline] |
97 | #[target_feature(enable = "sse2")] | |
98 | #[cfg_attr(test, assert_instr(paddd))] | |
83c7162d | 99 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
100 | pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i { |
101 | mem::transmute(simd_add(a.as_i32x4(), b.as_i32x4())) | |
102 | } | |
103 | ||
104 | /// Add packed 64-bit integers in `a` and "b`. | |
83c7162d XL |
105 | /// |
106 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi64) | |
0531ce1d XL |
107 | #[inline] |
108 | #[target_feature(enable = "sse2")] | |
109 | #[cfg_attr(test, assert_instr(paddq))] | |
83c7162d | 110 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
111 | pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i { |
112 | mem::transmute(simd_add(a.as_i64x2(), b.as_i64x2())) | |
113 | } | |
114 | ||
115 | /// Add packed 8-bit integers in `a` and `b` using saturation. | |
83c7162d XL |
116 | /// |
117 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi8) | |
0531ce1d XL |
118 | #[inline] |
119 | #[target_feature(enable = "sse2")] | |
120 | #[cfg_attr(test, assert_instr(paddsb))] | |
83c7162d | 121 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
122 | pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i { |
123 | mem::transmute(paddsb(a.as_i8x16(), b.as_i8x16())) | |
124 | } | |
125 | ||
126 | /// Add packed 16-bit integers in `a` and `b` using saturation. | |
83c7162d XL |
127 | /// |
128 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi16) | |
0531ce1d XL |
129 | #[inline] |
130 | #[target_feature(enable = "sse2")] | |
131 | #[cfg_attr(test, assert_instr(paddsw))] | |
83c7162d | 132 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
133 | pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i { |
134 | mem::transmute(paddsw(a.as_i16x8(), b.as_i16x8())) | |
135 | } | |
136 | ||
137 | /// Add packed unsigned 8-bit integers in `a` and `b` using saturation. | |
83c7162d XL |
138 | /// |
139 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu8) | |
0531ce1d XL |
140 | #[inline] |
141 | #[target_feature(enable = "sse2")] | |
142 | #[cfg_attr(test, assert_instr(paddusb))] | |
83c7162d | 143 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
144 | pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i { |
145 | mem::transmute(paddsub(a.as_u8x16(), b.as_u8x16())) | |
146 | } | |
147 | ||
148 | /// Add packed unsigned 16-bit integers in `a` and `b` using saturation. | |
83c7162d XL |
149 | /// |
150 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu16) | |
0531ce1d XL |
151 | #[inline] |
152 | #[target_feature(enable = "sse2")] | |
153 | #[cfg_attr(test, assert_instr(paddusw))] | |
83c7162d | 154 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
155 | pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i { |
156 | mem::transmute(paddsuw(a.as_u16x8(), b.as_u16x8())) | |
157 | } | |
158 | ||
159 | /// Average packed unsigned 8-bit integers in `a` and `b`. | |
83c7162d XL |
160 | /// |
161 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu8) | |
0531ce1d XL |
162 | #[inline] |
163 | #[target_feature(enable = "sse2")] | |
164 | #[cfg_attr(test, assert_instr(pavgb))] | |
83c7162d | 165 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
166 | pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i { |
167 | mem::transmute(pavgb(a.as_u8x16(), b.as_u8x16())) | |
168 | } | |
169 | ||
170 | /// Average packed unsigned 16-bit integers in `a` and `b`. | |
83c7162d XL |
171 | /// |
172 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu16) | |
0531ce1d XL |
173 | #[inline] |
174 | #[target_feature(enable = "sse2")] | |
175 | #[cfg_attr(test, assert_instr(pavgw))] | |
83c7162d | 176 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
177 | pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i { |
178 | mem::transmute(pavgw(a.as_u16x8(), b.as_u16x8())) | |
179 | } | |
180 | ||
181 | /// Multiply and then horizontally add signed 16 bit integers in `a` and `b`. | |
182 | /// | |
183 | /// Multiply packed signed 16-bit integers in `a` and `b`, producing | |
184 | /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of | |
185 | /// intermediate 32-bit integers. | |
83c7162d XL |
186 | /// |
187 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_madd_epi16) | |
0531ce1d XL |
188 | #[inline] |
189 | #[target_feature(enable = "sse2")] | |
190 | #[cfg_attr(test, assert_instr(pmaddwd))] | |
83c7162d | 191 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
192 | pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i { |
193 | mem::transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) | |
194 | } | |
195 | ||
196 | /// Compare packed 16-bit integers in `a` and `b`, and return the packed | |
197 | /// maximum values. | |
83c7162d XL |
198 | /// |
199 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi16) | |
0531ce1d XL |
200 | #[inline] |
201 | #[target_feature(enable = "sse2")] | |
202 | #[cfg_attr(test, assert_instr(pmaxsw))] | |
83c7162d | 203 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
204 | pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i { |
205 | mem::transmute(pmaxsw(a.as_i16x8(), b.as_i16x8())) | |
206 | } | |
207 | ||
208 | /// Compare packed unsigned 8-bit integers in `a` and `b`, and return the | |
209 | /// packed maximum values. | |
83c7162d XL |
210 | /// |
211 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu8) | |
0531ce1d XL |
212 | #[inline] |
213 | #[target_feature(enable = "sse2")] | |
214 | #[cfg_attr(test, assert_instr(pmaxub))] | |
83c7162d | 215 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
216 | pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i { |
217 | mem::transmute(pmaxub(a.as_u8x16(), b.as_u8x16())) | |
218 | } | |
219 | ||
220 | /// Compare packed 16-bit integers in `a` and `b`, and return the packed | |
221 | /// minimum values. | |
83c7162d XL |
222 | /// |
223 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi16) | |
0531ce1d XL |
224 | #[inline] |
225 | #[target_feature(enable = "sse2")] | |
226 | #[cfg_attr(test, assert_instr(pminsw))] | |
83c7162d | 227 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
228 | pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i { |
229 | mem::transmute(pminsw(a.as_i16x8(), b.as_i16x8())) | |
230 | } | |
231 | ||
232 | /// Compare packed unsigned 8-bit integers in `a` and `b`, and return the | |
233 | /// packed minimum values. | |
83c7162d XL |
234 | /// |
235 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu8) | |
0531ce1d XL |
236 | #[inline] |
237 | #[target_feature(enable = "sse2")] | |
238 | #[cfg_attr(test, assert_instr(pminub))] | |
83c7162d | 239 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
240 | pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i { |
241 | mem::transmute(pminub(a.as_u8x16(), b.as_u8x16())) | |
242 | } | |
243 | ||
244 | /// Multiply the packed 16-bit integers in `a` and `b`. | |
245 | /// | |
246 | /// The multiplication produces intermediate 32-bit integers, and returns the | |
247 | /// high 16 bits of the intermediate integers. | |
83c7162d XL |
248 | /// |
249 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epi16) | |
0531ce1d XL |
250 | #[inline] |
251 | #[target_feature(enable = "sse2")] | |
252 | #[cfg_attr(test, assert_instr(pmulhw))] | |
83c7162d | 253 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
254 | pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i { |
255 | mem::transmute(pmulhw(a.as_i16x8(), b.as_i16x8())) | |
256 | } | |
257 | ||
258 | /// Multiply the packed unsigned 16-bit integers in `a` and `b`. | |
259 | /// | |
260 | /// The multiplication produces intermediate 32-bit integers, and returns the | |
261 | /// high 16 bits of the intermediate integers. | |
83c7162d XL |
262 | /// |
263 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epu16) | |
0531ce1d XL |
264 | #[inline] |
265 | #[target_feature(enable = "sse2")] | |
266 | #[cfg_attr(test, assert_instr(pmulhuw))] | |
83c7162d | 267 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
268 | pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i { |
269 | mem::transmute(pmulhuw(a.as_u16x8(), b.as_u16x8())) | |
270 | } | |
271 | ||
272 | /// Multiply the packed 16-bit integers in `a` and `b`. | |
273 | /// | |
274 | /// The multiplication produces intermediate 32-bit integers, and returns the | |
275 | /// low 16 bits of the intermediate integers. | |
83c7162d XL |
276 | /// |
277 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mullo_epi16) | |
0531ce1d XL |
278 | #[inline] |
279 | #[target_feature(enable = "sse2")] | |
280 | #[cfg_attr(test, assert_instr(pmullw))] | |
83c7162d | 281 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
282 | pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i { |
283 | mem::transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) | |
284 | } | |
285 | ||
286 | /// Multiply the low unsigned 32-bit integers from each packed 64-bit element | |
287 | /// in `a` and `b`. | |
288 | /// | |
289 | /// Return the unsigned 64-bit results. | |
83c7162d XL |
290 | /// |
291 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epu32) | |
0531ce1d XL |
292 | #[inline] |
293 | #[target_feature(enable = "sse2")] | |
294 | #[cfg_attr(test, assert_instr(pmuludq))] | |
83c7162d | 295 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
296 | pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i { |
297 | mem::transmute(pmuludq(a.as_u32x4(), b.as_u32x4())) | |
298 | } | |
299 | ||
300 | /// Sum the absolute differences of packed unsigned 8-bit integers. | |
301 | /// | |
302 | /// Compute the absolute differences of packed unsigned 8-bit integers in `a` | |
303 | /// and `b`, then horizontally sum each consecutive 8 differences to produce | |
304 | /// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in | |
305 | /// the low 16 bits of 64-bit elements returned. | |
83c7162d XL |
306 | /// |
307 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_epu8) | |
0531ce1d XL |
308 | #[inline] |
309 | #[target_feature(enable = "sse2")] | |
310 | #[cfg_attr(test, assert_instr(psadbw))] | |
83c7162d | 311 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
312 | pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i { |
313 | mem::transmute(psadbw(a.as_u8x16(), b.as_u8x16())) | |
314 | } | |
315 | ||
316 | /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a`. | |
83c7162d XL |
317 | /// |
318 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi8) | |
0531ce1d XL |
319 | #[inline] |
320 | #[target_feature(enable = "sse2")] | |
321 | #[cfg_attr(test, assert_instr(psubb))] | |
83c7162d | 322 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
323 | pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i { |
324 | mem::transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) | |
325 | } | |
326 | ||
327 | /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`. | |
83c7162d XL |
328 | /// |
329 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi16) | |
0531ce1d XL |
330 | #[inline] |
331 | #[target_feature(enable = "sse2")] | |
332 | #[cfg_attr(test, assert_instr(psubw))] | |
83c7162d | 333 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
334 | pub unsafe fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i { |
335 | mem::transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) | |
336 | } | |
337 | ||
338 | /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. | |
83c7162d XL |
339 | /// |
340 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi32) | |
0531ce1d XL |
341 | #[inline] |
342 | #[target_feature(enable = "sse2")] | |
343 | #[cfg_attr(test, assert_instr(psubd))] | |
83c7162d | 344 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
345 | pub unsafe fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i { |
346 | mem::transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) | |
347 | } | |
348 | ||
349 | /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`. | |
83c7162d XL |
350 | /// |
351 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi64) | |
0531ce1d XL |
352 | #[inline] |
353 | #[target_feature(enable = "sse2")] | |
354 | #[cfg_attr(test, assert_instr(psubq))] | |
83c7162d | 355 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
356 | pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i { |
357 | mem::transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) | |
358 | } | |
359 | ||
360 | /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` | |
361 | /// using saturation. | |
83c7162d XL |
362 | /// |
363 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi8) | |
0531ce1d XL |
364 | #[inline] |
365 | #[target_feature(enable = "sse2")] | |
366 | #[cfg_attr(test, assert_instr(psubsb))] | |
83c7162d | 367 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
368 | pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i { |
369 | mem::transmute(psubsb(a.as_i8x16(), b.as_i8x16())) | |
370 | } | |
371 | ||
372 | /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` | |
373 | /// using saturation. | |
83c7162d XL |
374 | /// |
375 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi16) | |
0531ce1d XL |
376 | #[inline] |
377 | #[target_feature(enable = "sse2")] | |
378 | #[cfg_attr(test, assert_instr(psubsw))] | |
83c7162d | 379 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
380 | pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i { |
381 | mem::transmute(psubsw(a.as_i16x8(), b.as_i16x8())) | |
382 | } | |
383 | ||
384 | /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit | |
385 | /// integers in `a` using saturation. | |
83c7162d XL |
386 | /// |
387 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu8) | |
0531ce1d XL |
388 | #[inline] |
389 | #[target_feature(enable = "sse2")] | |
390 | #[cfg_attr(test, assert_instr(psubusb))] | |
83c7162d | 391 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
392 | pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i { |
393 | mem::transmute(psubusb(a.as_u8x16(), b.as_u8x16())) | |
394 | } | |
395 | ||
396 | /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit | |
397 | /// integers in `a` using saturation. | |
83c7162d XL |
398 | /// |
399 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu16) | |
0531ce1d XL |
400 | #[inline] |
401 | #[target_feature(enable = "sse2")] | |
402 | #[cfg_attr(test, assert_instr(psubusw))] | |
83c7162d | 403 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
404 | pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i { |
405 | mem::transmute(psubusw(a.as_u16x8(), b.as_u16x8())) | |
406 | } | |
407 | ||
408 | /// Shift `a` left by `imm8` bytes while shifting in zeros. | |
83c7162d XL |
409 | /// |
410 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_si128) | |
0531ce1d XL |
411 | #[inline] |
412 | #[target_feature(enable = "sse2")] | |
413 | #[cfg_attr(test, assert_instr(pslldq, imm8 = 1))] | |
414 | #[rustc_args_required_const(1)] | |
83c7162d | 415 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
416 | pub unsafe fn _mm_slli_si128(a: __m128i, imm8: i32) -> __m128i { |
417 | _mm_slli_si128_impl(a, imm8) | |
418 | } | |
419 | ||
420 | /// Implementation detail: converts the immediate argument of the | |
421 | /// `_mm_slli_si128` intrinsic into a compile-time constant. | |
422 | #[inline] | |
423 | #[target_feature(enable = "sse2")] | |
424 | unsafe fn _mm_slli_si128_impl(a: __m128i, imm8: i32) -> __m128i { | |
425 | let (zero, imm8) = (_mm_set1_epi8(0).as_i8x16(), imm8 as u32); | |
426 | let a = a.as_i8x16(); | |
427 | macro_rules! shuffle { | |
428 | ($shift:expr) => { | |
83c7162d XL |
429 | simd_shuffle16::<i8x16, i8x16>( |
430 | zero, | |
431 | a, | |
432 | [ | |
433 | 16 - $shift, | |
434 | 17 - $shift, | |
435 | 18 - $shift, | |
436 | 19 - $shift, | |
437 | 20 - $shift, | |
438 | 21 - $shift, | |
439 | 22 - $shift, | |
440 | 23 - $shift, | |
441 | 24 - $shift, | |
442 | 25 - $shift, | |
443 | 26 - $shift, | |
444 | 27 - $shift, | |
445 | 28 - $shift, | |
446 | 29 - $shift, | |
447 | 30 - $shift, | |
448 | 31 - $shift, | |
449 | ], | |
450 | ) | |
451 | }; | |
0531ce1d XL |
452 | } |
453 | let x = match imm8 { | |
454 | 0 => shuffle!(0), | |
455 | 1 => shuffle!(1), | |
456 | 2 => shuffle!(2), | |
457 | 3 => shuffle!(3), | |
458 | 4 => shuffle!(4), | |
459 | 5 => shuffle!(5), | |
460 | 6 => shuffle!(6), | |
461 | 7 => shuffle!(7), | |
462 | 8 => shuffle!(8), | |
463 | 9 => shuffle!(9), | |
464 | 10 => shuffle!(10), | |
465 | 11 => shuffle!(11), | |
466 | 12 => shuffle!(12), | |
467 | 13 => shuffle!(13), | |
468 | 14 => shuffle!(14), | |
469 | 15 => shuffle!(15), | |
470 | _ => shuffle!(16), | |
471 | }; | |
472 | mem::transmute(x) | |
473 | } | |
474 | ||
475 | /// Shift `a` left by `imm8` bytes while shifting in zeros. | |
83c7162d XL |
476 | /// |
477 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bslli_si128) | |
0531ce1d XL |
478 | #[inline] |
479 | #[target_feature(enable = "sse2")] | |
480 | #[cfg_attr(test, assert_instr(pslldq, imm8 = 1))] | |
481 | #[rustc_args_required_const(1)] | |
83c7162d | 482 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
483 | pub unsafe fn _mm_bslli_si128(a: __m128i, imm8: i32) -> __m128i { |
484 | _mm_slli_si128_impl(a, imm8) | |
485 | } | |
486 | ||
487 | /// Shift `a` right by `imm8` bytes while shifting in zeros. | |
83c7162d XL |
488 | /// |
489 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bsrli_si128) | |
0531ce1d XL |
490 | #[inline] |
491 | #[target_feature(enable = "sse2")] | |
492 | #[cfg_attr(test, assert_instr(psrldq, imm8 = 1))] | |
493 | #[rustc_args_required_const(1)] | |
83c7162d | 494 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
495 | pub unsafe fn _mm_bsrli_si128(a: __m128i, imm8: i32) -> __m128i { |
496 | _mm_srli_si128_impl(a, imm8) | |
497 | } | |
498 | ||
499 | /// Shift packed 16-bit integers in `a` left by `imm8` while shifting in zeros. | |
83c7162d XL |
500 | /// |
501 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi16) | |
0531ce1d XL |
502 | #[inline] |
503 | #[target_feature(enable = "sse2")] | |
504 | #[cfg_attr(test, assert_instr(psllw, imm8 = 7))] | |
505 | #[rustc_args_required_const(1)] | |
83c7162d | 506 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
507 | pub unsafe fn _mm_slli_epi16(a: __m128i, imm8: i32) -> __m128i { |
508 | mem::transmute(pslliw(a.as_i16x8(), imm8)) | |
509 | } | |
510 | ||
511 | /// Shift packed 16-bit integers in `a` left by `count` while shifting in | |
512 | /// zeros. | |
83c7162d XL |
513 | /// |
514 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi16) | |
0531ce1d XL |
515 | #[inline] |
516 | #[target_feature(enable = "sse2")] | |
517 | #[cfg_attr(test, assert_instr(psllw))] | |
83c7162d | 518 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
519 | pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { |
520 | mem::transmute(psllw(a.as_i16x8(), count.as_i16x8())) | |
521 | } | |
522 | ||
523 | /// Shift packed 32-bit integers in `a` left by `imm8` while shifting in zeros. | |
83c7162d XL |
524 | /// |
525 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi32) | |
0531ce1d XL |
526 | #[inline] |
527 | #[target_feature(enable = "sse2")] | |
528 | #[cfg_attr(test, assert_instr(pslld, imm8 = 7))] | |
529 | #[rustc_args_required_const(1)] | |
83c7162d | 530 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
531 | pub unsafe fn _mm_slli_epi32(a: __m128i, imm8: i32) -> __m128i { |
532 | mem::transmute(psllid(a.as_i32x4(), imm8)) | |
533 | } | |
534 | ||
535 | /// Shift packed 32-bit integers in `a` left by `count` while shifting in | |
536 | /// zeros. | |
83c7162d XL |
537 | /// |
538 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi32) | |
0531ce1d XL |
539 | #[inline] |
540 | #[target_feature(enable = "sse2")] | |
541 | #[cfg_attr(test, assert_instr(pslld))] | |
83c7162d | 542 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
543 | pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { |
544 | mem::transmute(pslld(a.as_i32x4(), count.as_i32x4())) | |
545 | } | |
546 | ||
547 | /// Shift packed 64-bit integers in `a` left by `imm8` while shifting in zeros. | |
83c7162d XL |
548 | /// |
549 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi64) | |
0531ce1d XL |
550 | #[inline] |
551 | #[target_feature(enable = "sse2")] | |
552 | #[cfg_attr(test, assert_instr(psllq, imm8 = 7))] | |
553 | #[rustc_args_required_const(1)] | |
83c7162d | 554 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
555 | pub unsafe fn _mm_slli_epi64(a: __m128i, imm8: i32) -> __m128i { |
556 | mem::transmute(pslliq(a.as_i64x2(), imm8)) | |
557 | } | |
558 | ||
559 | /// Shift packed 64-bit integers in `a` left by `count` while shifting in | |
560 | /// zeros. | |
83c7162d XL |
561 | /// |
562 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi64) | |
0531ce1d XL |
563 | #[inline] |
564 | #[target_feature(enable = "sse2")] | |
565 | #[cfg_attr(test, assert_instr(psllq))] | |
83c7162d | 566 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
567 | pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { |
568 | mem::transmute(psllq(a.as_i64x2(), count.as_i64x2())) | |
569 | } | |
570 | ||
571 | /// Shift packed 16-bit integers in `a` right by `imm8` while shifting in sign | |
572 | /// bits. | |
83c7162d XL |
573 | /// |
574 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi16) | |
0531ce1d XL |
575 | #[inline] |
576 | #[target_feature(enable = "sse2")] | |
577 | #[cfg_attr(test, assert_instr(psraw, imm8 = 1))] | |
578 | #[rustc_args_required_const(1)] | |
83c7162d | 579 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
580 | pub unsafe fn _mm_srai_epi16(a: __m128i, imm8: i32) -> __m128i { |
581 | mem::transmute(psraiw(a.as_i16x8(), imm8)) | |
582 | } | |
583 | ||
584 | /// Shift packed 16-bit integers in `a` right by `count` while shifting in sign | |
585 | /// bits. | |
83c7162d XL |
586 | /// |
587 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi16) | |
0531ce1d XL |
588 | #[inline] |
589 | #[target_feature(enable = "sse2")] | |
590 | #[cfg_attr(test, assert_instr(psraw))] | |
83c7162d | 591 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
592 | pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { |
593 | mem::transmute(psraw(a.as_i16x8(), count.as_i16x8())) | |
594 | } | |
595 | ||
596 | /// Shift packed 32-bit integers in `a` right by `imm8` while shifting in sign | |
597 | /// bits. | |
83c7162d XL |
598 | /// |
599 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi32) | |
0531ce1d XL |
600 | #[inline] |
601 | #[target_feature(enable = "sse2")] | |
602 | #[cfg_attr(test, assert_instr(psrad, imm8 = 1))] | |
603 | #[rustc_args_required_const(1)] | |
83c7162d | 604 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
605 | pub unsafe fn _mm_srai_epi32(a: __m128i, imm8: i32) -> __m128i { |
606 | mem::transmute(psraid(a.as_i32x4(), imm8)) | |
607 | } | |
608 | ||
609 | /// Shift packed 32-bit integers in `a` right by `count` while shifting in sign | |
610 | /// bits. | |
83c7162d XL |
611 | /// |
612 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi32) | |
0531ce1d XL |
613 | #[inline] |
614 | #[target_feature(enable = "sse2")] | |
615 | #[cfg_attr(test, assert_instr(psrad))] | |
83c7162d | 616 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
617 | pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { |
618 | mem::transmute(psrad(a.as_i32x4(), count.as_i32x4())) | |
619 | } | |
620 | ||
621 | /// Shift `a` right by `imm8` bytes while shifting in zeros. | |
83c7162d XL |
622 | /// |
623 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_si128) | |
0531ce1d XL |
624 | #[inline] |
625 | #[target_feature(enable = "sse2")] | |
626 | #[cfg_attr(test, assert_instr(psrldq, imm8 = 1))] | |
627 | #[rustc_args_required_const(1)] | |
83c7162d | 628 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
629 | pub unsafe fn _mm_srli_si128(a: __m128i, imm8: i32) -> __m128i { |
630 | _mm_srli_si128_impl(a, imm8) | |
631 | } | |
632 | ||
633 | /// Implementation detail: converts the immediate argument of the | |
634 | /// `_mm_srli_si128` intrinsic into a compile-time constant. | |
635 | #[inline] | |
636 | #[target_feature(enable = "sse2")] | |
637 | unsafe fn _mm_srli_si128_impl(a: __m128i, imm8: i32) -> __m128i { | |
638 | let (zero, imm8) = (_mm_set1_epi8(0).as_i8x16(), imm8 as u32); | |
639 | let a = a.as_i8x16(); | |
640 | macro_rules! shuffle { | |
641 | ($shift:expr) => { | |
83c7162d XL |
642 | simd_shuffle16( |
643 | a, | |
644 | zero, | |
645 | [ | |
646 | 0 + $shift, | |
647 | 1 + $shift, | |
648 | 2 + $shift, | |
649 | 3 + $shift, | |
650 | 4 + $shift, | |
651 | 5 + $shift, | |
652 | 6 + $shift, | |
653 | 7 + $shift, | |
654 | 8 + $shift, | |
655 | 9 + $shift, | |
656 | 10 + $shift, | |
657 | 11 + $shift, | |
658 | 12 + $shift, | |
659 | 13 + $shift, | |
660 | 14 + $shift, | |
661 | 15 + $shift, | |
662 | ], | |
663 | ) | |
664 | }; | |
0531ce1d XL |
665 | } |
666 | let x: i8x16 = match imm8 { | |
667 | 0 => shuffle!(0), | |
668 | 1 => shuffle!(1), | |
669 | 2 => shuffle!(2), | |
670 | 3 => shuffle!(3), | |
671 | 4 => shuffle!(4), | |
672 | 5 => shuffle!(5), | |
673 | 6 => shuffle!(6), | |
674 | 7 => shuffle!(7), | |
675 | 8 => shuffle!(8), | |
676 | 9 => shuffle!(9), | |
677 | 10 => shuffle!(10), | |
678 | 11 => shuffle!(11), | |
679 | 12 => shuffle!(12), | |
680 | 13 => shuffle!(13), | |
681 | 14 => shuffle!(14), | |
682 | 15 => shuffle!(15), | |
683 | _ => shuffle!(16), | |
684 | }; | |
685 | mem::transmute(x) | |
686 | } | |
687 | ||
688 | /// Shift packed 16-bit integers in `a` right by `imm8` while shifting in | |
689 | /// zeros. | |
83c7162d XL |
690 | /// |
691 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi16) | |
0531ce1d XL |
692 | #[inline] |
693 | #[target_feature(enable = "sse2")] | |
694 | #[cfg_attr(test, assert_instr(psrlw, imm8 = 1))] | |
695 | #[rustc_args_required_const(1)] | |
83c7162d | 696 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
697 | pub unsafe fn _mm_srli_epi16(a: __m128i, imm8: i32) -> __m128i { |
698 | mem::transmute(psrliw(a.as_i16x8(), imm8)) | |
699 | } | |
700 | ||
701 | /// Shift packed 16-bit integers in `a` right by `count` while shifting in | |
702 | /// zeros. | |
83c7162d XL |
703 | /// |
704 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi16) | |
0531ce1d XL |
705 | #[inline] |
706 | #[target_feature(enable = "sse2")] | |
707 | #[cfg_attr(test, assert_instr(psrlw))] | |
83c7162d | 708 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
709 | pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { |
710 | mem::transmute(psrlw(a.as_i16x8(), count.as_i16x8())) | |
711 | } | |
712 | ||
713 | /// Shift packed 32-bit integers in `a` right by `imm8` while shifting in | |
714 | /// zeros. | |
83c7162d XL |
715 | /// |
716 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi32) | |
0531ce1d XL |
717 | #[inline] |
718 | #[target_feature(enable = "sse2")] | |
719 | #[cfg_attr(test, assert_instr(psrld, imm8 = 8))] | |
720 | #[rustc_args_required_const(1)] | |
83c7162d | 721 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
722 | pub unsafe fn _mm_srli_epi32(a: __m128i, imm8: i32) -> __m128i { |
723 | mem::transmute(psrlid(a.as_i32x4(), imm8)) | |
724 | } | |
725 | ||
726 | /// Shift packed 32-bit integers in `a` right by `count` while shifting in | |
727 | /// zeros. | |
83c7162d XL |
728 | /// |
729 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi32) | |
0531ce1d XL |
730 | #[inline] |
731 | #[target_feature(enable = "sse2")] | |
732 | #[cfg_attr(test, assert_instr(psrld))] | |
83c7162d | 733 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
734 | pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { |
735 | mem::transmute(psrld(a.as_i32x4(), count.as_i32x4())) | |
736 | } | |
737 | ||
738 | /// Shift packed 64-bit integers in `a` right by `imm8` while shifting in | |
739 | /// zeros. | |
83c7162d XL |
740 | /// |
741 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi64) | |
0531ce1d XL |
742 | #[inline] |
743 | #[target_feature(enable = "sse2")] | |
744 | #[cfg_attr(test, assert_instr(psrlq, imm8 = 1))] | |
745 | #[rustc_args_required_const(1)] | |
83c7162d | 746 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
747 | pub unsafe fn _mm_srli_epi64(a: __m128i, imm8: i32) -> __m128i { |
748 | mem::transmute(psrliq(a.as_i64x2(), imm8)) | |
749 | } | |
750 | ||
751 | /// Shift packed 64-bit integers in `a` right by `count` while shifting in | |
752 | /// zeros. | |
83c7162d XL |
753 | /// |
754 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi64) | |
0531ce1d XL |
755 | #[inline] |
756 | #[target_feature(enable = "sse2")] | |
757 | #[cfg_attr(test, assert_instr(psrlq))] | |
83c7162d | 758 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
759 | pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i { |
760 | mem::transmute(psrlq(a.as_i64x2(), count.as_i64x2())) | |
761 | } | |
762 | ||
763 | /// Compute the bitwise AND of 128 bits (representing integer data) in `a` and | |
764 | /// `b`. | |
83c7162d XL |
765 | /// |
766 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_si128) | |
0531ce1d XL |
767 | #[inline] |
768 | #[target_feature(enable = "sse2")] | |
769 | #[cfg_attr(test, assert_instr(andps))] | |
83c7162d | 770 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
771 | pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i { |
772 | simd_and(a, b) | |
773 | } | |
774 | ||
775 | /// Compute the bitwise NOT of 128 bits (representing integer data) in `a` and | |
776 | /// then AND with `b`. | |
83c7162d XL |
777 | /// |
778 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_si128) | |
0531ce1d XL |
779 | #[inline] |
780 | #[target_feature(enable = "sse2")] | |
781 | #[cfg_attr(test, assert_instr(andnps))] | |
83c7162d | 782 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
783 | pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i { |
784 | simd_and(simd_xor(_mm_set1_epi8(-1), a), b) | |
785 | } | |
786 | ||
787 | /// Compute the bitwise OR of 128 bits (representing integer data) in `a` and | |
788 | /// `b`. | |
83c7162d XL |
789 | /// |
790 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_si128) | |
0531ce1d XL |
791 | #[inline] |
792 | #[target_feature(enable = "sse2")] | |
793 | #[cfg_attr(test, assert_instr(orps))] | |
83c7162d | 794 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
795 | pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i { |
796 | simd_or(a, b) | |
797 | } | |
798 | ||
799 | /// Compute the bitwise XOR of 128 bits (representing integer data) in `a` and | |
800 | /// `b`. | |
83c7162d XL |
801 | /// |
802 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_si128) | |
0531ce1d XL |
803 | #[inline] |
804 | #[target_feature(enable = "sse2")] | |
805 | #[cfg_attr(test, assert_instr(xorps))] | |
83c7162d | 806 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
807 | pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i { |
808 | simd_xor(a, b) | |
809 | } | |
810 | ||
811 | /// Compare packed 8-bit integers in `a` and `b` for equality. | |
83c7162d XL |
812 | /// |
813 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi8) | |
0531ce1d XL |
814 | #[inline] |
815 | #[target_feature(enable = "sse2")] | |
816 | #[cfg_attr(test, assert_instr(pcmpeqb))] | |
83c7162d | 817 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
818 | pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i { |
819 | mem::transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) | |
820 | } | |
821 | ||
822 | /// Compare packed 16-bit integers in `a` and `b` for equality. | |
83c7162d XL |
823 | /// |
824 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi16) | |
0531ce1d XL |
825 | #[inline] |
826 | #[target_feature(enable = "sse2")] | |
827 | #[cfg_attr(test, assert_instr(pcmpeqw))] | |
83c7162d | 828 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
829 | pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i { |
830 | mem::transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) | |
831 | } | |
832 | ||
833 | /// Compare packed 32-bit integers in `a` and `b` for equality. | |
83c7162d XL |
834 | /// |
835 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi32) | |
0531ce1d XL |
836 | #[inline] |
837 | #[target_feature(enable = "sse2")] | |
838 | #[cfg_attr(test, assert_instr(pcmpeqd))] | |
83c7162d | 839 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
840 | pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i { |
841 | mem::transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) | |
842 | } | |
843 | ||
844 | /// Compare packed 8-bit integers in `a` and `b` for greater-than. | |
83c7162d XL |
845 | /// |
846 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi8) | |
0531ce1d XL |
847 | #[inline] |
848 | #[target_feature(enable = "sse2")] | |
849 | #[cfg_attr(test, assert_instr(pcmpgtb))] | |
83c7162d | 850 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
851 | pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i { |
852 | mem::transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) | |
853 | } | |
854 | ||
855 | /// Compare packed 16-bit integers in `a` and `b` for greater-than. | |
83c7162d XL |
856 | /// |
857 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi16) | |
0531ce1d XL |
858 | #[inline] |
859 | #[target_feature(enable = "sse2")] | |
860 | #[cfg_attr(test, assert_instr(pcmpgtw))] | |
83c7162d | 861 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
862 | pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i { |
863 | mem::transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) | |
864 | } | |
865 | ||
866 | /// Compare packed 32-bit integers in `a` and `b` for greater-than. | |
83c7162d XL |
867 | /// |
868 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi32) | |
0531ce1d XL |
869 | #[inline] |
870 | #[target_feature(enable = "sse2")] | |
871 | #[cfg_attr(test, assert_instr(pcmpgtd))] | |
83c7162d | 872 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
873 | pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i { |
874 | mem::transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) | |
875 | } | |
876 | ||
877 | /// Compare packed 8-bit integers in `a` and `b` for less-than. | |
83c7162d XL |
878 | /// |
879 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi8) | |
0531ce1d XL |
880 | #[inline] |
881 | #[target_feature(enable = "sse2")] | |
882 | #[cfg_attr(test, assert_instr(pcmpgtb))] | |
83c7162d | 883 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
884 | pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i { |
885 | mem::transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) | |
886 | } | |
887 | ||
888 | /// Compare packed 16-bit integers in `a` and `b` for less-than. | |
83c7162d XL |
889 | /// |
890 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi16) | |
0531ce1d XL |
891 | #[inline] |
892 | #[target_feature(enable = "sse2")] | |
893 | #[cfg_attr(test, assert_instr(pcmpgtw))] | |
83c7162d | 894 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
895 | pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i { |
896 | mem::transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) | |
897 | } | |
898 | ||
899 | /// Compare packed 32-bit integers in `a` and `b` for less-than. | |
83c7162d XL |
900 | /// |
901 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi32) | |
0531ce1d XL |
902 | #[inline] |
903 | #[target_feature(enable = "sse2")] | |
904 | #[cfg_attr(test, assert_instr(pcmpgtd))] | |
83c7162d | 905 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
906 | pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i { |
907 | mem::transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) | |
908 | } | |
909 | ||
910 | /// Convert the lower two packed 32-bit integers in `a` to packed | |
911 | /// double-precision (64-bit) floating-point elements. | |
83c7162d XL |
912 | /// |
913 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_pd) | |
0531ce1d XL |
914 | #[inline] |
915 | #[target_feature(enable = "sse2")] | |
916 | #[cfg_attr(test, assert_instr(cvtdq2pd))] | |
83c7162d | 917 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
918 | pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d { |
919 | let a = a.as_i32x4(); | |
920 | simd_cast::<i32x2, __m128d>(simd_shuffle2(a, a, [0, 1])) | |
921 | } | |
922 | ||
923 | /// Return `a` with its lower element replaced by `b` after converting it to | |
924 | /// an `f64`. | |
83c7162d XL |
925 | /// |
926 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_sd) | |
0531ce1d XL |
927 | #[inline] |
928 | #[target_feature(enable = "sse2")] | |
929 | #[cfg_attr(test, assert_instr(cvtsi2sd))] | |
83c7162d | 930 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
931 | pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d { |
932 | simd_insert(a, 0, b as f64) | |
933 | } | |
934 | ||
935 | /// Convert packed 32-bit integers in `a` to packed single-precision (32-bit) | |
936 | /// floating-point elements. | |
83c7162d XL |
937 | /// |
938 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ps) | |
0531ce1d XL |
939 | #[inline] |
940 | #[target_feature(enable = "sse2")] | |
941 | #[cfg_attr(test, assert_instr(cvtdq2ps))] | |
83c7162d | 942 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
943 | pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 { |
944 | cvtdq2ps(a.as_i32x4()) | |
945 | } | |
946 | ||
947 | /// Convert packed single-precision (32-bit) floating-point elements in `a` | |
948 | /// to packed 32-bit integers. | |
83c7162d XL |
949 | /// |
950 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_epi32) | |
0531ce1d XL |
951 | #[inline] |
952 | #[target_feature(enable = "sse2")] | |
953 | #[cfg_attr(test, assert_instr(cvtps2dq))] | |
83c7162d | 954 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
955 | pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i { |
956 | mem::transmute(cvtps2dq(a)) | |
957 | } | |
958 | ||
959 | /// Return a vector whose lowest element is `a` and all higher elements are | |
960 | /// `0`. | |
83c7162d XL |
961 | /// |
962 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_si128) | |
0531ce1d XL |
963 | #[inline] |
964 | #[target_feature(enable = "sse2")] | |
965 | #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movd))] | |
83c7162d | 966 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
967 | pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i { |
968 | mem::transmute(i32x4::new(a, 0, 0, 0)) | |
969 | } | |
970 | ||
971 | /// Return the lowest element of `a`. | |
83c7162d XL |
972 | /// |
973 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si32) | |
0531ce1d XL |
974 | #[inline] |
975 | #[target_feature(enable = "sse2")] | |
0731742a | 976 | #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movd))] |
83c7162d | 977 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
978 | pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 { |
979 | simd_extract(a.as_i32x4(), 0) | |
980 | } | |
981 | ||
982 | /// Set packed 64-bit integers with the supplied values, from highest to | |
983 | /// lowest. | |
83c7162d XL |
984 | /// |
985 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi64x) | |
0531ce1d XL |
986 | #[inline] |
987 | #[target_feature(enable = "sse2")] | |
988 | // no particular instruction to test | |
83c7162d | 989 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
990 | pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i { |
991 | mem::transmute(i64x2::new(e0, e1)) | |
992 | } | |
993 | ||
994 | /// Set packed 32-bit integers with the supplied values. | |
83c7162d XL |
995 | /// |
996 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi32) | |
0531ce1d XL |
997 | #[inline] |
998 | #[target_feature(enable = "sse2")] | |
999 | // no particular instruction to test | |
83c7162d | 1000 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1001 | pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { |
1002 | mem::transmute(i32x4::new(e0, e1, e2, e3)) | |
1003 | } | |
1004 | ||
1005 | /// Set packed 16-bit integers with the supplied values. | |
83c7162d XL |
1006 | /// |
1007 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi16) | |
0531ce1d XL |
1008 | #[inline] |
1009 | #[target_feature(enable = "sse2")] | |
1010 | // no particular instruction to test | |
83c7162d | 1011 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1012 | pub unsafe fn _mm_set_epi16( |
0731742a XL |
1013 | e7: i16, |
1014 | e6: i16, | |
1015 | e5: i16, | |
1016 | e4: i16, | |
1017 | e3: i16, | |
1018 | e2: i16, | |
1019 | e1: i16, | |
1020 | e0: i16, | |
0531ce1d XL |
1021 | ) -> __m128i { |
1022 | mem::transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) | |
1023 | } | |
1024 | ||
1025 | /// Set packed 8-bit integers with the supplied values. | |
83c7162d XL |
1026 | /// |
1027 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi8) | |
0531ce1d XL |
1028 | #[inline] |
1029 | #[target_feature(enable = "sse2")] | |
1030 | // no particular instruction to test | |
83c7162d | 1031 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1032 | pub unsafe fn _mm_set_epi8( |
0731742a XL |
1033 | e15: i8, |
1034 | e14: i8, | |
1035 | e13: i8, | |
1036 | e12: i8, | |
1037 | e11: i8, | |
1038 | e10: i8, | |
1039 | e9: i8, | |
1040 | e8: i8, | |
1041 | e7: i8, | |
1042 | e6: i8, | |
1043 | e5: i8, | |
1044 | e4: i8, | |
1045 | e3: i8, | |
1046 | e2: i8, | |
1047 | e1: i8, | |
1048 | e0: i8, | |
0531ce1d | 1049 | ) -> __m128i { |
0731742a | 1050 | #[rustfmt::skip] |
0531ce1d XL |
1051 | mem::transmute(i8x16::new( |
1052 | e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, | |
1053 | )) | |
1054 | } | |
1055 | ||
1056 | /// Broadcast 64-bit integer `a` to all elements. | |
83c7162d XL |
1057 | /// |
1058 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi64x) | |
0531ce1d XL |
1059 | #[inline] |
1060 | #[target_feature(enable = "sse2")] | |
1061 | // no particular instruction to test | |
83c7162d | 1062 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1063 | pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i { |
1064 | _mm_set_epi64x(a, a) | |
1065 | } | |
1066 | ||
1067 | /// Broadcast 32-bit integer `a` to all elements. | |
83c7162d XL |
1068 | /// |
1069 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi32) | |
0531ce1d XL |
1070 | #[inline] |
1071 | #[target_feature(enable = "sse2")] | |
1072 | // no particular instruction to test | |
83c7162d | 1073 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1074 | pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i { |
1075 | _mm_set_epi32(a, a, a, a) | |
1076 | } | |
1077 | ||
1078 | /// Broadcast 16-bit integer `a` to all elements. | |
83c7162d XL |
1079 | /// |
1080 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi16) | |
0531ce1d XL |
1081 | #[inline] |
1082 | #[target_feature(enable = "sse2")] | |
1083 | // no particular instruction to test | |
83c7162d | 1084 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1085 | pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i { |
1086 | _mm_set_epi16(a, a, a, a, a, a, a, a) | |
1087 | } | |
1088 | ||
1089 | /// Broadcast 8-bit integer `a` to all elements. | |
83c7162d XL |
1090 | /// |
1091 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi8) | |
0531ce1d XL |
1092 | #[inline] |
1093 | #[target_feature(enable = "sse2")] | |
1094 | // no particular instruction to test | |
83c7162d | 1095 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1096 | pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i { |
1097 | _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) | |
1098 | } | |
1099 | ||
1100 | /// Set packed 32-bit integers with the supplied values in reverse order. | |
83c7162d XL |
1101 | /// |
1102 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi32) | |
0531ce1d XL |
1103 | #[inline] |
1104 | #[target_feature(enable = "sse2")] | |
1105 | // no particular instruction to test | |
83c7162d | 1106 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1107 | pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { |
1108 | _mm_set_epi32(e0, e1, e2, e3) | |
1109 | } | |
1110 | ||
1111 | /// Set packed 16-bit integers with the supplied values in reverse order. | |
83c7162d XL |
1112 | /// |
1113 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi16) | |
0531ce1d XL |
1114 | #[inline] |
1115 | #[target_feature(enable = "sse2")] | |
1116 | // no particular instruction to test | |
83c7162d | 1117 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1118 | pub unsafe fn _mm_setr_epi16( |
0731742a XL |
1119 | e7: i16, |
1120 | e6: i16, | |
1121 | e5: i16, | |
1122 | e4: i16, | |
1123 | e3: i16, | |
1124 | e2: i16, | |
1125 | e1: i16, | |
1126 | e0: i16, | |
0531ce1d XL |
1127 | ) -> __m128i { |
1128 | _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7) | |
1129 | } | |
1130 | ||
1131 | /// Set packed 8-bit integers with the supplied values in reverse order. | |
83c7162d XL |
1132 | /// |
1133 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi8) | |
0531ce1d XL |
1134 | #[inline] |
1135 | #[target_feature(enable = "sse2")] | |
1136 | // no particular instruction to test | |
83c7162d | 1137 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1138 | pub unsafe fn _mm_setr_epi8( |
0731742a XL |
1139 | e15: i8, |
1140 | e14: i8, | |
1141 | e13: i8, | |
1142 | e12: i8, | |
1143 | e11: i8, | |
1144 | e10: i8, | |
1145 | e9: i8, | |
1146 | e8: i8, | |
1147 | e7: i8, | |
1148 | e6: i8, | |
1149 | e5: i8, | |
1150 | e4: i8, | |
1151 | e3: i8, | |
1152 | e2: i8, | |
1153 | e1: i8, | |
1154 | e0: i8, | |
0531ce1d | 1155 | ) -> __m128i { |
0731742a | 1156 | #[rustfmt::skip] |
0531ce1d XL |
1157 | _mm_set_epi8( |
1158 | e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, | |
1159 | ) | |
1160 | } | |
1161 | ||
1162 | /// Returns a vector with all elements set to zero. | |
83c7162d XL |
1163 | /// |
1164 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_si128) | |
0531ce1d XL |
1165 | #[inline] |
1166 | #[target_feature(enable = "sse2")] | |
1167 | #[cfg_attr(test, assert_instr(xorps))] | |
83c7162d | 1168 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1169 | pub unsafe fn _mm_setzero_si128() -> __m128i { |
1170 | _mm_set1_epi64x(0) | |
1171 | } | |
1172 | ||
1173 | /// Load 64-bit integer from memory into first element of returned vector. | |
83c7162d XL |
1174 | /// |
1175 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_epi64) | |
0531ce1d XL |
1176 | #[inline] |
1177 | #[target_feature(enable = "sse2")] | |
1178 | // FIXME movsd on windows | |
8faf50e0 XL |
1179 | #[cfg_attr( |
1180 | all( | |
1181 | test, | |
1182 | not(windows), | |
1183 | not(all(target_os = "linux", target_arch = "x86_64")), | |
1184 | target_arch = "x86_64" | |
1185 | ), | |
1186 | assert_instr(movq) | |
1187 | )] | |
83c7162d | 1188 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1189 | pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i { |
a1dfa0c6 | 1190 | _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64)) |
0531ce1d XL |
1191 | } |
1192 | ||
1193 | /// Load 128-bits of integer data from memory into a new vector. | |
1194 | /// | |
1195 | /// `mem_addr` must be aligned on a 16-byte boundary. | |
83c7162d XL |
1196 | /// |
1197 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_si128) | |
0531ce1d XL |
1198 | #[inline] |
1199 | #[target_feature(enable = "sse2")] | |
1200 | #[cfg_attr(test, assert_instr(movaps))] | |
83c7162d | 1201 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1202 | pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i { |
1203 | *mem_addr | |
1204 | } | |
1205 | ||
1206 | /// Load 128-bits of integer data from memory into a new vector. | |
1207 | /// | |
1208 | /// `mem_addr` does not need to be aligned on any particular boundary. | |
83c7162d XL |
1209 | /// |
1210 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si128) | |
0531ce1d XL |
1211 | #[inline] |
1212 | #[target_feature(enable = "sse2")] | |
1213 | #[cfg_attr(test, assert_instr(movups))] | |
83c7162d | 1214 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1215 | pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i { |
1216 | let mut dst: __m128i = _mm_undefined_si128(); | |
1217 | ptr::copy_nonoverlapping( | |
1218 | mem_addr as *const u8, | |
1219 | &mut dst as *mut __m128i as *mut u8, | |
1220 | mem::size_of::<__m128i>(), | |
1221 | ); | |
1222 | dst | |
1223 | } | |
1224 | ||
1225 | /// Conditionally store 8-bit integer elements from `a` into memory using | |
1226 | /// `mask`. | |
1227 | /// | |
1228 | /// Elements are not stored when the highest bit is not set in the | |
1229 | /// corresponding element. | |
1230 | /// | |
1231 | /// `mem_addr` should correspond to a 128-bit memory location and does not need | |
1232 | /// to be aligned on any particular boundary. | |
83c7162d XL |
1233 | /// |
1234 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskmoveu_si128) | |
0531ce1d XL |
1235 | #[inline] |
1236 | #[target_feature(enable = "sse2")] | |
1237 | #[cfg_attr(test, assert_instr(maskmovdqu))] | |
83c7162d | 1238 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0731742a | 1239 | pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) { |
0531ce1d XL |
1240 | maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr) |
1241 | } | |
1242 | ||
1243 | /// Store 128-bits of integer data from `a` into memory. | |
1244 | /// | |
1245 | /// `mem_addr` must be aligned on a 16-byte boundary. | |
83c7162d XL |
1246 | /// |
1247 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_si128) | |
0531ce1d XL |
1248 | #[inline] |
1249 | #[target_feature(enable = "sse2")] | |
1250 | #[cfg_attr(test, assert_instr(movaps))] | |
83c7162d | 1251 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1252 | pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) { |
1253 | *mem_addr = a; | |
1254 | } | |
1255 | ||
1256 | /// Store 128-bits of integer data from `a` into memory. | |
1257 | /// | |
1258 | /// `mem_addr` does not need to be aligned on any particular boundary. | |
83c7162d XL |
1259 | /// |
1260 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si128) | |
0531ce1d XL |
1261 | #[inline] |
1262 | #[target_feature(enable = "sse2")] | |
1263 | #[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected | |
83c7162d | 1264 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1265 | pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) { |
1266 | storeudq(mem_addr as *mut i8, a); | |
1267 | } | |
1268 | ||
1269 | /// Store the lower 64-bit integer `a` to a memory location. | |
1270 | /// | |
1271 | /// `mem_addr` does not need to be aligned on any particular boundary. | |
83c7162d XL |
1272 | /// |
1273 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_epi64) | |
0531ce1d XL |
1274 | #[inline] |
1275 | #[target_feature(enable = "sse2")] | |
1276 | // FIXME mov on windows, movlps on i686 | |
8faf50e0 XL |
1277 | #[cfg_attr( |
1278 | all( | |
1279 | test, | |
1280 | not(windows), | |
1281 | not(all(target_os = "linux", target_arch = "x86_64")), | |
1282 | target_arch = "x86_64" | |
1283 | ), | |
1284 | assert_instr(movq) | |
1285 | )] | |
83c7162d | 1286 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1287 | pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) { |
0731742a | 1288 | ptr::copy_nonoverlapping(&a as *const _ as *const u8, mem_addr as *mut u8, 8); |
0531ce1d XL |
1289 | } |
1290 | ||
1291 | /// Stores a 128-bit integer vector to a 128-bit aligned memory location. | |
1292 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be | |
1293 | /// used again soon). | |
83c7162d XL |
1294 | /// |
1295 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si128) | |
0531ce1d XL |
1296 | #[inline] |
1297 | #[target_feature(enable = "sse2")] | |
1298 | #[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq | |
83c7162d | 1299 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1300 | pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) { |
1301 | ::intrinsics::nontemporal_store(mem_addr, a); | |
1302 | } | |
1303 | ||
1304 | /// Stores a 32-bit integer value in the specified memory location. | |
1305 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be | |
1306 | /// used again soon). | |
83c7162d XL |
1307 | /// |
1308 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si32) | |
0531ce1d XL |
1309 | #[inline] |
1310 | #[target_feature(enable = "sse2")] | |
1311 | #[cfg_attr(test, assert_instr(movnti))] | |
83c7162d | 1312 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1313 | pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) { |
1314 | ::intrinsics::nontemporal_store(mem_addr, a); | |
1315 | } | |
1316 | ||
1317 | /// Return a vector where the low element is extracted from `a` and its upper | |
1318 | /// element is zero. | |
83c7162d XL |
1319 | /// |
1320 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_epi64) | |
0531ce1d XL |
1321 | #[inline] |
1322 | #[target_feature(enable = "sse2")] | |
1323 | // FIXME movd on windows, movd on i686 | |
0731742a | 1324 | #[cfg_attr(all(test, not(windows), target_arch = "x86_64"), assert_instr(movq))] |
83c7162d | 1325 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1326 | pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i { |
1327 | let zero = _mm_setzero_si128(); | |
1328 | let r: i64x2 = simd_shuffle2(a.as_i64x2(), zero.as_i64x2(), [0, 2]); | |
1329 | mem::transmute(r) | |
1330 | } | |
1331 | ||
1332 | /// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers | |
1333 | /// using signed saturation. | |
83c7162d XL |
1334 | /// |
1335 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi16) | |
0531ce1d XL |
1336 | #[inline] |
1337 | #[target_feature(enable = "sse2")] | |
1338 | #[cfg_attr(test, assert_instr(packsswb))] | |
83c7162d | 1339 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1340 | pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { |
1341 | mem::transmute(packsswb(a.as_i16x8(), b.as_i16x8())) | |
1342 | } | |
1343 | ||
1344 | /// Convert packed 32-bit integers from `a` and `b` to packed 16-bit integers | |
1345 | /// using signed saturation. | |
83c7162d XL |
1346 | /// |
1347 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi32) | |
0531ce1d XL |
1348 | #[inline] |
1349 | #[target_feature(enable = "sse2")] | |
1350 | #[cfg_attr(test, assert_instr(packssdw))] | |
83c7162d | 1351 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1352 | pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { |
1353 | mem::transmute(packssdw(a.as_i32x4(), b.as_i32x4())) | |
1354 | } | |
1355 | ||
1356 | /// Convert packed 16-bit integers from `a` and `b` to packed 8-bit integers | |
1357 | /// using unsigned saturation. | |
83c7162d XL |
1358 | /// |
1359 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi16) | |
0531ce1d XL |
1360 | #[inline] |
1361 | #[target_feature(enable = "sse2")] | |
1362 | #[cfg_attr(test, assert_instr(packuswb))] | |
83c7162d | 1363 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1364 | pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { |
1365 | mem::transmute(packuswb(a.as_i16x8(), b.as_i16x8())) | |
1366 | } | |
1367 | ||
1368 | /// Return the `imm8` element of `a`. | |
83c7162d XL |
1369 | /// |
1370 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi16) | |
0531ce1d XL |
1371 | #[inline] |
1372 | #[target_feature(enable = "sse2")] | |
1373 | #[cfg_attr(test, assert_instr(pextrw, imm8 = 9))] | |
1374 | #[rustc_args_required_const(1)] | |
83c7162d | 1375 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1376 | pub unsafe fn _mm_extract_epi16(a: __m128i, imm8: i32) -> i32 { |
1377 | simd_extract::<_, i16>(a.as_i16x8(), (imm8 & 7) as u32) as i32 | |
1378 | } | |
1379 | ||
1380 | /// Return a new vector where the `imm8` element of `a` is replaced with `i`. | |
83c7162d XL |
1381 | /// |
1382 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi16) | |
0531ce1d XL |
1383 | #[inline] |
1384 | #[target_feature(enable = "sse2")] | |
1385 | #[cfg_attr(test, assert_instr(pinsrw, imm8 = 9))] | |
1386 | #[rustc_args_required_const(2)] | |
83c7162d | 1387 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1388 | pub unsafe fn _mm_insert_epi16(a: __m128i, i: i32, imm8: i32) -> __m128i { |
8faf50e0 | 1389 | mem::transmute(simd_insert(a.as_i16x8(), (imm8 & 7) as u32, i as i16)) |
0531ce1d XL |
1390 | } |
1391 | ||
1392 | /// Return a mask of the most significant bit of each element in `a`. | |
83c7162d XL |
1393 | /// |
1394 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_epi8) | |
0531ce1d XL |
1395 | #[inline] |
1396 | #[target_feature(enable = "sse2")] | |
1397 | #[cfg_attr(test, assert_instr(pmovmskb))] | |
83c7162d | 1398 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1399 | pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 { |
1400 | pmovmskb(a.as_i8x16()) | |
1401 | } | |
1402 | ||
1403 | /// Shuffle 32-bit integers in `a` using the control in `imm8`. | |
83c7162d XL |
1404 | /// |
1405 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi32) | |
0531ce1d XL |
1406 | #[inline] |
1407 | #[target_feature(enable = "sse2")] | |
1408 | #[cfg_attr(test, assert_instr(pshufd, imm8 = 9))] | |
1409 | #[rustc_args_required_const(1)] | |
83c7162d | 1410 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1411 | pub unsafe fn _mm_shuffle_epi32(a: __m128i, imm8: i32) -> __m128i { |
1412 | // simd_shuffleX requires that its selector parameter be made up of | |
1413 | // constant values, but we can't enforce that here. In spirit, we need | |
1414 | // to write a `match` on all possible values of a byte, and for each value, | |
1415 | // hard-code the correct `simd_shuffleX` call using only constants. We | |
1416 | // then hope for LLVM to do the rest. | |
1417 | // | |
1418 | // Of course, that's... awful. So we try to use macros to do it for us. | |
1419 | let imm8 = (imm8 & 0xFF) as u8; | |
1420 | let a = a.as_i32x4(); | |
1421 | ||
1422 | macro_rules! shuffle_done { | |
1423 | ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { | |
1424 | simd_shuffle4(a, a, [$x01, $x23, $x45, $x67]) | |
83c7162d | 1425 | }; |
0531ce1d XL |
1426 | } |
1427 | macro_rules! shuffle_x67 { | |
1428 | ($x01:expr, $x23:expr, $x45:expr) => { | |
1429 | match (imm8 >> 6) & 0b11 { | |
1430 | 0b00 => shuffle_done!($x01, $x23, $x45, 0), | |
1431 | 0b01 => shuffle_done!($x01, $x23, $x45, 1), | |
1432 | 0b10 => shuffle_done!($x01, $x23, $x45, 2), | |
1433 | _ => shuffle_done!($x01, $x23, $x45, 3), | |
1434 | } | |
83c7162d | 1435 | }; |
0531ce1d XL |
1436 | } |
1437 | macro_rules! shuffle_x45 { | |
1438 | ($x01:expr, $x23:expr) => { | |
1439 | match (imm8 >> 4) & 0b11 { | |
1440 | 0b00 => shuffle_x67!($x01, $x23, 0), | |
1441 | 0b01 => shuffle_x67!($x01, $x23, 1), | |
1442 | 0b10 => shuffle_x67!($x01, $x23, 2), | |
1443 | _ => shuffle_x67!($x01, $x23, 3), | |
1444 | } | |
83c7162d | 1445 | }; |
0531ce1d XL |
1446 | } |
1447 | macro_rules! shuffle_x23 { | |
1448 | ($x01:expr) => { | |
1449 | match (imm8 >> 2) & 0b11 { | |
1450 | 0b00 => shuffle_x45!($x01, 0), | |
1451 | 0b01 => shuffle_x45!($x01, 1), | |
1452 | 0b10 => shuffle_x45!($x01, 2), | |
1453 | _ => shuffle_x45!($x01, 3), | |
1454 | } | |
83c7162d | 1455 | }; |
0531ce1d XL |
1456 | } |
1457 | let x: i32x4 = match imm8 & 0b11 { | |
1458 | 0b00 => shuffle_x23!(0), | |
1459 | 0b01 => shuffle_x23!(1), | |
1460 | 0b10 => shuffle_x23!(2), | |
1461 | _ => shuffle_x23!(3), | |
1462 | }; | |
1463 | mem::transmute(x) | |
1464 | } | |
1465 | ||
1466 | /// Shuffle 16-bit integers in the high 64 bits of `a` using the control in | |
1467 | /// `imm8`. | |
1468 | /// | |
1469 | /// Put the results in the high 64 bits of the returned vector, with the low 64 | |
1470 | /// bits being copied from from `a`. | |
83c7162d XL |
1471 | /// |
1472 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflehi_epi16) | |
0531ce1d XL |
1473 | #[inline] |
1474 | #[target_feature(enable = "sse2")] | |
1475 | #[cfg_attr(test, assert_instr(pshufhw, imm8 = 9))] | |
1476 | #[rustc_args_required_const(1)] | |
83c7162d | 1477 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1478 | pub unsafe fn _mm_shufflehi_epi16(a: __m128i, imm8: i32) -> __m128i { |
1479 | // See _mm_shuffle_epi32. | |
1480 | let imm8 = (imm8 & 0xFF) as u8; | |
1481 | let a = a.as_i16x8(); | |
1482 | macro_rules! shuffle_done { | |
1483 | ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { | |
0731742a | 1484 | simd_shuffle8(a, a, [0, 1, 2, 3, $x01 + 4, $x23 + 4, $x45 + 4, $x67 + 4]) |
83c7162d | 1485 | }; |
0531ce1d XL |
1486 | } |
1487 | macro_rules! shuffle_x67 { | |
1488 | ($x01:expr, $x23:expr, $x45:expr) => { | |
1489 | match (imm8 >> 6) & 0b11 { | |
1490 | 0b00 => shuffle_done!($x01, $x23, $x45, 0), | |
1491 | 0b01 => shuffle_done!($x01, $x23, $x45, 1), | |
1492 | 0b10 => shuffle_done!($x01, $x23, $x45, 2), | |
1493 | _ => shuffle_done!($x01, $x23, $x45, 3), | |
1494 | } | |
83c7162d | 1495 | }; |
0531ce1d XL |
1496 | } |
1497 | macro_rules! shuffle_x45 { | |
1498 | ($x01:expr, $x23:expr) => { | |
1499 | match (imm8 >> 4) & 0b11 { | |
1500 | 0b00 => shuffle_x67!($x01, $x23, 0), | |
1501 | 0b01 => shuffle_x67!($x01, $x23, 1), | |
1502 | 0b10 => shuffle_x67!($x01, $x23, 2), | |
1503 | _ => shuffle_x67!($x01, $x23, 3), | |
1504 | } | |
83c7162d | 1505 | }; |
0531ce1d XL |
1506 | } |
1507 | macro_rules! shuffle_x23 { | |
1508 | ($x01:expr) => { | |
1509 | match (imm8 >> 2) & 0b11 { | |
1510 | 0b00 => shuffle_x45!($x01, 0), | |
1511 | 0b01 => shuffle_x45!($x01, 1), | |
1512 | 0b10 => shuffle_x45!($x01, 2), | |
1513 | _ => shuffle_x45!($x01, 3), | |
1514 | } | |
83c7162d | 1515 | }; |
0531ce1d XL |
1516 | } |
1517 | let x: i16x8 = match imm8 & 0b11 { | |
1518 | 0b00 => shuffle_x23!(0), | |
1519 | 0b01 => shuffle_x23!(1), | |
1520 | 0b10 => shuffle_x23!(2), | |
1521 | _ => shuffle_x23!(3), | |
1522 | }; | |
1523 | mem::transmute(x) | |
1524 | } | |
1525 | ||
1526 | /// Shuffle 16-bit integers in the low 64 bits of `a` using the control in | |
1527 | /// `imm8`. | |
1528 | /// | |
1529 | /// Put the results in the low 64 bits of the returned vector, with the high 64 | |
1530 | /// bits being copied from from `a`. | |
83c7162d XL |
1531 | /// |
1532 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflelo_epi16) | |
0531ce1d XL |
1533 | #[inline] |
1534 | #[target_feature(enable = "sse2")] | |
1535 | #[cfg_attr(test, assert_instr(pshuflw, imm8 = 9))] | |
1536 | #[rustc_args_required_const(1)] | |
83c7162d | 1537 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1538 | pub unsafe fn _mm_shufflelo_epi16(a: __m128i, imm8: i32) -> __m128i { |
1539 | // See _mm_shuffle_epi32. | |
1540 | let imm8 = (imm8 & 0xFF) as u8; | |
1541 | let a = a.as_i16x8(); | |
1542 | ||
1543 | macro_rules! shuffle_done { | |
1544 | ($x01:expr, $x23:expr, $x45:expr, $x67:expr) => { | |
1545 | simd_shuffle8(a, a, [$x01, $x23, $x45, $x67, 4, 5, 6, 7]) | |
83c7162d | 1546 | }; |
0531ce1d XL |
1547 | } |
1548 | macro_rules! shuffle_x67 { | |
1549 | ($x01:expr, $x23:expr, $x45:expr) => { | |
1550 | match (imm8 >> 6) & 0b11 { | |
1551 | 0b00 => shuffle_done!($x01, $x23, $x45, 0), | |
1552 | 0b01 => shuffle_done!($x01, $x23, $x45, 1), | |
1553 | 0b10 => shuffle_done!($x01, $x23, $x45, 2), | |
1554 | _ => shuffle_done!($x01, $x23, $x45, 3), | |
1555 | } | |
83c7162d | 1556 | }; |
0531ce1d XL |
1557 | } |
1558 | macro_rules! shuffle_x45 { | |
1559 | ($x01:expr, $x23:expr) => { | |
1560 | match (imm8 >> 4) & 0b11 { | |
1561 | 0b00 => shuffle_x67!($x01, $x23, 0), | |
1562 | 0b01 => shuffle_x67!($x01, $x23, 1), | |
1563 | 0b10 => shuffle_x67!($x01, $x23, 2), | |
1564 | _ => shuffle_x67!($x01, $x23, 3), | |
1565 | } | |
83c7162d | 1566 | }; |
0531ce1d XL |
1567 | } |
1568 | macro_rules! shuffle_x23 { | |
1569 | ($x01:expr) => { | |
1570 | match (imm8 >> 2) & 0b11 { | |
1571 | 0b00 => shuffle_x45!($x01, 0), | |
1572 | 0b01 => shuffle_x45!($x01, 1), | |
1573 | 0b10 => shuffle_x45!($x01, 2), | |
1574 | _ => shuffle_x45!($x01, 3), | |
1575 | } | |
83c7162d | 1576 | }; |
0531ce1d XL |
1577 | } |
1578 | let x: i16x8 = match imm8 & 0b11 { | |
1579 | 0b00 => shuffle_x23!(0), | |
1580 | 0b01 => shuffle_x23!(1), | |
1581 | 0b10 => shuffle_x23!(2), | |
1582 | _ => shuffle_x23!(3), | |
1583 | }; | |
1584 | mem::transmute(x) | |
1585 | } | |
1586 | ||
1587 | /// Unpack and interleave 8-bit integers from the high half of `a` and `b`. | |
83c7162d XL |
1588 | /// |
1589 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi8) | |
0531ce1d XL |
1590 | #[inline] |
1591 | #[target_feature(enable = "sse2")] | |
1592 | #[cfg_attr(test, assert_instr(punpckhbw))] | |
83c7162d | 1593 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1594 | pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i { |
1595 | mem::transmute::<i8x16, _>(simd_shuffle16( | |
1596 | a.as_i8x16(), | |
1597 | b.as_i8x16(), | |
8faf50e0 | 1598 | [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31], |
0531ce1d XL |
1599 | )) |
1600 | } | |
1601 | ||
1602 | /// Unpack and interleave 16-bit integers from the high half of `a` and `b`. | |
83c7162d XL |
1603 | /// |
1604 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi16) | |
0531ce1d XL |
1605 | #[inline] |
1606 | #[target_feature(enable = "sse2")] | |
1607 | #[cfg_attr(test, assert_instr(punpckhwd))] | |
83c7162d | 1608 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1609 | pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i { |
0731742a | 1610 | let x = simd_shuffle8(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]); |
0531ce1d XL |
1611 | mem::transmute::<i16x8, _>(x) |
1612 | } | |
1613 | ||
1614 | /// Unpack and interleave 32-bit integers from the high half of `a` and `b`. | |
83c7162d XL |
1615 | /// |
1616 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi32) | |
0531ce1d XL |
1617 | #[inline] |
1618 | #[target_feature(enable = "sse2")] | |
1619 | #[cfg_attr(test, assert_instr(unpckhps))] | |
83c7162d | 1620 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1621 | pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i { |
0731742a | 1622 | mem::transmute::<i32x4, _>(simd_shuffle4(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) |
0531ce1d XL |
1623 | } |
1624 | ||
1625 | /// Unpack and interleave 64-bit integers from the high half of `a` and `b`. | |
83c7162d XL |
1626 | /// |
1627 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi64) | |
0531ce1d XL |
1628 | #[inline] |
1629 | #[target_feature(enable = "sse2")] | |
1630 | #[cfg_attr(test, assert_instr(unpckhpd))] | |
83c7162d | 1631 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1632 | pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i { |
0731742a | 1633 | mem::transmute::<i64x2, _>(simd_shuffle2(a.as_i64x2(), b.as_i64x2(), [1, 3])) |
0531ce1d XL |
1634 | } |
1635 | ||
1636 | /// Unpack and interleave 8-bit integers from the low half of `a` and `b`. | |
83c7162d XL |
1637 | /// |
1638 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi8) | |
0531ce1d XL |
1639 | #[inline] |
1640 | #[target_feature(enable = "sse2")] | |
1641 | #[cfg_attr(test, assert_instr(punpcklbw))] | |
83c7162d | 1642 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1643 | pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i { |
1644 | mem::transmute::<i8x16, _>(simd_shuffle16( | |
1645 | a.as_i8x16(), | |
1646 | b.as_i8x16(), | |
8faf50e0 | 1647 | [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23], |
0531ce1d XL |
1648 | )) |
1649 | } | |
1650 | ||
1651 | /// Unpack and interleave 16-bit integers from the low half of `a` and `b`. | |
83c7162d XL |
1652 | /// |
1653 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi16) | |
0531ce1d XL |
1654 | #[inline] |
1655 | #[target_feature(enable = "sse2")] | |
1656 | #[cfg_attr(test, assert_instr(punpcklwd))] | |
83c7162d | 1657 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1658 | pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i { |
0731742a | 1659 | let x = simd_shuffle8(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]); |
0531ce1d XL |
1660 | mem::transmute::<i16x8, _>(x) |
1661 | } | |
1662 | ||
1663 | /// Unpack and interleave 32-bit integers from the low half of `a` and `b`. | |
83c7162d XL |
1664 | /// |
1665 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi32) | |
0531ce1d XL |
1666 | #[inline] |
1667 | #[target_feature(enable = "sse2")] | |
1668 | #[cfg_attr(test, assert_instr(unpcklps))] | |
83c7162d | 1669 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1670 | pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i { |
0731742a | 1671 | mem::transmute::<i32x4, _>(simd_shuffle4(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) |
0531ce1d XL |
1672 | } |
1673 | ||
1674 | /// Unpack and interleave 64-bit integers from the low half of `a` and `b`. | |
83c7162d XL |
1675 | /// |
1676 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi64) | |
0531ce1d XL |
1677 | #[inline] |
1678 | #[target_feature(enable = "sse2")] | |
0731742a | 1679 | #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))] |
83c7162d | 1680 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1681 | pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i { |
0731742a | 1682 | mem::transmute::<i64x2, _>(simd_shuffle2(a.as_i64x2(), b.as_i64x2(), [0, 2])) |
0531ce1d XL |
1683 | } |
1684 | ||
1685 | /// Return a new vector with the low element of `a` replaced by the sum of the | |
1686 | /// low elements of `a` and `b`. | |
83c7162d XL |
1687 | /// |
1688 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sd) | |
0531ce1d XL |
1689 | #[inline] |
1690 | #[target_feature(enable = "sse2")] | |
1691 | #[cfg_attr(test, assert_instr(addsd))] | |
83c7162d | 1692 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1693 | pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d { |
1694 | simd_insert(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) | |
1695 | } | |
1696 | ||
1697 | /// Add packed double-precision (64-bit) floating-point elements in `a` and | |
1698 | /// `b`. | |
83c7162d XL |
1699 | /// |
1700 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_pd) | |
0531ce1d XL |
1701 | #[inline] |
1702 | #[target_feature(enable = "sse2")] | |
1703 | #[cfg_attr(test, assert_instr(addpd))] | |
83c7162d | 1704 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1705 | pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d { |
1706 | simd_add(a, b) | |
1707 | } | |
1708 | ||
1709 | /// Return a new vector with the low element of `a` replaced by the result of | |
1710 | /// diving the lower element of `a` by the lower element of `b`. | |
83c7162d XL |
1711 | /// |
1712 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sd) | |
0531ce1d XL |
1713 | #[inline] |
1714 | #[target_feature(enable = "sse2")] | |
1715 | #[cfg_attr(test, assert_instr(divsd))] | |
83c7162d | 1716 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1717 | pub unsafe fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d { |
1718 | simd_insert(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) | |
1719 | } | |
1720 | ||
1721 | /// Divide packed double-precision (64-bit) floating-point elements in `a` by | |
1722 | /// packed elements in `b`. | |
83c7162d XL |
1723 | /// |
1724 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_pd) | |
0531ce1d XL |
1725 | #[inline] |
1726 | #[target_feature(enable = "sse2")] | |
1727 | #[cfg_attr(test, assert_instr(divpd))] | |
83c7162d | 1728 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1729 | pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d { |
1730 | simd_div(a, b) | |
1731 | } | |
1732 | ||
1733 | /// Return a new vector with the low element of `a` replaced by the maximum | |
1734 | /// of the lower elements of `a` and `b`. | |
83c7162d XL |
1735 | /// |
1736 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sd) | |
0531ce1d XL |
1737 | #[inline] |
1738 | #[target_feature(enable = "sse2")] | |
1739 | #[cfg_attr(test, assert_instr(maxsd))] | |
83c7162d | 1740 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1741 | pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d { |
1742 | maxsd(a, b) | |
1743 | } | |
1744 | ||
1745 | /// Return a new vector with the maximum values from corresponding elements in | |
1746 | /// `a` and `b`. | |
83c7162d XL |
1747 | /// |
1748 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pd) | |
0531ce1d XL |
1749 | #[inline] |
1750 | #[target_feature(enable = "sse2")] | |
1751 | #[cfg_attr(test, assert_instr(maxpd))] | |
83c7162d | 1752 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1753 | pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d { |
1754 | maxpd(a, b) | |
1755 | } | |
1756 | ||
1757 | /// Return a new vector with the low element of `a` replaced by the minimum | |
1758 | /// of the lower elements of `a` and `b`. | |
83c7162d XL |
1759 | /// |
1760 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sd) | |
0531ce1d XL |
1761 | #[inline] |
1762 | #[target_feature(enable = "sse2")] | |
1763 | #[cfg_attr(test, assert_instr(minsd))] | |
83c7162d | 1764 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1765 | pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d { |
1766 | minsd(a, b) | |
1767 | } | |
1768 | ||
1769 | /// Return a new vector with the minimum values from corresponding elements in | |
1770 | /// `a` and `b`. | |
83c7162d XL |
1771 | /// |
1772 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pd) | |
0531ce1d XL |
1773 | #[inline] |
1774 | #[target_feature(enable = "sse2")] | |
1775 | #[cfg_attr(test, assert_instr(minpd))] | |
83c7162d | 1776 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1777 | pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d { |
1778 | minpd(a, b) | |
1779 | } | |
1780 | ||
1781 | /// Return a new vector with the low element of `a` replaced by multiplying the | |
1782 | /// low elements of `a` and `b`. | |
83c7162d XL |
1783 | /// |
1784 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sd) | |
0531ce1d XL |
1785 | #[inline] |
1786 | #[target_feature(enable = "sse2")] | |
1787 | #[cfg_attr(test, assert_instr(mulsd))] | |
83c7162d | 1788 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1789 | pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d { |
1790 | simd_insert(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) | |
1791 | } | |
1792 | ||
1793 | /// Multiply packed double-precision (64-bit) floating-point elements in `a` | |
1794 | /// and `b`. | |
83c7162d XL |
1795 | /// |
1796 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pd) | |
0531ce1d XL |
1797 | #[inline] |
1798 | #[target_feature(enable = "sse2")] | |
1799 | #[cfg_attr(test, assert_instr(mulpd))] | |
83c7162d | 1800 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1801 | pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d { |
1802 | simd_mul(a, b) | |
1803 | } | |
1804 | ||
1805 | /// Return a new vector with the low element of `a` replaced by the square | |
1806 | /// root of the lower element `b`. | |
83c7162d XL |
1807 | /// |
1808 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sd) | |
0531ce1d XL |
1809 | #[inline] |
1810 | #[target_feature(enable = "sse2")] | |
1811 | #[cfg_attr(test, assert_instr(sqrtsd))] | |
83c7162d | 1812 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1813 | pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d { |
1814 | simd_insert(a, 0, _mm_cvtsd_f64(sqrtsd(b))) | |
1815 | } | |
1816 | ||
1817 | /// Return a new vector with the square root of each of the values in `a`. | |
83c7162d XL |
1818 | /// |
1819 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_pd) | |
0531ce1d XL |
1820 | #[inline] |
1821 | #[target_feature(enable = "sse2")] | |
1822 | #[cfg_attr(test, assert_instr(sqrtpd))] | |
83c7162d | 1823 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1824 | pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d { |
1825 | sqrtpd(a) | |
1826 | } | |
1827 | ||
1828 | /// Return a new vector with the low element of `a` replaced by subtracting the | |
1829 | /// low element by `b` from the low element of `a`. | |
83c7162d XL |
1830 | /// |
1831 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sd) | |
0531ce1d XL |
1832 | #[inline] |
1833 | #[target_feature(enable = "sse2")] | |
1834 | #[cfg_attr(test, assert_instr(subsd))] | |
83c7162d | 1835 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1836 | pub unsafe fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d { |
1837 | simd_insert(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) | |
1838 | } | |
1839 | ||
1840 | /// Subtract packed double-precision (64-bit) floating-point elements in `b` | |
1841 | /// from `a`. | |
83c7162d XL |
1842 | /// |
1843 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_pd) | |
0531ce1d XL |
1844 | #[inline] |
1845 | #[target_feature(enable = "sse2")] | |
1846 | #[cfg_attr(test, assert_instr(subpd))] | |
83c7162d | 1847 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1848 | pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d { |
1849 | simd_sub(a, b) | |
1850 | } | |
1851 | ||
1852 | /// Compute the bitwise AND of packed double-precision (64-bit) floating-point | |
1853 | /// elements in `a` and `b`. | |
83c7162d XL |
1854 | /// |
1855 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_pd) | |
0531ce1d XL |
1856 | #[inline] |
1857 | #[target_feature(enable = "sse2")] | |
1858 | #[cfg_attr(test, assert_instr(andps))] | |
83c7162d | 1859 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1860 | pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d { |
1861 | let a: __m128i = mem::transmute(a); | |
1862 | let b: __m128i = mem::transmute(b); | |
1863 | mem::transmute(_mm_and_si128(a, b)) | |
1864 | } | |
1865 | ||
1866 | /// Compute the bitwise NOT of `a` and then AND with `b`. | |
83c7162d XL |
1867 | /// |
1868 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_pd) | |
0531ce1d XL |
1869 | #[inline] |
1870 | #[target_feature(enable = "sse2")] | |
1871 | #[cfg_attr(test, assert_instr(andnps))] | |
83c7162d | 1872 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1873 | pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d { |
1874 | let a: __m128i = mem::transmute(a); | |
1875 | let b: __m128i = mem::transmute(b); | |
1876 | mem::transmute(_mm_andnot_si128(a, b)) | |
1877 | } | |
1878 | ||
1879 | /// Compute the bitwise OR of `a` and `b`. | |
83c7162d XL |
1880 | /// |
1881 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_pd) | |
0531ce1d XL |
1882 | #[inline] |
1883 | #[target_feature(enable = "sse2")] | |
1884 | #[cfg_attr(test, assert_instr(orps))] | |
83c7162d | 1885 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1886 | pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d { |
1887 | let a: __m128i = mem::transmute(a); | |
1888 | let b: __m128i = mem::transmute(b); | |
1889 | mem::transmute(_mm_or_si128(a, b)) | |
1890 | } | |
1891 | ||
1892 | /// Compute the bitwise OR of `a` and `b`. | |
83c7162d XL |
1893 | /// |
1894 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_pd) | |
0531ce1d XL |
1895 | #[inline] |
1896 | #[target_feature(enable = "sse2")] | |
1897 | #[cfg_attr(test, assert_instr(xorps))] | |
83c7162d | 1898 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1899 | pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d { |
1900 | let a: __m128i = mem::transmute(a); | |
1901 | let b: __m128i = mem::transmute(b); | |
1902 | mem::transmute(_mm_xor_si128(a, b)) | |
1903 | } | |
1904 | ||
1905 | /// Return a new vector with the low element of `a` replaced by the equality | |
1906 | /// comparison of the lower elements of `a` and `b`. | |
83c7162d XL |
1907 | /// |
1908 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_sd) | |
0531ce1d XL |
1909 | #[inline] |
1910 | #[target_feature(enable = "sse2")] | |
1911 | #[cfg_attr(test, assert_instr(cmpeqsd))] | |
83c7162d | 1912 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1913 | pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d { |
1914 | cmpsd(a, b, 0) | |
1915 | } | |
1916 | ||
1917 | /// Return a new vector with the low element of `a` replaced by the less-than | |
1918 | /// comparison of the lower elements of `a` and `b`. | |
83c7162d XL |
1919 | /// |
1920 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_sd) | |
0531ce1d XL |
1921 | #[inline] |
1922 | #[target_feature(enable = "sse2")] | |
1923 | #[cfg_attr(test, assert_instr(cmpltsd))] | |
83c7162d | 1924 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1925 | pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d { |
1926 | cmpsd(a, b, 1) | |
1927 | } | |
1928 | ||
1929 | /// Return a new vector with the low element of `a` replaced by the | |
1930 | /// less-than-or-equal comparison of the lower elements of `a` and `b`. | |
83c7162d XL |
1931 | /// |
1932 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_sd) | |
0531ce1d XL |
1933 | #[inline] |
1934 | #[target_feature(enable = "sse2")] | |
1935 | #[cfg_attr(test, assert_instr(cmplesd))] | |
83c7162d | 1936 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1937 | pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d { |
1938 | cmpsd(a, b, 2) | |
1939 | } | |
1940 | ||
1941 | /// Return a new vector with the low element of `a` replaced by the | |
1942 | /// greater-than comparison of the lower elements of `a` and `b`. | |
83c7162d XL |
1943 | /// |
1944 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_sd) | |
0531ce1d XL |
1945 | #[inline] |
1946 | #[target_feature(enable = "sse2")] | |
1947 | #[cfg_attr(test, assert_instr(cmpltsd))] | |
83c7162d | 1948 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1949 | pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d { |
8faf50e0 | 1950 | simd_insert(_mm_cmplt_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) |
0531ce1d XL |
1951 | } |
1952 | ||
1953 | /// Return a new vector with the low element of `a` replaced by the | |
1954 | /// greater-than-or-equal comparison of the lower elements of `a` and `b`. | |
83c7162d XL |
1955 | /// |
1956 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_sd) | |
0531ce1d XL |
1957 | #[inline] |
1958 | #[target_feature(enable = "sse2")] | |
1959 | #[cfg_attr(test, assert_instr(cmplesd))] | |
83c7162d | 1960 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 1961 | pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d { |
8faf50e0 | 1962 | simd_insert(_mm_cmple_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) |
0531ce1d XL |
1963 | } |
1964 | ||
1965 | /// Return a new vector with the low element of `a` replaced by the result | |
1966 | /// of comparing both of the lower elements of `a` and `b` to `NaN`. If | |
1967 | /// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` | |
1968 | /// otherwise. | |
83c7162d XL |
1969 | /// |
1970 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_sd) | |
0531ce1d XL |
1971 | #[inline] |
1972 | #[target_feature(enable = "sse2")] | |
1973 | #[cfg_attr(test, assert_instr(cmpordsd))] | |
83c7162d | 1974 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1975 | pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d { |
1976 | cmpsd(a, b, 7) | |
1977 | } | |
1978 | ||
1979 | /// Return a new vector with the low element of `a` replaced by the result of | |
1980 | /// comparing both of the lower elements of `a` and `b` to `NaN`. If either is | |
1981 | /// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise. | |
83c7162d XL |
1982 | /// |
1983 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_sd) | |
0531ce1d XL |
1984 | #[inline] |
1985 | #[target_feature(enable = "sse2")] | |
1986 | #[cfg_attr(test, assert_instr(cmpunordsd))] | |
83c7162d | 1987 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
1988 | pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d { |
1989 | cmpsd(a, b, 3) | |
1990 | } | |
1991 | ||
1992 | /// Return a new vector with the low element of `a` replaced by the not-equal | |
1993 | /// comparison of the lower elements of `a` and `b`. | |
83c7162d XL |
1994 | /// |
1995 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_sd) | |
0531ce1d XL |
1996 | #[inline] |
1997 | #[target_feature(enable = "sse2")] | |
1998 | #[cfg_attr(test, assert_instr(cmpneqsd))] | |
83c7162d | 1999 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2000 | pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d { |
2001 | cmpsd(a, b, 4) | |
2002 | } | |
2003 | ||
2004 | /// Return a new vector with the low element of `a` replaced by the | |
2005 | /// not-less-than comparison of the lower elements of `a` and `b`. | |
83c7162d XL |
2006 | /// |
2007 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_sd) | |
0531ce1d XL |
2008 | #[inline] |
2009 | #[target_feature(enable = "sse2")] | |
2010 | #[cfg_attr(test, assert_instr(cmpnltsd))] | |
83c7162d | 2011 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2012 | pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d { |
2013 | cmpsd(a, b, 5) | |
2014 | } | |
2015 | ||
2016 | /// Return a new vector with the low element of `a` replaced by the | |
2017 | /// not-less-than-or-equal comparison of the lower elements of `a` and `b`. | |
83c7162d XL |
2018 | /// |
2019 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_sd) | |
0531ce1d XL |
2020 | #[inline] |
2021 | #[target_feature(enable = "sse2")] | |
2022 | #[cfg_attr(test, assert_instr(cmpnlesd))] | |
83c7162d | 2023 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2024 | pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d { |
2025 | cmpsd(a, b, 6) | |
2026 | } | |
2027 | ||
2028 | /// Return a new vector with the low element of `a` replaced by the | |
2029 | /// not-greater-than comparison of the lower elements of `a` and `b`. | |
83c7162d XL |
2030 | /// |
2031 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_sd) | |
0531ce1d XL |
2032 | #[inline] |
2033 | #[target_feature(enable = "sse2")] | |
2034 | #[cfg_attr(test, assert_instr(cmpnltsd))] | |
83c7162d | 2035 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 2036 | pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d { |
8faf50e0 | 2037 | simd_insert(_mm_cmpnlt_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) |
0531ce1d XL |
2038 | } |
2039 | ||
2040 | /// Return a new vector with the low element of `a` replaced by the | |
2041 | /// not-greater-than-or-equal comparison of the lower elements of `a` and `b`. | |
83c7162d XL |
2042 | /// |
2043 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_sd) | |
0531ce1d XL |
2044 | #[inline] |
2045 | #[target_feature(enable = "sse2")] | |
2046 | #[cfg_attr(test, assert_instr(cmpnlesd))] | |
83c7162d | 2047 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 2048 | pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d { |
8faf50e0 | 2049 | simd_insert(_mm_cmpnle_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) |
0531ce1d XL |
2050 | } |
2051 | ||
2052 | /// Compare corresponding elements in `a` and `b` for equality. | |
83c7162d XL |
2053 | /// |
2054 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_pd) | |
0531ce1d XL |
2055 | #[inline] |
2056 | #[target_feature(enable = "sse2")] | |
2057 | #[cfg_attr(test, assert_instr(cmpeqpd))] | |
83c7162d | 2058 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2059 | pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d { |
2060 | cmppd(a, b, 0) | |
2061 | } | |
2062 | ||
2063 | /// Compare corresponding elements in `a` and `b` for less-than. | |
83c7162d XL |
2064 | /// |
2065 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_pd) | |
0531ce1d XL |
2066 | #[inline] |
2067 | #[target_feature(enable = "sse2")] | |
2068 | #[cfg_attr(test, assert_instr(cmpltpd))] | |
83c7162d | 2069 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2070 | pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d { |
2071 | cmppd(a, b, 1) | |
2072 | } | |
2073 | ||
2074 | /// Compare corresponding elements in `a` and `b` for less-than-or-equal | |
83c7162d XL |
2075 | /// |
2076 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_pd) | |
0531ce1d XL |
2077 | #[inline] |
2078 | #[target_feature(enable = "sse2")] | |
2079 | #[cfg_attr(test, assert_instr(cmplepd))] | |
83c7162d | 2080 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2081 | pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d { |
2082 | cmppd(a, b, 2) | |
2083 | } | |
2084 | ||
2085 | /// Compare corresponding elements in `a` and `b` for greater-than. | |
83c7162d XL |
2086 | /// |
2087 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_pd) | |
0531ce1d XL |
2088 | #[inline] |
2089 | #[target_feature(enable = "sse2")] | |
2090 | #[cfg_attr(test, assert_instr(cmpltpd))] | |
83c7162d | 2091 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2092 | pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d { |
2093 | _mm_cmplt_pd(b, a) | |
2094 | } | |
2095 | ||
2096 | /// Compare corresponding elements in `a` and `b` for greater-than-or-equal. | |
83c7162d XL |
2097 | /// |
2098 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_pd) | |
0531ce1d XL |
2099 | #[inline] |
2100 | #[target_feature(enable = "sse2")] | |
2101 | #[cfg_attr(test, assert_instr(cmplepd))] | |
83c7162d | 2102 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2103 | pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d { |
2104 | _mm_cmple_pd(b, a) | |
2105 | } | |
2106 | ||
2107 | /// Compare corresponding elements in `a` and `b` to see if neither is `NaN`. | |
83c7162d XL |
2108 | /// |
2109 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_pd) | |
0531ce1d XL |
2110 | #[inline] |
2111 | #[target_feature(enable = "sse2")] | |
2112 | #[cfg_attr(test, assert_instr(cmpordpd))] | |
83c7162d | 2113 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2114 | pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d { |
2115 | cmppd(a, b, 7) | |
2116 | } | |
2117 | ||
2118 | /// Compare corresponding elements in `a` and `b` to see if either is `NaN`. | |
83c7162d XL |
2119 | /// |
2120 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_pd) | |
0531ce1d XL |
2121 | #[inline] |
2122 | #[target_feature(enable = "sse2")] | |
2123 | #[cfg_attr(test, assert_instr(cmpunordpd))] | |
83c7162d | 2124 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2125 | pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d { |
2126 | cmppd(a, b, 3) | |
2127 | } | |
2128 | ||
2129 | /// Compare corresponding elements in `a` and `b` for not-equal. | |
83c7162d XL |
2130 | /// |
2131 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_pd) | |
0531ce1d XL |
2132 | #[inline] |
2133 | #[target_feature(enable = "sse2")] | |
2134 | #[cfg_attr(test, assert_instr(cmpneqpd))] | |
83c7162d | 2135 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2136 | pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d { |
2137 | cmppd(a, b, 4) | |
2138 | } | |
2139 | ||
2140 | /// Compare corresponding elements in `a` and `b` for not-less-than. | |
83c7162d XL |
2141 | /// |
2142 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_pd) | |
0531ce1d XL |
2143 | #[inline] |
2144 | #[target_feature(enable = "sse2")] | |
2145 | #[cfg_attr(test, assert_instr(cmpnltpd))] | |
83c7162d | 2146 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2147 | pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d { |
2148 | cmppd(a, b, 5) | |
2149 | } | |
2150 | ||
2151 | /// Compare corresponding elements in `a` and `b` for not-less-than-or-equal. | |
83c7162d XL |
2152 | /// |
2153 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_pd) | |
0531ce1d XL |
2154 | #[inline] |
2155 | #[target_feature(enable = "sse2")] | |
2156 | #[cfg_attr(test, assert_instr(cmpnlepd))] | |
83c7162d | 2157 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2158 | pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d { |
2159 | cmppd(a, b, 6) | |
2160 | } | |
2161 | ||
2162 | /// Compare corresponding elements in `a` and `b` for not-greater-than. | |
83c7162d XL |
2163 | /// |
2164 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_pd) | |
0531ce1d XL |
2165 | #[inline] |
2166 | #[target_feature(enable = "sse2")] | |
2167 | #[cfg_attr(test, assert_instr(cmpnltpd))] | |
83c7162d | 2168 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2169 | pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d { |
2170 | _mm_cmpnlt_pd(b, a) | |
2171 | } | |
2172 | ||
2173 | /// Compare corresponding elements in `a` and `b` for | |
2174 | /// not-greater-than-or-equal. | |
83c7162d XL |
2175 | /// |
2176 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_pd) | |
0531ce1d XL |
2177 | #[inline] |
2178 | #[target_feature(enable = "sse2")] | |
2179 | #[cfg_attr(test, assert_instr(cmpnlepd))] | |
83c7162d | 2180 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2181 | pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d { |
2182 | _mm_cmpnle_pd(b, a) | |
2183 | } | |
2184 | ||
2185 | /// Compare the lower element of `a` and `b` for equality. | |
83c7162d XL |
2186 | /// |
2187 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sd) | |
0531ce1d XL |
2188 | #[inline] |
2189 | #[target_feature(enable = "sse2")] | |
2190 | #[cfg_attr(test, assert_instr(comisd))] | |
83c7162d | 2191 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2192 | pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 { |
2193 | comieqsd(a, b) | |
2194 | } | |
2195 | ||
2196 | /// Compare the lower element of `a` and `b` for less-than. | |
83c7162d XL |
2197 | /// |
2198 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sd) | |
0531ce1d XL |
2199 | #[inline] |
2200 | #[target_feature(enable = "sse2")] | |
2201 | #[cfg_attr(test, assert_instr(comisd))] | |
83c7162d | 2202 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2203 | pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 { |
2204 | comiltsd(a, b) | |
2205 | } | |
2206 | ||
2207 | /// Compare the lower element of `a` and `b` for less-than-or-equal. | |
83c7162d XL |
2208 | /// |
2209 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sd) | |
0531ce1d XL |
2210 | #[inline] |
2211 | #[target_feature(enable = "sse2")] | |
2212 | #[cfg_attr(test, assert_instr(comisd))] | |
83c7162d | 2213 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2214 | pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 { |
2215 | comilesd(a, b) | |
2216 | } | |
2217 | ||
2218 | /// Compare the lower element of `a` and `b` for greater-than. | |
83c7162d XL |
2219 | /// |
2220 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sd) | |
0531ce1d XL |
2221 | #[inline] |
2222 | #[target_feature(enable = "sse2")] | |
2223 | #[cfg_attr(test, assert_instr(comisd))] | |
83c7162d | 2224 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2225 | pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 { |
2226 | comigtsd(a, b) | |
2227 | } | |
2228 | ||
2229 | /// Compare the lower element of `a` and `b` for greater-than-or-equal. | |
83c7162d XL |
2230 | /// |
2231 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sd) | |
0531ce1d XL |
2232 | #[inline] |
2233 | #[target_feature(enable = "sse2")] | |
2234 | #[cfg_attr(test, assert_instr(comisd))] | |
83c7162d | 2235 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2236 | pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 { |
2237 | comigesd(a, b) | |
2238 | } | |
2239 | ||
2240 | /// Compare the lower element of `a` and `b` for not-equal. | |
83c7162d XL |
2241 | /// |
2242 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sd) | |
0531ce1d XL |
2243 | #[inline] |
2244 | #[target_feature(enable = "sse2")] | |
2245 | #[cfg_attr(test, assert_instr(comisd))] | |
83c7162d | 2246 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2247 | pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 { |
2248 | comineqsd(a, b) | |
2249 | } | |
2250 | ||
2251 | /// Compare the lower element of `a` and `b` for equality. | |
83c7162d XL |
2252 | /// |
2253 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sd) | |
0531ce1d XL |
2254 | #[inline] |
2255 | #[target_feature(enable = "sse2")] | |
2256 | #[cfg_attr(test, assert_instr(ucomisd))] | |
83c7162d | 2257 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2258 | pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 { |
2259 | ucomieqsd(a, b) | |
2260 | } | |
2261 | ||
2262 | /// Compare the lower element of `a` and `b` for less-than. | |
83c7162d XL |
2263 | /// |
2264 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sd) | |
0531ce1d XL |
2265 | #[inline] |
2266 | #[target_feature(enable = "sse2")] | |
2267 | #[cfg_attr(test, assert_instr(ucomisd))] | |
83c7162d | 2268 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2269 | pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 { |
2270 | ucomiltsd(a, b) | |
2271 | } | |
2272 | ||
2273 | /// Compare the lower element of `a` and `b` for less-than-or-equal. | |
83c7162d XL |
2274 | /// |
2275 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sd) | |
0531ce1d XL |
2276 | #[inline] |
2277 | #[target_feature(enable = "sse2")] | |
2278 | #[cfg_attr(test, assert_instr(ucomisd))] | |
83c7162d | 2279 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2280 | pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 { |
2281 | ucomilesd(a, b) | |
2282 | } | |
2283 | ||
2284 | /// Compare the lower element of `a` and `b` for greater-than. | |
83c7162d XL |
2285 | /// |
2286 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sd) | |
0531ce1d XL |
2287 | #[inline] |
2288 | #[target_feature(enable = "sse2")] | |
2289 | #[cfg_attr(test, assert_instr(ucomisd))] | |
83c7162d | 2290 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2291 | pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 { |
2292 | ucomigtsd(a, b) | |
2293 | } | |
2294 | ||
2295 | /// Compare the lower element of `a` and `b` for greater-than-or-equal. | |
83c7162d XL |
2296 | /// |
2297 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sd) | |
0531ce1d XL |
2298 | #[inline] |
2299 | #[target_feature(enable = "sse2")] | |
2300 | #[cfg_attr(test, assert_instr(ucomisd))] | |
83c7162d | 2301 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2302 | pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 { |
2303 | ucomigesd(a, b) | |
2304 | } | |
2305 | ||
2306 | /// Compare the lower element of `a` and `b` for not-equal. | |
83c7162d XL |
2307 | /// |
2308 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sd) | |
0531ce1d XL |
2309 | #[inline] |
2310 | #[target_feature(enable = "sse2")] | |
2311 | #[cfg_attr(test, assert_instr(ucomisd))] | |
83c7162d | 2312 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2313 | pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 { |
2314 | ucomineqsd(a, b) | |
2315 | } | |
2316 | ||
2317 | /// Convert packed double-precision (64-bit) floating-point elements in "a" to | |
2318 | /// packed single-precision (32-bit) floating-point elements | |
83c7162d XL |
2319 | /// |
2320 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ps) | |
0531ce1d XL |
2321 | #[inline] |
2322 | #[target_feature(enable = "sse2")] | |
2323 | #[cfg_attr(test, assert_instr(cvtpd2ps))] | |
83c7162d | 2324 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2325 | pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 { |
2326 | cvtpd2ps(a) | |
2327 | } | |
2328 | ||
2329 | /// Convert packed single-precision (32-bit) floating-point elements in `a` to | |
2330 | /// packed | |
2331 | /// double-precision (64-bit) floating-point elements. | |
83c7162d XL |
2332 | /// |
2333 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pd) | |
0531ce1d XL |
2334 | #[inline] |
2335 | #[target_feature(enable = "sse2")] | |
2336 | #[cfg_attr(test, assert_instr(cvtps2pd))] | |
83c7162d | 2337 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2338 | pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d { |
2339 | cvtps2pd(a) | |
2340 | } | |
2341 | ||
2342 | /// Convert packed double-precision (64-bit) floating-point elements in `a` to | |
2343 | /// packed 32-bit integers. | |
83c7162d XL |
2344 | /// |
2345 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_epi32) | |
0531ce1d XL |
2346 | #[inline] |
2347 | #[target_feature(enable = "sse2")] | |
2348 | #[cfg_attr(test, assert_instr(cvtpd2dq))] | |
83c7162d | 2349 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2350 | pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i { |
2351 | mem::transmute(cvtpd2dq(a)) | |
2352 | } | |
2353 | ||
2354 | /// Convert the lower double-precision (64-bit) floating-point element in a to | |
2355 | /// a 32-bit integer. | |
83c7162d XL |
2356 | /// |
2357 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si32) | |
0531ce1d XL |
2358 | #[inline] |
2359 | #[target_feature(enable = "sse2")] | |
2360 | #[cfg_attr(test, assert_instr(cvtsd2si))] | |
83c7162d | 2361 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2362 | pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 { |
2363 | cvtsd2si(a) | |
2364 | } | |
2365 | ||
2366 | /// Convert the lower double-precision (64-bit) floating-point element in `b` | |
2367 | /// to a single-precision (32-bit) floating-point element, store the result in | |
2368 | /// the lower element of the return value, and copy the upper element from `a` | |
2369 | /// to the upper element the return value. | |
83c7162d XL |
2370 | /// |
2371 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_ss) | |
0531ce1d XL |
2372 | #[inline] |
2373 | #[target_feature(enable = "sse2")] | |
2374 | #[cfg_attr(test, assert_instr(cvtsd2ss))] | |
83c7162d | 2375 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2376 | pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 { |
2377 | cvtsd2ss(a, b) | |
2378 | } | |
2379 | ||
2380 | /// Return the lower double-precision (64-bit) floating-point element of "a". | |
83c7162d XL |
2381 | /// |
2382 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_f64) | |
0531ce1d XL |
2383 | #[inline] |
2384 | #[target_feature(enable = "sse2")] | |
83c7162d | 2385 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2386 | pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 { |
2387 | simd_extract(a, 0) | |
2388 | } | |
2389 | ||
2390 | /// Convert the lower single-precision (32-bit) floating-point element in `b` | |
2391 | /// to a double-precision (64-bit) floating-point element, store the result in | |
2392 | /// the lower element of the return value, and copy the upper element from `a` | |
2393 | /// to the upper element the return value. | |
83c7162d XL |
2394 | /// |
2395 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sd) | |
0531ce1d XL |
2396 | #[inline] |
2397 | #[target_feature(enable = "sse2")] | |
2398 | #[cfg_attr(test, assert_instr(cvtss2sd))] | |
83c7162d | 2399 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2400 | pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d { |
2401 | cvtss2sd(a, b) | |
2402 | } | |
2403 | ||
2404 | /// Convert packed double-precision (64-bit) floating-point elements in `a` to | |
2405 | /// packed 32-bit integers with truncation. | |
83c7162d XL |
2406 | /// |
2407 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_epi32) | |
0531ce1d XL |
2408 | #[inline] |
2409 | #[target_feature(enable = "sse2")] | |
2410 | #[cfg_attr(test, assert_instr(cvttpd2dq))] | |
83c7162d | 2411 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2412 | pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i { |
2413 | mem::transmute(cvttpd2dq(a)) | |
2414 | } | |
2415 | ||
2416 | /// Convert the lower double-precision (64-bit) floating-point element in `a` | |
2417 | /// to a 32-bit integer with truncation. | |
83c7162d XL |
2418 | /// |
2419 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si32) | |
0531ce1d XL |
2420 | #[inline] |
2421 | #[target_feature(enable = "sse2")] | |
2422 | #[cfg_attr(test, assert_instr(cvttsd2si))] | |
83c7162d | 2423 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2424 | pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 { |
2425 | cvttsd2si(a) | |
2426 | } | |
2427 | ||
2428 | /// Convert packed single-precision (32-bit) floating-point elements in `a` to | |
2429 | /// packed 32-bit integers with truncation. | |
83c7162d XL |
2430 | /// |
2431 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_epi32) | |
0531ce1d XL |
2432 | #[inline] |
2433 | #[target_feature(enable = "sse2")] | |
2434 | #[cfg_attr(test, assert_instr(cvttps2dq))] | |
83c7162d | 2435 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2436 | pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i { |
2437 | mem::transmute(cvttps2dq(a)) | |
2438 | } | |
2439 | ||
2440 | /// Copy double-precision (64-bit) floating-point element `a` to the lower | |
2441 | /// element of the packed 64-bit return value. | |
83c7162d XL |
2442 | /// |
2443 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sd) | |
0531ce1d XL |
2444 | #[inline] |
2445 | #[target_feature(enable = "sse2")] | |
83c7162d | 2446 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2447 | pub unsafe fn _mm_set_sd(a: f64) -> __m128d { |
2448 | _mm_set_pd(0.0, a) | |
2449 | } | |
2450 | ||
2451 | /// Broadcast double-precision (64-bit) floating-point value a to all elements | |
2452 | /// of the return value. | |
83c7162d XL |
2453 | /// |
2454 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pd) | |
0531ce1d XL |
2455 | #[inline] |
2456 | #[target_feature(enable = "sse2")] | |
83c7162d | 2457 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2458 | pub unsafe fn _mm_set1_pd(a: f64) -> __m128d { |
2459 | _mm_set_pd(a, a) | |
2460 | } | |
2461 | ||
2462 | /// Broadcast double-precision (64-bit) floating-point value a to all elements | |
2463 | /// of the return value. | |
83c7162d XL |
2464 | /// |
2465 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd1) | |
0531ce1d XL |
2466 | #[inline] |
2467 | #[target_feature(enable = "sse2")] | |
83c7162d | 2468 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2469 | pub unsafe fn _mm_set_pd1(a: f64) -> __m128d { |
2470 | _mm_set_pd(a, a) | |
2471 | } | |
2472 | ||
2473 | /// Set packed double-precision (64-bit) floating-point elements in the return | |
2474 | /// value with the supplied values. | |
83c7162d XL |
2475 | /// |
2476 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd) | |
0531ce1d XL |
2477 | #[inline] |
2478 | #[target_feature(enable = "sse2")] | |
83c7162d | 2479 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2480 | pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d { |
2481 | __m128d(b, a) | |
2482 | } | |
2483 | ||
2484 | /// Set packed double-precision (64-bit) floating-point elements in the return | |
2485 | /// value with the supplied values in reverse order. | |
83c7162d XL |
2486 | /// |
2487 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_pd) | |
0531ce1d XL |
2488 | #[inline] |
2489 | #[target_feature(enable = "sse2")] | |
83c7162d | 2490 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2491 | pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d { |
2492 | _mm_set_pd(b, a) | |
2493 | } | |
2494 | ||
2495 | /// Returns packed double-precision (64-bit) floating-point elements with all | |
2496 | /// zeros. | |
83c7162d XL |
2497 | /// |
2498 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_pd) | |
0531ce1d XL |
2499 | #[inline] |
2500 | #[target_feature(enable = "sse2")] | |
2501 | #[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected | |
83c7162d | 2502 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2503 | pub unsafe fn _mm_setzero_pd() -> __m128d { |
2504 | _mm_set_pd(0.0, 0.0) | |
2505 | } | |
2506 | ||
2507 | /// Return a mask of the most significant bit of each element in `a`. | |
2508 | /// | |
2509 | /// The mask is stored in the 2 least significant bits of the return value. | |
2510 | /// All other bits are set to `0`. | |
83c7162d XL |
2511 | /// |
2512 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_pd) | |
0531ce1d XL |
2513 | #[inline] |
2514 | #[target_feature(enable = "sse2")] | |
2515 | #[cfg_attr(test, assert_instr(movmskpd))] | |
83c7162d | 2516 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2517 | pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 { |
2518 | movmskpd(a) | |
2519 | } | |
2520 | ||
2521 | /// Load 128-bits (composed of 2 packed double-precision (64-bit) | |
2522 | /// floating-point elements) from memory into the returned vector. | |
2523 | /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection | |
2524 | /// exception may be generated. | |
83c7162d XL |
2525 | /// |
2526 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd) | |
0531ce1d XL |
2527 | #[inline] |
2528 | #[target_feature(enable = "sse2")] | |
2529 | #[cfg_attr(test, assert_instr(movaps))] | |
83c7162d | 2530 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0731742a | 2531 | #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] |
0531ce1d XL |
2532 | pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d { |
2533 | *(mem_addr as *const __m128d) | |
2534 | } | |
2535 | ||
2536 | /// Loads a 64-bit double-precision value to the low element of a | |
2537 | /// 128-bit integer vector and clears the upper element. | |
83c7162d XL |
2538 | /// |
2539 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sd) | |
0531ce1d XL |
2540 | #[inline] |
2541 | #[target_feature(enable = "sse2")] | |
2542 | #[cfg_attr(test, assert_instr(movsd))] | |
83c7162d | 2543 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2544 | pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d { |
2545 | _mm_setr_pd(*mem_addr, 0.) | |
2546 | } | |
2547 | ||
2548 | /// Loads a double-precision value into the high-order bits of a 128-bit | |
83c7162d | 2549 | /// vector of `[2 x double]`. The low-order bits are copied from the low-order |
0531ce1d | 2550 | /// bits of the first operand. |
83c7162d XL |
2551 | /// |
2552 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadh_pd) | |
0531ce1d XL |
2553 | #[inline] |
2554 | #[target_feature(enable = "sse2")] | |
2555 | #[cfg_attr(test, assert_instr(movhpd))] | |
83c7162d | 2556 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2557 | pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d { |
2558 | _mm_setr_pd(simd_extract(a, 0), *mem_addr) | |
2559 | } | |
2560 | ||
2561 | /// Loads a double-precision value into the low-order bits of a 128-bit | |
83c7162d | 2562 | /// vector of `[2 x double]`. The high-order bits are copied from the |
0531ce1d | 2563 | /// high-order bits of the first operand. |
83c7162d XL |
2564 | /// |
2565 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_pd) | |
0531ce1d XL |
2566 | #[inline] |
2567 | #[target_feature(enable = "sse2")] | |
2568 | #[cfg_attr(test, assert_instr(movlpd))] | |
83c7162d | 2569 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2570 | pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d { |
2571 | _mm_setr_pd(*mem_addr, simd_extract(a, 1)) | |
2572 | } | |
2573 | ||
83c7162d | 2574 | /// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit |
0531ce1d XL |
2575 | /// aligned memory location. |
2576 | /// To minimize caching, the data is flagged as non-temporal (unlikely to be | |
2577 | /// used again soon). | |
83c7162d XL |
2578 | /// |
2579 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_pd) | |
0531ce1d XL |
2580 | #[inline] |
2581 | #[target_feature(enable = "sse2")] | |
2582 | #[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd | |
83c7162d | 2583 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0731742a | 2584 | #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] |
0531ce1d | 2585 | pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) { |
0731742a | 2586 | intrinsics::nontemporal_store(mem_addr as *mut __m128d, a); |
0531ce1d XL |
2587 | } |
2588 | ||
83c7162d | 2589 | /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a |
0531ce1d | 2590 | /// memory location. |
83c7162d XL |
2591 | /// |
2592 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sd) | |
0531ce1d XL |
2593 | #[inline] |
2594 | #[target_feature(enable = "sse2")] | |
0731742a | 2595 | #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))] |
83c7162d | 2596 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2597 | pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) { |
2598 | *mem_addr = simd_extract(a, 0) | |
2599 | } | |
2600 | ||
2601 | /// Store 128-bits (composed of 2 packed double-precision (64-bit) | |
2602 | /// floating-point elements) from `a` into memory. `mem_addr` must be aligned | |
2603 | /// on a 16-byte boundary or a general-protection exception may be generated. | |
83c7162d XL |
2604 | /// |
2605 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd) | |
0531ce1d XL |
2606 | #[inline] |
2607 | #[target_feature(enable = "sse2")] | |
2608 | #[cfg_attr(test, assert_instr(movaps))] | |
83c7162d | 2609 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0731742a | 2610 | #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] |
0531ce1d XL |
2611 | pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) { |
2612 | *(mem_addr as *mut __m128d) = a; | |
2613 | } | |
2614 | ||
2615 | /// Store 128-bits (composed of 2 packed double-precision (64-bit) | |
2616 | /// floating-point elements) from `a` into memory. | |
2617 | /// `mem_addr` does not need to be aligned on any particular boundary. | |
83c7162d XL |
2618 | /// |
2619 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_pd) | |
0531ce1d XL |
2620 | #[inline] |
2621 | #[target_feature(enable = "sse2")] | |
2622 | #[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected | |
83c7162d | 2623 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2624 | pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) { |
2625 | storeupd(mem_addr as *mut i8, a); | |
2626 | } | |
2627 | ||
2628 | /// Store the lower double-precision (64-bit) floating-point element from `a` | |
2629 | /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a | |
2630 | /// 16-byte boundary or a general-protection exception may be generated. | |
83c7162d XL |
2631 | /// |
2632 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_pd) | |
0531ce1d XL |
2633 | #[inline] |
2634 | #[target_feature(enable = "sse2")] | |
83c7162d | 2635 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0731742a | 2636 | #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] |
0531ce1d XL |
2637 | pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) { |
2638 | let b: __m128d = simd_shuffle2(a, a, [0, 0]); | |
2639 | *(mem_addr as *mut __m128d) = b; | |
2640 | } | |
2641 | ||
2642 | /// Store the lower double-precision (64-bit) floating-point element from `a` | |
2643 | /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a | |
2644 | /// 16-byte boundary or a general-protection exception may be generated. | |
83c7162d XL |
2645 | /// |
2646 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd1) | |
0531ce1d XL |
2647 | #[inline] |
2648 | #[target_feature(enable = "sse2")] | |
83c7162d | 2649 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0731742a | 2650 | #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] |
0531ce1d XL |
2651 | pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) { |
2652 | let b: __m128d = simd_shuffle2(a, a, [0, 0]); | |
2653 | *(mem_addr as *mut __m128d) = b; | |
2654 | } | |
2655 | ||
2656 | /// Store 2 double-precision (64-bit) floating-point elements from `a` into | |
2657 | /// memory in reverse order. | |
2658 | /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection | |
2659 | /// exception may be generated. | |
83c7162d XL |
2660 | /// |
2661 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_pd) | |
0531ce1d XL |
2662 | #[inline] |
2663 | #[target_feature(enable = "sse2")] | |
83c7162d | 2664 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0731742a | 2665 | #[cfg_attr(feature = "cargo-clippy", allow(clippy::cast_ptr_alignment))] |
0531ce1d XL |
2666 | pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) { |
2667 | let b: __m128d = simd_shuffle2(a, a, [1, 0]); | |
2668 | *(mem_addr as *mut __m128d) = b; | |
2669 | } | |
2670 | ||
83c7162d | 2671 | /// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a |
0531ce1d | 2672 | /// memory location. |
83c7162d XL |
2673 | /// |
2674 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeh_pd) | |
0531ce1d XL |
2675 | #[inline] |
2676 | #[target_feature(enable = "sse2")] | |
0731742a | 2677 | #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movhpd))] |
83c7162d | 2678 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2679 | pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) { |
2680 | *mem_addr = simd_extract(a, 1); | |
2681 | } | |
2682 | ||
83c7162d | 2683 | /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a |
0531ce1d | 2684 | /// memory location. |
83c7162d XL |
2685 | /// |
2686 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_pd) | |
0531ce1d XL |
2687 | #[inline] |
2688 | #[target_feature(enable = "sse2")] | |
0731742a | 2689 | #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))] |
83c7162d | 2690 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2691 | pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) { |
2692 | *mem_addr = simd_extract(a, 0); | |
2693 | } | |
2694 | ||
2695 | /// Load a double-precision (64-bit) floating-point element from memory | |
2696 | /// into both elements of returned vector. | |
83c7162d XL |
2697 | /// |
2698 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_pd) | |
0531ce1d XL |
2699 | #[inline] |
2700 | #[target_feature(enable = "sse2")] | |
2701 | // #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen | |
83c7162d | 2702 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2703 | pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d { |
2704 | let d = *mem_addr; | |
2705 | _mm_setr_pd(d, d) | |
2706 | } | |
2707 | ||
2708 | /// Load a double-precision (64-bit) floating-point element from memory | |
2709 | /// into both elements of returned vector. | |
83c7162d XL |
2710 | /// |
2711 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd1) | |
0531ce1d XL |
2712 | #[inline] |
2713 | #[target_feature(enable = "sse2")] | |
2714 | // #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd | |
83c7162d | 2715 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2716 | pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d { |
2717 | _mm_load1_pd(mem_addr) | |
2718 | } | |
2719 | ||
2720 | /// Load 2 double-precision (64-bit) floating-point elements from memory into | |
2721 | /// the returned vector in reverse order. `mem_addr` must be aligned on a | |
2722 | /// 16-byte boundary or a general-protection exception may be generated. | |
83c7162d XL |
2723 | /// |
2724 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_pd) | |
0531ce1d XL |
2725 | #[inline] |
2726 | #[target_feature(enable = "sse2")] | |
2727 | #[cfg_attr(test, assert_instr(movapd))] | |
83c7162d | 2728 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2729 | pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d { |
2730 | let a = _mm_load_pd(mem_addr); | |
2731 | simd_shuffle2(a, a, [1, 0]) | |
2732 | } | |
2733 | ||
2734 | /// Load 128-bits (composed of 2 packed double-precision (64-bit) | |
2735 | /// floating-point elements) from memory into the returned vector. | |
2736 | /// `mem_addr` does not need to be aligned on any particular boundary. | |
83c7162d XL |
2737 | /// |
2738 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_pd) | |
0531ce1d XL |
2739 | #[inline] |
2740 | #[target_feature(enable = "sse2")] | |
2741 | #[cfg_attr(test, assert_instr(movups))] | |
83c7162d | 2742 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2743 | pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d { |
2744 | let mut dst = _mm_undefined_pd(); | |
2745 | ptr::copy_nonoverlapping( | |
2746 | mem_addr as *const u8, | |
2747 | &mut dst as *mut __m128d as *mut u8, | |
2748 | mem::size_of::<__m128d>(), | |
2749 | ); | |
2750 | dst | |
2751 | } | |
2752 | ||
83c7162d XL |
2753 | /// Constructs a 128-bit floating-point vector of `[2 x double]` from two |
2754 | /// 128-bit vector parameters of `[2 x double]`, using the immediate-value | |
0531ce1d | 2755 | /// parameter as a specifier. |
83c7162d XL |
2756 | /// |
2757 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pd) | |
0531ce1d XL |
2758 | #[inline] |
2759 | #[target_feature(enable = "sse2")] | |
2760 | #[cfg_attr(test, assert_instr(shufpd, imm8 = 1))] | |
2761 | #[rustc_args_required_const(2)] | |
83c7162d | 2762 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2763 | pub unsafe fn _mm_shuffle_pd(a: __m128d, b: __m128d, imm8: i32) -> __m128d { |
2764 | match imm8 & 0b11 { | |
2765 | 0b00 => simd_shuffle2(a, b, [0, 2]), | |
2766 | 0b01 => simd_shuffle2(a, b, [1, 2]), | |
2767 | 0b10 => simd_shuffle2(a, b, [0, 3]), | |
2768 | _ => simd_shuffle2(a, b, [1, 3]), | |
2769 | } | |
2770 | } | |
2771 | ||
83c7162d | 2772 | /// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower |
0531ce1d XL |
2773 | /// 64 bits are set to the lower 64 bits of the second parameter. The upper |
2774 | /// 64 bits are set to the upper 64 bits of the first parameter. | |
83c7162d XL |
2775 | /// |
2776 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sd) | |
0531ce1d XL |
2777 | #[inline] |
2778 | #[target_feature(enable = "sse2")] | |
2779 | #[cfg_attr(test, assert_instr(movsd))] | |
83c7162d | 2780 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2781 | pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d { |
2782 | _mm_setr_pd(simd_extract(b, 0), simd_extract(a, 1)) | |
2783 | } | |
2784 | ||
83c7162d XL |
2785 | /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit |
2786 | /// floating-point vector of `[4 x float]`. | |
2787 | /// | |
2788 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ps) | |
0531ce1d XL |
2789 | #[inline] |
2790 | #[target_feature(enable = "sse2")] | |
83c7162d | 2791 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2792 | pub unsafe fn _mm_castpd_ps(a: __m128d) -> __m128 { |
2793 | mem::transmute(a) | |
2794 | } | |
2795 | ||
83c7162d | 2796 | /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit |
0531ce1d | 2797 | /// integer vector. |
83c7162d XL |
2798 | /// |
2799 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_si128) | |
0531ce1d XL |
2800 | #[inline] |
2801 | #[target_feature(enable = "sse2")] | |
83c7162d | 2802 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 2803 | pub unsafe fn _mm_castpd_si128(a: __m128d) -> __m128i { |
0bf4aa26 | 2804 | mem::transmute(a) |
0531ce1d XL |
2805 | } |
2806 | ||
83c7162d XL |
2807 | /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit |
2808 | /// floating-point vector of `[2 x double]`. | |
2809 | /// | |
2810 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_pd) | |
0531ce1d XL |
2811 | #[inline] |
2812 | #[target_feature(enable = "sse2")] | |
83c7162d | 2813 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2814 | pub unsafe fn _mm_castps_pd(a: __m128) -> __m128d { |
2815 | mem::transmute(a) | |
2816 | } | |
2817 | ||
83c7162d | 2818 | /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit |
0531ce1d | 2819 | /// integer vector. |
83c7162d XL |
2820 | /// |
2821 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_si128) | |
0531ce1d XL |
2822 | #[inline] |
2823 | #[target_feature(enable = "sse2")] | |
83c7162d | 2824 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2825 | pub unsafe fn _mm_castps_si128(a: __m128) -> __m128i { |
2826 | mem::transmute(a) | |
2827 | } | |
2828 | ||
2829 | /// Casts a 128-bit integer vector into a 128-bit floating-point vector | |
83c7162d XL |
2830 | /// of `[2 x double]`. |
2831 | /// | |
2832 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_pd) | |
0531ce1d XL |
2833 | #[inline] |
2834 | #[target_feature(enable = "sse2")] | |
83c7162d | 2835 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 2836 | pub unsafe fn _mm_castsi128_pd(a: __m128i) -> __m128d { |
0bf4aa26 | 2837 | mem::transmute(a) |
0531ce1d XL |
2838 | } |
2839 | ||
2840 | /// Casts a 128-bit integer vector into a 128-bit floating-point vector | |
83c7162d XL |
2841 | /// of `[4 x float]`. |
2842 | /// | |
2843 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ps) | |
0531ce1d XL |
2844 | #[inline] |
2845 | #[target_feature(enable = "sse2")] | |
83c7162d | 2846 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2847 | pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 { |
2848 | mem::transmute(a) | |
2849 | } | |
2850 | ||
2851 | /// Return vector of type __m128d with undefined elements. | |
83c7162d XL |
2852 | /// |
2853 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_pd) | |
0531ce1d XL |
2854 | #[inline] |
2855 | #[target_feature(enable = "sse2")] | |
83c7162d | 2856 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 2857 | pub unsafe fn _mm_undefined_pd() -> __m128d { |
0731742a XL |
2858 | // FIXME: this function should return MaybeUninit<__m128d> |
2859 | mem::MaybeUninit::<__m128d>::uninitialized().into_inner() | |
0531ce1d XL |
2860 | } |
2861 | ||
2862 | /// Return vector of type __m128i with undefined elements. | |
83c7162d XL |
2863 | /// |
2864 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_si128) | |
0531ce1d XL |
2865 | #[inline] |
2866 | #[target_feature(enable = "sse2")] | |
83c7162d | 2867 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 2868 | pub unsafe fn _mm_undefined_si128() -> __m128i { |
0731742a XL |
2869 | // FIXME: this function should return MaybeUninit<__m128i> |
2870 | mem::MaybeUninit::<__m128i>::uninitialized().into_inner() | |
0531ce1d XL |
2871 | } |
2872 | ||
2873 | /// The resulting `__m128d` element is composed by the low-order values of | |
2874 | /// the two `__m128d` interleaved input elements, i.e.: | |
2875 | /// | |
8faf50e0 XL |
2876 | /// * The `[127:64]` bits are copied from the `[127:64]` bits of the second |
2877 | /// input * The `[63:0]` bits are copied from the `[127:64]` bits of the first | |
2878 | /// input | |
83c7162d XL |
2879 | /// |
2880 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_pd) | |
0531ce1d XL |
2881 | #[inline] |
2882 | #[target_feature(enable = "sse2")] | |
2883 | #[cfg_attr(test, assert_instr(unpckhpd))] | |
83c7162d | 2884 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2885 | pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d { |
2886 | simd_shuffle2(a, b, [1, 3]) | |
2887 | } | |
2888 | ||
2889 | /// The resulting `__m128d` element is composed by the high-order values of | |
2890 | /// the two `__m128d` interleaved input elements, i.e.: | |
2891 | /// | |
83c7162d XL |
2892 | /// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input |
2893 | /// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input | |
2894 | /// | |
2895 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_pd) | |
0531ce1d XL |
2896 | #[inline] |
2897 | #[target_feature(enable = "sse2")] | |
0731742a | 2898 | #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))] |
83c7162d | 2899 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
2900 | pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d { |
2901 | simd_shuffle2(a, b, [0, 2]) | |
2902 | } | |
2903 | ||
2904 | /// Adds two signed or unsigned 64-bit integer values, returning the | |
2905 | /// lower 64 bits of the sum. | |
2906 | #[inline] | |
2907 | #[target_feature(enable = "sse2,mmx")] | |
2908 | #[cfg_attr(test, assert_instr(paddq))] | |
2909 | pub unsafe fn _mm_add_si64(a: __m64, b: __m64) -> __m64 { | |
2910 | paddq(a, b) | |
2911 | } | |
2912 | ||
2913 | /// Multiplies 32-bit unsigned integer values contained in the lower bits | |
2914 | /// of the two 64-bit integer vectors and returns the 64-bit unsigned | |
2915 | /// product. | |
2916 | #[inline] | |
2917 | #[target_feature(enable = "sse2,mmx")] | |
2918 | #[cfg_attr(test, assert_instr(pmuludq))] | |
2919 | pub unsafe fn _mm_mul_su32(a: __m64, b: __m64) -> __m64 { | |
2920 | pmuludq2(a, b) | |
2921 | } | |
2922 | ||
2923 | /// Subtracts signed or unsigned 64-bit integer values and writes the | |
2924 | /// difference to the corresponding bits in the destination. | |
2925 | #[inline] | |
2926 | #[target_feature(enable = "sse2,mmx")] | |
2927 | #[cfg_attr(test, assert_instr(psubq))] | |
2928 | pub unsafe fn _mm_sub_si64(a: __m64, b: __m64) -> __m64 { | |
2929 | psubq(a, b) | |
2930 | } | |
2931 | ||
2932 | /// Converts the two signed 32-bit integer elements of a 64-bit vector of | |
83c7162d XL |
2933 | /// `[2 x i32]` into two double-precision floating-point values, returned in a |
2934 | /// 128-bit vector of `[2 x double]`. | |
0531ce1d XL |
2935 | #[inline] |
2936 | #[target_feature(enable = "sse2,mmx")] | |
2937 | #[cfg_attr(test, assert_instr(cvtpi2pd))] | |
2938 | pub unsafe fn _mm_cvtpi32_pd(a: __m64) -> __m128d { | |
2939 | cvtpi2pd(a) | |
2940 | } | |
2941 | ||
83c7162d | 2942 | /// Initializes both 64-bit values in a 128-bit vector of `[2 x i64]` with |
0531ce1d XL |
2943 | /// the specified 64-bit integer values. |
2944 | #[inline] | |
2945 | #[target_feature(enable = "sse2,mmx")] | |
2946 | // no particular instruction to test | |
2947 | pub unsafe fn _mm_set_epi64(e1: __m64, e0: __m64) -> __m128i { | |
2948 | _mm_set_epi64x(mem::transmute(e1), mem::transmute(e0)) | |
2949 | } | |
2950 | ||
83c7162d | 2951 | /// Initializes both values in a 128-bit vector of `[2 x i64]` with the |
0531ce1d XL |
2952 | /// specified 64-bit value. |
2953 | #[inline] | |
2954 | #[target_feature(enable = "sse2,mmx")] | |
2955 | // no particular instruction to test | |
2956 | pub unsafe fn _mm_set1_epi64(a: __m64) -> __m128i { | |
2957 | _mm_set_epi64x(mem::transmute(a), mem::transmute(a)) | |
2958 | } | |
2959 | ||
2960 | /// Constructs a 128-bit integer vector, initialized in reverse order | |
2961 | /// with the specified 64-bit integral values. | |
2962 | #[inline] | |
2963 | #[target_feature(enable = "sse2,mmx")] | |
2964 | // no particular instruction to test | |
2965 | pub unsafe fn _mm_setr_epi64(e1: __m64, e0: __m64) -> __m128i { | |
2966 | _mm_set_epi64x(mem::transmute(e0), mem::transmute(e1)) | |
2967 | } | |
2968 | ||
2969 | /// Returns the lower 64 bits of a 128-bit integer vector as a 64-bit | |
2970 | /// integer. | |
2971 | #[inline] | |
2972 | #[target_feature(enable = "sse2,mmx")] | |
2973 | // #[cfg_attr(test, assert_instr(movdq2q))] // FIXME: llvm codegens wrong | |
2974 | // instr? | |
2975 | pub unsafe fn _mm_movepi64_pi64(a: __m128i) -> __m64 { | |
2976 | mem::transmute(simd_extract::<_, i64>(a.as_i64x2(), 0)) | |
2977 | } | |
2978 | ||
2979 | /// Moves the 64-bit operand to a 128-bit integer vector, zeroing the | |
2980 | /// upper bits. | |
2981 | #[inline] | |
2982 | #[target_feature(enable = "sse2,mmx")] | |
2983 | // #[cfg_attr(test, assert_instr(movq2dq))] // FIXME: llvm codegens wrong | |
2984 | // instr? | |
2985 | pub unsafe fn _mm_movpi64_epi64(a: __m64) -> __m128i { | |
2986 | _mm_set_epi64x(0, mem::transmute(a)) | |
2987 | } | |
2988 | ||
2989 | /// Converts the two double-precision floating-point elements of a | |
83c7162d XL |
2990 | /// 128-bit vector of `[2 x double]` into two signed 32-bit integer values, |
2991 | /// returned in a 64-bit vector of `[2 x i32]`. | |
0531ce1d XL |
2992 | #[inline] |
2993 | #[target_feature(enable = "sse2,mmx")] | |
2994 | #[cfg_attr(test, assert_instr(cvtpd2pi))] | |
2995 | pub unsafe fn _mm_cvtpd_pi32(a: __m128d) -> __m64 { | |
2996 | cvtpd2pi(a) | |
2997 | } | |
2998 | ||
2999 | /// Converts the two double-precision floating-point elements of a | |
83c7162d XL |
3000 | /// 128-bit vector of `[2 x double]` into two signed 32-bit integer values, |
3001 | /// returned in a 64-bit vector of `[2 x i32]`. | |
0531ce1d XL |
3002 | /// If the result of either conversion is inexact, the result is truncated |
3003 | /// (rounded towards zero) regardless of the current MXCSR setting. | |
3004 | #[inline] | |
3005 | #[target_feature(enable = "sse2,mmx")] | |
3006 | #[cfg_attr(test, assert_instr(cvttpd2pi))] | |
3007 | pub unsafe fn _mm_cvttpd_pi32(a: __m128d) -> __m64 { | |
3008 | cvttpd2pi(a) | |
3009 | } | |
3010 | ||
3011 | #[allow(improper_ctypes)] | |
3012 | extern "C" { | |
3013 | #[link_name = "llvm.x86.sse2.pause"] | |
3014 | fn pause(); | |
3015 | #[link_name = "llvm.x86.sse2.clflush"] | |
3016 | fn clflush(p: *mut u8); | |
3017 | #[link_name = "llvm.x86.sse2.lfence"] | |
3018 | fn lfence(); | |
3019 | #[link_name = "llvm.x86.sse2.mfence"] | |
3020 | fn mfence(); | |
3021 | #[link_name = "llvm.x86.sse2.padds.b"] | |
3022 | fn paddsb(a: i8x16, b: i8x16) -> i8x16; | |
3023 | #[link_name = "llvm.x86.sse2.padds.w"] | |
3024 | fn paddsw(a: i16x8, b: i16x8) -> i16x8; | |
3025 | #[link_name = "llvm.x86.sse2.paddus.b"] | |
3026 | fn paddsub(a: u8x16, b: u8x16) -> u8x16; | |
3027 | #[link_name = "llvm.x86.sse2.paddus.w"] | |
3028 | fn paddsuw(a: u16x8, b: u16x8) -> u16x8; | |
3029 | #[link_name = "llvm.x86.sse2.pavg.b"] | |
3030 | fn pavgb(a: u8x16, b: u8x16) -> u8x16; | |
3031 | #[link_name = "llvm.x86.sse2.pavg.w"] | |
3032 | fn pavgw(a: u16x8, b: u16x8) -> u16x8; | |
3033 | #[link_name = "llvm.x86.sse2.pmadd.wd"] | |
3034 | fn pmaddwd(a: i16x8, b: i16x8) -> i32x4; | |
3035 | #[link_name = "llvm.x86.sse2.pmaxs.w"] | |
3036 | fn pmaxsw(a: i16x8, b: i16x8) -> i16x8; | |
3037 | #[link_name = "llvm.x86.sse2.pmaxu.b"] | |
3038 | fn pmaxub(a: u8x16, b: u8x16) -> u8x16; | |
3039 | #[link_name = "llvm.x86.sse2.pmins.w"] | |
3040 | fn pminsw(a: i16x8, b: i16x8) -> i16x8; | |
3041 | #[link_name = "llvm.x86.sse2.pminu.b"] | |
3042 | fn pminub(a: u8x16, b: u8x16) -> u8x16; | |
3043 | #[link_name = "llvm.x86.sse2.pmulh.w"] | |
3044 | fn pmulhw(a: i16x8, b: i16x8) -> i16x8; | |
3045 | #[link_name = "llvm.x86.sse2.pmulhu.w"] | |
3046 | fn pmulhuw(a: u16x8, b: u16x8) -> u16x8; | |
3047 | #[link_name = "llvm.x86.sse2.pmulu.dq"] | |
3048 | fn pmuludq(a: u32x4, b: u32x4) -> u64x2; | |
3049 | #[link_name = "llvm.x86.sse2.psad.bw"] | |
3050 | fn psadbw(a: u8x16, b: u8x16) -> u64x2; | |
3051 | #[link_name = "llvm.x86.sse2.psubs.b"] | |
3052 | fn psubsb(a: i8x16, b: i8x16) -> i8x16; | |
3053 | #[link_name = "llvm.x86.sse2.psubs.w"] | |
3054 | fn psubsw(a: i16x8, b: i16x8) -> i16x8; | |
3055 | #[link_name = "llvm.x86.sse2.psubus.b"] | |
3056 | fn psubusb(a: u8x16, b: u8x16) -> u8x16; | |
3057 | #[link_name = "llvm.x86.sse2.psubus.w"] | |
3058 | fn psubusw(a: u16x8, b: u16x8) -> u16x8; | |
3059 | #[link_name = "llvm.x86.sse2.pslli.w"] | |
3060 | fn pslliw(a: i16x8, imm8: i32) -> i16x8; | |
3061 | #[link_name = "llvm.x86.sse2.psll.w"] | |
3062 | fn psllw(a: i16x8, count: i16x8) -> i16x8; | |
3063 | #[link_name = "llvm.x86.sse2.pslli.d"] | |
3064 | fn psllid(a: i32x4, imm8: i32) -> i32x4; | |
3065 | #[link_name = "llvm.x86.sse2.psll.d"] | |
3066 | fn pslld(a: i32x4, count: i32x4) -> i32x4; | |
3067 | #[link_name = "llvm.x86.sse2.pslli.q"] | |
3068 | fn pslliq(a: i64x2, imm8: i32) -> i64x2; | |
3069 | #[link_name = "llvm.x86.sse2.psll.q"] | |
3070 | fn psllq(a: i64x2, count: i64x2) -> i64x2; | |
3071 | #[link_name = "llvm.x86.sse2.psrai.w"] | |
3072 | fn psraiw(a: i16x8, imm8: i32) -> i16x8; | |
3073 | #[link_name = "llvm.x86.sse2.psra.w"] | |
3074 | fn psraw(a: i16x8, count: i16x8) -> i16x8; | |
3075 | #[link_name = "llvm.x86.sse2.psrai.d"] | |
3076 | fn psraid(a: i32x4, imm8: i32) -> i32x4; | |
3077 | #[link_name = "llvm.x86.sse2.psra.d"] | |
3078 | fn psrad(a: i32x4, count: i32x4) -> i32x4; | |
3079 | #[link_name = "llvm.x86.sse2.psrli.w"] | |
3080 | fn psrliw(a: i16x8, imm8: i32) -> i16x8; | |
3081 | #[link_name = "llvm.x86.sse2.psrl.w"] | |
3082 | fn psrlw(a: i16x8, count: i16x8) -> i16x8; | |
3083 | #[link_name = "llvm.x86.sse2.psrli.d"] | |
3084 | fn psrlid(a: i32x4, imm8: i32) -> i32x4; | |
3085 | #[link_name = "llvm.x86.sse2.psrl.d"] | |
3086 | fn psrld(a: i32x4, count: i32x4) -> i32x4; | |
3087 | #[link_name = "llvm.x86.sse2.psrli.q"] | |
3088 | fn psrliq(a: i64x2, imm8: i32) -> i64x2; | |
3089 | #[link_name = "llvm.x86.sse2.psrl.q"] | |
3090 | fn psrlq(a: i64x2, count: i64x2) -> i64x2; | |
3091 | #[link_name = "llvm.x86.sse2.cvtdq2ps"] | |
3092 | fn cvtdq2ps(a: i32x4) -> __m128; | |
3093 | #[link_name = "llvm.x86.sse2.cvtps2dq"] | |
3094 | fn cvtps2dq(a: __m128) -> i32x4; | |
3095 | #[link_name = "llvm.x86.sse2.maskmov.dqu"] | |
3096 | fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8); | |
3097 | #[link_name = "llvm.x86.sse2.packsswb.128"] | |
3098 | fn packsswb(a: i16x8, b: i16x8) -> i8x16; | |
3099 | #[link_name = "llvm.x86.sse2.packssdw.128"] | |
3100 | fn packssdw(a: i32x4, b: i32x4) -> i16x8; | |
3101 | #[link_name = "llvm.x86.sse2.packuswb.128"] | |
3102 | fn packuswb(a: i16x8, b: i16x8) -> u8x16; | |
3103 | #[link_name = "llvm.x86.sse2.pmovmskb.128"] | |
3104 | fn pmovmskb(a: i8x16) -> i32; | |
3105 | #[link_name = "llvm.x86.sse2.max.sd"] | |
3106 | fn maxsd(a: __m128d, b: __m128d) -> __m128d; | |
3107 | #[link_name = "llvm.x86.sse2.max.pd"] | |
3108 | fn maxpd(a: __m128d, b: __m128d) -> __m128d; | |
3109 | #[link_name = "llvm.x86.sse2.min.sd"] | |
3110 | fn minsd(a: __m128d, b: __m128d) -> __m128d; | |
3111 | #[link_name = "llvm.x86.sse2.min.pd"] | |
3112 | fn minpd(a: __m128d, b: __m128d) -> __m128d; | |
3113 | #[link_name = "llvm.x86.sse2.sqrt.sd"] | |
3114 | fn sqrtsd(a: __m128d) -> __m128d; | |
3115 | #[link_name = "llvm.x86.sse2.sqrt.pd"] | |
3116 | fn sqrtpd(a: __m128d) -> __m128d; | |
3117 | #[link_name = "llvm.x86.sse2.cmp.sd"] | |
3118 | fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; | |
3119 | #[link_name = "llvm.x86.sse2.cmp.pd"] | |
3120 | fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; | |
3121 | #[link_name = "llvm.x86.sse2.comieq.sd"] | |
3122 | fn comieqsd(a: __m128d, b: __m128d) -> i32; | |
3123 | #[link_name = "llvm.x86.sse2.comilt.sd"] | |
3124 | fn comiltsd(a: __m128d, b: __m128d) -> i32; | |
3125 | #[link_name = "llvm.x86.sse2.comile.sd"] | |
3126 | fn comilesd(a: __m128d, b: __m128d) -> i32; | |
3127 | #[link_name = "llvm.x86.sse2.comigt.sd"] | |
3128 | fn comigtsd(a: __m128d, b: __m128d) -> i32; | |
3129 | #[link_name = "llvm.x86.sse2.comige.sd"] | |
3130 | fn comigesd(a: __m128d, b: __m128d) -> i32; | |
3131 | #[link_name = "llvm.x86.sse2.comineq.sd"] | |
3132 | fn comineqsd(a: __m128d, b: __m128d) -> i32; | |
3133 | #[link_name = "llvm.x86.sse2.ucomieq.sd"] | |
3134 | fn ucomieqsd(a: __m128d, b: __m128d) -> i32; | |
3135 | #[link_name = "llvm.x86.sse2.ucomilt.sd"] | |
3136 | fn ucomiltsd(a: __m128d, b: __m128d) -> i32; | |
3137 | #[link_name = "llvm.x86.sse2.ucomile.sd"] | |
3138 | fn ucomilesd(a: __m128d, b: __m128d) -> i32; | |
3139 | #[link_name = "llvm.x86.sse2.ucomigt.sd"] | |
3140 | fn ucomigtsd(a: __m128d, b: __m128d) -> i32; | |
3141 | #[link_name = "llvm.x86.sse2.ucomige.sd"] | |
3142 | fn ucomigesd(a: __m128d, b: __m128d) -> i32; | |
3143 | #[link_name = "llvm.x86.sse2.ucomineq.sd"] | |
3144 | fn ucomineqsd(a: __m128d, b: __m128d) -> i32; | |
3145 | #[link_name = "llvm.x86.sse2.movmsk.pd"] | |
3146 | fn movmskpd(a: __m128d) -> i32; | |
3147 | #[link_name = "llvm.x86.sse2.cvtpd2ps"] | |
3148 | fn cvtpd2ps(a: __m128d) -> __m128; | |
3149 | #[link_name = "llvm.x86.sse2.cvtps2pd"] | |
3150 | fn cvtps2pd(a: __m128) -> __m128d; | |
3151 | #[link_name = "llvm.x86.sse2.cvtpd2dq"] | |
3152 | fn cvtpd2dq(a: __m128d) -> i32x4; | |
3153 | #[link_name = "llvm.x86.sse2.cvtsd2si"] | |
3154 | fn cvtsd2si(a: __m128d) -> i32; | |
3155 | #[link_name = "llvm.x86.sse2.cvtsd2ss"] | |
3156 | fn cvtsd2ss(a: __m128, b: __m128d) -> __m128; | |
3157 | #[link_name = "llvm.x86.sse2.cvtss2sd"] | |
3158 | fn cvtss2sd(a: __m128d, b: __m128) -> __m128d; | |
3159 | #[link_name = "llvm.x86.sse2.cvttpd2dq"] | |
3160 | fn cvttpd2dq(a: __m128d) -> i32x4; | |
3161 | #[link_name = "llvm.x86.sse2.cvttsd2si"] | |
3162 | fn cvttsd2si(a: __m128d) -> i32; | |
3163 | #[link_name = "llvm.x86.sse2.cvttps2dq"] | |
3164 | fn cvttps2dq(a: __m128) -> i32x4; | |
3165 | #[link_name = "llvm.x86.sse2.storeu.dq"] | |
3166 | fn storeudq(mem_addr: *mut i8, a: __m128i); | |
3167 | #[link_name = "llvm.x86.sse2.storeu.pd"] | |
3168 | fn storeupd(mem_addr: *mut i8, a: __m128d); | |
3169 | #[link_name = "llvm.x86.mmx.padd.q"] | |
3170 | fn paddq(a: __m64, b: __m64) -> __m64; | |
3171 | #[link_name = "llvm.x86.mmx.pmulu.dq"] | |
3172 | fn pmuludq2(a: __m64, b: __m64) -> __m64; | |
3173 | #[link_name = "llvm.x86.mmx.psub.q"] | |
3174 | fn psubq(a: __m64, b: __m64) -> __m64; | |
3175 | #[link_name = "llvm.x86.sse.cvtpi2pd"] | |
3176 | fn cvtpi2pd(a: __m64) -> __m128d; | |
3177 | #[link_name = "llvm.x86.sse.cvtpd2pi"] | |
3178 | fn cvtpd2pi(a: __m128d) -> __m64; | |
3179 | #[link_name = "llvm.x86.sse.cvttpd2pi"] | |
3180 | fn cvttpd2pi(a: __m128d) -> __m64; | |
3181 | } | |
3182 | ||
3183 | #[cfg(test)] | |
3184 | mod tests { | |
0531ce1d | 3185 | use std::f32; |
83c7162d | 3186 | use std::f64::{self, NAN}; |
0531ce1d | 3187 | use std::i32; |
83c7162d | 3188 | use std::mem::{self, transmute}; |
0531ce1d | 3189 | |
83c7162d XL |
3190 | use coresimd::simd::*; |
3191 | use coresimd::x86::*; | |
0531ce1d XL |
3192 | use stdsimd_test::simd_test; |
3193 | use test::black_box; // Used to inhibit constant-folding. | |
0531ce1d | 3194 | |
83c7162d | 3195 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3196 | unsafe fn test_mm_pause() { |
3197 | _mm_pause(); | |
3198 | } | |
3199 | ||
83c7162d | 3200 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3201 | unsafe fn test_mm_clflush() { |
3202 | let x = 0; | |
3203 | _mm_clflush(&x as *const _ as *mut u8); | |
3204 | } | |
3205 | ||
83c7162d | 3206 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3207 | unsafe fn test_mm_lfence() { |
3208 | _mm_lfence(); | |
3209 | } | |
3210 | ||
83c7162d | 3211 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3212 | unsafe fn test_mm_mfence() { |
3213 | _mm_mfence(); | |
3214 | } | |
3215 | ||
83c7162d | 3216 | #[simd_test(enable = "sse2")] |
0531ce1d | 3217 | unsafe fn test_mm_add_epi8() { |
0731742a XL |
3218 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3219 | #[rustfmt::skip] | |
0531ce1d XL |
3220 | let b = _mm_setr_epi8( |
3221 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
3222 | ); | |
3223 | let r = _mm_add_epi8(a, b); | |
0731742a | 3224 | #[rustfmt::skip] |
0531ce1d XL |
3225 | let e = _mm_setr_epi8( |
3226 | 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, | |
3227 | ); | |
3228 | assert_eq_m128i(r, e); | |
3229 | } | |
3230 | ||
83c7162d | 3231 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3232 | unsafe fn test_mm_add_epi8_overflow() { |
3233 | let a = _mm_set1_epi8(0x7F); | |
3234 | let b = _mm_set1_epi8(1); | |
3235 | let r = _mm_add_epi8(a, b); | |
3236 | assert_eq_m128i(r, _mm_set1_epi8(-128)); | |
3237 | } | |
3238 | ||
83c7162d | 3239 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3240 | unsafe fn test_mm_add_epi16() { |
3241 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
3242 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); | |
3243 | let r = _mm_add_epi16(a, b); | |
3244 | let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); | |
3245 | assert_eq_m128i(r, e); | |
3246 | } | |
3247 | ||
83c7162d | 3248 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3249 | unsafe fn test_mm_add_epi32() { |
3250 | let a = _mm_setr_epi32(0, 1, 2, 3); | |
3251 | let b = _mm_setr_epi32(4, 5, 6, 7); | |
3252 | let r = _mm_add_epi32(a, b); | |
3253 | let e = _mm_setr_epi32(4, 6, 8, 10); | |
3254 | assert_eq_m128i(r, e); | |
3255 | } | |
3256 | ||
83c7162d | 3257 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3258 | unsafe fn test_mm_add_epi64() { |
3259 | let a = _mm_setr_epi64x(0, 1); | |
3260 | let b = _mm_setr_epi64x(2, 3); | |
3261 | let r = _mm_add_epi64(a, b); | |
3262 | let e = _mm_setr_epi64x(2, 4); | |
3263 | assert_eq_m128i(r, e); | |
3264 | } | |
3265 | ||
83c7162d | 3266 | #[simd_test(enable = "sse2")] |
0531ce1d | 3267 | unsafe fn test_mm_adds_epi8() { |
0731742a XL |
3268 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3269 | #[rustfmt::skip] | |
0531ce1d XL |
3270 | let b = _mm_setr_epi8( |
3271 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
3272 | ); | |
3273 | let r = _mm_adds_epi8(a, b); | |
0731742a | 3274 | #[rustfmt::skip] |
0531ce1d XL |
3275 | let e = _mm_setr_epi8( |
3276 | 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, | |
3277 | ); | |
3278 | assert_eq_m128i(r, e); | |
3279 | } | |
3280 | ||
83c7162d | 3281 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3282 | unsafe fn test_mm_adds_epi8_saturate_positive() { |
3283 | let a = _mm_set1_epi8(0x7F); | |
3284 | let b = _mm_set1_epi8(1); | |
3285 | let r = _mm_adds_epi8(a, b); | |
3286 | assert_eq_m128i(r, a); | |
3287 | } | |
3288 | ||
83c7162d | 3289 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3290 | unsafe fn test_mm_adds_epi8_saturate_negative() { |
3291 | let a = _mm_set1_epi8(-0x80); | |
3292 | let b = _mm_set1_epi8(-1); | |
3293 | let r = _mm_adds_epi8(a, b); | |
3294 | assert_eq_m128i(r, a); | |
3295 | } | |
3296 | ||
83c7162d | 3297 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3298 | unsafe fn test_mm_adds_epi16() { |
3299 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
3300 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); | |
3301 | let r = _mm_adds_epi16(a, b); | |
3302 | let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); | |
3303 | assert_eq_m128i(r, e); | |
3304 | } | |
3305 | ||
83c7162d | 3306 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3307 | unsafe fn test_mm_adds_epi16_saturate_positive() { |
3308 | let a = _mm_set1_epi16(0x7FFF); | |
3309 | let b = _mm_set1_epi16(1); | |
3310 | let r = _mm_adds_epi16(a, b); | |
3311 | assert_eq_m128i(r, a); | |
3312 | } | |
3313 | ||
83c7162d | 3314 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3315 | unsafe fn test_mm_adds_epi16_saturate_negative() { |
3316 | let a = _mm_set1_epi16(-0x8000); | |
3317 | let b = _mm_set1_epi16(-1); | |
3318 | let r = _mm_adds_epi16(a, b); | |
3319 | assert_eq_m128i(r, a); | |
3320 | } | |
3321 | ||
83c7162d | 3322 | #[simd_test(enable = "sse2")] |
0531ce1d | 3323 | unsafe fn test_mm_adds_epu8() { |
0731742a XL |
3324 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3325 | #[rustfmt::skip] | |
0531ce1d XL |
3326 | let b = _mm_setr_epi8( |
3327 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
3328 | ); | |
3329 | let r = _mm_adds_epu8(a, b); | |
0731742a | 3330 | #[rustfmt::skip] |
0531ce1d XL |
3331 | let e = _mm_setr_epi8( |
3332 | 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, | |
3333 | ); | |
3334 | assert_eq_m128i(r, e); | |
3335 | } | |
3336 | ||
83c7162d | 3337 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3338 | unsafe fn test_mm_adds_epu8_saturate() { |
3339 | let a = _mm_set1_epi8(!0); | |
3340 | let b = _mm_set1_epi8(1); | |
3341 | let r = _mm_adds_epu8(a, b); | |
3342 | assert_eq_m128i(r, a); | |
3343 | } | |
3344 | ||
83c7162d | 3345 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3346 | unsafe fn test_mm_adds_epu16() { |
3347 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
3348 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); | |
3349 | let r = _mm_adds_epu16(a, b); | |
3350 | let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); | |
3351 | assert_eq_m128i(r, e); | |
3352 | } | |
3353 | ||
83c7162d | 3354 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3355 | unsafe fn test_mm_adds_epu16_saturate() { |
3356 | let a = _mm_set1_epi16(!0); | |
3357 | let b = _mm_set1_epi16(1); | |
3358 | let r = _mm_adds_epu16(a, b); | |
3359 | assert_eq_m128i(r, a); | |
3360 | } | |
3361 | ||
83c7162d | 3362 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3363 | unsafe fn test_mm_avg_epu8() { |
3364 | let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9)); | |
3365 | let r = _mm_avg_epu8(a, b); | |
3366 | assert_eq_m128i(r, _mm_set1_epi8(6)); | |
3367 | } | |
3368 | ||
83c7162d | 3369 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3370 | unsafe fn test_mm_avg_epu16() { |
3371 | let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9)); | |
3372 | let r = _mm_avg_epu16(a, b); | |
3373 | assert_eq_m128i(r, _mm_set1_epi16(6)); | |
3374 | } | |
3375 | ||
83c7162d | 3376 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3377 | unsafe fn test_mm_madd_epi16() { |
3378 | let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); | |
3379 | let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16); | |
3380 | let r = _mm_madd_epi16(a, b); | |
3381 | let e = _mm_setr_epi32(29, 81, 149, 233); | |
3382 | assert_eq_m128i(r, e); | |
3383 | } | |
3384 | ||
83c7162d | 3385 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3386 | unsafe fn test_mm_max_epi16() { |
3387 | let a = _mm_set1_epi16(1); | |
3388 | let b = _mm_set1_epi16(-1); | |
3389 | let r = _mm_max_epi16(a, b); | |
3390 | assert_eq_m128i(r, a); | |
3391 | } | |
3392 | ||
83c7162d | 3393 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3394 | unsafe fn test_mm_max_epu8() { |
3395 | let a = _mm_set1_epi8(1); | |
3396 | let b = _mm_set1_epi8(!0); | |
3397 | let r = _mm_max_epu8(a, b); | |
3398 | assert_eq_m128i(r, b); | |
3399 | } | |
3400 | ||
83c7162d | 3401 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3402 | unsafe fn test_mm_min_epi16() { |
3403 | let a = _mm_set1_epi16(1); | |
3404 | let b = _mm_set1_epi16(-1); | |
3405 | let r = _mm_min_epi16(a, b); | |
3406 | assert_eq_m128i(r, b); | |
3407 | } | |
3408 | ||
83c7162d | 3409 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3410 | unsafe fn test_mm_min_epu8() { |
3411 | let a = _mm_set1_epi8(1); | |
3412 | let b = _mm_set1_epi8(!0); | |
3413 | let r = _mm_min_epu8(a, b); | |
3414 | assert_eq_m128i(r, a); | |
3415 | } | |
3416 | ||
83c7162d | 3417 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3418 | unsafe fn test_mm_mulhi_epi16() { |
3419 | let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); | |
3420 | let r = _mm_mulhi_epi16(a, b); | |
3421 | assert_eq_m128i(r, _mm_set1_epi16(-16)); | |
3422 | } | |
3423 | ||
83c7162d | 3424 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3425 | unsafe fn test_mm_mulhi_epu16() { |
3426 | let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001)); | |
3427 | let r = _mm_mulhi_epu16(a, b); | |
3428 | assert_eq_m128i(r, _mm_set1_epi16(15)); | |
3429 | } | |
3430 | ||
83c7162d | 3431 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3432 | unsafe fn test_mm_mullo_epi16() { |
3433 | let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); | |
3434 | let r = _mm_mullo_epi16(a, b); | |
3435 | assert_eq_m128i(r, _mm_set1_epi16(-17960)); | |
3436 | } | |
3437 | ||
83c7162d | 3438 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3439 | unsafe fn test_mm_mul_epu32() { |
3440 | let a = _mm_setr_epi64x(1_000_000_000, 1 << 34); | |
3441 | let b = _mm_setr_epi64x(1_000_000_000, 1 << 35); | |
3442 | let r = _mm_mul_epu32(a, b); | |
3443 | let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0); | |
3444 | assert_eq_m128i(r, e); | |
3445 | } | |
3446 | ||
83c7162d | 3447 | #[simd_test(enable = "sse2")] |
0531ce1d | 3448 | unsafe fn test_mm_sad_epu8() { |
0731742a | 3449 | #[rustfmt::skip] |
0531ce1d XL |
3450 | let a = _mm_setr_epi8( |
3451 | 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8, | |
3452 | 1, 2, 3, 4, | |
3453 | 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8, | |
3454 | 1, 2, 3, 4, | |
3455 | ); | |
3456 | let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2); | |
3457 | let r = _mm_sad_epu8(a, b); | |
3458 | let e = _mm_setr_epi64x(1020, 614); | |
3459 | assert_eq_m128i(r, e); | |
3460 | } | |
3461 | ||
83c7162d | 3462 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3463 | unsafe fn test_mm_sub_epi8() { |
3464 | let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6)); | |
3465 | let r = _mm_sub_epi8(a, b); | |
3466 | assert_eq_m128i(r, _mm_set1_epi8(-1)); | |
3467 | } | |
3468 | ||
83c7162d | 3469 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3470 | unsafe fn test_mm_sub_epi16() { |
3471 | let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6)); | |
3472 | let r = _mm_sub_epi16(a, b); | |
3473 | assert_eq_m128i(r, _mm_set1_epi16(-1)); | |
3474 | } | |
3475 | ||
83c7162d | 3476 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3477 | unsafe fn test_mm_sub_epi32() { |
3478 | let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6)); | |
3479 | let r = _mm_sub_epi32(a, b); | |
3480 | assert_eq_m128i(r, _mm_set1_epi32(-1)); | |
3481 | } | |
3482 | ||
83c7162d | 3483 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3484 | unsafe fn test_mm_sub_epi64() { |
3485 | let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6)); | |
3486 | let r = _mm_sub_epi64(a, b); | |
3487 | assert_eq_m128i(r, _mm_set1_epi64x(-1)); | |
3488 | } | |
3489 | ||
83c7162d | 3490 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3491 | unsafe fn test_mm_subs_epi8() { |
3492 | let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); | |
3493 | let r = _mm_subs_epi8(a, b); | |
3494 | assert_eq_m128i(r, _mm_set1_epi8(3)); | |
3495 | } | |
3496 | ||
83c7162d | 3497 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3498 | unsafe fn test_mm_subs_epi8_saturate_positive() { |
3499 | let a = _mm_set1_epi8(0x7F); | |
3500 | let b = _mm_set1_epi8(-1); | |
3501 | let r = _mm_subs_epi8(a, b); | |
3502 | assert_eq_m128i(r, a); | |
3503 | } | |
3504 | ||
83c7162d | 3505 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3506 | unsafe fn test_mm_subs_epi8_saturate_negative() { |
3507 | let a = _mm_set1_epi8(-0x80); | |
3508 | let b = _mm_set1_epi8(1); | |
3509 | let r = _mm_subs_epi8(a, b); | |
3510 | assert_eq_m128i(r, a); | |
3511 | } | |
3512 | ||
83c7162d | 3513 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3514 | unsafe fn test_mm_subs_epi16() { |
3515 | let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); | |
3516 | let r = _mm_subs_epi16(a, b); | |
3517 | assert_eq_m128i(r, _mm_set1_epi16(3)); | |
3518 | } | |
3519 | ||
83c7162d | 3520 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3521 | unsafe fn test_mm_subs_epi16_saturate_positive() { |
3522 | let a = _mm_set1_epi16(0x7FFF); | |
3523 | let b = _mm_set1_epi16(-1); | |
3524 | let r = _mm_subs_epi16(a, b); | |
3525 | assert_eq_m128i(r, a); | |
3526 | } | |
3527 | ||
83c7162d | 3528 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3529 | unsafe fn test_mm_subs_epi16_saturate_negative() { |
3530 | let a = _mm_set1_epi16(-0x8000); | |
3531 | let b = _mm_set1_epi16(1); | |
3532 | let r = _mm_subs_epi16(a, b); | |
3533 | assert_eq_m128i(r, a); | |
3534 | } | |
3535 | ||
83c7162d | 3536 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3537 | unsafe fn test_mm_subs_epu8() { |
3538 | let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); | |
3539 | let r = _mm_subs_epu8(a, b); | |
3540 | assert_eq_m128i(r, _mm_set1_epi8(3)); | |
3541 | } | |
3542 | ||
83c7162d | 3543 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3544 | unsafe fn test_mm_subs_epu8_saturate() { |
3545 | let a = _mm_set1_epi8(0); | |
3546 | let b = _mm_set1_epi8(1); | |
3547 | let r = _mm_subs_epu8(a, b); | |
3548 | assert_eq_m128i(r, a); | |
3549 | } | |
3550 | ||
83c7162d | 3551 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3552 | unsafe fn test_mm_subs_epu16() { |
3553 | let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); | |
3554 | let r = _mm_subs_epu16(a, b); | |
3555 | assert_eq_m128i(r, _mm_set1_epi16(3)); | |
3556 | } | |
3557 | ||
83c7162d | 3558 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3559 | unsafe fn test_mm_subs_epu16_saturate() { |
3560 | let a = _mm_set1_epi16(0); | |
3561 | let b = _mm_set1_epi16(1); | |
3562 | let r = _mm_subs_epu16(a, b); | |
3563 | assert_eq_m128i(r, a); | |
3564 | } | |
3565 | ||
83c7162d | 3566 | #[simd_test(enable = "sse2")] |
0531ce1d | 3567 | unsafe fn test_mm_slli_si128() { |
0731742a | 3568 | #[rustfmt::skip] |
0531ce1d XL |
3569 | let a = _mm_setr_epi8( |
3570 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
3571 | ); | |
3572 | let r = _mm_slli_si128(a, 1); | |
0731742a | 3573 | let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
0531ce1d XL |
3574 | assert_eq_m128i(r, e); |
3575 | ||
0731742a | 3576 | #[rustfmt::skip] |
0531ce1d XL |
3577 | let a = _mm_setr_epi8( |
3578 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
3579 | ); | |
3580 | let r = _mm_slli_si128(a, 15); | |
3581 | let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); | |
3582 | assert_eq_m128i(r, e); | |
3583 | ||
0731742a | 3584 | #[rustfmt::skip] |
0531ce1d XL |
3585 | let a = _mm_setr_epi8( |
3586 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
3587 | ); | |
3588 | let r = _mm_slli_si128(a, 16); | |
3589 | assert_eq_m128i(r, _mm_set1_epi8(0)); | |
3590 | ||
0731742a | 3591 | #[rustfmt::skip] |
0531ce1d XL |
3592 | let a = _mm_setr_epi8( |
3593 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
3594 | ); | |
3595 | let r = _mm_slli_si128(a, -1); | |
3596 | assert_eq_m128i(_mm_set1_epi8(0), r); | |
3597 | ||
0731742a | 3598 | #[rustfmt::skip] |
0531ce1d XL |
3599 | let a = _mm_setr_epi8( |
3600 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
3601 | ); | |
3602 | let r = _mm_slli_si128(a, -0x80000000); | |
3603 | assert_eq_m128i(r, _mm_set1_epi8(0)); | |
3604 | } | |
3605 | ||
83c7162d | 3606 | #[simd_test(enable = "sse2")] |
0531ce1d | 3607 | unsafe fn test_mm_slli_epi16() { |
0731742a | 3608 | #[rustfmt::skip] |
0531ce1d XL |
3609 | let a = _mm_setr_epi16( |
3610 | 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0, | |
3611 | ); | |
3612 | let r = _mm_slli_epi16(a, 4); | |
3613 | ||
0731742a | 3614 | #[rustfmt::skip] |
0531ce1d XL |
3615 | let e = _mm_setr_epi16( |
3616 | 0xFFF0 as u16 as i16, 0xFFF0 as u16 as i16, 0x0FF0, 0x00F0, | |
3617 | 0, 0, 0, 0, | |
3618 | ); | |
3619 | assert_eq_m128i(r, e); | |
3620 | } | |
3621 | ||
83c7162d | 3622 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3623 | unsafe fn test_mm_sll_epi16() { |
3624 | let a = _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0); | |
3625 | let r = _mm_sll_epi16(a, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0)); | |
3626 | assert_eq_m128i(r, _mm_setr_epi16(0xFF0, 0, 0, 0, 0, 0, 0, 0)); | |
3627 | let r = _mm_sll_epi16(a, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0)); | |
3628 | assert_eq_m128i(r, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0)); | |
3629 | } | |
3630 | ||
83c7162d | 3631 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3632 | unsafe fn test_mm_slli_epi32() { |
3633 | let r = _mm_slli_epi32(_mm_set1_epi32(0xFFFF), 4); | |
3634 | assert_eq_m128i(r, _mm_set1_epi32(0xFFFF0)); | |
3635 | } | |
3636 | ||
83c7162d | 3637 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3638 | unsafe fn test_mm_sll_epi32() { |
3639 | let a = _mm_set1_epi32(0xFFFF); | |
3640 | let b = _mm_setr_epi32(4, 0, 0, 0); | |
3641 | let r = _mm_sll_epi32(a, b); | |
3642 | assert_eq_m128i(r, _mm_set1_epi32(0xFFFF0)); | |
3643 | } | |
3644 | ||
83c7162d | 3645 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3646 | unsafe fn test_mm_slli_epi64() { |
3647 | let r = _mm_slli_epi64(_mm_set1_epi64x(0xFFFFFFFF), 4); | |
3648 | assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFFF0)); | |
3649 | } | |
3650 | ||
83c7162d | 3651 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3652 | unsafe fn test_mm_sll_epi64() { |
3653 | let a = _mm_set1_epi64x(0xFFFFFFFF); | |
3654 | let b = _mm_setr_epi64x(4, 0); | |
3655 | let r = _mm_sll_epi64(a, b); | |
3656 | assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFFF0)); | |
3657 | } | |
3658 | ||
83c7162d | 3659 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3660 | unsafe fn test_mm_srai_epi16() { |
3661 | let r = _mm_srai_epi16(_mm_set1_epi16(-1), 1); | |
3662 | assert_eq_m128i(r, _mm_set1_epi16(-1)); | |
3663 | } | |
3664 | ||
83c7162d | 3665 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3666 | unsafe fn test_mm_sra_epi16() { |
3667 | let a = _mm_set1_epi16(-1); | |
3668 | let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); | |
3669 | let r = _mm_sra_epi16(a, b); | |
3670 | assert_eq_m128i(r, _mm_set1_epi16(-1)); | |
3671 | } | |
3672 | ||
83c7162d | 3673 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3674 | unsafe fn test_mm_srai_epi32() { |
3675 | let r = _mm_srai_epi32(_mm_set1_epi32(-1), 1); | |
3676 | assert_eq_m128i(r, _mm_set1_epi32(-1)); | |
3677 | } | |
3678 | ||
83c7162d | 3679 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3680 | unsafe fn test_mm_sra_epi32() { |
3681 | let a = _mm_set1_epi32(-1); | |
3682 | let b = _mm_setr_epi32(1, 0, 0, 0); | |
3683 | let r = _mm_sra_epi32(a, b); | |
3684 | assert_eq_m128i(r, _mm_set1_epi32(-1)); | |
3685 | } | |
3686 | ||
83c7162d | 3687 | #[simd_test(enable = "sse2")] |
0531ce1d | 3688 | unsafe fn test_mm_srli_si128() { |
0731742a | 3689 | #[rustfmt::skip] |
0531ce1d XL |
3690 | let a = _mm_setr_epi8( |
3691 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
3692 | ); | |
3693 | let r = _mm_srli_si128(a, 1); | |
0731742a | 3694 | #[rustfmt::skip] |
0531ce1d XL |
3695 | let e = _mm_setr_epi8( |
3696 | 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, | |
3697 | ); | |
3698 | assert_eq_m128i(r, e); | |
3699 | ||
0731742a | 3700 | #[rustfmt::skip] |
0531ce1d XL |
3701 | let a = _mm_setr_epi8( |
3702 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
3703 | ); | |
3704 | let r = _mm_srli_si128(a, 15); | |
3705 | let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
3706 | assert_eq_m128i(r, e); | |
3707 | ||
0731742a | 3708 | #[rustfmt::skip] |
0531ce1d XL |
3709 | let a = _mm_setr_epi8( |
3710 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
3711 | ); | |
3712 | let r = _mm_srli_si128(a, 16); | |
3713 | assert_eq_m128i(r, _mm_set1_epi8(0)); | |
3714 | ||
0731742a | 3715 | #[rustfmt::skip] |
0531ce1d XL |
3716 | let a = _mm_setr_epi8( |
3717 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
3718 | ); | |
3719 | let r = _mm_srli_si128(a, -1); | |
3720 | assert_eq_m128i(r, _mm_set1_epi8(0)); | |
3721 | ||
0731742a | 3722 | #[rustfmt::skip] |
0531ce1d XL |
3723 | let a = _mm_setr_epi8( |
3724 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, | |
3725 | ); | |
3726 | let r = _mm_srli_si128(a, -0x80000000); | |
3727 | assert_eq_m128i(r, _mm_set1_epi8(0)); | |
3728 | } | |
3729 | ||
83c7162d | 3730 | #[simd_test(enable = "sse2")] |
0531ce1d | 3731 | unsafe fn test_mm_srli_epi16() { |
0731742a | 3732 | #[rustfmt::skip] |
0531ce1d XL |
3733 | let a = _mm_setr_epi16( |
3734 | 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0, | |
3735 | ); | |
3736 | let r = _mm_srli_epi16(a, 4); | |
0731742a | 3737 | #[rustfmt::skip] |
0531ce1d XL |
3738 | let e = _mm_setr_epi16( |
3739 | 0xFFF as u16 as i16, 0xFF as u16 as i16, 0xF, 0, 0, 0, 0, 0, | |
3740 | ); | |
3741 | assert_eq_m128i(r, e); | |
3742 | } | |
3743 | ||
83c7162d | 3744 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3745 | unsafe fn test_mm_srl_epi16() { |
3746 | let a = _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0); | |
3747 | let r = _mm_srl_epi16(a, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0)); | |
3748 | assert_eq_m128i(r, _mm_setr_epi16(0xF, 0, 0, 0, 0, 0, 0, 0)); | |
3749 | let r = _mm_srl_epi16(a, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0)); | |
3750 | assert_eq_m128i(r, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0)); | |
3751 | } | |
3752 | ||
83c7162d | 3753 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3754 | unsafe fn test_mm_srli_epi32() { |
3755 | let r = _mm_srli_epi32(_mm_set1_epi32(0xFFFF), 4); | |
3756 | assert_eq_m128i(r, _mm_set1_epi32(0xFFF)); | |
3757 | } | |
3758 | ||
83c7162d | 3759 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3760 | unsafe fn test_mm_srl_epi32() { |
3761 | let a = _mm_set1_epi32(0xFFFF); | |
3762 | let b = _mm_setr_epi32(4, 0, 0, 0); | |
3763 | let r = _mm_srl_epi32(a, b); | |
3764 | assert_eq_m128i(r, _mm_set1_epi32(0xFFF)); | |
3765 | } | |
3766 | ||
83c7162d | 3767 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3768 | unsafe fn test_mm_srli_epi64() { |
3769 | let r = _mm_srli_epi64(_mm_set1_epi64x(0xFFFFFFFF), 4); | |
3770 | assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFF)); | |
3771 | } | |
3772 | ||
83c7162d | 3773 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3774 | unsafe fn test_mm_srl_epi64() { |
3775 | let a = _mm_set1_epi64x(0xFFFFFFFF); | |
3776 | let b = _mm_setr_epi64x(4, 0); | |
3777 | let r = _mm_srl_epi64(a, b); | |
3778 | assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFF)); | |
3779 | } | |
3780 | ||
83c7162d | 3781 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3782 | unsafe fn test_mm_and_si128() { |
3783 | let a = _mm_set1_epi8(5); | |
3784 | let b = _mm_set1_epi8(3); | |
3785 | let r = _mm_and_si128(a, b); | |
3786 | assert_eq_m128i(r, _mm_set1_epi8(1)); | |
3787 | } | |
3788 | ||
83c7162d | 3789 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3790 | unsafe fn test_mm_andnot_si128() { |
3791 | let a = _mm_set1_epi8(5); | |
3792 | let b = _mm_set1_epi8(3); | |
3793 | let r = _mm_andnot_si128(a, b); | |
3794 | assert_eq_m128i(r, _mm_set1_epi8(2)); | |
3795 | } | |
3796 | ||
83c7162d | 3797 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3798 | unsafe fn test_mm_or_si128() { |
3799 | let a = _mm_set1_epi8(5); | |
3800 | let b = _mm_set1_epi8(3); | |
3801 | let r = _mm_or_si128(a, b); | |
3802 | assert_eq_m128i(r, _mm_set1_epi8(7)); | |
3803 | } | |
3804 | ||
83c7162d | 3805 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3806 | unsafe fn test_mm_xor_si128() { |
3807 | let a = _mm_set1_epi8(5); | |
3808 | let b = _mm_set1_epi8(3); | |
3809 | let r = _mm_xor_si128(a, b); | |
3810 | assert_eq_m128i(r, _mm_set1_epi8(6)); | |
3811 | } | |
3812 | ||
83c7162d | 3813 | #[simd_test(enable = "sse2")] |
0531ce1d | 3814 | unsafe fn test_mm_cmpeq_epi8() { |
0731742a XL |
3815 | let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
3816 | let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); | |
0531ce1d | 3817 | let r = _mm_cmpeq_epi8(a, b); |
0731742a | 3818 | #[rustfmt::skip] |
0531ce1d XL |
3819 | assert_eq_m128i( |
3820 | r, | |
3821 | _mm_setr_epi8( | |
3822 | 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | |
3823 | ) | |
3824 | ); | |
3825 | } | |
3826 | ||
83c7162d | 3827 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3828 | unsafe fn test_mm_cmpeq_epi16() { |
3829 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
3830 | let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0); | |
3831 | let r = _mm_cmpeq_epi16(a, b); | |
3832 | assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0)); | |
3833 | } | |
3834 | ||
83c7162d | 3835 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3836 | unsafe fn test_mm_cmpeq_epi32() { |
3837 | let a = _mm_setr_epi32(0, 1, 2, 3); | |
3838 | let b = _mm_setr_epi32(3, 2, 2, 0); | |
3839 | let r = _mm_cmpeq_epi32(a, b); | |
3840 | assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0)); | |
3841 | } | |
3842 | ||
83c7162d | 3843 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3844 | unsafe fn test_mm_cmpgt_epi8() { |
3845 | let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
3846 | let b = _mm_set1_epi8(0); | |
3847 | let r = _mm_cmpgt_epi8(a, b); | |
3848 | let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
3849 | assert_eq_m128i(r, e); | |
3850 | } | |
3851 | ||
83c7162d | 3852 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3853 | unsafe fn test_mm_cmpgt_epi16() { |
3854 | let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); | |
3855 | let b = _mm_set1_epi16(0); | |
3856 | let r = _mm_cmpgt_epi16(a, b); | |
3857 | let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); | |
3858 | assert_eq_m128i(r, e); | |
3859 | } | |
3860 | ||
83c7162d | 3861 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3862 | unsafe fn test_mm_cmpgt_epi32() { |
3863 | let a = _mm_set_epi32(5, 0, 0, 0); | |
3864 | let b = _mm_set1_epi32(0); | |
3865 | let r = _mm_cmpgt_epi32(a, b); | |
3866 | assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0)); | |
3867 | } | |
3868 | ||
83c7162d | 3869 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3870 | unsafe fn test_mm_cmplt_epi8() { |
3871 | let a = _mm_set1_epi8(0); | |
3872 | let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
3873 | let r = _mm_cmplt_epi8(a, b); | |
3874 | let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
3875 | assert_eq_m128i(r, e); | |
3876 | } | |
3877 | ||
83c7162d | 3878 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3879 | unsafe fn test_mm_cmplt_epi16() { |
3880 | let a = _mm_set1_epi16(0); | |
3881 | let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); | |
3882 | let r = _mm_cmplt_epi16(a, b); | |
3883 | let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); | |
3884 | assert_eq_m128i(r, e); | |
3885 | } | |
3886 | ||
83c7162d | 3887 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3888 | unsafe fn test_mm_cmplt_epi32() { |
3889 | let a = _mm_set1_epi32(0); | |
3890 | let b = _mm_set_epi32(5, 0, 0, 0); | |
3891 | let r = _mm_cmplt_epi32(a, b); | |
3892 | assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0)); | |
3893 | } | |
3894 | ||
83c7162d | 3895 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3896 | unsafe fn test_mm_cvtepi32_pd() { |
3897 | let a = _mm_set_epi32(35, 25, 15, 5); | |
3898 | let r = _mm_cvtepi32_pd(a); | |
3899 | assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0)); | |
3900 | } | |
3901 | ||
83c7162d | 3902 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3903 | unsafe fn test_mm_cvtsi32_sd() { |
3904 | let a = _mm_set1_pd(3.5); | |
3905 | let r = _mm_cvtsi32_sd(a, 5); | |
3906 | assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5)); | |
3907 | } | |
3908 | ||
83c7162d | 3909 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3910 | unsafe fn test_mm_cvtepi32_ps() { |
3911 | let a = _mm_setr_epi32(1, 2, 3, 4); | |
3912 | let r = _mm_cvtepi32_ps(a); | |
3913 | assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0)); | |
3914 | } | |
3915 | ||
83c7162d | 3916 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3917 | unsafe fn test_mm_cvtps_epi32() { |
3918 | let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); | |
3919 | let r = _mm_cvtps_epi32(a); | |
3920 | assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4)); | |
3921 | } | |
3922 | ||
83c7162d | 3923 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3924 | unsafe fn test_mm_cvtsi32_si128() { |
3925 | let r = _mm_cvtsi32_si128(5); | |
3926 | assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0)); | |
3927 | } | |
3928 | ||
83c7162d | 3929 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3930 | unsafe fn test_mm_cvtsi128_si32() { |
3931 | let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0)); | |
3932 | assert_eq!(r, 5); | |
3933 | } | |
3934 | ||
83c7162d | 3935 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3936 | unsafe fn test_mm_set_epi64x() { |
3937 | let r = _mm_set_epi64x(0, 1); | |
3938 | assert_eq_m128i(r, _mm_setr_epi64x(1, 0)); | |
3939 | } | |
3940 | ||
83c7162d | 3941 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3942 | unsafe fn test_mm_set_epi32() { |
3943 | let r = _mm_set_epi32(0, 1, 2, 3); | |
3944 | assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0)); | |
3945 | } | |
3946 | ||
83c7162d | 3947 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3948 | unsafe fn test_mm_set_epi16() { |
3949 | let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
3950 | assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0)); | |
3951 | } | |
3952 | ||
83c7162d | 3953 | #[simd_test(enable = "sse2")] |
0531ce1d | 3954 | unsafe fn test_mm_set_epi8() { |
0731742a | 3955 | #[rustfmt::skip] |
0531ce1d XL |
3956 | let r = _mm_set_epi8( |
3957 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
3958 | ); | |
0731742a | 3959 | #[rustfmt::skip] |
0531ce1d XL |
3960 | let e = _mm_setr_epi8( |
3961 | 15, 14, 13, 12, 11, 10, 9, 8, | |
3962 | 7, 6, 5, 4, 3, 2, 1, 0, | |
3963 | ); | |
3964 | assert_eq_m128i(r, e); | |
3965 | } | |
3966 | ||
83c7162d | 3967 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3968 | unsafe fn test_mm_set1_epi64x() { |
3969 | let r = _mm_set1_epi64x(1); | |
3970 | assert_eq_m128i(r, _mm_set1_epi64x(1)); | |
3971 | } | |
3972 | ||
83c7162d | 3973 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3974 | unsafe fn test_mm_set1_epi32() { |
3975 | let r = _mm_set1_epi32(1); | |
3976 | assert_eq_m128i(r, _mm_set1_epi32(1)); | |
3977 | } | |
3978 | ||
83c7162d | 3979 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3980 | unsafe fn test_mm_set1_epi16() { |
3981 | let r = _mm_set1_epi16(1); | |
3982 | assert_eq_m128i(r, _mm_set1_epi16(1)); | |
3983 | } | |
3984 | ||
83c7162d | 3985 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3986 | unsafe fn test_mm_set1_epi8() { |
3987 | let r = _mm_set1_epi8(1); | |
3988 | assert_eq_m128i(r, _mm_set1_epi8(1)); | |
3989 | } | |
3990 | ||
83c7162d | 3991 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3992 | unsafe fn test_mm_setr_epi32() { |
3993 | let r = _mm_setr_epi32(0, 1, 2, 3); | |
3994 | assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3)); | |
3995 | } | |
3996 | ||
83c7162d | 3997 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
3998 | unsafe fn test_mm_setr_epi16() { |
3999 | let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
4000 | assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7)); | |
4001 | } | |
4002 | ||
83c7162d | 4003 | #[simd_test(enable = "sse2")] |
0531ce1d | 4004 | unsafe fn test_mm_setr_epi8() { |
0731742a | 4005 | #[rustfmt::skip] |
0531ce1d XL |
4006 | let r = _mm_setr_epi8( |
4007 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
4008 | ); | |
0731742a | 4009 | #[rustfmt::skip] |
0531ce1d XL |
4010 | let e = _mm_setr_epi8( |
4011 | 0, 1, 2, 3, 4, 5, 6, 7, | |
4012 | 8, 9, 10, 11, 12, 13, 14, 15, | |
4013 | ); | |
4014 | assert_eq_m128i(r, e); | |
4015 | } | |
4016 | ||
83c7162d | 4017 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4018 | unsafe fn test_mm_setzero_si128() { |
4019 | let r = _mm_setzero_si128(); | |
4020 | assert_eq_m128i(r, _mm_set1_epi64x(0)); | |
4021 | } | |
4022 | ||
83c7162d | 4023 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4024 | unsafe fn test_mm_loadl_epi64() { |
4025 | let a = _mm_setr_epi64x(6, 5); | |
4026 | let r = _mm_loadl_epi64(&a as *const _); | |
4027 | assert_eq_m128i(r, _mm_setr_epi64x(6, 0)); | |
4028 | } | |
4029 | ||
83c7162d | 4030 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4031 | unsafe fn test_mm_load_si128() { |
4032 | let a = _mm_set_epi64x(5, 6); | |
4033 | let r = _mm_load_si128(&a as *const _ as *const _); | |
4034 | assert_eq_m128i(a, r); | |
4035 | } | |
4036 | ||
83c7162d | 4037 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4038 | unsafe fn test_mm_loadu_si128() { |
4039 | let a = _mm_set_epi64x(5, 6); | |
4040 | let r = _mm_loadu_si128(&a as *const _ as *const _); | |
4041 | assert_eq_m128i(a, r); | |
4042 | } | |
4043 | ||
83c7162d | 4044 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4045 | unsafe fn test_mm_maskmoveu_si128() { |
4046 | let a = _mm_set1_epi8(9); | |
0731742a | 4047 | #[rustfmt::skip] |
0531ce1d XL |
4048 | let mask = _mm_set_epi8( |
4049 | 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0, | |
4050 | 0, 0, 0, 0, 0, 0, 0, 0, | |
4051 | ); | |
4052 | let mut r = _mm_set1_epi8(0); | |
4053 | _mm_maskmoveu_si128(a, mask, &mut r as *mut _ as *mut i8); | |
4054 | let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); | |
4055 | assert_eq_m128i(r, e); | |
4056 | } | |
4057 | ||
83c7162d | 4058 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4059 | unsafe fn test_mm_store_si128() { |
4060 | let a = _mm_set1_epi8(9); | |
4061 | let mut r = _mm_set1_epi8(0); | |
4062 | _mm_store_si128(&mut r as *mut _ as *mut __m128i, a); | |
4063 | assert_eq_m128i(r, a); | |
4064 | } | |
4065 | ||
83c7162d | 4066 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4067 | unsafe fn test_mm_storeu_si128() { |
4068 | let a = _mm_set1_epi8(9); | |
4069 | let mut r = _mm_set1_epi8(0); | |
4070 | _mm_storeu_si128(&mut r as *mut _ as *mut __m128i, a); | |
4071 | assert_eq_m128i(r, a); | |
4072 | } | |
4073 | ||
83c7162d | 4074 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4075 | unsafe fn test_mm_storel_epi64() { |
4076 | let a = _mm_setr_epi64x(2, 9); | |
4077 | let mut r = _mm_set1_epi8(0); | |
4078 | _mm_storel_epi64(&mut r as *mut _ as *mut __m128i, a); | |
4079 | assert_eq_m128i(r, _mm_setr_epi64x(2, 0)); | |
4080 | } | |
4081 | ||
83c7162d | 4082 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4083 | unsafe fn test_mm_stream_si128() { |
4084 | let a = _mm_setr_epi32(1, 2, 3, 4); | |
4085 | let mut r = _mm_undefined_si128(); | |
4086 | _mm_stream_si128(&mut r as *mut _, a); | |
4087 | assert_eq_m128i(r, a); | |
4088 | } | |
4089 | ||
83c7162d | 4090 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4091 | unsafe fn test_mm_stream_si32() { |
4092 | let a: i32 = 7; | |
4093 | let mut mem = ::std::boxed::Box::<i32>::new(-1); | |
4094 | _mm_stream_si32(&mut *mem as *mut i32, a); | |
4095 | assert_eq!(a, *mem); | |
4096 | } | |
4097 | ||
83c7162d | 4098 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4099 | unsafe fn test_mm_move_epi64() { |
4100 | let a = _mm_setr_epi64x(5, 6); | |
4101 | let r = _mm_move_epi64(a); | |
4102 | assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); | |
4103 | } | |
4104 | ||
83c7162d | 4105 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4106 | unsafe fn test_mm_packs_epi16() { |
4107 | let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0); | |
4108 | let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80); | |
4109 | let r = _mm_packs_epi16(a, b); | |
0731742a | 4110 | #[rustfmt::skip] |
0531ce1d XL |
4111 | assert_eq_m128i( |
4112 | r, | |
4113 | _mm_setr_epi8( | |
4114 | 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F | |
4115 | ) | |
4116 | ); | |
4117 | } | |
4118 | ||
83c7162d | 4119 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4120 | unsafe fn test_mm_packs_epi32() { |
4121 | let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0); | |
4122 | let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000); | |
4123 | let r = _mm_packs_epi32(a, b); | |
4124 | assert_eq_m128i( | |
4125 | r, | |
4126 | _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF), | |
4127 | ); | |
4128 | } | |
4129 | ||
83c7162d | 4130 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4131 | unsafe fn test_mm_packus_epi16() { |
4132 | let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0); | |
4133 | let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100); | |
4134 | let r = _mm_packus_epi16(a, b); | |
4135 | assert_eq_m128i( | |
4136 | r, | |
4137 | _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0), | |
4138 | ); | |
4139 | } | |
4140 | ||
83c7162d | 4141 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4142 | unsafe fn test_mm_extract_epi16() { |
4143 | let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7); | |
4144 | let r1 = _mm_extract_epi16(a, 0); | |
4145 | let r2 = _mm_extract_epi16(a, 11); | |
4146 | assert_eq!(r1, -1); | |
4147 | assert_eq!(r2, 3); | |
4148 | } | |
4149 | ||
83c7162d | 4150 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4151 | unsafe fn test_mm_insert_epi16() { |
4152 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
4153 | let r = _mm_insert_epi16(a, 9, 0); | |
4154 | let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7); | |
4155 | assert_eq_m128i(r, e); | |
4156 | } | |
4157 | ||
83c7162d | 4158 | #[simd_test(enable = "sse2")] |
0531ce1d | 4159 | unsafe fn test_mm_movemask_epi8() { |
0731742a | 4160 | #[rustfmt::skip] |
0531ce1d XL |
4161 | let a = _mm_setr_epi8( |
4162 | 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01, | |
4163 | 0b0101, 0b1111_0000u8 as i8, 0, 0, | |
4164 | 0, 0, 0b1111_0000u8 as i8, 0b0101, | |
4165 | 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, | |
4166 | ); | |
4167 | let r = _mm_movemask_epi8(a); | |
4168 | assert_eq!(r, 0b10100100_00100101); | |
4169 | } | |
4170 | ||
83c7162d | 4171 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4172 | unsafe fn test_mm_shuffle_epi32() { |
4173 | let a = _mm_setr_epi32(5, 10, 15, 20); | |
4174 | let r = _mm_shuffle_epi32(a, 0b00_01_01_11); | |
4175 | let e = _mm_setr_epi32(20, 10, 10, 5); | |
4176 | assert_eq_m128i(r, e); | |
4177 | } | |
4178 | ||
83c7162d | 4179 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4180 | unsafe fn test_mm_shufflehi_epi16() { |
4181 | let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20); | |
4182 | let r = _mm_shufflehi_epi16(a, 0b00_01_01_11); | |
4183 | let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5); | |
4184 | assert_eq_m128i(r, e); | |
4185 | } | |
4186 | ||
83c7162d | 4187 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4188 | unsafe fn test_mm_shufflelo_epi16() { |
4189 | let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4); | |
4190 | let r = _mm_shufflelo_epi16(a, 0b00_01_01_11); | |
4191 | let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4); | |
4192 | assert_eq_m128i(r, e); | |
4193 | } | |
4194 | ||
83c7162d | 4195 | #[simd_test(enable = "sse2")] |
0531ce1d | 4196 | unsafe fn test_mm_unpackhi_epi8() { |
0731742a | 4197 | #[rustfmt::skip] |
0531ce1d XL |
4198 | let a = _mm_setr_epi8( |
4199 | 0, 1, 2, 3, 4, 5, 6, 7, | |
4200 | 8, 9, 10, 11, 12, 13, 14, 15, | |
4201 | ); | |
0731742a | 4202 | #[rustfmt::skip] |
0531ce1d XL |
4203 | let b = _mm_setr_epi8( |
4204 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
4205 | ); | |
4206 | let r = _mm_unpackhi_epi8(a, b); | |
0731742a | 4207 | #[rustfmt::skip] |
0531ce1d XL |
4208 | let e = _mm_setr_epi8( |
4209 | 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, | |
4210 | ); | |
4211 | assert_eq_m128i(r, e); | |
4212 | } | |
4213 | ||
83c7162d | 4214 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4215 | unsafe fn test_mm_unpackhi_epi16() { |
4216 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
4217 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); | |
4218 | let r = _mm_unpackhi_epi16(a, b); | |
4219 | let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15); | |
4220 | assert_eq_m128i(r, e); | |
4221 | } | |
4222 | ||
83c7162d | 4223 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4224 | unsafe fn test_mm_unpackhi_epi32() { |
4225 | let a = _mm_setr_epi32(0, 1, 2, 3); | |
4226 | let b = _mm_setr_epi32(4, 5, 6, 7); | |
4227 | let r = _mm_unpackhi_epi32(a, b); | |
4228 | let e = _mm_setr_epi32(2, 6, 3, 7); | |
4229 | assert_eq_m128i(r, e); | |
4230 | } | |
4231 | ||
83c7162d | 4232 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4233 | unsafe fn test_mm_unpackhi_epi64() { |
4234 | let a = _mm_setr_epi64x(0, 1); | |
4235 | let b = _mm_setr_epi64x(2, 3); | |
4236 | let r = _mm_unpackhi_epi64(a, b); | |
4237 | let e = _mm_setr_epi64x(1, 3); | |
4238 | assert_eq_m128i(r, e); | |
4239 | } | |
4240 | ||
83c7162d | 4241 | #[simd_test(enable = "sse2")] |
0531ce1d | 4242 | unsafe fn test_mm_unpacklo_epi8() { |
0731742a | 4243 | #[rustfmt::skip] |
0531ce1d XL |
4244 | let a = _mm_setr_epi8( |
4245 | 0, 1, 2, 3, 4, 5, 6, 7, | |
4246 | 8, 9, 10, 11, 12, 13, 14, 15, | |
4247 | ); | |
0731742a | 4248 | #[rustfmt::skip] |
0531ce1d XL |
4249 | let b = _mm_setr_epi8( |
4250 | 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, | |
4251 | ); | |
4252 | let r = _mm_unpacklo_epi8(a, b); | |
0731742a | 4253 | #[rustfmt::skip] |
0531ce1d XL |
4254 | let e = _mm_setr_epi8( |
4255 | 0, 16, 1, 17, 2, 18, 3, 19, | |
4256 | 4, 20, 5, 21, 6, 22, 7, 23, | |
4257 | ); | |
4258 | assert_eq_m128i(r, e); | |
4259 | } | |
4260 | ||
83c7162d | 4261 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4262 | unsafe fn test_mm_unpacklo_epi16() { |
4263 | let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); | |
4264 | let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); | |
4265 | let r = _mm_unpacklo_epi16(a, b); | |
4266 | let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11); | |
4267 | assert_eq_m128i(r, e); | |
4268 | } | |
4269 | ||
83c7162d | 4270 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4271 | unsafe fn test_mm_unpacklo_epi32() { |
4272 | let a = _mm_setr_epi32(0, 1, 2, 3); | |
4273 | let b = _mm_setr_epi32(4, 5, 6, 7); | |
4274 | let r = _mm_unpacklo_epi32(a, b); | |
4275 | let e = _mm_setr_epi32(0, 4, 1, 5); | |
4276 | assert_eq_m128i(r, e); | |
4277 | } | |
4278 | ||
83c7162d | 4279 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4280 | unsafe fn test_mm_unpacklo_epi64() { |
4281 | let a = _mm_setr_epi64x(0, 1); | |
4282 | let b = _mm_setr_epi64x(2, 3); | |
4283 | let r = _mm_unpacklo_epi64(a, b); | |
4284 | let e = _mm_setr_epi64x(0, 2); | |
4285 | assert_eq_m128i(r, e); | |
4286 | } | |
4287 | ||
83c7162d | 4288 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4289 | unsafe fn test_mm_add_sd() { |
4290 | let a = _mm_setr_pd(1.0, 2.0); | |
4291 | let b = _mm_setr_pd(5.0, 10.0); | |
4292 | let r = _mm_add_sd(a, b); | |
4293 | assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0)); | |
4294 | } | |
4295 | ||
83c7162d | 4296 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4297 | unsafe fn test_mm_add_pd() { |
4298 | let a = _mm_setr_pd(1.0, 2.0); | |
4299 | let b = _mm_setr_pd(5.0, 10.0); | |
4300 | let r = _mm_add_pd(a, b); | |
4301 | assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0)); | |
4302 | } | |
4303 | ||
83c7162d | 4304 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4305 | unsafe fn test_mm_div_sd() { |
4306 | let a = _mm_setr_pd(1.0, 2.0); | |
4307 | let b = _mm_setr_pd(5.0, 10.0); | |
4308 | let r = _mm_div_sd(a, b); | |
4309 | assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0)); | |
4310 | } | |
4311 | ||
83c7162d | 4312 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4313 | unsafe fn test_mm_div_pd() { |
4314 | let a = _mm_setr_pd(1.0, 2.0); | |
4315 | let b = _mm_setr_pd(5.0, 10.0); | |
4316 | let r = _mm_div_pd(a, b); | |
4317 | assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2)); | |
4318 | } | |
4319 | ||
83c7162d | 4320 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4321 | unsafe fn test_mm_max_sd() { |
4322 | let a = _mm_setr_pd(1.0, 2.0); | |
4323 | let b = _mm_setr_pd(5.0, 10.0); | |
4324 | let r = _mm_max_sd(a, b); | |
4325 | assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); | |
4326 | } | |
4327 | ||
83c7162d | 4328 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4329 | unsafe fn test_mm_max_pd() { |
4330 | let a = _mm_setr_pd(1.0, 2.0); | |
4331 | let b = _mm_setr_pd(5.0, 10.0); | |
4332 | let r = _mm_max_pd(a, b); | |
4333 | assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0)); | |
4334 | } | |
4335 | ||
83c7162d | 4336 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4337 | unsafe fn test_mm_min_sd() { |
4338 | let a = _mm_setr_pd(1.0, 2.0); | |
4339 | let b = _mm_setr_pd(5.0, 10.0); | |
4340 | let r = _mm_min_sd(a, b); | |
4341 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); | |
4342 | } | |
4343 | ||
83c7162d | 4344 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4345 | unsafe fn test_mm_min_pd() { |
4346 | let a = _mm_setr_pd(1.0, 2.0); | |
4347 | let b = _mm_setr_pd(5.0, 10.0); | |
4348 | let r = _mm_min_pd(a, b); | |
4349 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); | |
4350 | } | |
4351 | ||
83c7162d | 4352 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4353 | unsafe fn test_mm_mul_sd() { |
4354 | let a = _mm_setr_pd(1.0, 2.0); | |
4355 | let b = _mm_setr_pd(5.0, 10.0); | |
4356 | let r = _mm_mul_sd(a, b); | |
4357 | assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); | |
4358 | } | |
4359 | ||
83c7162d | 4360 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4361 | unsafe fn test_mm_mul_pd() { |
4362 | let a = _mm_setr_pd(1.0, 2.0); | |
4363 | let b = _mm_setr_pd(5.0, 10.0); | |
4364 | let r = _mm_mul_pd(a, b); | |
4365 | assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0)); | |
4366 | } | |
4367 | ||
83c7162d | 4368 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4369 | unsafe fn test_mm_sqrt_sd() { |
4370 | let a = _mm_setr_pd(1.0, 2.0); | |
4371 | let b = _mm_setr_pd(5.0, 10.0); | |
4372 | let r = _mm_sqrt_sd(a, b); | |
4373 | assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0)); | |
4374 | } | |
4375 | ||
83c7162d | 4376 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4377 | unsafe fn test_mm_sqrt_pd() { |
4378 | let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0)); | |
4379 | assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt())); | |
4380 | } | |
4381 | ||
83c7162d | 4382 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4383 | unsafe fn test_mm_sub_sd() { |
4384 | let a = _mm_setr_pd(1.0, 2.0); | |
4385 | let b = _mm_setr_pd(5.0, 10.0); | |
4386 | let r = _mm_sub_sd(a, b); | |
4387 | assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0)); | |
4388 | } | |
4389 | ||
83c7162d | 4390 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4391 | unsafe fn test_mm_sub_pd() { |
4392 | let a = _mm_setr_pd(1.0, 2.0); | |
4393 | let b = _mm_setr_pd(5.0, 10.0); | |
4394 | let r = _mm_sub_pd(a, b); | |
4395 | assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0)); | |
4396 | } | |
4397 | ||
83c7162d | 4398 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4399 | unsafe fn test_mm_and_pd() { |
4400 | let a = transmute(u64x2::splat(5)); | |
4401 | let b = transmute(u64x2::splat(3)); | |
4402 | let r = _mm_and_pd(a, b); | |
4403 | let e = transmute(u64x2::splat(1)); | |
4404 | assert_eq_m128d(r, e); | |
4405 | } | |
4406 | ||
83c7162d | 4407 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4408 | unsafe fn test_mm_andnot_pd() { |
4409 | let a = transmute(u64x2::splat(5)); | |
4410 | let b = transmute(u64x2::splat(3)); | |
4411 | let r = _mm_andnot_pd(a, b); | |
4412 | let e = transmute(u64x2::splat(2)); | |
4413 | assert_eq_m128d(r, e); | |
4414 | } | |
4415 | ||
83c7162d | 4416 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4417 | unsafe fn test_mm_or_pd() { |
4418 | let a = transmute(u64x2::splat(5)); | |
4419 | let b = transmute(u64x2::splat(3)); | |
4420 | let r = _mm_or_pd(a, b); | |
4421 | let e = transmute(u64x2::splat(7)); | |
4422 | assert_eq_m128d(r, e); | |
4423 | } | |
4424 | ||
83c7162d | 4425 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4426 | unsafe fn test_mm_xor_pd() { |
4427 | let a = transmute(u64x2::splat(5)); | |
4428 | let b = transmute(u64x2::splat(3)); | |
4429 | let r = _mm_xor_pd(a, b); | |
4430 | let e = transmute(u64x2::splat(6)); | |
4431 | assert_eq_m128d(r, e); | |
4432 | } | |
4433 | ||
83c7162d | 4434 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4435 | unsafe fn test_mm_cmpeq_sd() { |
4436 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4437 | let e = _mm_setr_epi64x(!0, transmute(2.0f64)); | |
4438 | let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b)); | |
4439 | assert_eq_m128i(r, e); | |
4440 | } | |
4441 | ||
83c7162d | 4442 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4443 | unsafe fn test_mm_cmplt_sd() { |
4444 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); | |
4445 | let e = _mm_setr_epi64x(!0, transmute(2.0f64)); | |
4446 | let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b)); | |
4447 | assert_eq_m128i(r, e); | |
4448 | } | |
4449 | ||
83c7162d | 4450 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4451 | unsafe fn test_mm_cmple_sd() { |
4452 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4453 | let e = _mm_setr_epi64x(!0, transmute(2.0f64)); | |
4454 | let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b)); | |
4455 | assert_eq_m128i(r, e); | |
4456 | } | |
4457 | ||
83c7162d | 4458 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4459 | unsafe fn test_mm_cmpgt_sd() { |
4460 | let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4461 | let e = _mm_setr_epi64x(!0, transmute(2.0f64)); | |
4462 | let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b)); | |
4463 | assert_eq_m128i(r, e); | |
4464 | } | |
4465 | ||
83c7162d | 4466 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4467 | unsafe fn test_mm_cmpge_sd() { |
4468 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4469 | let e = _mm_setr_epi64x(!0, transmute(2.0f64)); | |
4470 | let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b)); | |
4471 | assert_eq_m128i(r, e); | |
4472 | } | |
4473 | ||
83c7162d | 4474 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4475 | unsafe fn test_mm_cmpord_sd() { |
4476 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); | |
4477 | let e = _mm_setr_epi64x(0, transmute(2.0f64)); | |
4478 | let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b)); | |
4479 | assert_eq_m128i(r, e); | |
4480 | } | |
4481 | ||
83c7162d | 4482 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4483 | unsafe fn test_mm_cmpunord_sd() { |
4484 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); | |
4485 | let e = _mm_setr_epi64x(!0, transmute(2.0f64)); | |
4486 | let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b)); | |
4487 | assert_eq_m128i(r, e); | |
4488 | } | |
4489 | ||
83c7162d | 4490 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4491 | unsafe fn test_mm_cmpneq_sd() { |
4492 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); | |
4493 | let e = _mm_setr_epi64x(!0, transmute(2.0f64)); | |
4494 | let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b)); | |
4495 | assert_eq_m128i(r, e); | |
4496 | } | |
4497 | ||
83c7162d | 4498 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4499 | unsafe fn test_mm_cmpnlt_sd() { |
4500 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); | |
4501 | let e = _mm_setr_epi64x(0, transmute(2.0f64)); | |
4502 | let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b)); | |
4503 | assert_eq_m128i(r, e); | |
4504 | } | |
4505 | ||
83c7162d | 4506 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4507 | unsafe fn test_mm_cmpnle_sd() { |
4508 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4509 | let e = _mm_setr_epi64x(0, transmute(2.0f64)); | |
4510 | let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b)); | |
4511 | assert_eq_m128i(r, e); | |
4512 | } | |
4513 | ||
83c7162d | 4514 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4515 | unsafe fn test_mm_cmpngt_sd() { |
4516 | let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4517 | let e = _mm_setr_epi64x(0, transmute(2.0f64)); | |
4518 | let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b)); | |
4519 | assert_eq_m128i(r, e); | |
4520 | } | |
4521 | ||
83c7162d | 4522 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4523 | unsafe fn test_mm_cmpnge_sd() { |
4524 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4525 | let e = _mm_setr_epi64x(0, transmute(2.0f64)); | |
4526 | let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b)); | |
4527 | assert_eq_m128i(r, e); | |
4528 | } | |
4529 | ||
83c7162d | 4530 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4531 | unsafe fn test_mm_cmpeq_pd() { |
4532 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4533 | let e = _mm_setr_epi64x(!0, 0); | |
4534 | let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b)); | |
4535 | assert_eq_m128i(r, e); | |
4536 | } | |
4537 | ||
83c7162d | 4538 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4539 | unsafe fn test_mm_cmplt_pd() { |
4540 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4541 | let e = _mm_setr_epi64x(0, !0); | |
4542 | let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b)); | |
4543 | assert_eq_m128i(r, e); | |
4544 | } | |
4545 | ||
83c7162d | 4546 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4547 | unsafe fn test_mm_cmple_pd() { |
4548 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4549 | let e = _mm_setr_epi64x(!0, !0); | |
4550 | let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b)); | |
4551 | assert_eq_m128i(r, e); | |
4552 | } | |
4553 | ||
83c7162d | 4554 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4555 | unsafe fn test_mm_cmpgt_pd() { |
4556 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4557 | let e = _mm_setr_epi64x(0, 0); | |
4558 | let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b)); | |
4559 | assert_eq_m128i(r, e); | |
4560 | } | |
4561 | ||
83c7162d | 4562 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4563 | unsafe fn test_mm_cmpge_pd() { |
4564 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4565 | let e = _mm_setr_epi64x(!0, 0); | |
4566 | let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b)); | |
4567 | assert_eq_m128i(r, e); | |
4568 | } | |
4569 | ||
83c7162d | 4570 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4571 | unsafe fn test_mm_cmpord_pd() { |
4572 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); | |
4573 | let e = _mm_setr_epi64x(0, !0); | |
4574 | let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b)); | |
4575 | assert_eq_m128i(r, e); | |
4576 | } | |
4577 | ||
83c7162d | 4578 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4579 | unsafe fn test_mm_cmpunord_pd() { |
4580 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); | |
4581 | let e = _mm_setr_epi64x(!0, 0); | |
4582 | let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b)); | |
4583 | assert_eq_m128i(r, e); | |
4584 | } | |
4585 | ||
83c7162d | 4586 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4587 | unsafe fn test_mm_cmpneq_pd() { |
4588 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); | |
4589 | let e = _mm_setr_epi64x(!0, !0); | |
4590 | let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b)); | |
4591 | assert_eq_m128i(r, e); | |
4592 | } | |
4593 | ||
83c7162d | 4594 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4595 | unsafe fn test_mm_cmpnlt_pd() { |
4596 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); | |
4597 | let e = _mm_setr_epi64x(0, 0); | |
4598 | let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b)); | |
4599 | assert_eq_m128i(r, e); | |
4600 | } | |
4601 | ||
83c7162d | 4602 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4603 | unsafe fn test_mm_cmpnle_pd() { |
4604 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4605 | let e = _mm_setr_epi64x(0, 0); | |
4606 | let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b)); | |
4607 | assert_eq_m128i(r, e); | |
4608 | } | |
4609 | ||
83c7162d | 4610 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4611 | unsafe fn test_mm_cmpngt_pd() { |
4612 | let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4613 | let e = _mm_setr_epi64x(0, !0); | |
4614 | let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b)); | |
4615 | assert_eq_m128i(r, e); | |
4616 | } | |
4617 | ||
83c7162d | 4618 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4619 | unsafe fn test_mm_cmpnge_pd() { |
4620 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4621 | let e = _mm_setr_epi64x(0, !0); | |
4622 | let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b)); | |
4623 | assert_eq_m128i(r, e); | |
4624 | } | |
4625 | ||
83c7162d | 4626 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4627 | unsafe fn test_mm_comieq_sd() { |
4628 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4629 | assert!(_mm_comieq_sd(a, b) != 0); | |
4630 | ||
4631 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4632 | assert!(_mm_comieq_sd(a, b) == 0); | |
4633 | } | |
4634 | ||
83c7162d | 4635 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4636 | unsafe fn test_mm_comilt_sd() { |
4637 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4638 | assert!(_mm_comilt_sd(a, b) == 0); | |
4639 | } | |
4640 | ||
83c7162d | 4641 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4642 | unsafe fn test_mm_comile_sd() { |
4643 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4644 | assert!(_mm_comile_sd(a, b) != 0); | |
4645 | } | |
4646 | ||
83c7162d | 4647 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4648 | unsafe fn test_mm_comigt_sd() { |
4649 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4650 | assert!(_mm_comigt_sd(a, b) == 0); | |
4651 | } | |
4652 | ||
83c7162d | 4653 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4654 | unsafe fn test_mm_comige_sd() { |
4655 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4656 | assert!(_mm_comige_sd(a, b) != 0); | |
4657 | } | |
4658 | ||
83c7162d | 4659 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4660 | unsafe fn test_mm_comineq_sd() { |
4661 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4662 | assert!(_mm_comineq_sd(a, b) == 0); | |
4663 | } | |
4664 | ||
83c7162d | 4665 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4666 | unsafe fn test_mm_ucomieq_sd() { |
4667 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4668 | assert!(_mm_ucomieq_sd(a, b) != 0); | |
4669 | ||
4670 | let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0)); | |
4671 | assert!(_mm_ucomieq_sd(a, b) == 0); | |
4672 | } | |
4673 | ||
83c7162d | 4674 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4675 | unsafe fn test_mm_ucomilt_sd() { |
4676 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4677 | assert!(_mm_ucomilt_sd(a, b) == 0); | |
4678 | } | |
4679 | ||
83c7162d | 4680 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4681 | unsafe fn test_mm_ucomile_sd() { |
4682 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4683 | assert!(_mm_ucomile_sd(a, b) != 0); | |
4684 | } | |
4685 | ||
83c7162d | 4686 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4687 | unsafe fn test_mm_ucomigt_sd() { |
4688 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4689 | assert!(_mm_ucomigt_sd(a, b) == 0); | |
4690 | } | |
4691 | ||
83c7162d | 4692 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4693 | unsafe fn test_mm_ucomige_sd() { |
4694 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4695 | assert!(_mm_ucomige_sd(a, b) != 0); | |
4696 | } | |
4697 | ||
83c7162d | 4698 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4699 | unsafe fn test_mm_ucomineq_sd() { |
4700 | let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); | |
4701 | assert!(_mm_ucomineq_sd(a, b) == 0); | |
4702 | } | |
4703 | ||
83c7162d | 4704 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4705 | unsafe fn test_mm_movemask_pd() { |
4706 | let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0)); | |
4707 | assert_eq!(r, 0b01); | |
4708 | ||
4709 | let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0)); | |
4710 | assert_eq!(r, 0b11); | |
4711 | } | |
4712 | ||
4713 | #[repr(align(16))] | |
4714 | struct Memory { | |
4715 | data: [f64; 4], | |
4716 | } | |
4717 | ||
83c7162d | 4718 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4719 | unsafe fn test_mm_load_pd() { |
4720 | let mem = Memory { | |
4721 | data: [1.0f64, 2.0, 3.0, 4.0], | |
4722 | }; | |
4723 | let vals = &mem.data; | |
4724 | let d = vals.as_ptr(); | |
4725 | ||
4726 | let r = _mm_load_pd(d); | |
4727 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); | |
4728 | } | |
4729 | ||
83c7162d | 4730 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4731 | unsafe fn test_mm_load_sd() { |
4732 | let a = 1.; | |
4733 | let expected = _mm_setr_pd(a, 0.); | |
4734 | let r = _mm_load_sd(&a); | |
4735 | assert_eq_m128d(r, expected); | |
4736 | } | |
4737 | ||
83c7162d | 4738 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4739 | unsafe fn test_mm_loadh_pd() { |
4740 | let a = _mm_setr_pd(1., 2.); | |
4741 | let b = 3.; | |
4742 | let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.); | |
4743 | let r = _mm_loadh_pd(a, &b); | |
4744 | assert_eq_m128d(r, expected); | |
4745 | } | |
4746 | ||
83c7162d | 4747 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4748 | unsafe fn test_mm_loadl_pd() { |
4749 | let a = _mm_setr_pd(1., 2.); | |
4750 | let b = 3.; | |
4751 | let expected = _mm_setr_pd(3., get_m128d(a, 1)); | |
4752 | let r = _mm_loadl_pd(a, &b); | |
4753 | assert_eq_m128d(r, expected); | |
4754 | } | |
4755 | ||
83c7162d | 4756 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4757 | unsafe fn test_mm_stream_pd() { |
4758 | #[repr(align(128))] | |
4759 | struct Memory { | |
4760 | pub data: [f64; 2], | |
4761 | } | |
4762 | let a = _mm_set1_pd(7.0); | |
8faf50e0 | 4763 | let mut mem = Memory { data: [-1.0; 2] }; |
0531ce1d XL |
4764 | |
4765 | _mm_stream_pd(&mut mem.data[0] as *mut f64, a); | |
4766 | for i in 0..2 { | |
4767 | assert_eq!(mem.data[i], get_m128d(a, i)); | |
4768 | } | |
4769 | } | |
4770 | ||
83c7162d | 4771 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4772 | unsafe fn test_mm_store_sd() { |
4773 | let mut dest = 0.; | |
4774 | let a = _mm_setr_pd(1., 2.); | |
4775 | _mm_store_sd(&mut dest, a); | |
4776 | assert_eq!(dest, _mm_cvtsd_f64(a)); | |
4777 | } | |
4778 | ||
83c7162d | 4779 | #[simd_test(enable = "sse2")] |
0531ce1d | 4780 | unsafe fn test_mm_store_pd() { |
8faf50e0 | 4781 | let mut mem = Memory { data: [0.0f64; 4] }; |
0531ce1d XL |
4782 | let vals = &mut mem.data; |
4783 | let a = _mm_setr_pd(1.0, 2.0); | |
4784 | let d = vals.as_mut_ptr(); | |
4785 | ||
4786 | _mm_store_pd(d, *black_box(&a)); | |
4787 | assert_eq!(vals[0], 1.0); | |
4788 | assert_eq!(vals[1], 2.0); | |
4789 | } | |
4790 | ||
83c7162d | 4791 | #[simd_test(enable = "sse")] |
0531ce1d | 4792 | unsafe fn test_mm_storeu_pd() { |
8faf50e0 | 4793 | let mut mem = Memory { data: [0.0f64; 4] }; |
0531ce1d XL |
4794 | let vals = &mut mem.data; |
4795 | let a = _mm_setr_pd(1.0, 2.0); | |
4796 | ||
4797 | let mut ofs = 0; | |
4798 | let mut p = vals.as_mut_ptr(); | |
4799 | ||
4800 | // Make sure p is *not* aligned to 16-byte boundary | |
4801 | if (p as usize) & 0xf == 0 { | |
4802 | ofs = 1; | |
4803 | p = p.offset(1); | |
4804 | } | |
4805 | ||
4806 | _mm_storeu_pd(p, *black_box(&a)); | |
4807 | ||
4808 | if ofs > 0 { | |
4809 | assert_eq!(vals[ofs - 1], 0.0); | |
4810 | } | |
4811 | assert_eq!(vals[ofs + 0], 1.0); | |
4812 | assert_eq!(vals[ofs + 1], 2.0); | |
4813 | } | |
4814 | ||
83c7162d | 4815 | #[simd_test(enable = "sse2")] |
0531ce1d | 4816 | unsafe fn test_mm_store1_pd() { |
8faf50e0 | 4817 | let mut mem = Memory { data: [0.0f64; 4] }; |
0531ce1d XL |
4818 | let vals = &mut mem.data; |
4819 | let a = _mm_setr_pd(1.0, 2.0); | |
4820 | let d = vals.as_mut_ptr(); | |
4821 | ||
4822 | _mm_store1_pd(d, *black_box(&a)); | |
4823 | assert_eq!(vals[0], 1.0); | |
4824 | assert_eq!(vals[1], 1.0); | |
4825 | } | |
4826 | ||
83c7162d | 4827 | #[simd_test(enable = "sse2")] |
0531ce1d | 4828 | unsafe fn test_mm_store_pd1() { |
8faf50e0 | 4829 | let mut mem = Memory { data: [0.0f64; 4] }; |
0531ce1d XL |
4830 | let vals = &mut mem.data; |
4831 | let a = _mm_setr_pd(1.0, 2.0); | |
4832 | let d = vals.as_mut_ptr(); | |
4833 | ||
4834 | _mm_store_pd1(d, *black_box(&a)); | |
4835 | assert_eq!(vals[0], 1.0); | |
4836 | assert_eq!(vals[1], 1.0); | |
4837 | } | |
4838 | ||
83c7162d | 4839 | #[simd_test(enable = "sse2")] |
0531ce1d | 4840 | unsafe fn test_mm_storer_pd() { |
8faf50e0 | 4841 | let mut mem = Memory { data: [0.0f64; 4] }; |
0531ce1d XL |
4842 | let vals = &mut mem.data; |
4843 | let a = _mm_setr_pd(1.0, 2.0); | |
4844 | let d = vals.as_mut_ptr(); | |
4845 | ||
4846 | _mm_storer_pd(d, *black_box(&a)); | |
4847 | assert_eq!(vals[0], 2.0); | |
4848 | assert_eq!(vals[1], 1.0); | |
4849 | } | |
4850 | ||
83c7162d | 4851 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4852 | unsafe fn test_mm_storeh_pd() { |
4853 | let mut dest = 0.; | |
4854 | let a = _mm_setr_pd(1., 2.); | |
4855 | _mm_storeh_pd(&mut dest, a); | |
4856 | assert_eq!(dest, get_m128d(a, 1)); | |
4857 | } | |
4858 | ||
83c7162d | 4859 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4860 | unsafe fn test_mm_storel_pd() { |
4861 | let mut dest = 0.; | |
4862 | let a = _mm_setr_pd(1., 2.); | |
4863 | _mm_storel_pd(&mut dest, a); | |
4864 | assert_eq!(dest, _mm_cvtsd_f64(a)); | |
4865 | } | |
4866 | ||
83c7162d | 4867 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4868 | unsafe fn test_mm_loadr_pd() { |
4869 | let mut mem = Memory { | |
4870 | data: [1.0f64, 2.0, 3.0, 4.0], | |
4871 | }; | |
4872 | let vals = &mut mem.data; | |
4873 | let d = vals.as_ptr(); | |
4874 | ||
4875 | let r = _mm_loadr_pd(d); | |
4876 | assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0)); | |
4877 | } | |
4878 | ||
83c7162d | 4879 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4880 | unsafe fn test_mm_loadu_pd() { |
4881 | let mut mem = Memory { | |
4882 | data: [1.0f64, 2.0, 3.0, 4.0], | |
4883 | }; | |
4884 | let vals = &mut mem.data; | |
4885 | let mut d = vals.as_ptr(); | |
4886 | ||
4887 | // make sure d is not aligned to 16-byte boundary | |
4888 | let mut offset = 0; | |
4889 | if (d as usize) & 0xf == 0 { | |
4890 | offset = 1; | |
4891 | d = d.offset(offset as isize); | |
4892 | } | |
4893 | ||
4894 | let r = _mm_loadu_pd(d); | |
8faf50e0 | 4895 | let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64)); |
0531ce1d XL |
4896 | assert_eq_m128d(r, e); |
4897 | } | |
4898 | ||
83c7162d | 4899 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4900 | unsafe fn test_mm_cvtpd_ps() { |
4901 | let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0)); | |
4902 | assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0)); | |
4903 | ||
4904 | let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0)); | |
4905 | assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0)); | |
4906 | ||
4907 | let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN)); | |
0731742a | 4908 | assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0)); |
0531ce1d XL |
4909 | |
4910 | let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64)); | |
4911 | assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0)); | |
4912 | } | |
4913 | ||
83c7162d | 4914 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4915 | unsafe fn test_mm_cvtps_pd() { |
4916 | let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0)); | |
4917 | assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0)); | |
4918 | ||
4919 | let r = _mm_cvtps_pd(_mm_setr_ps( | |
4920 | f32::MAX, | |
4921 | f32::INFINITY, | |
4922 | f32::NEG_INFINITY, | |
4923 | f32::MIN, | |
4924 | )); | |
4925 | assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY)); | |
4926 | } | |
4927 | ||
83c7162d | 4928 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4929 | unsafe fn test_mm_cvtpd_epi32() { |
4930 | let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0)); | |
4931 | assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0)); | |
4932 | ||
4933 | let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0)); | |
4934 | assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0)); | |
4935 | ||
4936 | let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN)); | |
4937 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); | |
4938 | ||
4939 | let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY)); | |
4940 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); | |
4941 | ||
4942 | let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN)); | |
4943 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); | |
4944 | } | |
4945 | ||
83c7162d | 4946 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4947 | unsafe fn test_mm_cvtsd_si32() { |
4948 | let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0)); | |
4949 | assert_eq!(r, -2); | |
4950 | ||
4951 | let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN)); | |
4952 | assert_eq!(r, i32::MIN); | |
4953 | ||
4954 | let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN)); | |
4955 | assert_eq!(r, i32::MIN); | |
4956 | } | |
4957 | ||
83c7162d | 4958 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4959 | unsafe fn test_mm_cvtsd_ss() { |
4960 | let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4); | |
4961 | let b = _mm_setr_pd(2.0, -5.0); | |
4962 | ||
4963 | let r = _mm_cvtsd_ss(a, b); | |
4964 | ||
4965 | assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4)); | |
4966 | ||
0731742a | 4967 | let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY); |
0531ce1d XL |
4968 | let b = _mm_setr_pd(f64::INFINITY, -5.0); |
4969 | ||
4970 | let r = _mm_cvtsd_ss(a, b); | |
4971 | ||
4972 | assert_eq_m128( | |
4973 | r, | |
4974 | _mm_setr_ps( | |
4975 | f32::INFINITY, | |
4976 | f32::NEG_INFINITY, | |
4977 | f32::MAX, | |
4978 | f32::NEG_INFINITY, | |
4979 | ), | |
4980 | ); | |
4981 | } | |
4982 | ||
83c7162d | 4983 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4984 | unsafe fn test_mm_cvtsd_f64() { |
4985 | let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2)); | |
4986 | assert_eq!(r, -1.1); | |
4987 | } | |
4988 | ||
83c7162d | 4989 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
4990 | unsafe fn test_mm_cvtss_sd() { |
4991 | let a = _mm_setr_pd(-1.1, 2.2); | |
4992 | let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); | |
4993 | ||
4994 | let r = _mm_cvtss_sd(a, b); | |
4995 | assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2)); | |
4996 | ||
4997 | let a = _mm_setr_pd(-1.1, f64::INFINITY); | |
4998 | let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0); | |
4999 | ||
5000 | let r = _mm_cvtss_sd(a, b); | |
5001 | assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY)); | |
5002 | } | |
5003 | ||
83c7162d | 5004 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5005 | unsafe fn test_mm_cvttpd_epi32() { |
5006 | let a = _mm_setr_pd(-1.1, 2.2); | |
5007 | let r = _mm_cvttpd_epi32(a); | |
5008 | assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0)); | |
5009 | ||
5010 | let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); | |
5011 | let r = _mm_cvttpd_epi32(a); | |
5012 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); | |
5013 | } | |
5014 | ||
83c7162d | 5015 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5016 | unsafe fn test_mm_cvttsd_si32() { |
5017 | let a = _mm_setr_pd(-1.1, 2.2); | |
5018 | let r = _mm_cvttsd_si32(a); | |
5019 | assert_eq!(r, -1); | |
5020 | ||
5021 | let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); | |
5022 | let r = _mm_cvttsd_si32(a); | |
5023 | assert_eq!(r, i32::MIN); | |
5024 | } | |
5025 | ||
83c7162d | 5026 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5027 | unsafe fn test_mm_cvttps_epi32() { |
5028 | let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6); | |
5029 | let r = _mm_cvttps_epi32(a); | |
5030 | assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6)); | |
5031 | ||
0731742a | 5032 | let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX); |
0531ce1d | 5033 | let r = _mm_cvttps_epi32(a); |
0731742a | 5034 | assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN)); |
0531ce1d XL |
5035 | } |
5036 | ||
83c7162d | 5037 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5038 | unsafe fn test_mm_set_sd() { |
5039 | let r = _mm_set_sd(-1.0_f64); | |
5040 | assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64)); | |
5041 | } | |
5042 | ||
83c7162d | 5043 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5044 | unsafe fn test_mm_set1_pd() { |
5045 | let r = _mm_set1_pd(-1.0_f64); | |
5046 | assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64)); | |
5047 | } | |
5048 | ||
83c7162d | 5049 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5050 | unsafe fn test_mm_set_pd1() { |
5051 | let r = _mm_set_pd1(-2.0_f64); | |
5052 | assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64)); | |
5053 | } | |
5054 | ||
83c7162d | 5055 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5056 | unsafe fn test_mm_set_pd() { |
5057 | let r = _mm_set_pd(1.0_f64, 5.0_f64); | |
5058 | assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64)); | |
5059 | } | |
5060 | ||
83c7162d | 5061 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5062 | unsafe fn test_mm_setr_pd() { |
5063 | let r = _mm_setr_pd(1.0_f64, -5.0_f64); | |
5064 | assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64)); | |
5065 | } | |
5066 | ||
83c7162d | 5067 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5068 | unsafe fn test_mm_setzero_pd() { |
5069 | let r = _mm_setzero_pd(); | |
5070 | assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64)); | |
5071 | } | |
5072 | ||
83c7162d | 5073 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5074 | unsafe fn test_mm_load1_pd() { |
5075 | let d = -5.0; | |
5076 | let r = _mm_load1_pd(&d); | |
5077 | assert_eq_m128d(r, _mm_setr_pd(d, d)); | |
5078 | } | |
5079 | ||
83c7162d | 5080 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5081 | unsafe fn test_mm_load_pd1() { |
5082 | let d = -5.0; | |
5083 | let r = _mm_load_pd1(&d); | |
5084 | assert_eq_m128d(r, _mm_setr_pd(d, d)); | |
5085 | } | |
5086 | ||
83c7162d | 5087 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5088 | unsafe fn test_mm_unpackhi_pd() { |
5089 | let a = _mm_setr_pd(1.0, 2.0); | |
5090 | let b = _mm_setr_pd(3.0, 4.0); | |
5091 | let r = _mm_unpackhi_pd(a, b); | |
5092 | assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0)); | |
5093 | } | |
5094 | ||
83c7162d | 5095 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5096 | unsafe fn test_mm_unpacklo_pd() { |
5097 | let a = _mm_setr_pd(1.0, 2.0); | |
5098 | let b = _mm_setr_pd(3.0, 4.0); | |
5099 | let r = _mm_unpacklo_pd(a, b); | |
5100 | assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0)); | |
5101 | } | |
5102 | ||
83c7162d | 5103 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5104 | unsafe fn test_mm_shuffle_pd() { |
5105 | let a = _mm_setr_pd(1., 2.); | |
5106 | let b = _mm_setr_pd(3., 4.); | |
5107 | let expected = _mm_setr_pd(1., 3.); | |
5108 | let r = _mm_shuffle_pd(a, b, 0); | |
5109 | assert_eq_m128d(r, expected); | |
5110 | } | |
5111 | ||
83c7162d | 5112 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5113 | unsafe fn test_mm_move_sd() { |
5114 | let a = _mm_setr_pd(1., 2.); | |
5115 | let b = _mm_setr_pd(3., 4.); | |
5116 | let expected = _mm_setr_pd(3., 2.); | |
5117 | let r = _mm_move_sd(a, b); | |
5118 | assert_eq_m128d(r, expected); | |
5119 | } | |
5120 | ||
83c7162d | 5121 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5122 | unsafe fn test_mm_castpd_ps() { |
5123 | let a = _mm_set1_pd(0.); | |
5124 | let expected = _mm_set1_ps(0.); | |
5125 | let r = _mm_castpd_ps(a); | |
5126 | assert_eq_m128(r, expected); | |
5127 | } | |
5128 | ||
83c7162d | 5129 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5130 | unsafe fn test_mm_castpd_si128() { |
5131 | let a = _mm_set1_pd(0.); | |
5132 | let expected = _mm_set1_epi64x(0); | |
5133 | let r = _mm_castpd_si128(a); | |
5134 | assert_eq_m128i(r, expected); | |
5135 | } | |
5136 | ||
83c7162d | 5137 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5138 | unsafe fn test_mm_castps_pd() { |
5139 | let a = _mm_set1_ps(0.); | |
5140 | let expected = _mm_set1_pd(0.); | |
5141 | let r = _mm_castps_pd(a); | |
5142 | assert_eq_m128d(r, expected); | |
5143 | } | |
5144 | ||
83c7162d | 5145 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5146 | unsafe fn test_mm_castps_si128() { |
5147 | let a = _mm_set1_ps(0.); | |
5148 | let expected = _mm_set1_epi32(0); | |
5149 | let r = _mm_castps_si128(a); | |
5150 | assert_eq_m128i(r, expected); | |
5151 | } | |
5152 | ||
83c7162d | 5153 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5154 | unsafe fn test_mm_castsi128_pd() { |
5155 | let a = _mm_set1_epi64x(0); | |
5156 | let expected = _mm_set1_pd(0.); | |
5157 | let r = _mm_castsi128_pd(a); | |
5158 | assert_eq_m128d(r, expected); | |
5159 | } | |
5160 | ||
83c7162d | 5161 | #[simd_test(enable = "sse2")] |
0531ce1d XL |
5162 | unsafe fn test_mm_castsi128_ps() { |
5163 | let a = _mm_set1_epi32(0); | |
5164 | let expected = _mm_set1_ps(0.); | |
5165 | let r = _mm_castsi128_ps(a); | |
5166 | assert_eq_m128(r, expected); | |
5167 | } | |
5168 | ||
83c7162d | 5169 | #[simd_test(enable = "sse2,mmx")] |
0531ce1d XL |
5170 | unsafe fn test_mm_add_si64() { |
5171 | let a = 1i64; | |
5172 | let b = 2i64; | |
5173 | let expected = 3i64; | |
5174 | let r = _mm_add_si64(mem::transmute(a), mem::transmute(b)); | |
5175 | assert_eq!(mem::transmute::<__m64, i64>(r), expected); | |
5176 | } | |
5177 | ||
83c7162d | 5178 | #[simd_test(enable = "sse2,mmx")] |
0531ce1d XL |
5179 | unsafe fn test_mm_mul_su32() { |
5180 | let a = _mm_setr_pi32(1, 2); | |
5181 | let b = _mm_setr_pi32(3, 4); | |
5182 | let expected = 3u64; | |
5183 | let r = _mm_mul_su32(a, b); | |
5184 | assert_eq_m64(r, mem::transmute(expected)); | |
5185 | } | |
5186 | ||
83c7162d | 5187 | #[simd_test(enable = "sse2,mmx")] |
0531ce1d XL |
5188 | unsafe fn test_mm_sub_si64() { |
5189 | let a = 1i64; | |
5190 | let b = 2i64; | |
5191 | let expected = -1i64; | |
5192 | let r = _mm_sub_si64(mem::transmute(a), mem::transmute(b)); | |
5193 | assert_eq!(mem::transmute::<__m64, i64>(r), expected); | |
5194 | } | |
5195 | ||
83c7162d | 5196 | #[simd_test(enable = "sse2,mmx")] |
0531ce1d XL |
5197 | unsafe fn test_mm_cvtpi32_pd() { |
5198 | let a = _mm_setr_pi32(1, 2); | |
5199 | let expected = _mm_setr_pd(1., 2.); | |
5200 | let r = _mm_cvtpi32_pd(a); | |
5201 | assert_eq_m128d(r, expected); | |
5202 | } | |
5203 | ||
83c7162d | 5204 | #[simd_test(enable = "sse2,mmx")] |
0531ce1d XL |
5205 | unsafe fn test_mm_set_epi64() { |
5206 | let r = _mm_set_epi64(mem::transmute(1i64), mem::transmute(2i64)); | |
5207 | assert_eq_m128i(r, _mm_setr_epi64x(2, 1)); | |
5208 | } | |
5209 | ||
83c7162d | 5210 | #[simd_test(enable = "sse2,mmx")] |
0531ce1d XL |
5211 | unsafe fn test_mm_set1_epi64() { |
5212 | let r = _mm_set1_epi64(mem::transmute(1i64)); | |
5213 | assert_eq_m128i(r, _mm_setr_epi64x(1, 1)); | |
5214 | } | |
5215 | ||
83c7162d | 5216 | #[simd_test(enable = "sse2,mmx")] |
0531ce1d XL |
5217 | unsafe fn test_mm_setr_epi64() { |
5218 | let r = _mm_setr_epi64(mem::transmute(1i64), mem::transmute(2i64)); | |
5219 | assert_eq_m128i(r, _mm_setr_epi64x(1, 2)); | |
5220 | } | |
5221 | ||
83c7162d | 5222 | #[simd_test(enable = "sse2,mmx")] |
0531ce1d XL |
5223 | unsafe fn test_mm_movepi64_pi64() { |
5224 | let r = _mm_movepi64_pi64(_mm_setr_epi64x(5, 0)); | |
5225 | assert_eq_m64(r, _mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0)); | |
5226 | } | |
5227 | ||
83c7162d | 5228 | #[simd_test(enable = "sse2,mmx")] |
0531ce1d XL |
5229 | unsafe fn test_mm_movpi64_epi64() { |
5230 | let r = _mm_movpi64_epi64(_mm_setr_pi8(5, 0, 0, 0, 0, 0, 0, 0)); | |
5231 | assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); | |
5232 | } | |
5233 | ||
83c7162d | 5234 | #[simd_test(enable = "sse2,mmx")] |
0531ce1d XL |
5235 | unsafe fn test_mm_cvtpd_pi32() { |
5236 | let a = _mm_setr_pd(5., 0.); | |
5237 | let r = _mm_cvtpd_pi32(a); | |
5238 | assert_eq_m64(r, _mm_setr_pi32(5, 0)); | |
5239 | } | |
5240 | ||
83c7162d | 5241 | #[simd_test(enable = "sse2,mmx")] |
0531ce1d XL |
5242 | unsafe fn test_mm_cvttpd_pi32() { |
5243 | use std::{f64, i32}; | |
5244 | ||
5245 | let a = _mm_setr_pd(5., 0.); | |
5246 | let r = _mm_cvttpd_pi32(a); | |
5247 | assert_eq_m64(r, _mm_setr_pi32(5, 0)); | |
5248 | ||
5249 | let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); | |
5250 | let r = _mm_cvttpd_pi32(a); | |
5251 | assert_eq_m64(r, _mm_setr_pi32(i32::MIN, i32::MIN)); | |
5252 | } | |
5253 | } |