]> git.proxmox.com Git - rustc.git/blame - library/stdarch/crates/core_arch/src/x86/ssse3.rs
New upstream version 1.48.0~beta.8+dfsg1
[rustc.git] / library / stdarch / crates / core_arch / src / x86 / ssse3.rs
CommitLineData
0531ce1d
XL
1//! Supplemental Streaming SIMD Extensions 3 (SSSE3)
2
532ac7d7
XL
3use crate::{
4 core_arch::{simd::*, simd_llvm::*, x86::*},
5 mem::transmute,
6};
0531ce1d
XL
7
8#[cfg(test)]
416331ca 9use stdarch_test::assert_instr;
0531ce1d 10
532ac7d7 11/// Computes the absolute value of packed 8-bit signed integers in `a` and
0531ce1d 12/// return the unsigned results.
83c7162d
XL
13///
14/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi8)
0531ce1d
XL
15#[inline]
16#[target_feature(enable = "ssse3")]
17#[cfg_attr(test, assert_instr(pabsb))]
83c7162d 18#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 19pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
532ac7d7 20 transmute(pabsb128(a.as_i8x16()))
0531ce1d
XL
21}
22
532ac7d7 23/// Computes the absolute value of each of the packed 16-bit signed integers in
0531ce1d
XL
24/// `a` and
25/// return the 16-bit unsigned integer
83c7162d
XL
26///
27/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi16)
0531ce1d
XL
28#[inline]
29#[target_feature(enable = "ssse3")]
30#[cfg_attr(test, assert_instr(pabsw))]
83c7162d 31#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 32pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
532ac7d7 33 transmute(pabsw128(a.as_i16x8()))
0531ce1d
XL
34}
35
532ac7d7 36/// Computes the absolute value of each of the packed 32-bit signed integers in
0531ce1d
XL
37/// `a` and
38/// return the 32-bit unsigned integer
83c7162d
XL
39///
40/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi32)
0531ce1d
XL
41#[inline]
42#[target_feature(enable = "ssse3")]
43#[cfg_attr(test, assert_instr(pabsd))]
83c7162d 44#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 45pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
532ac7d7 46 transmute(pabsd128(a.as_i32x4()))
0531ce1d
XL
47}
48
532ac7d7 49/// Shuffles bytes from `a` according to the content of `b`.
0531ce1d
XL
50///
51/// The last 4 bits of each byte of `b` are used as addresses
52/// into the 16 bytes of `a`.
53///
54/// In addition, if the highest significant bit of a byte of `b`
55/// is set, the respective destination byte is set to 0.
56///
57/// Picturing `a` and `b` as `[u8; 16]`, `_mm_shuffle_epi8` is
58/// logically equivalent to:
59///
60/// ```
61/// fn mm_shuffle_epi8(a: [u8; 16], b: [u8; 16]) -> [u8; 16] {
62/// let mut r = [0u8; 16];
63/// for i in 0..16 {
64/// // if the most significant bit of b is set,
65/// // then the destination byte is set to 0.
66/// if b[i] & 0x80 == 0u8 {
67/// r[i] = a[(b[i] % 16) as usize];
68/// }
69/// }
70/// r
71/// }
72/// ```
83c7162d
XL
73///
74/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi8)
0531ce1d
XL
75#[inline]
76#[target_feature(enable = "ssse3")]
77#[cfg_attr(test, assert_instr(pshufb))]
83c7162d 78#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 79pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 80 transmute(pshufb128(a.as_u8x16(), b.as_u8x16()))
0531ce1d
XL
81}
82
83/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
532ac7d7 84/// shift the result right by `n` bytes, and returns the low 16 bytes.
83c7162d
XL
85///
86/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_epi8)
0531ce1d
XL
87#[inline]
88#[target_feature(enable = "ssse3")]
89#[cfg_attr(test, assert_instr(palignr, n = 15))]
90#[rustc_args_required_const(2)]
83c7162d 91#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
92pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i, n: i32) -> __m128i {
93 let n = n as u32;
94 // If palignr is shifting the pair of vectors more than the size of two
95 // lanes, emit zero.
96 if n > 32 {
97 return _mm_set1_epi8(0);
98 }
99 // If palignr is shifting the pair of input vectors more than one lane,
100 // but less than two lanes, convert to shifting in zeroes.
101 let (a, b, n) = if n > 16 {
102 (_mm_set1_epi8(0), a, n - 16)
103 } else {
104 (a, b, n)
105 };
106 let a = a.as_i8x16();
107 let b = b.as_i8x16();
108
109 macro_rules! shuffle {
110 ($shift:expr) => {
83c7162d
XL
111 simd_shuffle16(
112 b,
113 a,
114 [
115 0 + $shift,
116 1 + $shift,
117 2 + $shift,
118 3 + $shift,
119 4 + $shift,
120 5 + $shift,
121 6 + $shift,
122 7 + $shift,
123 8 + $shift,
124 9 + $shift,
125 10 + $shift,
126 11 + $shift,
127 12 + $shift,
128 13 + $shift,
129 14 + $shift,
130 15 + $shift,
131 ],
132 )
133 };
0531ce1d
XL
134 }
135 let r: i8x16 = match n {
136 0 => shuffle!(0),
137 1 => shuffle!(1),
138 2 => shuffle!(2),
139 3 => shuffle!(3),
140 4 => shuffle!(4),
141 5 => shuffle!(5),
142 6 => shuffle!(6),
143 7 => shuffle!(7),
144 8 => shuffle!(8),
145 9 => shuffle!(9),
146 10 => shuffle!(10),
147 11 => shuffle!(11),
148 12 => shuffle!(12),
149 13 => shuffle!(13),
150 14 => shuffle!(14),
151 15 => shuffle!(15),
152 _ => shuffle!(16),
153 };
532ac7d7 154 transmute(r)
0531ce1d
XL
155}
156
532ac7d7 157/// Horizontally adds the adjacent pairs of values contained in 2 packed
83c7162d
XL
158/// 128-bit vectors of `[8 x i16]`.
159///
160/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi16)
0531ce1d
XL
161#[inline]
162#[target_feature(enable = "ssse3")]
163#[cfg_attr(test, assert_instr(phaddw))]
83c7162d 164#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 165pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 166 transmute(phaddw128(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
167}
168
532ac7d7 169/// Horizontally adds the adjacent pairs of values contained in 2 packed
83c7162d 170/// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are
0531ce1d 171/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
83c7162d
XL
172///
173/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadds_epi16)
0531ce1d
XL
174#[inline]
175#[target_feature(enable = "ssse3")]
176#[cfg_attr(test, assert_instr(phaddsw))]
83c7162d 177#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 178pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 179 transmute(phaddsw128(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
180}
181
532ac7d7 182/// Horizontally adds the adjacent pairs of values contained in 2 packed
83c7162d
XL
183/// 128-bit vectors of `[4 x i32]`.
184///
185/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi32)
0531ce1d
XL
186#[inline]
187#[target_feature(enable = "ssse3")]
188#[cfg_attr(test, assert_instr(phaddd))]
83c7162d 189#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 190pub unsafe fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 191 transmute(phaddd128(a.as_i32x4(), b.as_i32x4()))
0531ce1d
XL
192}
193
194/// Horizontally subtract the adjacent pairs of values contained in 2
83c7162d
XL
195/// packed 128-bit vectors of `[8 x i16]`.
196///
197/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi16)
0531ce1d
XL
198#[inline]
199#[target_feature(enable = "ssse3")]
200#[cfg_attr(test, assert_instr(phsubw))]
83c7162d 201#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 202pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 203 transmute(phsubw128(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
204}
205
206/// Horizontally subtract the adjacent pairs of values contained in 2
83c7162d 207/// packed 128-bit vectors of `[8 x i16]`. Positive differences greater than
0531ce1d
XL
208/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
209/// saturated to 8000h.
83c7162d
XL
210///
211/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsubs_epi16)
0531ce1d
XL
212#[inline]
213#[target_feature(enable = "ssse3")]
214#[cfg_attr(test, assert_instr(phsubsw))]
83c7162d 215#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 216pub unsafe fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 217 transmute(phsubsw128(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
218}
219
220/// Horizontally subtract the adjacent pairs of values contained in 2
83c7162d
XL
221/// packed 128-bit vectors of `[4 x i32]`.
222///
223/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi32)
0531ce1d
XL
224#[inline]
225#[target_feature(enable = "ssse3")]
226#[cfg_attr(test, assert_instr(phsubd))]
83c7162d 227#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 228pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 229 transmute(phsubd128(a.as_i32x4(), b.as_i32x4()))
0531ce1d
XL
230}
231
532ac7d7 232/// Multiplies corresponding pairs of packed 8-bit unsigned integer
0531ce1d
XL
233/// values contained in the first source operand and packed 8-bit signed
234/// integer values contained in the second source operand, add pairs of
235/// contiguous products with signed saturation, and writes the 16-bit sums to
236/// the corresponding bits in the destination.
83c7162d
XL
237///
238/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maddubs_epi16)
0531ce1d
XL
239#[inline]
240#[target_feature(enable = "ssse3")]
241#[cfg_attr(test, assert_instr(pmaddubsw))]
83c7162d 242#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 243pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 244 transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16()))
0531ce1d
XL
245}
246
532ac7d7 247/// Multiplies packed 16-bit signed integer values, truncate the 32-bit
0531ce1d 248/// product to the 18 most significant bits by right-shifting, round the
83c7162d
XL
249/// truncated value by adding 1, and write bits `[16:1]` to the destination.
250///
251/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhrs_epi16)
0531ce1d
XL
252#[inline]
253#[target_feature(enable = "ssse3")]
254#[cfg_attr(test, assert_instr(pmulhrsw))]
83c7162d 255#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 256pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 257 transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
258}
259
532ac7d7
XL
260/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit
261/// integer in `b` is negative, and returns the result.
0531ce1d
XL
262/// Elements in result are zeroed out when the corresponding element in `b`
263/// is zero.
83c7162d
XL
264///
265/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi8)
0531ce1d
XL
266#[inline]
267#[target_feature(enable = "ssse3")]
268#[cfg_attr(test, assert_instr(psignb))]
83c7162d 269#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 270pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 271 transmute(psignb128(a.as_i8x16(), b.as_i8x16()))
0531ce1d
XL
272}
273
532ac7d7
XL
274/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit
275/// integer in `b` is negative, and returns the results.
0531ce1d
XL
276/// Elements in result are zeroed out when the corresponding element in `b`
277/// is zero.
83c7162d
XL
278///
279/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi16)
0531ce1d
XL
280#[inline]
281#[target_feature(enable = "ssse3")]
282#[cfg_attr(test, assert_instr(psignw))]
83c7162d 283#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 284pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 285 transmute(psignw128(a.as_i16x8(), b.as_i16x8()))
0531ce1d
XL
286}
287
532ac7d7
XL
288/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit
289/// integer in `b` is negative, and returns the results.
0531ce1d
XL
290/// Element in result are zeroed out when the corresponding element in `b`
291/// is zero.
83c7162d
XL
292///
293/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi32)
0531ce1d
XL
294#[inline]
295#[target_feature(enable = "ssse3")]
296#[cfg_attr(test, assert_instr(psignd))]
83c7162d 297#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 298pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 299 transmute(psignd128(a.as_i32x4(), b.as_i32x4()))
0531ce1d
XL
300}
301
0531ce1d
XL
302#[allow(improper_ctypes)]
303extern "C" {
304 #[link_name = "llvm.x86.ssse3.pabs.b.128"]
305 fn pabsb128(a: i8x16) -> u8x16;
306
307 #[link_name = "llvm.x86.ssse3.pabs.w.128"]
308 fn pabsw128(a: i16x8) -> u16x8;
309
310 #[link_name = "llvm.x86.ssse3.pabs.d.128"]
311 fn pabsd128(a: i32x4) -> u32x4;
312
313 #[link_name = "llvm.x86.ssse3.pshuf.b.128"]
314 fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
315
316 #[link_name = "llvm.x86.ssse3.phadd.w.128"]
317 fn phaddw128(a: i16x8, b: i16x8) -> i16x8;
318
319 #[link_name = "llvm.x86.ssse3.phadd.sw.128"]
320 fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
321
322 #[link_name = "llvm.x86.ssse3.phadd.d.128"]
323 fn phaddd128(a: i32x4, b: i32x4) -> i32x4;
324
325 #[link_name = "llvm.x86.ssse3.phsub.w.128"]
326 fn phsubw128(a: i16x8, b: i16x8) -> i16x8;
327
328 #[link_name = "llvm.x86.ssse3.phsub.sw.128"]
329 fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
330
331 #[link_name = "llvm.x86.ssse3.phsub.d.128"]
332 fn phsubd128(a: i32x4, b: i32x4) -> i32x4;
333
334 #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
335 fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
336
337 #[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
338 fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
339
340 #[link_name = "llvm.x86.ssse3.psign.b.128"]
341 fn psignb128(a: i8x16, b: i8x16) -> i8x16;
342
343 #[link_name = "llvm.x86.ssse3.psign.w.128"]
344 fn psignw128(a: i16x8, b: i16x8) -> i16x8;
345
346 #[link_name = "llvm.x86.ssse3.psign.d.128"]
347 fn psignd128(a: i32x4, b: i32x4) -> i32x4;
0531ce1d
XL
348}
349
350#[cfg(test)]
351mod tests {
416331ca 352 use stdarch_test::simd_test;
0531ce1d 353
532ac7d7 354 use crate::core_arch::x86::*;
0531ce1d 355
83c7162d 356 #[simd_test(enable = "ssse3")]
0531ce1d
XL
357 unsafe fn test_mm_abs_epi8() {
358 let r = _mm_abs_epi8(_mm_set1_epi8(-5));
359 assert_eq_m128i(r, _mm_set1_epi8(5));
360 }
361
83c7162d 362 #[simd_test(enable = "ssse3")]
0531ce1d
XL
363 unsafe fn test_mm_abs_epi16() {
364 let r = _mm_abs_epi16(_mm_set1_epi16(-5));
365 assert_eq_m128i(r, _mm_set1_epi16(5));
366 }
367
83c7162d 368 #[simd_test(enable = "ssse3")]
0531ce1d
XL
369 unsafe fn test_mm_abs_epi32() {
370 let r = _mm_abs_epi32(_mm_set1_epi32(-5));
371 assert_eq_m128i(r, _mm_set1_epi32(5));
372 }
373
83c7162d 374 #[simd_test(enable = "ssse3")]
0531ce1d 375 unsafe fn test_mm_shuffle_epi8() {
0731742a 376 #[rustfmt::skip]
0531ce1d
XL
377 let a = _mm_setr_epi8(
378 1, 2, 3, 4, 5, 6, 7, 8,
379 9, 10, 11, 12, 13, 14, 15, 16,
380 );
0731742a 381 #[rustfmt::skip]
0531ce1d
XL
382 let b = _mm_setr_epi8(
383 4, 128_u8 as i8, 4, 3,
384 24, 12, 6, 19,
385 12, 5, 5, 10,
386 4, 1, 8, 0,
387 );
0731742a 388 let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
0531ce1d
XL
389 let r = _mm_shuffle_epi8(a, b);
390 assert_eq_m128i(r, expected);
391 }
392
83c7162d 393 #[simd_test(enable = "ssse3")]
0531ce1d 394 unsafe fn test_mm_alignr_epi8() {
0731742a 395 #[rustfmt::skip]
0531ce1d
XL
396 let a = _mm_setr_epi8(
397 1, 2, 3, 4, 5, 6, 7, 8,
398 9, 10, 11, 12, 13, 14, 15, 16,
399 );
0731742a 400 #[rustfmt::skip]
0531ce1d
XL
401 let b = _mm_setr_epi8(
402 4, 63, 4, 3,
403 24, 12, 6, 19,
404 12, 5, 5, 10,
405 4, 1, 8, 0,
406 );
407 let r = _mm_alignr_epi8(a, b, 33);
408 assert_eq_m128i(r, _mm_set1_epi8(0));
409
410 let r = _mm_alignr_epi8(a, b, 17);
0731742a 411 #[rustfmt::skip]
0531ce1d
XL
412 let expected = _mm_setr_epi8(
413 2, 3, 4, 5, 6, 7, 8, 9,
414 10, 11, 12, 13, 14, 15, 16, 0,
415 );
416 assert_eq_m128i(r, expected);
417
418 let r = _mm_alignr_epi8(a, b, 16);
419 assert_eq_m128i(r, a);
420
421 let r = _mm_alignr_epi8(a, b, 15);
0731742a 422 #[rustfmt::skip]
0531ce1d
XL
423 let expected = _mm_setr_epi8(
424 0, 1, 2, 3, 4, 5, 6, 7,
425 8, 9, 10, 11, 12, 13, 14, 15,
426 );
427 assert_eq_m128i(r, expected);
428
429 let r = _mm_alignr_epi8(a, b, 0);
430 assert_eq_m128i(r, b);
431 }
432
83c7162d 433 #[simd_test(enable = "ssse3")]
0531ce1d
XL
434 unsafe fn test_mm_hadd_epi16() {
435 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
436 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
437 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
438 let r = _mm_hadd_epi16(a, b);
439 assert_eq_m128i(r, expected);
440 }
441
83c7162d 442 #[simd_test(enable = "ssse3")]
0531ce1d
XL
443 unsafe fn test_mm_hadds_epi16() {
444 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
445 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
446 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
447 let r = _mm_hadds_epi16(a, b);
448 assert_eq_m128i(r, expected);
449 }
450
83c7162d 451 #[simd_test(enable = "ssse3")]
0531ce1d
XL
452 unsafe fn test_mm_hadd_epi32() {
453 let a = _mm_setr_epi32(1, 2, 3, 4);
454 let b = _mm_setr_epi32(4, 128, 4, 3);
455 let expected = _mm_setr_epi32(3, 7, 132, 7);
456 let r = _mm_hadd_epi32(a, b);
457 assert_eq_m128i(r, expected);
458 }
459
83c7162d 460 #[simd_test(enable = "ssse3")]
0531ce1d
XL
461 unsafe fn test_mm_hsub_epi16() {
462 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
463 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
464 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
465 let r = _mm_hsub_epi16(a, b);
466 assert_eq_m128i(r, expected);
467 }
468
83c7162d 469 #[simd_test(enable = "ssse3")]
0531ce1d
XL
470 unsafe fn test_mm_hsubs_epi16() {
471 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
472 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
473 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
474 let r = _mm_hsubs_epi16(a, b);
475 assert_eq_m128i(r, expected);
476 }
477
83c7162d 478 #[simd_test(enable = "ssse3")]
0531ce1d
XL
479 unsafe fn test_mm_hsub_epi32() {
480 let a = _mm_setr_epi32(1, 2, 3, 4);
481 let b = _mm_setr_epi32(4, 128, 4, 3);
482 let expected = _mm_setr_epi32(-1, -1, -124, 1);
483 let r = _mm_hsub_epi32(a, b);
484 assert_eq_m128i(r, expected);
485 }
486
83c7162d 487 #[simd_test(enable = "ssse3")]
0531ce1d 488 unsafe fn test_mm_maddubs_epi16() {
0731742a 489 #[rustfmt::skip]
0531ce1d
XL
490 let a = _mm_setr_epi8(
491 1, 2, 3, 4, 5, 6, 7, 8,
492 9, 10, 11, 12, 13, 14, 15, 16,
493 );
0731742a 494 #[rustfmt::skip]
0531ce1d
XL
495 let b = _mm_setr_epi8(
496 4, 63, 4, 3,
497 24, 12, 6, 19,
498 12, 5, 5, 10,
499 4, 1, 8, 0,
500 );
501 let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
502 let r = _mm_maddubs_epi16(a, b);
503 assert_eq_m128i(r, expected);
504 }
505
83c7162d 506 #[simd_test(enable = "ssse3")]
0531ce1d
XL
507 unsafe fn test_mm_mulhrs_epi16() {
508 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
509 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
510 let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
511 let r = _mm_mulhrs_epi16(a, b);
512 assert_eq_m128i(r, expected);
513 }
514
83c7162d 515 #[simd_test(enable = "ssse3")]
0531ce1d 516 unsafe fn test_mm_sign_epi8() {
0731742a 517 #[rustfmt::skip]
0531ce1d
XL
518 let a = _mm_setr_epi8(
519 1, 2, 3, 4, 5, 6, 7, 8,
520 9, 10, 11, 12, 13, -14, -15, 16,
521 );
0731742a 522 #[rustfmt::skip]
0531ce1d
XL
523 let b = _mm_setr_epi8(
524 4, 63, -4, 3, 24, 12, -6, -19,
525 12, 5, -5, 10, 4, 1, -8, 0,
526 );
0731742a 527 #[rustfmt::skip]
0531ce1d
XL
528 let expected = _mm_setr_epi8(
529 1, 2, -3, 4, 5, 6, -7, -8,
530 9, 10, -11, 12, 13, -14, 15, 0,
531 );
532 let r = _mm_sign_epi8(a, b);
533 assert_eq_m128i(r, expected);
534 }
535
83c7162d 536 #[simd_test(enable = "ssse3")]
0531ce1d
XL
537 unsafe fn test_mm_sign_epi16() {
538 let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
539 let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
540 let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
541 let r = _mm_sign_epi16(a, b);
542 assert_eq_m128i(r, expected);
543 }
544
83c7162d 545 #[simd_test(enable = "ssse3")]
0531ce1d
XL
546 unsafe fn test_mm_sign_epi32() {
547 let a = _mm_setr_epi32(-1, 2, 3, 4);
548 let b = _mm_setr_epi32(1, -1, 1, 0);
549 let expected = _mm_setr_epi32(-1, -2, 3, 0);
550 let r = _mm_sign_epi32(a, b);
551 assert_eq_m128i(r, expected);
552 }
0531ce1d 553}