]> git.proxmox.com Git - rustc.git/blame - src/stdsimd/coresimd/x86/ssse3.rs
New upstream version 1.29.0+dfsg1
[rustc.git] / src / stdsimd / coresimd / x86 / ssse3.rs
CommitLineData
0531ce1d
XL
1//! Supplemental Streaming SIMD Extensions 3 (SSSE3)
2
0531ce1d 3use coresimd::simd::*;
83c7162d 4use coresimd::simd_llvm::simd_shuffle16;
0531ce1d
XL
5use coresimd::x86::*;
6use mem;
7
8#[cfg(test)]
9use stdsimd_test::assert_instr;
10
11/// Compute the absolute value of packed 8-bit signed integers in `a` and
12/// return the unsigned results.
83c7162d
XL
13///
14/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi8)
0531ce1d
XL
15#[inline]
16#[target_feature(enable = "ssse3")]
17#[cfg_attr(test, assert_instr(pabsb))]
83c7162d 18#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
19pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
20 mem::transmute(pabsb128(a.as_i8x16()))
21}
22
23/// Compute the absolute value of each of the packed 16-bit signed integers in
24/// `a` and
25/// return the 16-bit unsigned integer
83c7162d
XL
26///
27/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi16)
0531ce1d
XL
28#[inline]
29#[target_feature(enable = "ssse3")]
30#[cfg_attr(test, assert_instr(pabsw))]
83c7162d 31#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
32pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
33 mem::transmute(pabsw128(a.as_i16x8()))
34}
35
36/// Compute the absolute value of each of the packed 32-bit signed integers in
37/// `a` and
38/// return the 32-bit unsigned integer
83c7162d
XL
39///
40/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi32)
0531ce1d
XL
41#[inline]
42#[target_feature(enable = "ssse3")]
43#[cfg_attr(test, assert_instr(pabsd))]
83c7162d 44#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
45pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
46 mem::transmute(pabsd128(a.as_i32x4()))
47}
48
49/// Shuffle bytes from `a` according to the content of `b`.
50///
51/// The last 4 bits of each byte of `b` are used as addresses
52/// into the 16 bytes of `a`.
53///
54/// In addition, if the highest significant bit of a byte of `b`
55/// is set, the respective destination byte is set to 0.
56///
57/// Picturing `a` and `b` as `[u8; 16]`, `_mm_shuffle_epi8` is
58/// logically equivalent to:
59///
60/// ```
61/// fn mm_shuffle_epi8(a: [u8; 16], b: [u8; 16]) -> [u8; 16] {
62/// let mut r = [0u8; 16];
63/// for i in 0..16 {
64/// // if the most significant bit of b is set,
65/// // then the destination byte is set to 0.
66/// if b[i] & 0x80 == 0u8 {
67/// r[i] = a[(b[i] % 16) as usize];
68/// }
69/// }
70/// r
71/// }
72/// ```
83c7162d
XL
73///
74/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi8)
0531ce1d
XL
75#[inline]
76#[target_feature(enable = "ssse3")]
77#[cfg_attr(test, assert_instr(pshufb))]
83c7162d 78#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
79pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
80 mem::transmute(pshufb128(a.as_u8x16(), b.as_u8x16()))
81}
82
83/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
84/// shift the result right by `n` bytes, and return the low 16 bytes.
83c7162d
XL
85///
86/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_epi8)
0531ce1d
XL
87#[inline]
88#[target_feature(enable = "ssse3")]
89#[cfg_attr(test, assert_instr(palignr, n = 15))]
90#[rustc_args_required_const(2)]
83c7162d 91#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
92pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i, n: i32) -> __m128i {
93 let n = n as u32;
94 // If palignr is shifting the pair of vectors more than the size of two
95 // lanes, emit zero.
96 if n > 32 {
97 return _mm_set1_epi8(0);
98 }
99 // If palignr is shifting the pair of input vectors more than one lane,
100 // but less than two lanes, convert to shifting in zeroes.
101 let (a, b, n) = if n > 16 {
102 (_mm_set1_epi8(0), a, n - 16)
103 } else {
104 (a, b, n)
105 };
106 let a = a.as_i8x16();
107 let b = b.as_i8x16();
108
109 macro_rules! shuffle {
110 ($shift:expr) => {
83c7162d
XL
111 simd_shuffle16(
112 b,
113 a,
114 [
115 0 + $shift,
116 1 + $shift,
117 2 + $shift,
118 3 + $shift,
119 4 + $shift,
120 5 + $shift,
121 6 + $shift,
122 7 + $shift,
123 8 + $shift,
124 9 + $shift,
125 10 + $shift,
126 11 + $shift,
127 12 + $shift,
128 13 + $shift,
129 14 + $shift,
130 15 + $shift,
131 ],
132 )
133 };
0531ce1d
XL
134 }
135 let r: i8x16 = match n {
136 0 => shuffle!(0),
137 1 => shuffle!(1),
138 2 => shuffle!(2),
139 3 => shuffle!(3),
140 4 => shuffle!(4),
141 5 => shuffle!(5),
142 6 => shuffle!(6),
143 7 => shuffle!(7),
144 8 => shuffle!(8),
145 9 => shuffle!(9),
146 10 => shuffle!(10),
147 11 => shuffle!(11),
148 12 => shuffle!(12),
149 13 => shuffle!(13),
150 14 => shuffle!(14),
151 15 => shuffle!(15),
152 _ => shuffle!(16),
153 };
154 mem::transmute(r)
155}
156
157/// Horizontally add the adjacent pairs of values contained in 2 packed
83c7162d
XL
158/// 128-bit vectors of `[8 x i16]`.
159///
160/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi16)
0531ce1d
XL
161#[inline]
162#[target_feature(enable = "ssse3")]
163#[cfg_attr(test, assert_instr(phaddw))]
83c7162d 164#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
165pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
166 mem::transmute(phaddw128(a.as_i16x8(), b.as_i16x8()))
167}
168
169/// Horizontally add the adjacent pairs of values contained in 2 packed
83c7162d 170/// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are
0531ce1d 171/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
83c7162d
XL
172///
173/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadds_epi16)
0531ce1d
XL
174#[inline]
175#[target_feature(enable = "ssse3")]
176#[cfg_attr(test, assert_instr(phaddsw))]
83c7162d 177#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
178pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
179 mem::transmute(phaddsw128(a.as_i16x8(), b.as_i16x8()))
180}
181
182/// Horizontally add the adjacent pairs of values contained in 2 packed
83c7162d
XL
183/// 128-bit vectors of `[4 x i32]`.
184///
185/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi32)
0531ce1d
XL
186#[inline]
187#[target_feature(enable = "ssse3")]
188#[cfg_attr(test, assert_instr(phaddd))]
83c7162d 189#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
190pub unsafe fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
191 mem::transmute(phaddd128(a.as_i32x4(), b.as_i32x4()))
192}
193
194/// Horizontally subtract the adjacent pairs of values contained in 2
83c7162d
XL
195/// packed 128-bit vectors of `[8 x i16]`.
196///
197/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi16)
0531ce1d
XL
198#[inline]
199#[target_feature(enable = "ssse3")]
200#[cfg_attr(test, assert_instr(phsubw))]
83c7162d 201#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
202pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
203 mem::transmute(phsubw128(a.as_i16x8(), b.as_i16x8()))
204}
205
206/// Horizontally subtract the adjacent pairs of values contained in 2
83c7162d 207/// packed 128-bit vectors of `[8 x i16]`. Positive differences greater than
0531ce1d
XL
208/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
209/// saturated to 8000h.
83c7162d
XL
210///
211/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsubs_epi16)
0531ce1d
XL
212#[inline]
213#[target_feature(enable = "ssse3")]
214#[cfg_attr(test, assert_instr(phsubsw))]
83c7162d 215#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
216pub unsafe fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
217 mem::transmute(phsubsw128(a.as_i16x8(), b.as_i16x8()))
218}
219
220/// Horizontally subtract the adjacent pairs of values contained in 2
83c7162d
XL
221/// packed 128-bit vectors of `[4 x i32]`.
222///
223/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi32)
0531ce1d
XL
224#[inline]
225#[target_feature(enable = "ssse3")]
226#[cfg_attr(test, assert_instr(phsubd))]
83c7162d 227#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
228pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
229 mem::transmute(phsubd128(a.as_i32x4(), b.as_i32x4()))
230}
231
232/// Multiply corresponding pairs of packed 8-bit unsigned integer
233/// values contained in the first source operand and packed 8-bit signed
234/// integer values contained in the second source operand, add pairs of
235/// contiguous products with signed saturation, and writes the 16-bit sums to
236/// the corresponding bits in the destination.
83c7162d
XL
237///
238/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maddubs_epi16)
0531ce1d
XL
239#[inline]
240#[target_feature(enable = "ssse3")]
241#[cfg_attr(test, assert_instr(pmaddubsw))]
83c7162d 242#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
243pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
244 mem::transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16()))
245}
246
247/// Multiply packed 16-bit signed integer values, truncate the 32-bit
248/// product to the 18 most significant bits by right-shifting, round the
83c7162d
XL
249/// truncated value by adding 1, and write bits `[16:1]` to the destination.
250///
251/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhrs_epi16)
0531ce1d
XL
252#[inline]
253#[target_feature(enable = "ssse3")]
254#[cfg_attr(test, assert_instr(pmulhrsw))]
83c7162d 255#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
256pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
257 mem::transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8()))
258}
259
260/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
261/// integer in `b` is negative, and return the result.
262/// Elements in result are zeroed out when the corresponding element in `b`
263/// is zero.
83c7162d
XL
264///
265/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi8)
0531ce1d
XL
266#[inline]
267#[target_feature(enable = "ssse3")]
268#[cfg_attr(test, assert_instr(psignb))]
83c7162d 269#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
270pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
271 mem::transmute(psignb128(a.as_i8x16(), b.as_i8x16()))
272}
273
274/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
275/// integer in `b` is negative, and return the results.
276/// Elements in result are zeroed out when the corresponding element in `b`
277/// is zero.
83c7162d
XL
278///
279/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi16)
0531ce1d
XL
280#[inline]
281#[target_feature(enable = "ssse3")]
282#[cfg_attr(test, assert_instr(psignw))]
83c7162d 283#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
284pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
285 mem::transmute(psignw128(a.as_i16x8(), b.as_i16x8()))
286}
287
288/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
289/// integer in `b` is negative, and return the results.
290/// Element in result are zeroed out when the corresponding element in `b`
291/// is zero.
83c7162d
XL
292///
293/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi32)
0531ce1d
XL
294#[inline]
295#[target_feature(enable = "ssse3")]
296#[cfg_attr(test, assert_instr(psignd))]
83c7162d 297#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
298pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
299 mem::transmute(psignd128(a.as_i32x4(), b.as_i32x4()))
300}
301
302/// Compute the absolute value of packed 8-bit integers in `a` and
303/// return the unsigned results.
304#[inline]
305#[target_feature(enable = "ssse3,mmx")]
306#[cfg_attr(test, assert_instr(pabsb))]
307pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 {
308 pabsb(a)
309}
310
311/// Compute the absolute value of packed 8-bit integers in `a`, and return the
312/// unsigned results.
313#[inline]
314#[target_feature(enable = "ssse3,mmx")]
315#[cfg_attr(test, assert_instr(pabsw))]
316pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 {
317 pabsw(a)
318}
319
320/// Compute the absolute value of packed 32-bit integers in `a`, and return the
321/// unsigned results.
322#[inline]
323#[target_feature(enable = "ssse3,mmx")]
324#[cfg_attr(test, assert_instr(pabsd))]
325pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 {
326 pabsd(a)
327}
328
329/// Shuffle packed 8-bit integers in `a` according to shuffle control mask in
330/// the corresponding 8-bit element of `b`, and return the results
331#[inline]
332#[target_feature(enable = "ssse3,mmx")]
333#[cfg_attr(test, assert_instr(pshufb))]
334pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 {
335 pshufb(a, b)
336}
337
338/// Concatenates the two 64-bit integer vector operands, and right-shifts
339/// the result by the number of bytes specified in the immediate operand.
340#[inline]
341#[target_feature(enable = "ssse3,mmx")]
342#[cfg_attr(test, assert_instr(palignr, n = 15))]
343#[rustc_args_required_const(2)]
344pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 {
345 macro_rules! call {
346 ($imm8:expr) => {
347 palignrb(a, b, $imm8)
83c7162d 348 };
0531ce1d
XL
349 }
350 constify_imm8!(n, call)
351}
352
353/// Horizontally add the adjacent pairs of values contained in 2 packed
83c7162d 354/// 64-bit vectors of `[4 x i16]`.
0531ce1d
XL
355#[inline]
356#[target_feature(enable = "ssse3,mmx")]
357#[cfg_attr(test, assert_instr(phaddw))]
358pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 {
359 phaddw(a, b)
360}
361
362/// Horizontally add the adjacent pairs of values contained in 2 packed
83c7162d 363/// 64-bit vectors of `[2 x i32]`.
0531ce1d
XL
364#[inline]
365#[target_feature(enable = "ssse3,mmx")]
366#[cfg_attr(test, assert_instr(phaddd))]
367pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 {
368 phaddd(a, b)
369}
370
371/// Horizontally add the adjacent pairs of values contained in 2 packed
83c7162d 372/// 64-bit vectors of `[4 x i16]`. Positive sums greater than 7FFFh are
0531ce1d
XL
373/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
374#[inline]
375#[target_feature(enable = "ssse3,mmx")]
376#[cfg_attr(test, assert_instr(phaddsw))]
377pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 {
378 phaddsw(a, b)
379}
380
381/// Horizontally subtracts the adjacent pairs of values contained in 2
83c7162d 382/// packed 64-bit vectors of `[4 x i16]`.
0531ce1d
XL
383#[inline]
384#[target_feature(enable = "ssse3,mmx")]
385#[cfg_attr(test, assert_instr(phsubw))]
386pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 {
387 phsubw(a, b)
388}
389
390/// Horizontally subtracts the adjacent pairs of values contained in 2
83c7162d 391/// packed 64-bit vectors of `[2 x i32]`.
0531ce1d
XL
392#[inline]
393#[target_feature(enable = "ssse3,mmx")]
394#[cfg_attr(test, assert_instr(phsubd))]
395pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 {
396 phsubd(a, b)
397}
398
399/// Horizontally subtracts the adjacent pairs of values contained in 2
83c7162d 400/// packed 64-bit vectors of `[4 x i16]`. Positive differences greater than
0531ce1d
XL
401/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
402/// saturated to 8000h.
403#[inline]
404#[target_feature(enable = "ssse3,mmx")]
405#[cfg_attr(test, assert_instr(phsubsw))]
406pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 {
407 phsubsw(a, b)
408}
409
410/// Multiplies corresponding pairs of packed 8-bit unsigned integer
411/// values contained in the first source operand and packed 8-bit signed
412/// integer values contained in the second source operand, adds pairs of
413/// contiguous products with signed saturation, and writes the 16-bit sums to
414/// the corresponding bits in the destination.
415#[inline]
416#[target_feature(enable = "ssse3,mmx")]
417#[cfg_attr(test, assert_instr(pmaddubsw))]
418pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 {
419 pmaddubsw(a, b)
420}
421
422/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
423/// products to the 18 most significant bits by right-shifting, rounds the
83c7162d 424/// truncated value by adding 1, and writes bits `[16:1]` to the destination.
0531ce1d
XL
425#[inline]
426#[target_feature(enable = "ssse3,mmx")]
427#[cfg_attr(test, assert_instr(pmulhrsw))]
428pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 {
429 pmulhrsw(a, b)
430}
431
432/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
433/// integer in `b` is negative, and return the results.
434/// Element in result are zeroed out when the corresponding element in `b` is
435/// zero.
436#[inline]
437#[target_feature(enable = "ssse3,mmx")]
438#[cfg_attr(test, assert_instr(psignb))]
439pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 {
440 psignb(a, b)
441}
442
443/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
444/// integer in `b` is negative, and return the results.
445/// Element in result are zeroed out when the corresponding element in `b` is
446/// zero.
447#[inline]
448#[target_feature(enable = "ssse3,mmx")]
449#[cfg_attr(test, assert_instr(psignw))]
450pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 {
451 psignw(a, b)
452}
453
454/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
455/// integer in `b` is negative, and return the results.
456/// Element in result are zeroed out when the corresponding element in `b` is
457/// zero.
458#[inline]
459#[target_feature(enable = "ssse3,mmx")]
460#[cfg_attr(test, assert_instr(psignd))]
461pub unsafe fn _mm_sign_pi32(a: __m64, b: __m64) -> __m64 {
462 psignd(a, b)
463}
464
465#[allow(improper_ctypes)]
466extern "C" {
467 #[link_name = "llvm.x86.ssse3.pabs.b.128"]
468 fn pabsb128(a: i8x16) -> u8x16;
469
470 #[link_name = "llvm.x86.ssse3.pabs.w.128"]
471 fn pabsw128(a: i16x8) -> u16x8;
472
473 #[link_name = "llvm.x86.ssse3.pabs.d.128"]
474 fn pabsd128(a: i32x4) -> u32x4;
475
476 #[link_name = "llvm.x86.ssse3.pshuf.b.128"]
477 fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
478
479 #[link_name = "llvm.x86.ssse3.phadd.w.128"]
480 fn phaddw128(a: i16x8, b: i16x8) -> i16x8;
481
482 #[link_name = "llvm.x86.ssse3.phadd.sw.128"]
483 fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
484
485 #[link_name = "llvm.x86.ssse3.phadd.d.128"]
486 fn phaddd128(a: i32x4, b: i32x4) -> i32x4;
487
488 #[link_name = "llvm.x86.ssse3.phsub.w.128"]
489 fn phsubw128(a: i16x8, b: i16x8) -> i16x8;
490
491 #[link_name = "llvm.x86.ssse3.phsub.sw.128"]
492 fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
493
494 #[link_name = "llvm.x86.ssse3.phsub.d.128"]
495 fn phsubd128(a: i32x4, b: i32x4) -> i32x4;
496
497 #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
498 fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
499
500 #[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
501 fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
502
503 #[link_name = "llvm.x86.ssse3.psign.b.128"]
504 fn psignb128(a: i8x16, b: i8x16) -> i8x16;
505
506 #[link_name = "llvm.x86.ssse3.psign.w.128"]
507 fn psignw128(a: i16x8, b: i16x8) -> i16x8;
508
509 #[link_name = "llvm.x86.ssse3.psign.d.128"]
510 fn psignd128(a: i32x4, b: i32x4) -> i32x4;
511
512 #[link_name = "llvm.x86.ssse3.pabs.b"]
513 fn pabsb(a: __m64) -> __m64;
514
515 #[link_name = "llvm.x86.ssse3.pabs.w"]
516 fn pabsw(a: __m64) -> __m64;
517
518 #[link_name = "llvm.x86.ssse3.pabs.d"]
519 fn pabsd(a: __m64) -> __m64;
520
521 #[link_name = "llvm.x86.ssse3.pshuf.b"]
522 fn pshufb(a: __m64, b: __m64) -> __m64;
523
524 #[link_name = "llvm.x86.mmx.palignr.b"]
525 fn palignrb(a: __m64, b: __m64, n: u8) -> __m64;
526
527 #[link_name = "llvm.x86.ssse3.phadd.w"]
528 fn phaddw(a: __m64, b: __m64) -> __m64;
529
530 #[link_name = "llvm.x86.ssse3.phadd.d"]
531 fn phaddd(a: __m64, b: __m64) -> __m64;
532
533 #[link_name = "llvm.x86.ssse3.phadd.sw"]
534 fn phaddsw(a: __m64, b: __m64) -> __m64;
535
536 #[link_name = "llvm.x86.ssse3.phsub.w"]
537 fn phsubw(a: __m64, b: __m64) -> __m64;
538
539 #[link_name = "llvm.x86.ssse3.phsub.d"]
540 fn phsubd(a: __m64, b: __m64) -> __m64;
541
542 #[link_name = "llvm.x86.ssse3.phsub.sw"]
543 fn phsubsw(a: __m64, b: __m64) -> __m64;
544
545 #[link_name = "llvm.x86.ssse3.pmadd.ub.sw"]
546 fn pmaddubsw(a: __m64, b: __m64) -> __m64;
547
548 #[link_name = "llvm.x86.ssse3.pmul.hr.sw"]
549 fn pmulhrsw(a: __m64, b: __m64) -> __m64;
550
551 #[link_name = "llvm.x86.ssse3.psign.b"]
552 fn psignb(a: __m64, b: __m64) -> __m64;
553
554 #[link_name = "llvm.x86.ssse3.psign.w"]
555 fn psignw(a: __m64, b: __m64) -> __m64;
556
557 #[link_name = "llvm.x86.ssse3.psign.d"]
558 fn psignd(a: __m64, b: __m64) -> __m64;
559}
560
561#[cfg(test)]
562mod tests {
563 use stdsimd_test::simd_test;
564
565 use coresimd::x86::*;
566
83c7162d 567 #[simd_test(enable = "ssse3")]
0531ce1d
XL
568 unsafe fn test_mm_abs_epi8() {
569 let r = _mm_abs_epi8(_mm_set1_epi8(-5));
570 assert_eq_m128i(r, _mm_set1_epi8(5));
571 }
572
83c7162d 573 #[simd_test(enable = "ssse3")]
0531ce1d
XL
574 unsafe fn test_mm_abs_epi16() {
575 let r = _mm_abs_epi16(_mm_set1_epi16(-5));
576 assert_eq_m128i(r, _mm_set1_epi16(5));
577 }
578
83c7162d 579 #[simd_test(enable = "ssse3")]
0531ce1d
XL
580 unsafe fn test_mm_abs_epi32() {
581 let r = _mm_abs_epi32(_mm_set1_epi32(-5));
582 assert_eq_m128i(r, _mm_set1_epi32(5));
583 }
584
83c7162d 585 #[simd_test(enable = "ssse3")]
0531ce1d
XL
586 unsafe fn test_mm_shuffle_epi8() {
587 #[cfg_attr(rustfmt, rustfmt_skip)]
588 let a = _mm_setr_epi8(
589 1, 2, 3, 4, 5, 6, 7, 8,
590 9, 10, 11, 12, 13, 14, 15, 16,
591 );
592 #[cfg_attr(rustfmt, rustfmt_skip)]
593 let b = _mm_setr_epi8(
594 4, 128_u8 as i8, 4, 3,
595 24, 12, 6, 19,
596 12, 5, 5, 10,
597 4, 1, 8, 0,
598 );
8faf50e0
XL
599 let expected =
600 _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
0531ce1d
XL
601 let r = _mm_shuffle_epi8(a, b);
602 assert_eq_m128i(r, expected);
603 }
604
83c7162d 605 #[simd_test(enable = "ssse3")]
0531ce1d
XL
606 unsafe fn test_mm_alignr_epi8() {
607 #[cfg_attr(rustfmt, rustfmt_skip)]
608 let a = _mm_setr_epi8(
609 1, 2, 3, 4, 5, 6, 7, 8,
610 9, 10, 11, 12, 13, 14, 15, 16,
611 );
612 #[cfg_attr(rustfmt, rustfmt_skip)]
613 let b = _mm_setr_epi8(
614 4, 63, 4, 3,
615 24, 12, 6, 19,
616 12, 5, 5, 10,
617 4, 1, 8, 0,
618 );
619 let r = _mm_alignr_epi8(a, b, 33);
620 assert_eq_m128i(r, _mm_set1_epi8(0));
621
622 let r = _mm_alignr_epi8(a, b, 17);
623 #[cfg_attr(rustfmt, rustfmt_skip)]
624 let expected = _mm_setr_epi8(
625 2, 3, 4, 5, 6, 7, 8, 9,
626 10, 11, 12, 13, 14, 15, 16, 0,
627 );
628 assert_eq_m128i(r, expected);
629
630 let r = _mm_alignr_epi8(a, b, 16);
631 assert_eq_m128i(r, a);
632
633 let r = _mm_alignr_epi8(a, b, 15);
634 #[cfg_attr(rustfmt, rustfmt_skip)]
635 let expected = _mm_setr_epi8(
636 0, 1, 2, 3, 4, 5, 6, 7,
637 8, 9, 10, 11, 12, 13, 14, 15,
638 );
639 assert_eq_m128i(r, expected);
640
641 let r = _mm_alignr_epi8(a, b, 0);
642 assert_eq_m128i(r, b);
643 }
644
83c7162d 645 #[simd_test(enable = "ssse3")]
0531ce1d
XL
646 unsafe fn test_mm_hadd_epi16() {
647 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
648 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
649 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
650 let r = _mm_hadd_epi16(a, b);
651 assert_eq_m128i(r, expected);
652 }
653
83c7162d 654 #[simd_test(enable = "ssse3")]
0531ce1d
XL
655 unsafe fn test_mm_hadds_epi16() {
656 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
657 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
658 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
659 let r = _mm_hadds_epi16(a, b);
660 assert_eq_m128i(r, expected);
661 }
662
83c7162d 663 #[simd_test(enable = "ssse3")]
0531ce1d
XL
664 unsafe fn test_mm_hadd_epi32() {
665 let a = _mm_setr_epi32(1, 2, 3, 4);
666 let b = _mm_setr_epi32(4, 128, 4, 3);
667 let expected = _mm_setr_epi32(3, 7, 132, 7);
668 let r = _mm_hadd_epi32(a, b);
669 assert_eq_m128i(r, expected);
670 }
671
83c7162d 672 #[simd_test(enable = "ssse3")]
0531ce1d
XL
673 unsafe fn test_mm_hsub_epi16() {
674 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
675 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
676 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
677 let r = _mm_hsub_epi16(a, b);
678 assert_eq_m128i(r, expected);
679 }
680
83c7162d 681 #[simd_test(enable = "ssse3")]
0531ce1d
XL
682 unsafe fn test_mm_hsubs_epi16() {
683 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
684 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
685 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
686 let r = _mm_hsubs_epi16(a, b);
687 assert_eq_m128i(r, expected);
688 }
689
83c7162d 690 #[simd_test(enable = "ssse3")]
0531ce1d
XL
691 unsafe fn test_mm_hsub_epi32() {
692 let a = _mm_setr_epi32(1, 2, 3, 4);
693 let b = _mm_setr_epi32(4, 128, 4, 3);
694 let expected = _mm_setr_epi32(-1, -1, -124, 1);
695 let r = _mm_hsub_epi32(a, b);
696 assert_eq_m128i(r, expected);
697 }
698
83c7162d 699 #[simd_test(enable = "ssse3")]
0531ce1d
XL
700 unsafe fn test_mm_maddubs_epi16() {
701 #[cfg_attr(rustfmt, rustfmt_skip)]
702 let a = _mm_setr_epi8(
703 1, 2, 3, 4, 5, 6, 7, 8,
704 9, 10, 11, 12, 13, 14, 15, 16,
705 );
706 #[cfg_attr(rustfmt, rustfmt_skip)]
707 let b = _mm_setr_epi8(
708 4, 63, 4, 3,
709 24, 12, 6, 19,
710 12, 5, 5, 10,
711 4, 1, 8, 0,
712 );
713 let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
714 let r = _mm_maddubs_epi16(a, b);
715 assert_eq_m128i(r, expected);
716 }
717
83c7162d 718 #[simd_test(enable = "ssse3")]
0531ce1d
XL
719 unsafe fn test_mm_mulhrs_epi16() {
720 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
721 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
722 let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
723 let r = _mm_mulhrs_epi16(a, b);
724 assert_eq_m128i(r, expected);
725 }
726
83c7162d 727 #[simd_test(enable = "ssse3")]
0531ce1d
XL
728 unsafe fn test_mm_sign_epi8() {
729 #[cfg_attr(rustfmt, rustfmt_skip)]
730 let a = _mm_setr_epi8(
731 1, 2, 3, 4, 5, 6, 7, 8,
732 9, 10, 11, 12, 13, -14, -15, 16,
733 );
734 #[cfg_attr(rustfmt, rustfmt_skip)]
735 let b = _mm_setr_epi8(
736 4, 63, -4, 3, 24, 12, -6, -19,
737 12, 5, -5, 10, 4, 1, -8, 0,
738 );
739 #[cfg_attr(rustfmt, rustfmt_skip)]
740 let expected = _mm_setr_epi8(
741 1, 2, -3, 4, 5, 6, -7, -8,
742 9, 10, -11, 12, 13, -14, 15, 0,
743 );
744 let r = _mm_sign_epi8(a, b);
745 assert_eq_m128i(r, expected);
746 }
747
83c7162d 748 #[simd_test(enable = "ssse3")]
0531ce1d
XL
749 unsafe fn test_mm_sign_epi16() {
750 let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
751 let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
752 let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
753 let r = _mm_sign_epi16(a, b);
754 assert_eq_m128i(r, expected);
755 }
756
83c7162d 757 #[simd_test(enable = "ssse3")]
0531ce1d
XL
758 unsafe fn test_mm_sign_epi32() {
759 let a = _mm_setr_epi32(-1, 2, 3, 4);
760 let b = _mm_setr_epi32(1, -1, 1, 0);
761 let expected = _mm_setr_epi32(-1, -2, 3, 0);
762 let r = _mm_sign_epi32(a, b);
763 assert_eq_m128i(r, expected);
764 }
765
83c7162d 766 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
767 unsafe fn test_mm_abs_pi8() {
768 let r = _mm_abs_pi8(_mm_set1_pi8(-5));
769 assert_eq_m64(r, _mm_set1_pi8(5));
770 }
771
83c7162d 772 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
773 unsafe fn test_mm_abs_pi16() {
774 let r = _mm_abs_pi16(_mm_set1_pi16(-5));
775 assert_eq_m64(r, _mm_set1_pi16(5));
776 }
777
83c7162d 778 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
779 unsafe fn test_mm_abs_pi32() {
780 let r = _mm_abs_pi32(_mm_set1_pi32(-5));
781 assert_eq_m64(r, _mm_set1_pi32(5));
782 }
783
83c7162d 784 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
785 unsafe fn test_mm_shuffle_pi8() {
786 let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
787 let b = _mm_setr_pi8(4, 128u8 as i8, 4, 3, 24, 12, 6, 19);
788 let expected = _mm_setr_pi8(5, 0, 5, 4, 1, 5, 7, 4);
789 let r = _mm_shuffle_pi8(a, b);
790 assert_eq_m64(r, expected);
791 }
792
83c7162d 793 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
794 unsafe fn test_mm_alignr_pi8() {
795 let a = _mm_setr_pi32(0x89ABCDEF_u32 as i32, 0x01234567_u32 as i32);
796 let b = _mm_setr_pi32(0xBBAA9988_u32 as i32, 0xFFDDEECC_u32 as i32);
797 let r = _mm_alignr_pi8(a, b, 4);
798 assert_eq_m64(r, ::std::mem::transmute(0x89abcdefffddeecc_u64));
799 }
800
83c7162d 801 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
802 unsafe fn test_mm_hadd_pi16() {
803 let a = _mm_setr_pi16(1, 2, 3, 4);
804 let b = _mm_setr_pi16(4, 128, 4, 3);
805 let expected = _mm_setr_pi16(3, 7, 132, 7);
806 let r = _mm_hadd_pi16(a, b);
807 assert_eq_m64(r, expected);
808 }
809
83c7162d 810 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
811 unsafe fn test_mm_hadd_pi32() {
812 let a = _mm_setr_pi32(1, 2);
813 let b = _mm_setr_pi32(4, 128);
814 let expected = _mm_setr_pi32(3, 132);
815 let r = _mm_hadd_pi32(a, b);
816 assert_eq_m64(r, expected);
817 }
818
83c7162d 819 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
820 unsafe fn test_mm_hadds_pi16() {
821 let a = _mm_setr_pi16(1, 2, 3, 4);
822 let b = _mm_setr_pi16(32767, 1, -32768, -1);
823 let expected = _mm_setr_pi16(3, 7, 32767, -32768);
824 let r = _mm_hadds_pi16(a, b);
825 assert_eq_m64(r, expected);
826 }
827
83c7162d 828 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
829 unsafe fn test_mm_hsub_pi16() {
830 let a = _mm_setr_pi16(1, 2, 3, 4);
831 let b = _mm_setr_pi16(4, 128, 4, 3);
832 let expected = _mm_setr_pi16(-1, -1, -124, 1);
833 let r = _mm_hsub_pi16(a, b);
834 assert_eq_m64(r, expected);
835 }
836
83c7162d 837 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
838 unsafe fn test_mm_hsub_pi32() {
839 let a = _mm_setr_pi32(1, 2);
840 let b = _mm_setr_pi32(4, 128);
841 let expected = _mm_setr_pi32(-1, -124);
842 let r = _mm_hsub_pi32(a, b);
843 assert_eq_m64(r, expected);
844 }
845
83c7162d 846 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
847 unsafe fn test_mm_hsubs_pi16() {
848 let a = _mm_setr_pi16(1, 2, 3, 4);
849 let b = _mm_setr_pi16(4, 128, 4, 3);
850 let expected = _mm_setr_pi16(-1, -1, -124, 1);
851 let r = _mm_hsubs_pi16(a, b);
852 assert_eq_m64(r, expected);
853 }
854
83c7162d 855 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
856 unsafe fn test_mm_maddubs_pi16() {
857 let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
858 let b = _mm_setr_pi8(4, 63, 4, 3, 24, 12, 6, 19);
859 let expected = _mm_setr_pi16(130, 24, 192, 194);
860 let r = _mm_maddubs_pi16(a, b);
861 assert_eq_m64(r, expected);
862 }
863
83c7162d 864 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
865 unsafe fn test_mm_mulhrs_pi16() {
866 let a = _mm_setr_pi16(1, 2, 3, 4);
867 let b = _mm_setr_pi16(4, 32767, -1, -32768);
868 let expected = _mm_setr_pi16(0, 2, 0, -4);
869 let r = _mm_mulhrs_pi16(a, b);
870 assert_eq_m64(r, expected);
871 }
872
83c7162d 873 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
874 unsafe fn test_mm_sign_pi8() {
875 let a = _mm_setr_pi8(1, 2, 3, 4, -5, -6, 7, 8);
876 let b = _mm_setr_pi8(4, 64, 0, 3, 1, -1, -2, 1);
877 let expected = _mm_setr_pi8(1, 2, 0, 4, -5, 6, -7, 8);
878 let r = _mm_sign_pi8(a, b);
879 assert_eq_m64(r, expected);
880 }
881
83c7162d 882 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
883 unsafe fn test_mm_sign_pi16() {
884 let a = _mm_setr_pi16(-1, 2, 3, 4);
885 let b = _mm_setr_pi16(1, -1, 1, 0);
886 let expected = _mm_setr_pi16(-1, -2, 3, 0);
887 let r = _mm_sign_pi16(a, b);
888 assert_eq_m64(r, expected);
889 }
890
83c7162d 891 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
892 unsafe fn test_mm_sign_pi32() {
893 let a = _mm_setr_pi32(-1, 2);
894 let b = _mm_setr_pi32(1, 0);
895 let expected = _mm_setr_pi32(-1, 0);
896 let r = _mm_sign_pi32(a, b);
897 assert_eq_m64(r, expected);
898 }
899}