]> git.proxmox.com Git - rustc.git/blame - src/stdsimd/coresimd/x86/ssse3.rs
New upstream version 1.27.1+dfsg1
[rustc.git] / src / stdsimd / coresimd / x86 / ssse3.rs
CommitLineData
0531ce1d
XL
1//! Supplemental Streaming SIMD Extensions 3 (SSSE3)
2
0531ce1d 3use coresimd::simd::*;
83c7162d 4use coresimd::simd_llvm::simd_shuffle16;
0531ce1d
XL
5use coresimd::x86::*;
6use mem;
7
8#[cfg(test)]
9use stdsimd_test::assert_instr;
10
11/// Compute the absolute value of packed 8-bit signed integers in `a` and
12/// return the unsigned results.
83c7162d
XL
13///
14/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi8)
0531ce1d
XL
15#[inline]
16#[target_feature(enable = "ssse3")]
17#[cfg_attr(test, assert_instr(pabsb))]
83c7162d 18#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
19pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
20 mem::transmute(pabsb128(a.as_i8x16()))
21}
22
23/// Compute the absolute value of each of the packed 16-bit signed integers in
24/// `a` and
25/// return the 16-bit unsigned integer
83c7162d
XL
26///
27/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi16)
0531ce1d
XL
28#[inline]
29#[target_feature(enable = "ssse3")]
30#[cfg_attr(test, assert_instr(pabsw))]
83c7162d 31#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
32pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
33 mem::transmute(pabsw128(a.as_i16x8()))
34}
35
36/// Compute the absolute value of each of the packed 32-bit signed integers in
37/// `a` and
38/// return the 32-bit unsigned integer
83c7162d
XL
39///
40/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi32)
0531ce1d
XL
41#[inline]
42#[target_feature(enable = "ssse3")]
43#[cfg_attr(test, assert_instr(pabsd))]
83c7162d 44#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
45pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
46 mem::transmute(pabsd128(a.as_i32x4()))
47}
48
49/// Shuffle bytes from `a` according to the content of `b`.
50///
51/// The last 4 bits of each byte of `b` are used as addresses
52/// into the 16 bytes of `a`.
53///
54/// In addition, if the highest significant bit of a byte of `b`
55/// is set, the respective destination byte is set to 0.
56///
57/// Picturing `a` and `b` as `[u8; 16]`, `_mm_shuffle_epi8` is
58/// logically equivalent to:
59///
60/// ```
61/// fn mm_shuffle_epi8(a: [u8; 16], b: [u8; 16]) -> [u8; 16] {
62/// let mut r = [0u8; 16];
63/// for i in 0..16 {
64/// // if the most significant bit of b is set,
65/// // then the destination byte is set to 0.
66/// if b[i] & 0x80 == 0u8 {
67/// r[i] = a[(b[i] % 16) as usize];
68/// }
69/// }
70/// r
71/// }
72/// ```
83c7162d
XL
73///
74/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi8)
0531ce1d
XL
75#[inline]
76#[target_feature(enable = "ssse3")]
77#[cfg_attr(test, assert_instr(pshufb))]
83c7162d 78#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
79pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
80 mem::transmute(pshufb128(a.as_u8x16(), b.as_u8x16()))
81}
82
83/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
84/// shift the result right by `n` bytes, and return the low 16 bytes.
83c7162d
XL
85///
86/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_epi8)
0531ce1d
XL
87#[inline]
88#[target_feature(enable = "ssse3")]
89#[cfg_attr(test, assert_instr(palignr, n = 15))]
90#[rustc_args_required_const(2)]
83c7162d 91#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
92pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i, n: i32) -> __m128i {
93 let n = n as u32;
94 // If palignr is shifting the pair of vectors more than the size of two
95 // lanes, emit zero.
96 if n > 32 {
97 return _mm_set1_epi8(0);
98 }
99 // If palignr is shifting the pair of input vectors more than one lane,
100 // but less than two lanes, convert to shifting in zeroes.
101 let (a, b, n) = if n > 16 {
102 (_mm_set1_epi8(0), a, n - 16)
103 } else {
104 (a, b, n)
105 };
106 let a = a.as_i8x16();
107 let b = b.as_i8x16();
108
109 macro_rules! shuffle {
110 ($shift:expr) => {
83c7162d
XL
111 simd_shuffle16(
112 b,
113 a,
114 [
115 0 + $shift,
116 1 + $shift,
117 2 + $shift,
118 3 + $shift,
119 4 + $shift,
120 5 + $shift,
121 6 + $shift,
122 7 + $shift,
123 8 + $shift,
124 9 + $shift,
125 10 + $shift,
126 11 + $shift,
127 12 + $shift,
128 13 + $shift,
129 14 + $shift,
130 15 + $shift,
131 ],
132 )
133 };
0531ce1d
XL
134 }
135 let r: i8x16 = match n {
136 0 => shuffle!(0),
137 1 => shuffle!(1),
138 2 => shuffle!(2),
139 3 => shuffle!(3),
140 4 => shuffle!(4),
141 5 => shuffle!(5),
142 6 => shuffle!(6),
143 7 => shuffle!(7),
144 8 => shuffle!(8),
145 9 => shuffle!(9),
146 10 => shuffle!(10),
147 11 => shuffle!(11),
148 12 => shuffle!(12),
149 13 => shuffle!(13),
150 14 => shuffle!(14),
151 15 => shuffle!(15),
152 _ => shuffle!(16),
153 };
154 mem::transmute(r)
155}
156
157/// Horizontally add the adjacent pairs of values contained in 2 packed
83c7162d
XL
158/// 128-bit vectors of `[8 x i16]`.
159///
160/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi16)
0531ce1d
XL
161#[inline]
162#[target_feature(enable = "ssse3")]
163#[cfg_attr(test, assert_instr(phaddw))]
83c7162d 164#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
165pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
166 mem::transmute(phaddw128(a.as_i16x8(), b.as_i16x8()))
167}
168
169/// Horizontally add the adjacent pairs of values contained in 2 packed
83c7162d 170/// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are
0531ce1d 171/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
83c7162d
XL
172///
173/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadds_epi16)
0531ce1d
XL
174#[inline]
175#[target_feature(enable = "ssse3")]
176#[cfg_attr(test, assert_instr(phaddsw))]
83c7162d 177#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
178pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
179 mem::transmute(phaddsw128(a.as_i16x8(), b.as_i16x8()))
180}
181
182/// Horizontally add the adjacent pairs of values contained in 2 packed
83c7162d
XL
183/// 128-bit vectors of `[4 x i32]`.
184///
185/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi32)
0531ce1d
XL
186#[inline]
187#[target_feature(enable = "ssse3")]
188#[cfg_attr(test, assert_instr(phaddd))]
83c7162d 189#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
190pub unsafe fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
191 mem::transmute(phaddd128(a.as_i32x4(), b.as_i32x4()))
192}
193
194/// Horizontally subtract the adjacent pairs of values contained in 2
83c7162d
XL
195/// packed 128-bit vectors of `[8 x i16]`.
196///
197/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi16)
0531ce1d
XL
198#[inline]
199#[target_feature(enable = "ssse3")]
200#[cfg_attr(test, assert_instr(phsubw))]
83c7162d 201#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
202pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
203 mem::transmute(phsubw128(a.as_i16x8(), b.as_i16x8()))
204}
205
206/// Horizontally subtract the adjacent pairs of values contained in 2
83c7162d 207/// packed 128-bit vectors of `[8 x i16]`. Positive differences greater than
0531ce1d
XL
208/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
209/// saturated to 8000h.
83c7162d
XL
210///
211/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsubs_epi16)
0531ce1d
XL
212#[inline]
213#[target_feature(enable = "ssse3")]
214#[cfg_attr(test, assert_instr(phsubsw))]
83c7162d 215#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
216pub unsafe fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
217 mem::transmute(phsubsw128(a.as_i16x8(), b.as_i16x8()))
218}
219
220/// Horizontally subtract the adjacent pairs of values contained in 2
83c7162d
XL
221/// packed 128-bit vectors of `[4 x i32]`.
222///
223/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi32)
0531ce1d
XL
224#[inline]
225#[target_feature(enable = "ssse3")]
226#[cfg_attr(test, assert_instr(phsubd))]
83c7162d 227#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
228pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
229 mem::transmute(phsubd128(a.as_i32x4(), b.as_i32x4()))
230}
231
232/// Multiply corresponding pairs of packed 8-bit unsigned integer
233/// values contained in the first source operand and packed 8-bit signed
234/// integer values contained in the second source operand, add pairs of
235/// contiguous products with signed saturation, and writes the 16-bit sums to
236/// the corresponding bits in the destination.
83c7162d
XL
237///
238/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maddubs_epi16)
0531ce1d
XL
239#[inline]
240#[target_feature(enable = "ssse3")]
241#[cfg_attr(test, assert_instr(pmaddubsw))]
83c7162d 242#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
243pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
244 mem::transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16()))
245}
246
247/// Multiply packed 16-bit signed integer values, truncate the 32-bit
248/// product to the 18 most significant bits by right-shifting, round the
83c7162d
XL
249/// truncated value by adding 1, and write bits `[16:1]` to the destination.
250///
251/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhrs_epi16)
0531ce1d
XL
252#[inline]
253#[target_feature(enable = "ssse3")]
254#[cfg_attr(test, assert_instr(pmulhrsw))]
83c7162d 255#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
256pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
257 mem::transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8()))
258}
259
260/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
261/// integer in `b` is negative, and return the result.
262/// Elements in result are zeroed out when the corresponding element in `b`
263/// is zero.
83c7162d
XL
264///
265/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi8)
0531ce1d
XL
266#[inline]
267#[target_feature(enable = "ssse3")]
268#[cfg_attr(test, assert_instr(psignb))]
83c7162d 269#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
270pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
271 mem::transmute(psignb128(a.as_i8x16(), b.as_i8x16()))
272}
273
274/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
275/// integer in `b` is negative, and return the results.
276/// Elements in result are zeroed out when the corresponding element in `b`
277/// is zero.
83c7162d
XL
278///
279/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi16)
0531ce1d
XL
280#[inline]
281#[target_feature(enable = "ssse3")]
282#[cfg_attr(test, assert_instr(psignw))]
83c7162d 283#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
284pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
285 mem::transmute(psignw128(a.as_i16x8(), b.as_i16x8()))
286}
287
288/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
289/// integer in `b` is negative, and return the results.
290/// Element in result are zeroed out when the corresponding element in `b`
291/// is zero.
83c7162d
XL
292///
293/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi32)
0531ce1d
XL
294#[inline]
295#[target_feature(enable = "ssse3")]
296#[cfg_attr(test, assert_instr(psignd))]
83c7162d 297#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
298pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
299 mem::transmute(psignd128(a.as_i32x4(), b.as_i32x4()))
300}
301
302/// Compute the absolute value of packed 8-bit integers in `a` and
303/// return the unsigned results.
304#[inline]
305#[target_feature(enable = "ssse3,mmx")]
306#[cfg_attr(test, assert_instr(pabsb))]
307pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 {
308 pabsb(a)
309}
310
311/// Compute the absolute value of packed 8-bit integers in `a`, and return the
312/// unsigned results.
313#[inline]
314#[target_feature(enable = "ssse3,mmx")]
315#[cfg_attr(test, assert_instr(pabsw))]
316pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 {
317 pabsw(a)
318}
319
320/// Compute the absolute value of packed 32-bit integers in `a`, and return the
321/// unsigned results.
322#[inline]
323#[target_feature(enable = "ssse3,mmx")]
324#[cfg_attr(test, assert_instr(pabsd))]
325pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 {
326 pabsd(a)
327}
328
329/// Shuffle packed 8-bit integers in `a` according to shuffle control mask in
330/// the corresponding 8-bit element of `b`, and return the results
331#[inline]
332#[target_feature(enable = "ssse3,mmx")]
333#[cfg_attr(test, assert_instr(pshufb))]
334pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 {
335 pshufb(a, b)
336}
337
338/// Concatenates the two 64-bit integer vector operands, and right-shifts
339/// the result by the number of bytes specified in the immediate operand.
340#[inline]
341#[target_feature(enable = "ssse3,mmx")]
342#[cfg_attr(test, assert_instr(palignr, n = 15))]
343#[rustc_args_required_const(2)]
344pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 {
345 macro_rules! call {
346 ($imm8:expr) => {
347 palignrb(a, b, $imm8)
83c7162d 348 };
0531ce1d
XL
349 }
350 constify_imm8!(n, call)
351}
352
353/// Horizontally add the adjacent pairs of values contained in 2 packed
83c7162d 354/// 64-bit vectors of `[4 x i16]`.
0531ce1d
XL
355#[inline]
356#[target_feature(enable = "ssse3,mmx")]
357#[cfg_attr(test, assert_instr(phaddw))]
358pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 {
359 phaddw(a, b)
360}
361
362/// Horizontally add the adjacent pairs of values contained in 2 packed
83c7162d 363/// 64-bit vectors of `[2 x i32]`.
0531ce1d
XL
364#[inline]
365#[target_feature(enable = "ssse3,mmx")]
366#[cfg_attr(test, assert_instr(phaddd))]
367pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 {
368 phaddd(a, b)
369}
370
371/// Horizontally add the adjacent pairs of values contained in 2 packed
83c7162d 372/// 64-bit vectors of `[4 x i16]`. Positive sums greater than 7FFFh are
0531ce1d
XL
373/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
374#[inline]
375#[target_feature(enable = "ssse3,mmx")]
376#[cfg_attr(test, assert_instr(phaddsw))]
377pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 {
378 phaddsw(a, b)
379}
380
381/// Horizontally subtracts the adjacent pairs of values contained in 2
83c7162d 382/// packed 64-bit vectors of `[4 x i16]`.
0531ce1d
XL
383#[inline]
384#[target_feature(enable = "ssse3,mmx")]
385#[cfg_attr(test, assert_instr(phsubw))]
386pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 {
387 phsubw(a, b)
388}
389
390/// Horizontally subtracts the adjacent pairs of values contained in 2
83c7162d 391/// packed 64-bit vectors of `[2 x i32]`.
0531ce1d
XL
392#[inline]
393#[target_feature(enable = "ssse3,mmx")]
394#[cfg_attr(test, assert_instr(phsubd))]
395pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 {
396 phsubd(a, b)
397}
398
399/// Horizontally subtracts the adjacent pairs of values contained in 2
83c7162d 400/// packed 64-bit vectors of `[4 x i16]`. Positive differences greater than
0531ce1d
XL
401/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
402/// saturated to 8000h.
403#[inline]
404#[target_feature(enable = "ssse3,mmx")]
405#[cfg_attr(test, assert_instr(phsubsw))]
406pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 {
407 phsubsw(a, b)
408}
409
410/// Multiplies corresponding pairs of packed 8-bit unsigned integer
411/// values contained in the first source operand and packed 8-bit signed
412/// integer values contained in the second source operand, adds pairs of
413/// contiguous products with signed saturation, and writes the 16-bit sums to
414/// the corresponding bits in the destination.
415#[inline]
416#[target_feature(enable = "ssse3,mmx")]
417#[cfg_attr(test, assert_instr(pmaddubsw))]
418pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 {
419 pmaddubsw(a, b)
420}
421
422/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
423/// products to the 18 most significant bits by right-shifting, rounds the
83c7162d 424/// truncated value by adding 1, and writes bits `[16:1]` to the destination.
0531ce1d
XL
425#[inline]
426#[target_feature(enable = "ssse3,mmx")]
427#[cfg_attr(test, assert_instr(pmulhrsw))]
428pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 {
429 pmulhrsw(a, b)
430}
431
432/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
433/// integer in `b` is negative, and return the results.
434/// Element in result are zeroed out when the corresponding element in `b` is
435/// zero.
436#[inline]
437#[target_feature(enable = "ssse3,mmx")]
438#[cfg_attr(test, assert_instr(psignb))]
439pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 {
440 psignb(a, b)
441}
442
443/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
444/// integer in `b` is negative, and return the results.
445/// Element in result are zeroed out when the corresponding element in `b` is
446/// zero.
447#[inline]
448#[target_feature(enable = "ssse3,mmx")]
449#[cfg_attr(test, assert_instr(psignw))]
450pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 {
451 psignw(a, b)
452}
453
454/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
455/// integer in `b` is negative, and return the results.
456/// Element in result are zeroed out when the corresponding element in `b` is
457/// zero.
458#[inline]
459#[target_feature(enable = "ssse3,mmx")]
460#[cfg_attr(test, assert_instr(psignd))]
461pub unsafe fn _mm_sign_pi32(a: __m64, b: __m64) -> __m64 {
462 psignd(a, b)
463}
464
465#[allow(improper_ctypes)]
466extern "C" {
467 #[link_name = "llvm.x86.ssse3.pabs.b.128"]
468 fn pabsb128(a: i8x16) -> u8x16;
469
470 #[link_name = "llvm.x86.ssse3.pabs.w.128"]
471 fn pabsw128(a: i16x8) -> u16x8;
472
473 #[link_name = "llvm.x86.ssse3.pabs.d.128"]
474 fn pabsd128(a: i32x4) -> u32x4;
475
476 #[link_name = "llvm.x86.ssse3.pshuf.b.128"]
477 fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
478
479 #[link_name = "llvm.x86.ssse3.phadd.w.128"]
480 fn phaddw128(a: i16x8, b: i16x8) -> i16x8;
481
482 #[link_name = "llvm.x86.ssse3.phadd.sw.128"]
483 fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
484
485 #[link_name = "llvm.x86.ssse3.phadd.d.128"]
486 fn phaddd128(a: i32x4, b: i32x4) -> i32x4;
487
488 #[link_name = "llvm.x86.ssse3.phsub.w.128"]
489 fn phsubw128(a: i16x8, b: i16x8) -> i16x8;
490
491 #[link_name = "llvm.x86.ssse3.phsub.sw.128"]
492 fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
493
494 #[link_name = "llvm.x86.ssse3.phsub.d.128"]
495 fn phsubd128(a: i32x4, b: i32x4) -> i32x4;
496
497 #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
498 fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
499
500 #[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
501 fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
502
503 #[link_name = "llvm.x86.ssse3.psign.b.128"]
504 fn psignb128(a: i8x16, b: i8x16) -> i8x16;
505
506 #[link_name = "llvm.x86.ssse3.psign.w.128"]
507 fn psignw128(a: i16x8, b: i16x8) -> i16x8;
508
509 #[link_name = "llvm.x86.ssse3.psign.d.128"]
510 fn psignd128(a: i32x4, b: i32x4) -> i32x4;
511
512 #[link_name = "llvm.x86.ssse3.pabs.b"]
513 fn pabsb(a: __m64) -> __m64;
514
515 #[link_name = "llvm.x86.ssse3.pabs.w"]
516 fn pabsw(a: __m64) -> __m64;
517
518 #[link_name = "llvm.x86.ssse3.pabs.d"]
519 fn pabsd(a: __m64) -> __m64;
520
521 #[link_name = "llvm.x86.ssse3.pshuf.b"]
522 fn pshufb(a: __m64, b: __m64) -> __m64;
523
524 #[link_name = "llvm.x86.mmx.palignr.b"]
525 fn palignrb(a: __m64, b: __m64, n: u8) -> __m64;
526
527 #[link_name = "llvm.x86.ssse3.phadd.w"]
528 fn phaddw(a: __m64, b: __m64) -> __m64;
529
530 #[link_name = "llvm.x86.ssse3.phadd.d"]
531 fn phaddd(a: __m64, b: __m64) -> __m64;
532
533 #[link_name = "llvm.x86.ssse3.phadd.sw"]
534 fn phaddsw(a: __m64, b: __m64) -> __m64;
535
536 #[link_name = "llvm.x86.ssse3.phsub.w"]
537 fn phsubw(a: __m64, b: __m64) -> __m64;
538
539 #[link_name = "llvm.x86.ssse3.phsub.d"]
540 fn phsubd(a: __m64, b: __m64) -> __m64;
541
542 #[link_name = "llvm.x86.ssse3.phsub.sw"]
543 fn phsubsw(a: __m64, b: __m64) -> __m64;
544
545 #[link_name = "llvm.x86.ssse3.pmadd.ub.sw"]
546 fn pmaddubsw(a: __m64, b: __m64) -> __m64;
547
548 #[link_name = "llvm.x86.ssse3.pmul.hr.sw"]
549 fn pmulhrsw(a: __m64, b: __m64) -> __m64;
550
551 #[link_name = "llvm.x86.ssse3.psign.b"]
552 fn psignb(a: __m64, b: __m64) -> __m64;
553
554 #[link_name = "llvm.x86.ssse3.psign.w"]
555 fn psignw(a: __m64, b: __m64) -> __m64;
556
557 #[link_name = "llvm.x86.ssse3.psign.d"]
558 fn psignd(a: __m64, b: __m64) -> __m64;
559}
560
561#[cfg(test)]
562mod tests {
563 use stdsimd_test::simd_test;
564
565 use coresimd::x86::*;
566
83c7162d 567 #[simd_test(enable = "ssse3")]
0531ce1d
XL
568 unsafe fn test_mm_abs_epi8() {
569 let r = _mm_abs_epi8(_mm_set1_epi8(-5));
570 assert_eq_m128i(r, _mm_set1_epi8(5));
571 }
572
83c7162d 573 #[simd_test(enable = "ssse3")]
0531ce1d
XL
574 unsafe fn test_mm_abs_epi16() {
575 let r = _mm_abs_epi16(_mm_set1_epi16(-5));
576 assert_eq_m128i(r, _mm_set1_epi16(5));
577 }
578
83c7162d 579 #[simd_test(enable = "ssse3")]
0531ce1d
XL
580 unsafe fn test_mm_abs_epi32() {
581 let r = _mm_abs_epi32(_mm_set1_epi32(-5));
582 assert_eq_m128i(r, _mm_set1_epi32(5));
583 }
584
83c7162d 585 #[simd_test(enable = "ssse3")]
0531ce1d
XL
586 unsafe fn test_mm_shuffle_epi8() {
587 #[cfg_attr(rustfmt, rustfmt_skip)]
588 let a = _mm_setr_epi8(
589 1, 2, 3, 4, 5, 6, 7, 8,
590 9, 10, 11, 12, 13, 14, 15, 16,
591 );
592 #[cfg_attr(rustfmt, rustfmt_skip)]
593 let b = _mm_setr_epi8(
594 4, 128_u8 as i8, 4, 3,
595 24, 12, 6, 19,
596 12, 5, 5, 10,
597 4, 1, 8, 0,
598 );
83c7162d
XL
599 let expected = _mm_setr_epi8(
600 5,
601 0,
602 5,
603 4,
604 9,
605 13,
606 7,
607 4,
608 13,
609 6,
610 6,
611 11,
612 5,
613 2,
614 9,
615 1,
616 );
0531ce1d
XL
617 let r = _mm_shuffle_epi8(a, b);
618 assert_eq_m128i(r, expected);
619 }
620
83c7162d 621 #[simd_test(enable = "ssse3")]
0531ce1d
XL
622 unsafe fn test_mm_alignr_epi8() {
623 #[cfg_attr(rustfmt, rustfmt_skip)]
624 let a = _mm_setr_epi8(
625 1, 2, 3, 4, 5, 6, 7, 8,
626 9, 10, 11, 12, 13, 14, 15, 16,
627 );
628 #[cfg_attr(rustfmt, rustfmt_skip)]
629 let b = _mm_setr_epi8(
630 4, 63, 4, 3,
631 24, 12, 6, 19,
632 12, 5, 5, 10,
633 4, 1, 8, 0,
634 );
635 let r = _mm_alignr_epi8(a, b, 33);
636 assert_eq_m128i(r, _mm_set1_epi8(0));
637
638 let r = _mm_alignr_epi8(a, b, 17);
639 #[cfg_attr(rustfmt, rustfmt_skip)]
640 let expected = _mm_setr_epi8(
641 2, 3, 4, 5, 6, 7, 8, 9,
642 10, 11, 12, 13, 14, 15, 16, 0,
643 );
644 assert_eq_m128i(r, expected);
645
646 let r = _mm_alignr_epi8(a, b, 16);
647 assert_eq_m128i(r, a);
648
649 let r = _mm_alignr_epi8(a, b, 15);
650 #[cfg_attr(rustfmt, rustfmt_skip)]
651 let expected = _mm_setr_epi8(
652 0, 1, 2, 3, 4, 5, 6, 7,
653 8, 9, 10, 11, 12, 13, 14, 15,
654 );
655 assert_eq_m128i(r, expected);
656
657 let r = _mm_alignr_epi8(a, b, 0);
658 assert_eq_m128i(r, b);
659 }
660
83c7162d 661 #[simd_test(enable = "ssse3")]
0531ce1d
XL
662 unsafe fn test_mm_hadd_epi16() {
663 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
664 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
665 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
666 let r = _mm_hadd_epi16(a, b);
667 assert_eq_m128i(r, expected);
668 }
669
83c7162d 670 #[simd_test(enable = "ssse3")]
0531ce1d
XL
671 unsafe fn test_mm_hadds_epi16() {
672 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
673 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
674 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
675 let r = _mm_hadds_epi16(a, b);
676 assert_eq_m128i(r, expected);
677 }
678
83c7162d 679 #[simd_test(enable = "ssse3")]
0531ce1d
XL
680 unsafe fn test_mm_hadd_epi32() {
681 let a = _mm_setr_epi32(1, 2, 3, 4);
682 let b = _mm_setr_epi32(4, 128, 4, 3);
683 let expected = _mm_setr_epi32(3, 7, 132, 7);
684 let r = _mm_hadd_epi32(a, b);
685 assert_eq_m128i(r, expected);
686 }
687
83c7162d 688 #[simd_test(enable = "ssse3")]
0531ce1d
XL
689 unsafe fn test_mm_hsub_epi16() {
690 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
691 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
692 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
693 let r = _mm_hsub_epi16(a, b);
694 assert_eq_m128i(r, expected);
695 }
696
83c7162d 697 #[simd_test(enable = "ssse3")]
0531ce1d
XL
698 unsafe fn test_mm_hsubs_epi16() {
699 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
700 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
701 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
702 let r = _mm_hsubs_epi16(a, b);
703 assert_eq_m128i(r, expected);
704 }
705
83c7162d 706 #[simd_test(enable = "ssse3")]
0531ce1d
XL
707 unsafe fn test_mm_hsub_epi32() {
708 let a = _mm_setr_epi32(1, 2, 3, 4);
709 let b = _mm_setr_epi32(4, 128, 4, 3);
710 let expected = _mm_setr_epi32(-1, -1, -124, 1);
711 let r = _mm_hsub_epi32(a, b);
712 assert_eq_m128i(r, expected);
713 }
714
83c7162d 715 #[simd_test(enable = "ssse3")]
0531ce1d
XL
716 unsafe fn test_mm_maddubs_epi16() {
717 #[cfg_attr(rustfmt, rustfmt_skip)]
718 let a = _mm_setr_epi8(
719 1, 2, 3, 4, 5, 6, 7, 8,
720 9, 10, 11, 12, 13, 14, 15, 16,
721 );
722 #[cfg_attr(rustfmt, rustfmt_skip)]
723 let b = _mm_setr_epi8(
724 4, 63, 4, 3,
725 24, 12, 6, 19,
726 12, 5, 5, 10,
727 4, 1, 8, 0,
728 );
729 let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
730 let r = _mm_maddubs_epi16(a, b);
731 assert_eq_m128i(r, expected);
732 }
733
83c7162d 734 #[simd_test(enable = "ssse3")]
0531ce1d
XL
735 unsafe fn test_mm_mulhrs_epi16() {
736 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
737 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
738 let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
739 let r = _mm_mulhrs_epi16(a, b);
740 assert_eq_m128i(r, expected);
741 }
742
83c7162d 743 #[simd_test(enable = "ssse3")]
0531ce1d
XL
744 unsafe fn test_mm_sign_epi8() {
745 #[cfg_attr(rustfmt, rustfmt_skip)]
746 let a = _mm_setr_epi8(
747 1, 2, 3, 4, 5, 6, 7, 8,
748 9, 10, 11, 12, 13, -14, -15, 16,
749 );
750 #[cfg_attr(rustfmt, rustfmt_skip)]
751 let b = _mm_setr_epi8(
752 4, 63, -4, 3, 24, 12, -6, -19,
753 12, 5, -5, 10, 4, 1, -8, 0,
754 );
755 #[cfg_attr(rustfmt, rustfmt_skip)]
756 let expected = _mm_setr_epi8(
757 1, 2, -3, 4, 5, 6, -7, -8,
758 9, 10, -11, 12, 13, -14, 15, 0,
759 );
760 let r = _mm_sign_epi8(a, b);
761 assert_eq_m128i(r, expected);
762 }
763
83c7162d 764 #[simd_test(enable = "ssse3")]
0531ce1d
XL
765 unsafe fn test_mm_sign_epi16() {
766 let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
767 let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
768 let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
769 let r = _mm_sign_epi16(a, b);
770 assert_eq_m128i(r, expected);
771 }
772
83c7162d 773 #[simd_test(enable = "ssse3")]
0531ce1d
XL
774 unsafe fn test_mm_sign_epi32() {
775 let a = _mm_setr_epi32(-1, 2, 3, 4);
776 let b = _mm_setr_epi32(1, -1, 1, 0);
777 let expected = _mm_setr_epi32(-1, -2, 3, 0);
778 let r = _mm_sign_epi32(a, b);
779 assert_eq_m128i(r, expected);
780 }
781
83c7162d 782 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
783 unsafe fn test_mm_abs_pi8() {
784 let r = _mm_abs_pi8(_mm_set1_pi8(-5));
785 assert_eq_m64(r, _mm_set1_pi8(5));
786 }
787
83c7162d 788 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
789 unsafe fn test_mm_abs_pi16() {
790 let r = _mm_abs_pi16(_mm_set1_pi16(-5));
791 assert_eq_m64(r, _mm_set1_pi16(5));
792 }
793
83c7162d 794 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
795 unsafe fn test_mm_abs_pi32() {
796 let r = _mm_abs_pi32(_mm_set1_pi32(-5));
797 assert_eq_m64(r, _mm_set1_pi32(5));
798 }
799
83c7162d 800 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
801 unsafe fn test_mm_shuffle_pi8() {
802 let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
803 let b = _mm_setr_pi8(4, 128u8 as i8, 4, 3, 24, 12, 6, 19);
804 let expected = _mm_setr_pi8(5, 0, 5, 4, 1, 5, 7, 4);
805 let r = _mm_shuffle_pi8(a, b);
806 assert_eq_m64(r, expected);
807 }
808
83c7162d 809 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
810 unsafe fn test_mm_alignr_pi8() {
811 let a = _mm_setr_pi32(0x89ABCDEF_u32 as i32, 0x01234567_u32 as i32);
812 let b = _mm_setr_pi32(0xBBAA9988_u32 as i32, 0xFFDDEECC_u32 as i32);
813 let r = _mm_alignr_pi8(a, b, 4);
814 assert_eq_m64(r, ::std::mem::transmute(0x89abcdefffddeecc_u64));
815 }
816
83c7162d 817 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
818 unsafe fn test_mm_hadd_pi16() {
819 let a = _mm_setr_pi16(1, 2, 3, 4);
820 let b = _mm_setr_pi16(4, 128, 4, 3);
821 let expected = _mm_setr_pi16(3, 7, 132, 7);
822 let r = _mm_hadd_pi16(a, b);
823 assert_eq_m64(r, expected);
824 }
825
83c7162d 826 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
827 unsafe fn test_mm_hadd_pi32() {
828 let a = _mm_setr_pi32(1, 2);
829 let b = _mm_setr_pi32(4, 128);
830 let expected = _mm_setr_pi32(3, 132);
831 let r = _mm_hadd_pi32(a, b);
832 assert_eq_m64(r, expected);
833 }
834
83c7162d 835 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
836 unsafe fn test_mm_hadds_pi16() {
837 let a = _mm_setr_pi16(1, 2, 3, 4);
838 let b = _mm_setr_pi16(32767, 1, -32768, -1);
839 let expected = _mm_setr_pi16(3, 7, 32767, -32768);
840 let r = _mm_hadds_pi16(a, b);
841 assert_eq_m64(r, expected);
842 }
843
83c7162d 844 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
845 unsafe fn test_mm_hsub_pi16() {
846 let a = _mm_setr_pi16(1, 2, 3, 4);
847 let b = _mm_setr_pi16(4, 128, 4, 3);
848 let expected = _mm_setr_pi16(-1, -1, -124, 1);
849 let r = _mm_hsub_pi16(a, b);
850 assert_eq_m64(r, expected);
851 }
852
83c7162d 853 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
854 unsafe fn test_mm_hsub_pi32() {
855 let a = _mm_setr_pi32(1, 2);
856 let b = _mm_setr_pi32(4, 128);
857 let expected = _mm_setr_pi32(-1, -124);
858 let r = _mm_hsub_pi32(a, b);
859 assert_eq_m64(r, expected);
860 }
861
83c7162d 862 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
863 unsafe fn test_mm_hsubs_pi16() {
864 let a = _mm_setr_pi16(1, 2, 3, 4);
865 let b = _mm_setr_pi16(4, 128, 4, 3);
866 let expected = _mm_setr_pi16(-1, -1, -124, 1);
867 let r = _mm_hsubs_pi16(a, b);
868 assert_eq_m64(r, expected);
869 }
870
83c7162d 871 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
872 unsafe fn test_mm_maddubs_pi16() {
873 let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
874 let b = _mm_setr_pi8(4, 63, 4, 3, 24, 12, 6, 19);
875 let expected = _mm_setr_pi16(130, 24, 192, 194);
876 let r = _mm_maddubs_pi16(a, b);
877 assert_eq_m64(r, expected);
878 }
879
83c7162d 880 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
881 unsafe fn test_mm_mulhrs_pi16() {
882 let a = _mm_setr_pi16(1, 2, 3, 4);
883 let b = _mm_setr_pi16(4, 32767, -1, -32768);
884 let expected = _mm_setr_pi16(0, 2, 0, -4);
885 let r = _mm_mulhrs_pi16(a, b);
886 assert_eq_m64(r, expected);
887 }
888
83c7162d 889 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
890 unsafe fn test_mm_sign_pi8() {
891 let a = _mm_setr_pi8(1, 2, 3, 4, -5, -6, 7, 8);
892 let b = _mm_setr_pi8(4, 64, 0, 3, 1, -1, -2, 1);
893 let expected = _mm_setr_pi8(1, 2, 0, 4, -5, 6, -7, 8);
894 let r = _mm_sign_pi8(a, b);
895 assert_eq_m64(r, expected);
896 }
897
83c7162d 898 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
899 unsafe fn test_mm_sign_pi16() {
900 let a = _mm_setr_pi16(-1, 2, 3, 4);
901 let b = _mm_setr_pi16(1, -1, 1, 0);
902 let expected = _mm_setr_pi16(-1, -2, 3, 0);
903 let r = _mm_sign_pi16(a, b);
904 assert_eq_m64(r, expected);
905 }
906
83c7162d 907 #[simd_test(enable = "ssse3,mmx")]
0531ce1d
XL
908 unsafe fn test_mm_sign_pi32() {
909 let a = _mm_setr_pi32(-1, 2);
910 let b = _mm_setr_pi32(1, 0);
911 let expected = _mm_setr_pi32(-1, 0);
912 let r = _mm_sign_pi32(a, b);
913 assert_eq_m64(r, expected);
914 }
915}