]> git.proxmox.com Git - rustc.git/blame_incremental - src/stdsimd/coresimd/x86/ssse3.rs
New upstream version 1.27.1+dfsg1
[rustc.git] / src / stdsimd / coresimd / x86 / ssse3.rs
... / ...
CommitLineData
1//! Supplemental Streaming SIMD Extensions 3 (SSSE3)
2
3use coresimd::simd::*;
4use coresimd::simd_llvm::simd_shuffle16;
5use coresimd::x86::*;
6use mem;
7
8#[cfg(test)]
9use stdsimd_test::assert_instr;
10
11/// Compute the absolute value of packed 8-bit signed integers in `a` and
12/// return the unsigned results.
13///
14/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi8)
15#[inline]
16#[target_feature(enable = "ssse3")]
17#[cfg_attr(test, assert_instr(pabsb))]
18#[stable(feature = "simd_x86", since = "1.27.0")]
19pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
20 mem::transmute(pabsb128(a.as_i8x16()))
21}
22
23/// Compute the absolute value of each of the packed 16-bit signed integers in
24/// `a` and
25/// return the 16-bit unsigned integer
26///
27/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi16)
28#[inline]
29#[target_feature(enable = "ssse3")]
30#[cfg_attr(test, assert_instr(pabsw))]
31#[stable(feature = "simd_x86", since = "1.27.0")]
32pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
33 mem::transmute(pabsw128(a.as_i16x8()))
34}
35
36/// Compute the absolute value of each of the packed 32-bit signed integers in
37/// `a` and
38/// return the 32-bit unsigned integer
39///
40/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi32)
41#[inline]
42#[target_feature(enable = "ssse3")]
43#[cfg_attr(test, assert_instr(pabsd))]
44#[stable(feature = "simd_x86", since = "1.27.0")]
45pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
46 mem::transmute(pabsd128(a.as_i32x4()))
47}
48
49/// Shuffle bytes from `a` according to the content of `b`.
50///
51/// The last 4 bits of each byte of `b` are used as addresses
52/// into the 16 bytes of `a`.
53///
54/// In addition, if the highest significant bit of a byte of `b`
55/// is set, the respective destination byte is set to 0.
56///
57/// Picturing `a` and `b` as `[u8; 16]`, `_mm_shuffle_epi8` is
58/// logically equivalent to:
59///
60/// ```
61/// fn mm_shuffle_epi8(a: [u8; 16], b: [u8; 16]) -> [u8; 16] {
62/// let mut r = [0u8; 16];
63/// for i in 0..16 {
64/// // if the most significant bit of b is set,
65/// // then the destination byte is set to 0.
66/// if b[i] & 0x80 == 0u8 {
67/// r[i] = a[(b[i] % 16) as usize];
68/// }
69/// }
70/// r
71/// }
72/// ```
73///
74/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi8)
75#[inline]
76#[target_feature(enable = "ssse3")]
77#[cfg_attr(test, assert_instr(pshufb))]
78#[stable(feature = "simd_x86", since = "1.27.0")]
79pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
80 mem::transmute(pshufb128(a.as_u8x16(), b.as_u8x16()))
81}
82
83/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
84/// shift the result right by `n` bytes, and return the low 16 bytes.
85///
86/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_epi8)
87#[inline]
88#[target_feature(enable = "ssse3")]
89#[cfg_attr(test, assert_instr(palignr, n = 15))]
90#[rustc_args_required_const(2)]
91#[stable(feature = "simd_x86", since = "1.27.0")]
92pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i, n: i32) -> __m128i {
93 let n = n as u32;
94 // If palignr is shifting the pair of vectors more than the size of two
95 // lanes, emit zero.
96 if n > 32 {
97 return _mm_set1_epi8(0);
98 }
99 // If palignr is shifting the pair of input vectors more than one lane,
100 // but less than two lanes, convert to shifting in zeroes.
101 let (a, b, n) = if n > 16 {
102 (_mm_set1_epi8(0), a, n - 16)
103 } else {
104 (a, b, n)
105 };
106 let a = a.as_i8x16();
107 let b = b.as_i8x16();
108
109 macro_rules! shuffle {
110 ($shift:expr) => {
111 simd_shuffle16(
112 b,
113 a,
114 [
115 0 + $shift,
116 1 + $shift,
117 2 + $shift,
118 3 + $shift,
119 4 + $shift,
120 5 + $shift,
121 6 + $shift,
122 7 + $shift,
123 8 + $shift,
124 9 + $shift,
125 10 + $shift,
126 11 + $shift,
127 12 + $shift,
128 13 + $shift,
129 14 + $shift,
130 15 + $shift,
131 ],
132 )
133 };
134 }
135 let r: i8x16 = match n {
136 0 => shuffle!(0),
137 1 => shuffle!(1),
138 2 => shuffle!(2),
139 3 => shuffle!(3),
140 4 => shuffle!(4),
141 5 => shuffle!(5),
142 6 => shuffle!(6),
143 7 => shuffle!(7),
144 8 => shuffle!(8),
145 9 => shuffle!(9),
146 10 => shuffle!(10),
147 11 => shuffle!(11),
148 12 => shuffle!(12),
149 13 => shuffle!(13),
150 14 => shuffle!(14),
151 15 => shuffle!(15),
152 _ => shuffle!(16),
153 };
154 mem::transmute(r)
155}
156
157/// Horizontally add the adjacent pairs of values contained in 2 packed
158/// 128-bit vectors of `[8 x i16]`.
159///
160/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi16)
161#[inline]
162#[target_feature(enable = "ssse3")]
163#[cfg_attr(test, assert_instr(phaddw))]
164#[stable(feature = "simd_x86", since = "1.27.0")]
165pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
166 mem::transmute(phaddw128(a.as_i16x8(), b.as_i16x8()))
167}
168
169/// Horizontally add the adjacent pairs of values contained in 2 packed
170/// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are
171/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
172///
173/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadds_epi16)
174#[inline]
175#[target_feature(enable = "ssse3")]
176#[cfg_attr(test, assert_instr(phaddsw))]
177#[stable(feature = "simd_x86", since = "1.27.0")]
178pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
179 mem::transmute(phaddsw128(a.as_i16x8(), b.as_i16x8()))
180}
181
182/// Horizontally add the adjacent pairs of values contained in 2 packed
183/// 128-bit vectors of `[4 x i32]`.
184///
185/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi32)
186#[inline]
187#[target_feature(enable = "ssse3")]
188#[cfg_attr(test, assert_instr(phaddd))]
189#[stable(feature = "simd_x86", since = "1.27.0")]
190pub unsafe fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
191 mem::transmute(phaddd128(a.as_i32x4(), b.as_i32x4()))
192}
193
194/// Horizontally subtract the adjacent pairs of values contained in 2
195/// packed 128-bit vectors of `[8 x i16]`.
196///
197/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi16)
198#[inline]
199#[target_feature(enable = "ssse3")]
200#[cfg_attr(test, assert_instr(phsubw))]
201#[stable(feature = "simd_x86", since = "1.27.0")]
202pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
203 mem::transmute(phsubw128(a.as_i16x8(), b.as_i16x8()))
204}
205
206/// Horizontally subtract the adjacent pairs of values contained in 2
207/// packed 128-bit vectors of `[8 x i16]`. Positive differences greater than
208/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
209/// saturated to 8000h.
210///
211/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsubs_epi16)
212#[inline]
213#[target_feature(enable = "ssse3")]
214#[cfg_attr(test, assert_instr(phsubsw))]
215#[stable(feature = "simd_x86", since = "1.27.0")]
216pub unsafe fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
217 mem::transmute(phsubsw128(a.as_i16x8(), b.as_i16x8()))
218}
219
220/// Horizontally subtract the adjacent pairs of values contained in 2
221/// packed 128-bit vectors of `[4 x i32]`.
222///
223/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi32)
224#[inline]
225#[target_feature(enable = "ssse3")]
226#[cfg_attr(test, assert_instr(phsubd))]
227#[stable(feature = "simd_x86", since = "1.27.0")]
228pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
229 mem::transmute(phsubd128(a.as_i32x4(), b.as_i32x4()))
230}
231
232/// Multiply corresponding pairs of packed 8-bit unsigned integer
233/// values contained in the first source operand and packed 8-bit signed
234/// integer values contained in the second source operand, add pairs of
235/// contiguous products with signed saturation, and writes the 16-bit sums to
236/// the corresponding bits in the destination.
237///
238/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maddubs_epi16)
239#[inline]
240#[target_feature(enable = "ssse3")]
241#[cfg_attr(test, assert_instr(pmaddubsw))]
242#[stable(feature = "simd_x86", since = "1.27.0")]
243pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
244 mem::transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16()))
245}
246
247/// Multiply packed 16-bit signed integer values, truncate the 32-bit
248/// product to the 18 most significant bits by right-shifting, round the
249/// truncated value by adding 1, and write bits `[16:1]` to the destination.
250///
251/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhrs_epi16)
252#[inline]
253#[target_feature(enable = "ssse3")]
254#[cfg_attr(test, assert_instr(pmulhrsw))]
255#[stable(feature = "simd_x86", since = "1.27.0")]
256pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
257 mem::transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8()))
258}
259
260/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
261/// integer in `b` is negative, and return the result.
262/// Elements in result are zeroed out when the corresponding element in `b`
263/// is zero.
264///
265/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi8)
266#[inline]
267#[target_feature(enable = "ssse3")]
268#[cfg_attr(test, assert_instr(psignb))]
269#[stable(feature = "simd_x86", since = "1.27.0")]
270pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
271 mem::transmute(psignb128(a.as_i8x16(), b.as_i8x16()))
272}
273
274/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
275/// integer in `b` is negative, and return the results.
276/// Elements in result are zeroed out when the corresponding element in `b`
277/// is zero.
278///
279/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi16)
280#[inline]
281#[target_feature(enable = "ssse3")]
282#[cfg_attr(test, assert_instr(psignw))]
283#[stable(feature = "simd_x86", since = "1.27.0")]
284pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
285 mem::transmute(psignw128(a.as_i16x8(), b.as_i16x8()))
286}
287
288/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
289/// integer in `b` is negative, and return the results.
290/// Element in result are zeroed out when the corresponding element in `b`
291/// is zero.
292///
293/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi32)
294#[inline]
295#[target_feature(enable = "ssse3")]
296#[cfg_attr(test, assert_instr(psignd))]
297#[stable(feature = "simd_x86", since = "1.27.0")]
298pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
299 mem::transmute(psignd128(a.as_i32x4(), b.as_i32x4()))
300}
301
302/// Compute the absolute value of packed 8-bit integers in `a` and
303/// return the unsigned results.
304#[inline]
305#[target_feature(enable = "ssse3,mmx")]
306#[cfg_attr(test, assert_instr(pabsb))]
307pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 {
308 pabsb(a)
309}
310
311/// Compute the absolute value of packed 8-bit integers in `a`, and return the
312/// unsigned results.
313#[inline]
314#[target_feature(enable = "ssse3,mmx")]
315#[cfg_attr(test, assert_instr(pabsw))]
316pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 {
317 pabsw(a)
318}
319
320/// Compute the absolute value of packed 32-bit integers in `a`, and return the
321/// unsigned results.
322#[inline]
323#[target_feature(enable = "ssse3,mmx")]
324#[cfg_attr(test, assert_instr(pabsd))]
325pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 {
326 pabsd(a)
327}
328
329/// Shuffle packed 8-bit integers in `a` according to shuffle control mask in
330/// the corresponding 8-bit element of `b`, and return the results
331#[inline]
332#[target_feature(enable = "ssse3,mmx")]
333#[cfg_attr(test, assert_instr(pshufb))]
334pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 {
335 pshufb(a, b)
336}
337
338/// Concatenates the two 64-bit integer vector operands, and right-shifts
339/// the result by the number of bytes specified in the immediate operand.
340#[inline]
341#[target_feature(enable = "ssse3,mmx")]
342#[cfg_attr(test, assert_instr(palignr, n = 15))]
343#[rustc_args_required_const(2)]
344pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 {
345 macro_rules! call {
346 ($imm8:expr) => {
347 palignrb(a, b, $imm8)
348 };
349 }
350 constify_imm8!(n, call)
351}
352
353/// Horizontally add the adjacent pairs of values contained in 2 packed
354/// 64-bit vectors of `[4 x i16]`.
355#[inline]
356#[target_feature(enable = "ssse3,mmx")]
357#[cfg_attr(test, assert_instr(phaddw))]
358pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 {
359 phaddw(a, b)
360}
361
362/// Horizontally add the adjacent pairs of values contained in 2 packed
363/// 64-bit vectors of `[2 x i32]`.
364#[inline]
365#[target_feature(enable = "ssse3,mmx")]
366#[cfg_attr(test, assert_instr(phaddd))]
367pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 {
368 phaddd(a, b)
369}
370
371/// Horizontally add the adjacent pairs of values contained in 2 packed
372/// 64-bit vectors of `[4 x i16]`. Positive sums greater than 7FFFh are
373/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
374#[inline]
375#[target_feature(enable = "ssse3,mmx")]
376#[cfg_attr(test, assert_instr(phaddsw))]
377pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 {
378 phaddsw(a, b)
379}
380
381/// Horizontally subtracts the adjacent pairs of values contained in 2
382/// packed 64-bit vectors of `[4 x i16]`.
383#[inline]
384#[target_feature(enable = "ssse3,mmx")]
385#[cfg_attr(test, assert_instr(phsubw))]
386pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 {
387 phsubw(a, b)
388}
389
390/// Horizontally subtracts the adjacent pairs of values contained in 2
391/// packed 64-bit vectors of `[2 x i32]`.
392#[inline]
393#[target_feature(enable = "ssse3,mmx")]
394#[cfg_attr(test, assert_instr(phsubd))]
395pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 {
396 phsubd(a, b)
397}
398
399/// Horizontally subtracts the adjacent pairs of values contained in 2
400/// packed 64-bit vectors of `[4 x i16]`. Positive differences greater than
401/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
402/// saturated to 8000h.
403#[inline]
404#[target_feature(enable = "ssse3,mmx")]
405#[cfg_attr(test, assert_instr(phsubsw))]
406pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 {
407 phsubsw(a, b)
408}
409
410/// Multiplies corresponding pairs of packed 8-bit unsigned integer
411/// values contained in the first source operand and packed 8-bit signed
412/// integer values contained in the second source operand, adds pairs of
413/// contiguous products with signed saturation, and writes the 16-bit sums to
414/// the corresponding bits in the destination.
415#[inline]
416#[target_feature(enable = "ssse3,mmx")]
417#[cfg_attr(test, assert_instr(pmaddubsw))]
418pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 {
419 pmaddubsw(a, b)
420}
421
422/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
423/// products to the 18 most significant bits by right-shifting, rounds the
424/// truncated value by adding 1, and writes bits `[16:1]` to the destination.
425#[inline]
426#[target_feature(enable = "ssse3,mmx")]
427#[cfg_attr(test, assert_instr(pmulhrsw))]
428pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 {
429 pmulhrsw(a, b)
430}
431
432/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
433/// integer in `b` is negative, and return the results.
434/// Element in result are zeroed out when the corresponding element in `b` is
435/// zero.
436#[inline]
437#[target_feature(enable = "ssse3,mmx")]
438#[cfg_attr(test, assert_instr(psignb))]
439pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 {
440 psignb(a, b)
441}
442
443/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
444/// integer in `b` is negative, and return the results.
445/// Element in result are zeroed out when the corresponding element in `b` is
446/// zero.
447#[inline]
448#[target_feature(enable = "ssse3,mmx")]
449#[cfg_attr(test, assert_instr(psignw))]
450pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 {
451 psignw(a, b)
452}
453
454/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
455/// integer in `b` is negative, and return the results.
456/// Element in result are zeroed out when the corresponding element in `b` is
457/// zero.
458#[inline]
459#[target_feature(enable = "ssse3,mmx")]
460#[cfg_attr(test, assert_instr(psignd))]
461pub unsafe fn _mm_sign_pi32(a: __m64, b: __m64) -> __m64 {
462 psignd(a, b)
463}
464
465#[allow(improper_ctypes)]
466extern "C" {
467 #[link_name = "llvm.x86.ssse3.pabs.b.128"]
468 fn pabsb128(a: i8x16) -> u8x16;
469
470 #[link_name = "llvm.x86.ssse3.pabs.w.128"]
471 fn pabsw128(a: i16x8) -> u16x8;
472
473 #[link_name = "llvm.x86.ssse3.pabs.d.128"]
474 fn pabsd128(a: i32x4) -> u32x4;
475
476 #[link_name = "llvm.x86.ssse3.pshuf.b.128"]
477 fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
478
479 #[link_name = "llvm.x86.ssse3.phadd.w.128"]
480 fn phaddw128(a: i16x8, b: i16x8) -> i16x8;
481
482 #[link_name = "llvm.x86.ssse3.phadd.sw.128"]
483 fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
484
485 #[link_name = "llvm.x86.ssse3.phadd.d.128"]
486 fn phaddd128(a: i32x4, b: i32x4) -> i32x4;
487
488 #[link_name = "llvm.x86.ssse3.phsub.w.128"]
489 fn phsubw128(a: i16x8, b: i16x8) -> i16x8;
490
491 #[link_name = "llvm.x86.ssse3.phsub.sw.128"]
492 fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
493
494 #[link_name = "llvm.x86.ssse3.phsub.d.128"]
495 fn phsubd128(a: i32x4, b: i32x4) -> i32x4;
496
497 #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
498 fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
499
500 #[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
501 fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
502
503 #[link_name = "llvm.x86.ssse3.psign.b.128"]
504 fn psignb128(a: i8x16, b: i8x16) -> i8x16;
505
506 #[link_name = "llvm.x86.ssse3.psign.w.128"]
507 fn psignw128(a: i16x8, b: i16x8) -> i16x8;
508
509 #[link_name = "llvm.x86.ssse3.psign.d.128"]
510 fn psignd128(a: i32x4, b: i32x4) -> i32x4;
511
512 #[link_name = "llvm.x86.ssse3.pabs.b"]
513 fn pabsb(a: __m64) -> __m64;
514
515 #[link_name = "llvm.x86.ssse3.pabs.w"]
516 fn pabsw(a: __m64) -> __m64;
517
518 #[link_name = "llvm.x86.ssse3.pabs.d"]
519 fn pabsd(a: __m64) -> __m64;
520
521 #[link_name = "llvm.x86.ssse3.pshuf.b"]
522 fn pshufb(a: __m64, b: __m64) -> __m64;
523
524 #[link_name = "llvm.x86.mmx.palignr.b"]
525 fn palignrb(a: __m64, b: __m64, n: u8) -> __m64;
526
527 #[link_name = "llvm.x86.ssse3.phadd.w"]
528 fn phaddw(a: __m64, b: __m64) -> __m64;
529
530 #[link_name = "llvm.x86.ssse3.phadd.d"]
531 fn phaddd(a: __m64, b: __m64) -> __m64;
532
533 #[link_name = "llvm.x86.ssse3.phadd.sw"]
534 fn phaddsw(a: __m64, b: __m64) -> __m64;
535
536 #[link_name = "llvm.x86.ssse3.phsub.w"]
537 fn phsubw(a: __m64, b: __m64) -> __m64;
538
539 #[link_name = "llvm.x86.ssse3.phsub.d"]
540 fn phsubd(a: __m64, b: __m64) -> __m64;
541
542 #[link_name = "llvm.x86.ssse3.phsub.sw"]
543 fn phsubsw(a: __m64, b: __m64) -> __m64;
544
545 #[link_name = "llvm.x86.ssse3.pmadd.ub.sw"]
546 fn pmaddubsw(a: __m64, b: __m64) -> __m64;
547
548 #[link_name = "llvm.x86.ssse3.pmul.hr.sw"]
549 fn pmulhrsw(a: __m64, b: __m64) -> __m64;
550
551 #[link_name = "llvm.x86.ssse3.psign.b"]
552 fn psignb(a: __m64, b: __m64) -> __m64;
553
554 #[link_name = "llvm.x86.ssse3.psign.w"]
555 fn psignw(a: __m64, b: __m64) -> __m64;
556
557 #[link_name = "llvm.x86.ssse3.psign.d"]
558 fn psignd(a: __m64, b: __m64) -> __m64;
559}
560
561#[cfg(test)]
562mod tests {
563 use stdsimd_test::simd_test;
564
565 use coresimd::x86::*;
566
567 #[simd_test(enable = "ssse3")]
568 unsafe fn test_mm_abs_epi8() {
569 let r = _mm_abs_epi8(_mm_set1_epi8(-5));
570 assert_eq_m128i(r, _mm_set1_epi8(5));
571 }
572
573 #[simd_test(enable = "ssse3")]
574 unsafe fn test_mm_abs_epi16() {
575 let r = _mm_abs_epi16(_mm_set1_epi16(-5));
576 assert_eq_m128i(r, _mm_set1_epi16(5));
577 }
578
579 #[simd_test(enable = "ssse3")]
580 unsafe fn test_mm_abs_epi32() {
581 let r = _mm_abs_epi32(_mm_set1_epi32(-5));
582 assert_eq_m128i(r, _mm_set1_epi32(5));
583 }
584
585 #[simd_test(enable = "ssse3")]
586 unsafe fn test_mm_shuffle_epi8() {
587 #[cfg_attr(rustfmt, rustfmt_skip)]
588 let a = _mm_setr_epi8(
589 1, 2, 3, 4, 5, 6, 7, 8,
590 9, 10, 11, 12, 13, 14, 15, 16,
591 );
592 #[cfg_attr(rustfmt, rustfmt_skip)]
593 let b = _mm_setr_epi8(
594 4, 128_u8 as i8, 4, 3,
595 24, 12, 6, 19,
596 12, 5, 5, 10,
597 4, 1, 8, 0,
598 );
599 let expected = _mm_setr_epi8(
600 5,
601 0,
602 5,
603 4,
604 9,
605 13,
606 7,
607 4,
608 13,
609 6,
610 6,
611 11,
612 5,
613 2,
614 9,
615 1,
616 );
617 let r = _mm_shuffle_epi8(a, b);
618 assert_eq_m128i(r, expected);
619 }
620
621 #[simd_test(enable = "ssse3")]
622 unsafe fn test_mm_alignr_epi8() {
623 #[cfg_attr(rustfmt, rustfmt_skip)]
624 let a = _mm_setr_epi8(
625 1, 2, 3, 4, 5, 6, 7, 8,
626 9, 10, 11, 12, 13, 14, 15, 16,
627 );
628 #[cfg_attr(rustfmt, rustfmt_skip)]
629 let b = _mm_setr_epi8(
630 4, 63, 4, 3,
631 24, 12, 6, 19,
632 12, 5, 5, 10,
633 4, 1, 8, 0,
634 );
635 let r = _mm_alignr_epi8(a, b, 33);
636 assert_eq_m128i(r, _mm_set1_epi8(0));
637
638 let r = _mm_alignr_epi8(a, b, 17);
639 #[cfg_attr(rustfmt, rustfmt_skip)]
640 let expected = _mm_setr_epi8(
641 2, 3, 4, 5, 6, 7, 8, 9,
642 10, 11, 12, 13, 14, 15, 16, 0,
643 );
644 assert_eq_m128i(r, expected);
645
646 let r = _mm_alignr_epi8(a, b, 16);
647 assert_eq_m128i(r, a);
648
649 let r = _mm_alignr_epi8(a, b, 15);
650 #[cfg_attr(rustfmt, rustfmt_skip)]
651 let expected = _mm_setr_epi8(
652 0, 1, 2, 3, 4, 5, 6, 7,
653 8, 9, 10, 11, 12, 13, 14, 15,
654 );
655 assert_eq_m128i(r, expected);
656
657 let r = _mm_alignr_epi8(a, b, 0);
658 assert_eq_m128i(r, b);
659 }
660
661 #[simd_test(enable = "ssse3")]
662 unsafe fn test_mm_hadd_epi16() {
663 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
664 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
665 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
666 let r = _mm_hadd_epi16(a, b);
667 assert_eq_m128i(r, expected);
668 }
669
670 #[simd_test(enable = "ssse3")]
671 unsafe fn test_mm_hadds_epi16() {
672 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
673 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
674 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
675 let r = _mm_hadds_epi16(a, b);
676 assert_eq_m128i(r, expected);
677 }
678
679 #[simd_test(enable = "ssse3")]
680 unsafe fn test_mm_hadd_epi32() {
681 let a = _mm_setr_epi32(1, 2, 3, 4);
682 let b = _mm_setr_epi32(4, 128, 4, 3);
683 let expected = _mm_setr_epi32(3, 7, 132, 7);
684 let r = _mm_hadd_epi32(a, b);
685 assert_eq_m128i(r, expected);
686 }
687
688 #[simd_test(enable = "ssse3")]
689 unsafe fn test_mm_hsub_epi16() {
690 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
691 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
692 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
693 let r = _mm_hsub_epi16(a, b);
694 assert_eq_m128i(r, expected);
695 }
696
697 #[simd_test(enable = "ssse3")]
698 unsafe fn test_mm_hsubs_epi16() {
699 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
700 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
701 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
702 let r = _mm_hsubs_epi16(a, b);
703 assert_eq_m128i(r, expected);
704 }
705
706 #[simd_test(enable = "ssse3")]
707 unsafe fn test_mm_hsub_epi32() {
708 let a = _mm_setr_epi32(1, 2, 3, 4);
709 let b = _mm_setr_epi32(4, 128, 4, 3);
710 let expected = _mm_setr_epi32(-1, -1, -124, 1);
711 let r = _mm_hsub_epi32(a, b);
712 assert_eq_m128i(r, expected);
713 }
714
715 #[simd_test(enable = "ssse3")]
716 unsafe fn test_mm_maddubs_epi16() {
717 #[cfg_attr(rustfmt, rustfmt_skip)]
718 let a = _mm_setr_epi8(
719 1, 2, 3, 4, 5, 6, 7, 8,
720 9, 10, 11, 12, 13, 14, 15, 16,
721 );
722 #[cfg_attr(rustfmt, rustfmt_skip)]
723 let b = _mm_setr_epi8(
724 4, 63, 4, 3,
725 24, 12, 6, 19,
726 12, 5, 5, 10,
727 4, 1, 8, 0,
728 );
729 let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
730 let r = _mm_maddubs_epi16(a, b);
731 assert_eq_m128i(r, expected);
732 }
733
734 #[simd_test(enable = "ssse3")]
735 unsafe fn test_mm_mulhrs_epi16() {
736 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
737 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
738 let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
739 let r = _mm_mulhrs_epi16(a, b);
740 assert_eq_m128i(r, expected);
741 }
742
743 #[simd_test(enable = "ssse3")]
744 unsafe fn test_mm_sign_epi8() {
745 #[cfg_attr(rustfmt, rustfmt_skip)]
746 let a = _mm_setr_epi8(
747 1, 2, 3, 4, 5, 6, 7, 8,
748 9, 10, 11, 12, 13, -14, -15, 16,
749 );
750 #[cfg_attr(rustfmt, rustfmt_skip)]
751 let b = _mm_setr_epi8(
752 4, 63, -4, 3, 24, 12, -6, -19,
753 12, 5, -5, 10, 4, 1, -8, 0,
754 );
755 #[cfg_attr(rustfmt, rustfmt_skip)]
756 let expected = _mm_setr_epi8(
757 1, 2, -3, 4, 5, 6, -7, -8,
758 9, 10, -11, 12, 13, -14, 15, 0,
759 );
760 let r = _mm_sign_epi8(a, b);
761 assert_eq_m128i(r, expected);
762 }
763
764 #[simd_test(enable = "ssse3")]
765 unsafe fn test_mm_sign_epi16() {
766 let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
767 let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
768 let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
769 let r = _mm_sign_epi16(a, b);
770 assert_eq_m128i(r, expected);
771 }
772
773 #[simd_test(enable = "ssse3")]
774 unsafe fn test_mm_sign_epi32() {
775 let a = _mm_setr_epi32(-1, 2, 3, 4);
776 let b = _mm_setr_epi32(1, -1, 1, 0);
777 let expected = _mm_setr_epi32(-1, -2, 3, 0);
778 let r = _mm_sign_epi32(a, b);
779 assert_eq_m128i(r, expected);
780 }
781
782 #[simd_test(enable = "ssse3,mmx")]
783 unsafe fn test_mm_abs_pi8() {
784 let r = _mm_abs_pi8(_mm_set1_pi8(-5));
785 assert_eq_m64(r, _mm_set1_pi8(5));
786 }
787
788 #[simd_test(enable = "ssse3,mmx")]
789 unsafe fn test_mm_abs_pi16() {
790 let r = _mm_abs_pi16(_mm_set1_pi16(-5));
791 assert_eq_m64(r, _mm_set1_pi16(5));
792 }
793
794 #[simd_test(enable = "ssse3,mmx")]
795 unsafe fn test_mm_abs_pi32() {
796 let r = _mm_abs_pi32(_mm_set1_pi32(-5));
797 assert_eq_m64(r, _mm_set1_pi32(5));
798 }
799
800 #[simd_test(enable = "ssse3,mmx")]
801 unsafe fn test_mm_shuffle_pi8() {
802 let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
803 let b = _mm_setr_pi8(4, 128u8 as i8, 4, 3, 24, 12, 6, 19);
804 let expected = _mm_setr_pi8(5, 0, 5, 4, 1, 5, 7, 4);
805 let r = _mm_shuffle_pi8(a, b);
806 assert_eq_m64(r, expected);
807 }
808
809 #[simd_test(enable = "ssse3,mmx")]
810 unsafe fn test_mm_alignr_pi8() {
811 let a = _mm_setr_pi32(0x89ABCDEF_u32 as i32, 0x01234567_u32 as i32);
812 let b = _mm_setr_pi32(0xBBAA9988_u32 as i32, 0xFFDDEECC_u32 as i32);
813 let r = _mm_alignr_pi8(a, b, 4);
814 assert_eq_m64(r, ::std::mem::transmute(0x89abcdefffddeecc_u64));
815 }
816
817 #[simd_test(enable = "ssse3,mmx")]
818 unsafe fn test_mm_hadd_pi16() {
819 let a = _mm_setr_pi16(1, 2, 3, 4);
820 let b = _mm_setr_pi16(4, 128, 4, 3);
821 let expected = _mm_setr_pi16(3, 7, 132, 7);
822 let r = _mm_hadd_pi16(a, b);
823 assert_eq_m64(r, expected);
824 }
825
826 #[simd_test(enable = "ssse3,mmx")]
827 unsafe fn test_mm_hadd_pi32() {
828 let a = _mm_setr_pi32(1, 2);
829 let b = _mm_setr_pi32(4, 128);
830 let expected = _mm_setr_pi32(3, 132);
831 let r = _mm_hadd_pi32(a, b);
832 assert_eq_m64(r, expected);
833 }
834
835 #[simd_test(enable = "ssse3,mmx")]
836 unsafe fn test_mm_hadds_pi16() {
837 let a = _mm_setr_pi16(1, 2, 3, 4);
838 let b = _mm_setr_pi16(32767, 1, -32768, -1);
839 let expected = _mm_setr_pi16(3, 7, 32767, -32768);
840 let r = _mm_hadds_pi16(a, b);
841 assert_eq_m64(r, expected);
842 }
843
844 #[simd_test(enable = "ssse3,mmx")]
845 unsafe fn test_mm_hsub_pi16() {
846 let a = _mm_setr_pi16(1, 2, 3, 4);
847 let b = _mm_setr_pi16(4, 128, 4, 3);
848 let expected = _mm_setr_pi16(-1, -1, -124, 1);
849 let r = _mm_hsub_pi16(a, b);
850 assert_eq_m64(r, expected);
851 }
852
853 #[simd_test(enable = "ssse3,mmx")]
854 unsafe fn test_mm_hsub_pi32() {
855 let a = _mm_setr_pi32(1, 2);
856 let b = _mm_setr_pi32(4, 128);
857 let expected = _mm_setr_pi32(-1, -124);
858 let r = _mm_hsub_pi32(a, b);
859 assert_eq_m64(r, expected);
860 }
861
862 #[simd_test(enable = "ssse3,mmx")]
863 unsafe fn test_mm_hsubs_pi16() {
864 let a = _mm_setr_pi16(1, 2, 3, 4);
865 let b = _mm_setr_pi16(4, 128, 4, 3);
866 let expected = _mm_setr_pi16(-1, -1, -124, 1);
867 let r = _mm_hsubs_pi16(a, b);
868 assert_eq_m64(r, expected);
869 }
870
871 #[simd_test(enable = "ssse3,mmx")]
872 unsafe fn test_mm_maddubs_pi16() {
873 let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
874 let b = _mm_setr_pi8(4, 63, 4, 3, 24, 12, 6, 19);
875 let expected = _mm_setr_pi16(130, 24, 192, 194);
876 let r = _mm_maddubs_pi16(a, b);
877 assert_eq_m64(r, expected);
878 }
879
880 #[simd_test(enable = "ssse3,mmx")]
881 unsafe fn test_mm_mulhrs_pi16() {
882 let a = _mm_setr_pi16(1, 2, 3, 4);
883 let b = _mm_setr_pi16(4, 32767, -1, -32768);
884 let expected = _mm_setr_pi16(0, 2, 0, -4);
885 let r = _mm_mulhrs_pi16(a, b);
886 assert_eq_m64(r, expected);
887 }
888
889 #[simd_test(enable = "ssse3,mmx")]
890 unsafe fn test_mm_sign_pi8() {
891 let a = _mm_setr_pi8(1, 2, 3, 4, -5, -6, 7, 8);
892 let b = _mm_setr_pi8(4, 64, 0, 3, 1, -1, -2, 1);
893 let expected = _mm_setr_pi8(1, 2, 0, 4, -5, 6, -7, 8);
894 let r = _mm_sign_pi8(a, b);
895 assert_eq_m64(r, expected);
896 }
897
898 #[simd_test(enable = "ssse3,mmx")]
899 unsafe fn test_mm_sign_pi16() {
900 let a = _mm_setr_pi16(-1, 2, 3, 4);
901 let b = _mm_setr_pi16(1, -1, 1, 0);
902 let expected = _mm_setr_pi16(-1, -2, 3, 0);
903 let r = _mm_sign_pi16(a, b);
904 assert_eq_m64(r, expected);
905 }
906
907 #[simd_test(enable = "ssse3,mmx")]
908 unsafe fn test_mm_sign_pi32() {
909 let a = _mm_setr_pi32(-1, 2);
910 let b = _mm_setr_pi32(1, 0);
911 let expected = _mm_setr_pi32(-1, 0);
912 let r = _mm_sign_pi32(a, b);
913 assert_eq_m64(r, expected);
914 }
915}