]> git.proxmox.com Git - rustc.git/blame - src/stdsimd/coresimd/x86/ssse3.rs
New upstream version 1.26.0+dfsg1
[rustc.git] / src / stdsimd / coresimd / x86 / ssse3.rs
CommitLineData
0531ce1d
XL
1//! Supplemental Streaming SIMD Extensions 3 (SSSE3)
2
3use coresimd::simd_llvm::simd_shuffle16;
4use coresimd::simd::*;
5use coresimd::x86::*;
6use mem;
7
8#[cfg(test)]
9use stdsimd_test::assert_instr;
10
11/// Compute the absolute value of packed 8-bit signed integers in `a` and
12/// return the unsigned results.
13#[inline]
14#[target_feature(enable = "ssse3")]
15#[cfg_attr(test, assert_instr(pabsb))]
16pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
17 mem::transmute(pabsb128(a.as_i8x16()))
18}
19
20/// Compute the absolute value of each of the packed 16-bit signed integers in
21/// `a` and
22/// return the 16-bit unsigned integer
23#[inline]
24#[target_feature(enable = "ssse3")]
25#[cfg_attr(test, assert_instr(pabsw))]
26pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
27 mem::transmute(pabsw128(a.as_i16x8()))
28}
29
30/// Compute the absolute value of each of the packed 32-bit signed integers in
31/// `a` and
32/// return the 32-bit unsigned integer
33#[inline]
34#[target_feature(enable = "ssse3")]
35#[cfg_attr(test, assert_instr(pabsd))]
36pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
37 mem::transmute(pabsd128(a.as_i32x4()))
38}
39
40/// Shuffle bytes from `a` according to the content of `b`.
41///
42/// The last 4 bits of each byte of `b` are used as addresses
43/// into the 16 bytes of `a`.
44///
45/// In addition, if the highest significant bit of a byte of `b`
46/// is set, the respective destination byte is set to 0.
47///
48/// Picturing `a` and `b` as `[u8; 16]`, `_mm_shuffle_epi8` is
49/// logically equivalent to:
50///
51/// ```
52/// fn mm_shuffle_epi8(a: [u8; 16], b: [u8; 16]) -> [u8; 16] {
53/// let mut r = [0u8; 16];
54/// for i in 0..16 {
55/// // if the most significant bit of b is set,
56/// // then the destination byte is set to 0.
57/// if b[i] & 0x80 == 0u8 {
58/// r[i] = a[(b[i] % 16) as usize];
59/// }
60/// }
61/// r
62/// }
63/// ```
64#[inline]
65#[target_feature(enable = "ssse3")]
66#[cfg_attr(test, assert_instr(pshufb))]
67pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
68 mem::transmute(pshufb128(a.as_u8x16(), b.as_u8x16()))
69}
70
71/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
72/// shift the result right by `n` bytes, and return the low 16 bytes.
73#[inline]
74#[target_feature(enable = "ssse3")]
75#[cfg_attr(test, assert_instr(palignr, n = 15))]
76#[rustc_args_required_const(2)]
77pub unsafe fn _mm_alignr_epi8(a: __m128i, b: __m128i, n: i32) -> __m128i {
78 let n = n as u32;
79 // If palignr is shifting the pair of vectors more than the size of two
80 // lanes, emit zero.
81 if n > 32 {
82 return _mm_set1_epi8(0);
83 }
84 // If palignr is shifting the pair of input vectors more than one lane,
85 // but less than two lanes, convert to shifting in zeroes.
86 let (a, b, n) = if n > 16 {
87 (_mm_set1_epi8(0), a, n - 16)
88 } else {
89 (a, b, n)
90 };
91 let a = a.as_i8x16();
92 let b = b.as_i8x16();
93
94 macro_rules! shuffle {
95 ($shift:expr) => {
96 simd_shuffle16(b, a, [
97 0 + $shift, 1 + $shift,
98 2 + $shift, 3 + $shift,
99 4 + $shift, 5 + $shift,
100 6 + $shift, 7 + $shift,
101 8 + $shift, 9 + $shift,
102 10 + $shift, 11 + $shift,
103 12 + $shift, 13 + $shift,
104 14 + $shift, 15 + $shift,
105 ])
106 }
107 }
108 let r: i8x16 = match n {
109 0 => shuffle!(0),
110 1 => shuffle!(1),
111 2 => shuffle!(2),
112 3 => shuffle!(3),
113 4 => shuffle!(4),
114 5 => shuffle!(5),
115 6 => shuffle!(6),
116 7 => shuffle!(7),
117 8 => shuffle!(8),
118 9 => shuffle!(9),
119 10 => shuffle!(10),
120 11 => shuffle!(11),
121 12 => shuffle!(12),
122 13 => shuffle!(13),
123 14 => shuffle!(14),
124 15 => shuffle!(15),
125 _ => shuffle!(16),
126 };
127 mem::transmute(r)
128}
129
130/// Horizontally add the adjacent pairs of values contained in 2 packed
131/// 128-bit vectors of [8 x i16].
132#[inline]
133#[target_feature(enable = "ssse3")]
134#[cfg_attr(test, assert_instr(phaddw))]
135pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
136 mem::transmute(phaddw128(a.as_i16x8(), b.as_i16x8()))
137}
138
139/// Horizontally add the adjacent pairs of values contained in 2 packed
140/// 128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are
141/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
142#[inline]
143#[target_feature(enable = "ssse3")]
144#[cfg_attr(test, assert_instr(phaddsw))]
145pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
146 mem::transmute(phaddsw128(a.as_i16x8(), b.as_i16x8()))
147}
148
149/// Horizontally add the adjacent pairs of values contained in 2 packed
150/// 128-bit vectors of [4 x i32].
151#[inline]
152#[target_feature(enable = "ssse3")]
153#[cfg_attr(test, assert_instr(phaddd))]
154pub unsafe fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
155 mem::transmute(phaddd128(a.as_i32x4(), b.as_i32x4()))
156}
157
158/// Horizontally subtract the adjacent pairs of values contained in 2
159/// packed 128-bit vectors of [8 x i16].
160#[inline]
161#[target_feature(enable = "ssse3")]
162#[cfg_attr(test, assert_instr(phsubw))]
163pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
164 mem::transmute(phsubw128(a.as_i16x8(), b.as_i16x8()))
165}
166
167/// Horizontally subtract the adjacent pairs of values contained in 2
168/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
169/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
170/// saturated to 8000h.
171#[inline]
172#[target_feature(enable = "ssse3")]
173#[cfg_attr(test, assert_instr(phsubsw))]
174pub unsafe fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
175 mem::transmute(phsubsw128(a.as_i16x8(), b.as_i16x8()))
176}
177
178/// Horizontally subtract the adjacent pairs of values contained in 2
179/// packed 128-bit vectors of [4 x i32].
180#[inline]
181#[target_feature(enable = "ssse3")]
182#[cfg_attr(test, assert_instr(phsubd))]
183pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
184 mem::transmute(phsubd128(a.as_i32x4(), b.as_i32x4()))
185}
186
187/// Multiply corresponding pairs of packed 8-bit unsigned integer
188/// values contained in the first source operand and packed 8-bit signed
189/// integer values contained in the second source operand, add pairs of
190/// contiguous products with signed saturation, and writes the 16-bit sums to
191/// the corresponding bits in the destination.
192#[inline]
193#[target_feature(enable = "ssse3")]
194#[cfg_attr(test, assert_instr(pmaddubsw))]
195pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
196 mem::transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16()))
197}
198
199/// Multiply packed 16-bit signed integer values, truncate the 32-bit
200/// product to the 18 most significant bits by right-shifting, round the
201/// truncated value by adding 1, and write bits [16:1] to the destination.
202#[inline]
203#[target_feature(enable = "ssse3")]
204#[cfg_attr(test, assert_instr(pmulhrsw))]
205pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
206 mem::transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8()))
207}
208
209/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
210/// integer in `b` is negative, and return the result.
211/// Elements in result are zeroed out when the corresponding element in `b`
212/// is zero.
213#[inline]
214#[target_feature(enable = "ssse3")]
215#[cfg_attr(test, assert_instr(psignb))]
216pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
217 mem::transmute(psignb128(a.as_i8x16(), b.as_i8x16()))
218}
219
220/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
221/// integer in `b` is negative, and return the results.
222/// Elements in result are zeroed out when the corresponding element in `b`
223/// is zero.
224#[inline]
225#[target_feature(enable = "ssse3")]
226#[cfg_attr(test, assert_instr(psignw))]
227pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
228 mem::transmute(psignw128(a.as_i16x8(), b.as_i16x8()))
229}
230
231/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
232/// integer in `b` is negative, and return the results.
233/// Element in result are zeroed out when the corresponding element in `b`
234/// is zero.
235#[inline]
236#[target_feature(enable = "ssse3")]
237#[cfg_attr(test, assert_instr(psignd))]
238pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
239 mem::transmute(psignd128(a.as_i32x4(), b.as_i32x4()))
240}
241
242/// Compute the absolute value of packed 8-bit integers in `a` and
243/// return the unsigned results.
244#[inline]
245#[target_feature(enable = "ssse3,mmx")]
246#[cfg_attr(test, assert_instr(pabsb))]
247pub unsafe fn _mm_abs_pi8(a: __m64) -> __m64 {
248 pabsb(a)
249}
250
251/// Compute the absolute value of packed 8-bit integers in `a`, and return the
252/// unsigned results.
253#[inline]
254#[target_feature(enable = "ssse3,mmx")]
255#[cfg_attr(test, assert_instr(pabsw))]
256pub unsafe fn _mm_abs_pi16(a: __m64) -> __m64 {
257 pabsw(a)
258}
259
260/// Compute the absolute value of packed 32-bit integers in `a`, and return the
261/// unsigned results.
262#[inline]
263#[target_feature(enable = "ssse3,mmx")]
264#[cfg_attr(test, assert_instr(pabsd))]
265pub unsafe fn _mm_abs_pi32(a: __m64) -> __m64 {
266 pabsd(a)
267}
268
269/// Shuffle packed 8-bit integers in `a` according to shuffle control mask in
270/// the corresponding 8-bit element of `b`, and return the results
271#[inline]
272#[target_feature(enable = "ssse3,mmx")]
273#[cfg_attr(test, assert_instr(pshufb))]
274pub unsafe fn _mm_shuffle_pi8(a: __m64, b: __m64) -> __m64 {
275 pshufb(a, b)
276}
277
278/// Concatenates the two 64-bit integer vector operands, and right-shifts
279/// the result by the number of bytes specified in the immediate operand.
280#[inline]
281#[target_feature(enable = "ssse3,mmx")]
282#[cfg_attr(test, assert_instr(palignr, n = 15))]
283#[rustc_args_required_const(2)]
284pub unsafe fn _mm_alignr_pi8(a: __m64, b: __m64, n: i32) -> __m64 {
285 macro_rules! call {
286 ($imm8:expr) => {
287 palignrb(a, b, $imm8)
288 }
289 }
290 constify_imm8!(n, call)
291}
292
293/// Horizontally add the adjacent pairs of values contained in 2 packed
294/// 64-bit vectors of [4 x i16].
295#[inline]
296#[target_feature(enable = "ssse3,mmx")]
297#[cfg_attr(test, assert_instr(phaddw))]
298pub unsafe fn _mm_hadd_pi16(a: __m64, b: __m64) -> __m64 {
299 phaddw(a, b)
300}
301
302/// Horizontally add the adjacent pairs of values contained in 2 packed
303/// 64-bit vectors of [2 x i32].
304#[inline]
305#[target_feature(enable = "ssse3,mmx")]
306#[cfg_attr(test, assert_instr(phaddd))]
307pub unsafe fn _mm_hadd_pi32(a: __m64, b: __m64) -> __m64 {
308 phaddd(a, b)
309}
310
311/// Horizontally add the adjacent pairs of values contained in 2 packed
312/// 64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are
313/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
314#[inline]
315#[target_feature(enable = "ssse3,mmx")]
316#[cfg_attr(test, assert_instr(phaddsw))]
317pub unsafe fn _mm_hadds_pi16(a: __m64, b: __m64) -> __m64 {
318 phaddsw(a, b)
319}
320
321/// Horizontally subtracts the adjacent pairs of values contained in 2
322/// packed 64-bit vectors of [4 x i16].
323#[inline]
324#[target_feature(enable = "ssse3,mmx")]
325#[cfg_attr(test, assert_instr(phsubw))]
326pub unsafe fn _mm_hsub_pi16(a: __m64, b: __m64) -> __m64 {
327 phsubw(a, b)
328}
329
330/// Horizontally subtracts the adjacent pairs of values contained in 2
331/// packed 64-bit vectors of [2 x i32].
332#[inline]
333#[target_feature(enable = "ssse3,mmx")]
334#[cfg_attr(test, assert_instr(phsubd))]
335pub unsafe fn _mm_hsub_pi32(a: __m64, b: __m64) -> __m64 {
336 phsubd(a, b)
337}
338
339/// Horizontally subtracts the adjacent pairs of values contained in 2
340/// packed 64-bit vectors of [4 x i16]. Positive differences greater than
341/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
342/// saturated to 8000h.
343#[inline]
344#[target_feature(enable = "ssse3,mmx")]
345#[cfg_attr(test, assert_instr(phsubsw))]
346pub unsafe fn _mm_hsubs_pi16(a: __m64, b: __m64) -> __m64 {
347 phsubsw(a, b)
348}
349
350/// Multiplies corresponding pairs of packed 8-bit unsigned integer
351/// values contained in the first source operand and packed 8-bit signed
352/// integer values contained in the second source operand, adds pairs of
353/// contiguous products with signed saturation, and writes the 16-bit sums to
354/// the corresponding bits in the destination.
355#[inline]
356#[target_feature(enable = "ssse3,mmx")]
357#[cfg_attr(test, assert_instr(pmaddubsw))]
358pub unsafe fn _mm_maddubs_pi16(a: __m64, b: __m64) -> __m64 {
359 pmaddubsw(a, b)
360}
361
362/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
363/// products to the 18 most significant bits by right-shifting, rounds the
364/// truncated value by adding 1, and writes bits [16:1] to the destination.
365#[inline]
366#[target_feature(enable = "ssse3,mmx")]
367#[cfg_attr(test, assert_instr(pmulhrsw))]
368pub unsafe fn _mm_mulhrs_pi16(a: __m64, b: __m64) -> __m64 {
369 pmulhrsw(a, b)
370}
371
372/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
373/// integer in `b` is negative, and return the results.
374/// Element in result are zeroed out when the corresponding element in `b` is
375/// zero.
376#[inline]
377#[target_feature(enable = "ssse3,mmx")]
378#[cfg_attr(test, assert_instr(psignb))]
379pub unsafe fn _mm_sign_pi8(a: __m64, b: __m64) -> __m64 {
380 psignb(a, b)
381}
382
383/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
384/// integer in `b` is negative, and return the results.
385/// Element in result are zeroed out when the corresponding element in `b` is
386/// zero.
387#[inline]
388#[target_feature(enable = "ssse3,mmx")]
389#[cfg_attr(test, assert_instr(psignw))]
390pub unsafe fn _mm_sign_pi16(a: __m64, b: __m64) -> __m64 {
391 psignw(a, b)
392}
393
394/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
395/// integer in `b` is negative, and return the results.
396/// Element in result are zeroed out when the corresponding element in `b` is
397/// zero.
398#[inline]
399#[target_feature(enable = "ssse3,mmx")]
400#[cfg_attr(test, assert_instr(psignd))]
401pub unsafe fn _mm_sign_pi32(a: __m64, b: __m64) -> __m64 {
402 psignd(a, b)
403}
404
405#[allow(improper_ctypes)]
406extern "C" {
407 #[link_name = "llvm.x86.ssse3.pabs.b.128"]
408 fn pabsb128(a: i8x16) -> u8x16;
409
410 #[link_name = "llvm.x86.ssse3.pabs.w.128"]
411 fn pabsw128(a: i16x8) -> u16x8;
412
413 #[link_name = "llvm.x86.ssse3.pabs.d.128"]
414 fn pabsd128(a: i32x4) -> u32x4;
415
416 #[link_name = "llvm.x86.ssse3.pshuf.b.128"]
417 fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
418
419 #[link_name = "llvm.x86.ssse3.phadd.w.128"]
420 fn phaddw128(a: i16x8, b: i16x8) -> i16x8;
421
422 #[link_name = "llvm.x86.ssse3.phadd.sw.128"]
423 fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
424
425 #[link_name = "llvm.x86.ssse3.phadd.d.128"]
426 fn phaddd128(a: i32x4, b: i32x4) -> i32x4;
427
428 #[link_name = "llvm.x86.ssse3.phsub.w.128"]
429 fn phsubw128(a: i16x8, b: i16x8) -> i16x8;
430
431 #[link_name = "llvm.x86.ssse3.phsub.sw.128"]
432 fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
433
434 #[link_name = "llvm.x86.ssse3.phsub.d.128"]
435 fn phsubd128(a: i32x4, b: i32x4) -> i32x4;
436
437 #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
438 fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
439
440 #[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
441 fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
442
443 #[link_name = "llvm.x86.ssse3.psign.b.128"]
444 fn psignb128(a: i8x16, b: i8x16) -> i8x16;
445
446 #[link_name = "llvm.x86.ssse3.psign.w.128"]
447 fn psignw128(a: i16x8, b: i16x8) -> i16x8;
448
449 #[link_name = "llvm.x86.ssse3.psign.d.128"]
450 fn psignd128(a: i32x4, b: i32x4) -> i32x4;
451
452 #[link_name = "llvm.x86.ssse3.pabs.b"]
453 fn pabsb(a: __m64) -> __m64;
454
455 #[link_name = "llvm.x86.ssse3.pabs.w"]
456 fn pabsw(a: __m64) -> __m64;
457
458 #[link_name = "llvm.x86.ssse3.pabs.d"]
459 fn pabsd(a: __m64) -> __m64;
460
461 #[link_name = "llvm.x86.ssse3.pshuf.b"]
462 fn pshufb(a: __m64, b: __m64) -> __m64;
463
464 #[link_name = "llvm.x86.mmx.palignr.b"]
465 fn palignrb(a: __m64, b: __m64, n: u8) -> __m64;
466
467 #[link_name = "llvm.x86.ssse3.phadd.w"]
468 fn phaddw(a: __m64, b: __m64) -> __m64;
469
470 #[link_name = "llvm.x86.ssse3.phadd.d"]
471 fn phaddd(a: __m64, b: __m64) -> __m64;
472
473 #[link_name = "llvm.x86.ssse3.phadd.sw"]
474 fn phaddsw(a: __m64, b: __m64) -> __m64;
475
476 #[link_name = "llvm.x86.ssse3.phsub.w"]
477 fn phsubw(a: __m64, b: __m64) -> __m64;
478
479 #[link_name = "llvm.x86.ssse3.phsub.d"]
480 fn phsubd(a: __m64, b: __m64) -> __m64;
481
482 #[link_name = "llvm.x86.ssse3.phsub.sw"]
483 fn phsubsw(a: __m64, b: __m64) -> __m64;
484
485 #[link_name = "llvm.x86.ssse3.pmadd.ub.sw"]
486 fn pmaddubsw(a: __m64, b: __m64) -> __m64;
487
488 #[link_name = "llvm.x86.ssse3.pmul.hr.sw"]
489 fn pmulhrsw(a: __m64, b: __m64) -> __m64;
490
491 #[link_name = "llvm.x86.ssse3.psign.b"]
492 fn psignb(a: __m64, b: __m64) -> __m64;
493
494 #[link_name = "llvm.x86.ssse3.psign.w"]
495 fn psignw(a: __m64, b: __m64) -> __m64;
496
497 #[link_name = "llvm.x86.ssse3.psign.d"]
498 fn psignd(a: __m64, b: __m64) -> __m64;
499}
500
501#[cfg(test)]
502mod tests {
503 use stdsimd_test::simd_test;
504
505 use coresimd::x86::*;
506
507 #[simd_test = "ssse3"]
508 unsafe fn test_mm_abs_epi8() {
509 let r = _mm_abs_epi8(_mm_set1_epi8(-5));
510 assert_eq_m128i(r, _mm_set1_epi8(5));
511 }
512
513 #[simd_test = "ssse3"]
514 unsafe fn test_mm_abs_epi16() {
515 let r = _mm_abs_epi16(_mm_set1_epi16(-5));
516 assert_eq_m128i(r, _mm_set1_epi16(5));
517 }
518
519 #[simd_test = "ssse3"]
520 unsafe fn test_mm_abs_epi32() {
521 let r = _mm_abs_epi32(_mm_set1_epi32(-5));
522 assert_eq_m128i(r, _mm_set1_epi32(5));
523 }
524
525 #[simd_test = "ssse3"]
526 unsafe fn test_mm_shuffle_epi8() {
527 #[cfg_attr(rustfmt, rustfmt_skip)]
528 let a = _mm_setr_epi8(
529 1, 2, 3, 4, 5, 6, 7, 8,
530 9, 10, 11, 12, 13, 14, 15, 16,
531 );
532 #[cfg_attr(rustfmt, rustfmt_skip)]
533 let b = _mm_setr_epi8(
534 4, 128_u8 as i8, 4, 3,
535 24, 12, 6, 19,
536 12, 5, 5, 10,
537 4, 1, 8, 0,
538 );
539 let expected =
540 _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
541 let r = _mm_shuffle_epi8(a, b);
542 assert_eq_m128i(r, expected);
543 }
544
545 #[simd_test = "ssse3"]
546 unsafe fn test_mm_alignr_epi8() {
547 #[cfg_attr(rustfmt, rustfmt_skip)]
548 let a = _mm_setr_epi8(
549 1, 2, 3, 4, 5, 6, 7, 8,
550 9, 10, 11, 12, 13, 14, 15, 16,
551 );
552 #[cfg_attr(rustfmt, rustfmt_skip)]
553 let b = _mm_setr_epi8(
554 4, 63, 4, 3,
555 24, 12, 6, 19,
556 12, 5, 5, 10,
557 4, 1, 8, 0,
558 );
559 let r = _mm_alignr_epi8(a, b, 33);
560 assert_eq_m128i(r, _mm_set1_epi8(0));
561
562 let r = _mm_alignr_epi8(a, b, 17);
563 #[cfg_attr(rustfmt, rustfmt_skip)]
564 let expected = _mm_setr_epi8(
565 2, 3, 4, 5, 6, 7, 8, 9,
566 10, 11, 12, 13, 14, 15, 16, 0,
567 );
568 assert_eq_m128i(r, expected);
569
570 let r = _mm_alignr_epi8(a, b, 16);
571 assert_eq_m128i(r, a);
572
573 let r = _mm_alignr_epi8(a, b, 15);
574 #[cfg_attr(rustfmt, rustfmt_skip)]
575 let expected = _mm_setr_epi8(
576 0, 1, 2, 3, 4, 5, 6, 7,
577 8, 9, 10, 11, 12, 13, 14, 15,
578 );
579 assert_eq_m128i(r, expected);
580
581 let r = _mm_alignr_epi8(a, b, 0);
582 assert_eq_m128i(r, b);
583 }
584
585 #[simd_test = "ssse3"]
586 unsafe fn test_mm_hadd_epi16() {
587 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
588 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
589 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
590 let r = _mm_hadd_epi16(a, b);
591 assert_eq_m128i(r, expected);
592 }
593
594 #[simd_test = "ssse3"]
595 unsafe fn test_mm_hadds_epi16() {
596 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
597 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
598 let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
599 let r = _mm_hadds_epi16(a, b);
600 assert_eq_m128i(r, expected);
601 }
602
603 #[simd_test = "ssse3"]
604 unsafe fn test_mm_hadd_epi32() {
605 let a = _mm_setr_epi32(1, 2, 3, 4);
606 let b = _mm_setr_epi32(4, 128, 4, 3);
607 let expected = _mm_setr_epi32(3, 7, 132, 7);
608 let r = _mm_hadd_epi32(a, b);
609 assert_eq_m128i(r, expected);
610 }
611
612 #[simd_test = "ssse3"]
613 unsafe fn test_mm_hsub_epi16() {
614 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
615 let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
616 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
617 let r = _mm_hsub_epi16(a, b);
618 assert_eq_m128i(r, expected);
619 }
620
621 #[simd_test = "ssse3"]
622 unsafe fn test_mm_hsubs_epi16() {
623 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
624 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
625 let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
626 let r = _mm_hsubs_epi16(a, b);
627 assert_eq_m128i(r, expected);
628 }
629
630 #[simd_test = "ssse3"]
631 unsafe fn test_mm_hsub_epi32() {
632 let a = _mm_setr_epi32(1, 2, 3, 4);
633 let b = _mm_setr_epi32(4, 128, 4, 3);
634 let expected = _mm_setr_epi32(-1, -1, -124, 1);
635 let r = _mm_hsub_epi32(a, b);
636 assert_eq_m128i(r, expected);
637 }
638
639 #[simd_test = "ssse3"]
640 unsafe fn test_mm_maddubs_epi16() {
641 #[cfg_attr(rustfmt, rustfmt_skip)]
642 let a = _mm_setr_epi8(
643 1, 2, 3, 4, 5, 6, 7, 8,
644 9, 10, 11, 12, 13, 14, 15, 16,
645 );
646 #[cfg_attr(rustfmt, rustfmt_skip)]
647 let b = _mm_setr_epi8(
648 4, 63, 4, 3,
649 24, 12, 6, 19,
650 12, 5, 5, 10,
651 4, 1, 8, 0,
652 );
653 let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
654 let r = _mm_maddubs_epi16(a, b);
655 assert_eq_m128i(r, expected);
656 }
657
658 #[simd_test = "ssse3"]
659 unsafe fn test_mm_mulhrs_epi16() {
660 let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
661 let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
662 let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
663 let r = _mm_mulhrs_epi16(a, b);
664 assert_eq_m128i(r, expected);
665 }
666
667 #[simd_test = "ssse3"]
668 unsafe fn test_mm_sign_epi8() {
669 #[cfg_attr(rustfmt, rustfmt_skip)]
670 let a = _mm_setr_epi8(
671 1, 2, 3, 4, 5, 6, 7, 8,
672 9, 10, 11, 12, 13, -14, -15, 16,
673 );
674 #[cfg_attr(rustfmt, rustfmt_skip)]
675 let b = _mm_setr_epi8(
676 4, 63, -4, 3, 24, 12, -6, -19,
677 12, 5, -5, 10, 4, 1, -8, 0,
678 );
679 #[cfg_attr(rustfmt, rustfmt_skip)]
680 let expected = _mm_setr_epi8(
681 1, 2, -3, 4, 5, 6, -7, -8,
682 9, 10, -11, 12, 13, -14, 15, 0,
683 );
684 let r = _mm_sign_epi8(a, b);
685 assert_eq_m128i(r, expected);
686 }
687
688 #[simd_test = "ssse3"]
689 unsafe fn test_mm_sign_epi16() {
690 let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
691 let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
692 let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
693 let r = _mm_sign_epi16(a, b);
694 assert_eq_m128i(r, expected);
695 }
696
697 #[simd_test = "ssse3"]
698 unsafe fn test_mm_sign_epi32() {
699 let a = _mm_setr_epi32(-1, 2, 3, 4);
700 let b = _mm_setr_epi32(1, -1, 1, 0);
701 let expected = _mm_setr_epi32(-1, -2, 3, 0);
702 let r = _mm_sign_epi32(a, b);
703 assert_eq_m128i(r, expected);
704 }
705
706 #[simd_test = "ssse3,mmx"]
707 unsafe fn test_mm_abs_pi8() {
708 let r = _mm_abs_pi8(_mm_set1_pi8(-5));
709 assert_eq_m64(r, _mm_set1_pi8(5));
710 }
711
712 #[simd_test = "ssse3,mmx"]
713 unsafe fn test_mm_abs_pi16() {
714 let r = _mm_abs_pi16(_mm_set1_pi16(-5));
715 assert_eq_m64(r, _mm_set1_pi16(5));
716 }
717
718 #[simd_test = "ssse3,mmx"]
719 unsafe fn test_mm_abs_pi32() {
720 let r = _mm_abs_pi32(_mm_set1_pi32(-5));
721 assert_eq_m64(r, _mm_set1_pi32(5));
722 }
723
724 #[simd_test = "ssse3,mmx"]
725 unsafe fn test_mm_shuffle_pi8() {
726 let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
727 let b = _mm_setr_pi8(4, 128u8 as i8, 4, 3, 24, 12, 6, 19);
728 let expected = _mm_setr_pi8(5, 0, 5, 4, 1, 5, 7, 4);
729 let r = _mm_shuffle_pi8(a, b);
730 assert_eq_m64(r, expected);
731 }
732
733 #[simd_test = "ssse3,mmx"]
734 unsafe fn test_mm_alignr_pi8() {
735 let a = _mm_setr_pi32(0x89ABCDEF_u32 as i32, 0x01234567_u32 as i32);
736 let b = _mm_setr_pi32(0xBBAA9988_u32 as i32, 0xFFDDEECC_u32 as i32);
737 let r = _mm_alignr_pi8(a, b, 4);
738 assert_eq_m64(r, ::std::mem::transmute(0x89abcdefffddeecc_u64));
739 }
740
741 #[simd_test = "ssse3,mmx"]
742 unsafe fn test_mm_hadd_pi16() {
743 let a = _mm_setr_pi16(1, 2, 3, 4);
744 let b = _mm_setr_pi16(4, 128, 4, 3);
745 let expected = _mm_setr_pi16(3, 7, 132, 7);
746 let r = _mm_hadd_pi16(a, b);
747 assert_eq_m64(r, expected);
748 }
749
750 #[simd_test = "ssse3,mmx"]
751 unsafe fn test_mm_hadd_pi32() {
752 let a = _mm_setr_pi32(1, 2);
753 let b = _mm_setr_pi32(4, 128);
754 let expected = _mm_setr_pi32(3, 132);
755 let r = _mm_hadd_pi32(a, b);
756 assert_eq_m64(r, expected);
757 }
758
759 #[simd_test = "ssse3,mmx"]
760 unsafe fn test_mm_hadds_pi16() {
761 let a = _mm_setr_pi16(1, 2, 3, 4);
762 let b = _mm_setr_pi16(32767, 1, -32768, -1);
763 let expected = _mm_setr_pi16(3, 7, 32767, -32768);
764 let r = _mm_hadds_pi16(a, b);
765 assert_eq_m64(r, expected);
766 }
767
768 #[simd_test = "ssse3,mmx"]
769 unsafe fn test_mm_hsub_pi16() {
770 let a = _mm_setr_pi16(1, 2, 3, 4);
771 let b = _mm_setr_pi16(4, 128, 4, 3);
772 let expected = _mm_setr_pi16(-1, -1, -124, 1);
773 let r = _mm_hsub_pi16(a, b);
774 assert_eq_m64(r, expected);
775 }
776
777 #[simd_test = "ssse3,mmx"]
778 unsafe fn test_mm_hsub_pi32() {
779 let a = _mm_setr_pi32(1, 2);
780 let b = _mm_setr_pi32(4, 128);
781 let expected = _mm_setr_pi32(-1, -124);
782 let r = _mm_hsub_pi32(a, b);
783 assert_eq_m64(r, expected);
784 }
785
786 #[simd_test = "ssse3,mmx"]
787 unsafe fn test_mm_hsubs_pi16() {
788 let a = _mm_setr_pi16(1, 2, 3, 4);
789 let b = _mm_setr_pi16(4, 128, 4, 3);
790 let expected = _mm_setr_pi16(-1, -1, -124, 1);
791 let r = _mm_hsubs_pi16(a, b);
792 assert_eq_m64(r, expected);
793 }
794
795 #[simd_test = "ssse3,mmx"]
796 unsafe fn test_mm_maddubs_pi16() {
797 let a = _mm_setr_pi8(1, 2, 3, 4, 5, 6, 7, 8);
798 let b = _mm_setr_pi8(4, 63, 4, 3, 24, 12, 6, 19);
799 let expected = _mm_setr_pi16(130, 24, 192, 194);
800 let r = _mm_maddubs_pi16(a, b);
801 assert_eq_m64(r, expected);
802 }
803
804 #[simd_test = "ssse3,mmx"]
805 unsafe fn test_mm_mulhrs_pi16() {
806 let a = _mm_setr_pi16(1, 2, 3, 4);
807 let b = _mm_setr_pi16(4, 32767, -1, -32768);
808 let expected = _mm_setr_pi16(0, 2, 0, -4);
809 let r = _mm_mulhrs_pi16(a, b);
810 assert_eq_m64(r, expected);
811 }
812
813 #[simd_test = "ssse3,mmx"]
814 unsafe fn test_mm_sign_pi8() {
815 let a = _mm_setr_pi8(1, 2, 3, 4, -5, -6, 7, 8);
816 let b = _mm_setr_pi8(4, 64, 0, 3, 1, -1, -2, 1);
817 let expected = _mm_setr_pi8(1, 2, 0, 4, -5, 6, -7, 8);
818 let r = _mm_sign_pi8(a, b);
819 assert_eq_m64(r, expected);
820 }
821
822 #[simd_test = "ssse3,mmx"]
823 unsafe fn test_mm_sign_pi16() {
824 let a = _mm_setr_pi16(-1, 2, 3, 4);
825 let b = _mm_setr_pi16(1, -1, 1, 0);
826 let expected = _mm_setr_pi16(-1, -2, 3, 0);
827 let r = _mm_sign_pi16(a, b);
828 assert_eq_m64(r, expected);
829 }
830
831 #[simd_test = "ssse3,mmx"]
832 unsafe fn test_mm_sign_pi32() {
833 let a = _mm_setr_pi32(-1, 2);
834 let b = _mm_setr_pi32(1, 0);
835 let expected = _mm_setr_pi32(-1, 0);
836 let r = _mm_sign_pi32(a, b);
837 assert_eq_m64(r, expected);
838 }
839}