]> git.proxmox.com Git - rustc.git/blame - library/stdarch/crates/core_arch/src/x86/sse41.rs
bump version to 1.80.1+dfsg1-1~bpo12+pve1
[rustc.git] / library / stdarch / crates / core_arch / src / x86 / sse41.rs
CommitLineData
0531ce1d
XL
1//! Streaming SIMD Extensions 4.1 (SSE4.1)
2
c620b35d
FG
3use crate::core_arch::{simd::*, x86::*};
4use crate::intrinsics::simd::*;
0531ce1d
XL
5
6#[cfg(test)]
416331ca 7use stdarch_test::assert_instr;
0531ce1d 8
a2a8927a 9// SSE4 rounding constants
0531ce1d 10/// round to nearest
83c7162d 11#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
12pub const _MM_FROUND_TO_NEAREST_INT: i32 = 0x00;
13/// round down
83c7162d 14#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
15pub const _MM_FROUND_TO_NEG_INF: i32 = 0x01;
16/// round up
83c7162d 17#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
18pub const _MM_FROUND_TO_POS_INF: i32 = 0x02;
19/// truncate
83c7162d 20#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
21pub const _MM_FROUND_TO_ZERO: i32 = 0x03;
22/// use MXCSR.RC; see `vendor::_MM_SET_ROUNDING_MODE`
83c7162d 23#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
24pub const _MM_FROUND_CUR_DIRECTION: i32 = 0x04;
25/// do not suppress exceptions
83c7162d 26#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
27pub const _MM_FROUND_RAISE_EXC: i32 = 0x00;
28/// suppress exceptions
83c7162d 29#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
30pub const _MM_FROUND_NO_EXC: i32 = 0x08;
31/// round to nearest and do not suppress exceptions
83c7162d 32#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
33pub const _MM_FROUND_NINT: i32 = 0x00;
34/// round down and do not suppress exceptions
83c7162d 35#[stable(feature = "simd_x86", since = "1.27.0")]
74b04a01 36pub const _MM_FROUND_FLOOR: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF;
0531ce1d 37/// round up and do not suppress exceptions
83c7162d 38#[stable(feature = "simd_x86", since = "1.27.0")]
74b04a01 39pub const _MM_FROUND_CEIL: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF;
0531ce1d 40/// truncate and do not suppress exceptions
83c7162d 41#[stable(feature = "simd_x86", since = "1.27.0")]
74b04a01 42pub const _MM_FROUND_TRUNC: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO;
0531ce1d
XL
43/// use MXCSR.RC and do not suppress exceptions; see
44/// `vendor::_MM_SET_ROUNDING_MODE`
83c7162d 45#[stable(feature = "simd_x86", since = "1.27.0")]
74b04a01 46pub const _MM_FROUND_RINT: i32 = _MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION;
0531ce1d 47/// use MXCSR.RC and suppress exceptions; see `vendor::_MM_SET_ROUNDING_MODE`
83c7162d 48#[stable(feature = "simd_x86", since = "1.27.0")]
74b04a01 49pub const _MM_FROUND_NEARBYINT: i32 = _MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION;
0531ce1d
XL
50
51/// Blend packed 8-bit integers from `a` and `b` using `mask`
52///
53/// The high bit of each corresponding mask byte determines the selection.
54/// If the high bit is set the element of `a` is selected. The element
55/// of `b` is selected otherwise.
83c7162d 56///
353b0b11 57/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_epi8)
0531ce1d
XL
58#[inline]
59#[target_feature(enable = "sse4.1")]
60#[cfg_attr(test, assert_instr(pblendvb))]
83c7162d 61#[stable(feature = "simd_x86", since = "1.27.0")]
0731742a 62pub unsafe fn _mm_blendv_epi8(a: __m128i, b: __m128i, mask: __m128i) -> __m128i {
ed00b5ec
FG
63 let mask: i8x16 = simd_lt(mask.as_i8x16(), i8x16::splat(0));
64 transmute(simd_select(mask, b.as_i8x16(), a.as_i8x16()))
0531ce1d
XL
65}
66
17df50a5 67/// Blend packed 16-bit integers from `a` and `b` using the mask `IMM8`.
0531ce1d
XL
68///
69/// The mask bits determine the selection. A clear bit selects the
70/// corresponding element of `a`, and a set bit the corresponding
71/// element of `b`.
83c7162d 72///
353b0b11 73/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_epi16)
0531ce1d
XL
74#[inline]
75#[target_feature(enable = "sse4.1")]
ed00b5ec 76#[cfg_attr(test, assert_instr(pblendw, IMM8 = 0xB1))]
17df50a5 77#[rustc_legacy_const_generics(2)]
83c7162d 78#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5 79pub unsafe fn _mm_blend_epi16<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
353b0b11 80 static_assert_uimm_bits!(IMM8, 8);
ed00b5ec
FG
81 transmute::<i16x8, _>(simd_shuffle!(
82 a.as_i16x8(),
83 b.as_i16x8(),
84 [
85 [0, 8][IMM8 as usize & 1],
86 [1, 9][(IMM8 >> 1) as usize & 1],
87 [2, 10][(IMM8 >> 2) as usize & 1],
88 [3, 11][(IMM8 >> 3) as usize & 1],
89 [4, 12][(IMM8 >> 4) as usize & 1],
90 [5, 13][(IMM8 >> 5) as usize & 1],
91 [6, 14][(IMM8 >> 6) as usize & 1],
92 [7, 15][(IMM8 >> 7) as usize & 1],
93 ]
94 ))
0531ce1d
XL
95}
96
97/// Blend packed double-precision (64-bit) floating-point elements from `a`
98/// and `b` using `mask`
83c7162d 99///
353b0b11 100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_pd)
0531ce1d
XL
101#[inline]
102#[target_feature(enable = "sse4.1")]
103#[cfg_attr(test, assert_instr(blendvpd))]
83c7162d 104#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 105pub unsafe fn _mm_blendv_pd(a: __m128d, b: __m128d, mask: __m128d) -> __m128d {
ed00b5ec
FG
106 let mask: i64x2 = simd_lt(transmute::<_, i64x2>(mask), i64x2::splat(0));
107 transmute(simd_select(mask, b.as_f64x2(), a.as_f64x2()))
0531ce1d
XL
108}
109
110/// Blend packed single-precision (32-bit) floating-point elements from `a`
111/// and `b` using `mask`
83c7162d 112///
353b0b11 113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_ps)
0531ce1d
XL
114#[inline]
115#[target_feature(enable = "sse4.1")]
116#[cfg_attr(test, assert_instr(blendvps))]
83c7162d 117#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 118pub unsafe fn _mm_blendv_ps(a: __m128, b: __m128, mask: __m128) -> __m128 {
ed00b5ec
FG
119 let mask: i32x4 = simd_lt(transmute::<_, i32x4>(mask), i32x4::splat(0));
120 transmute(simd_select(mask, b.as_f32x4(), a.as_f32x4()))
0531ce1d
XL
121}
122
123/// Blend packed double-precision (64-bit) floating-point elements from `a`
17df50a5 124/// and `b` using control mask `IMM2`
83c7162d 125///
353b0b11 126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_pd)
0531ce1d
XL
127#[inline]
128#[target_feature(enable = "sse4.1")]
8faf50e0
XL
129// Note: LLVM7 prefers the single-precision floating-point domain when possible
130// see https://bugs.llvm.org/show_bug.cgi?id=38195
17df50a5
XL
131// #[cfg_attr(test, assert_instr(blendpd, IMM2 = 0b10))]
132#[cfg_attr(test, assert_instr(blendps, IMM2 = 0b10))]
133#[rustc_legacy_const_generics(2)]
134#[stable(feature = "simd_x86", since = "1.27.0")]
135pub unsafe fn _mm_blend_pd<const IMM2: i32>(a: __m128d, b: __m128d) -> __m128d {
353b0b11 136 static_assert_uimm_bits!(IMM2, 2);
ed00b5ec
FG
137 transmute::<f64x2, _>(simd_shuffle!(
138 a.as_f64x2(),
139 b.as_f64x2(),
140 [[0, 2][IMM2 as usize & 1], [1, 3][(IMM2 >> 1) as usize & 1]]
141 ))
0531ce1d
XL
142}
143
144/// Blend packed single-precision (32-bit) floating-point elements from `a`
17df50a5 145/// and `b` using mask `IMM4`
83c7162d 146///
353b0b11 147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blend_ps)
0531ce1d
XL
148#[inline]
149#[target_feature(enable = "sse4.1")]
17df50a5
XL
150#[cfg_attr(test, assert_instr(blendps, IMM4 = 0b0101))]
151#[rustc_legacy_const_generics(2)]
152#[stable(feature = "simd_x86", since = "1.27.0")]
153pub unsafe fn _mm_blend_ps<const IMM4: i32>(a: __m128, b: __m128) -> __m128 {
353b0b11 154 static_assert_uimm_bits!(IMM4, 4);
ed00b5ec
FG
155 transmute::<f32x4, _>(simd_shuffle!(
156 a.as_f32x4(),
157 b.as_f32x4(),
158 [
159 [0, 4][IMM4 as usize & 1],
160 [1, 5][(IMM4 >> 1) as usize & 1],
161 [2, 6][(IMM4 >> 2) as usize & 1],
162 [3, 7][(IMM4 >> 3) as usize & 1],
163 ]
164 ))
0531ce1d
XL
165}
166
532ac7d7 167/// Extracts a single-precision (32-bit) floating-point element from `a`,
3c0e092e
XL
168/// selected with `IMM8`. The returned `i32` stores the float's bit-pattern,
169/// and may be converted back to a floating point number via casting.
83c7162d 170///
3c0e092e
XL
171/// # Example
172/// ```rust
173/// # #[cfg(target_arch = "x86")]
174/// # use std::arch::x86::*;
175/// # #[cfg(target_arch = "x86_64")]
176/// # use std::arch::x86_64::*;
177/// # fn main() {
178/// # if is_x86_feature_detected!("sse4.1") {
179/// # #[target_feature(enable = "sse4.1")]
180/// # unsafe fn worker() {
181/// let mut float_store = vec![1.0, 1.0, 2.0, 3.0];
a2a8927a
XL
182/// let simd_floats = _mm_set_ps(2.5, 5.0, 7.5, 10.0);
183/// let x: i32 = _mm_extract_ps::<2>(simd_floats);
184/// float_store.push(f32::from_bits(x as u32));
3c0e092e
XL
185/// assert_eq!(float_store, vec![1.0, 1.0, 2.0, 3.0, 5.0]);
186/// # }
187/// # unsafe { worker() }
188/// # }
189/// # }
190/// ```
353b0b11 191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_ps)
0531ce1d
XL
192#[inline]
193#[target_feature(enable = "sse4.1")]
0731742a
XL
194#[cfg_attr(
195 all(test, not(target_os = "windows")),
17df50a5 196 assert_instr(extractps, IMM8 = 0)
0731742a 197)]
17df50a5 198#[rustc_legacy_const_generics(1)]
83c7162d 199#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5 200pub unsafe fn _mm_extract_ps<const IMM8: i32>(a: __m128) -> i32 {
353b0b11 201 static_assert_uimm_bits!(IMM8, 2);
c620b35d 202 simd_extract!(a, IMM8 as u32, f32).to_bits() as i32
0531ce1d
XL
203}
204
17df50a5 205/// Extracts an 8-bit integer from `a`, selected with `IMM8`. Returns a 32-bit
0531ce1d
XL
206/// integer containing the zero-extended integer data.
207///
fc512014 208/// See [LLVM commit D20468](https://reviews.llvm.org/D20468).
83c7162d 209///
353b0b11 210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi8)
0531ce1d
XL
211#[inline]
212#[target_feature(enable = "sse4.1")]
17df50a5
XL
213#[cfg_attr(test, assert_instr(pextrb, IMM8 = 0))]
214#[rustc_legacy_const_generics(1)]
83c7162d 215#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5 216pub unsafe fn _mm_extract_epi8<const IMM8: i32>(a: __m128i) -> i32 {
353b0b11 217 static_assert_uimm_bits!(IMM8, 4);
c620b35d 218 simd_extract!(a.as_u8x16(), IMM8 as u32, u8) as i32
0531ce1d
XL
219}
220
17df50a5 221/// Extracts an 32-bit integer from `a` selected with `IMM8`
83c7162d 222///
353b0b11 223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_extract_epi32)
0531ce1d
XL
224#[inline]
225#[target_feature(enable = "sse4.1")]
0731742a
XL
226#[cfg_attr(
227 all(test, not(target_os = "windows")),
17df50a5 228 assert_instr(extractps, IMM8 = 1)
0731742a 229)]
17df50a5 230#[rustc_legacy_const_generics(1)]
83c7162d 231#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5 232pub unsafe fn _mm_extract_epi32<const IMM8: i32>(a: __m128i) -> i32 {
353b0b11 233 static_assert_uimm_bits!(IMM8, 2);
c620b35d 234 simd_extract!(a.as_i32x4(), IMM8 as u32, i32)
0531ce1d
XL
235}
236
e8be2606 237/// Select a single value in `b` to store at some position in `a`,
17df50a5 238/// Then zero elements according to `IMM8`.
0531ce1d 239///
e8be2606 240/// `IMM8` specifies which bits from operand `b` will be copied, which bits in
0531ce1d
XL
241/// the result they will be copied to, and which bits in the result will be
242/// cleared. The following assignments are made:
243///
e8be2606
FG
244/// * Bits `[7:6]` specify the bits to copy from operand `b`:
245/// - `00`: Selects bits `[31:0]` from operand `b`.
246/// - `01`: Selects bits `[63:32]` from operand `b`.
247/// - `10`: Selects bits `[95:64]` from operand `b`.
248/// - `11`: Selects bits `[127:96]` from operand `b`.
0531ce1d
XL
249///
250/// * Bits `[5:4]` specify the bits in the result to which the selected bits
e8be2606
FG
251/// from operand `b` are copied:
252/// - `00`: Copies the selected bits from `b` to result bits `[31:0]`.
253/// - `01`: Copies the selected bits from `b` to result bits `[63:32]`.
254/// - `10`: Copies the selected bits from `b` to result bits `[95:64]`.
255/// - `11`: Copies the selected bits from `b` to result bits `[127:96]`.
0531ce1d
XL
256///
257/// * Bits `[3:0]`: If any of these bits are set, the corresponding result
258/// element is cleared.
83c7162d 259///
353b0b11 260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_ps)
0531ce1d
XL
261#[inline]
262#[target_feature(enable = "sse4.1")]
17df50a5
XL
263#[cfg_attr(test, assert_instr(insertps, IMM8 = 0b1010))]
264#[rustc_legacy_const_generics(2)]
265#[stable(feature = "simd_x86", since = "1.27.0")]
266pub unsafe fn _mm_insert_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
353b0b11 267 static_assert_uimm_bits!(IMM8, 8);
17df50a5 268 insertps(a, b, IMM8 as u8)
0531ce1d
XL
269}
270
532ac7d7 271/// Returns a copy of `a` with the 8-bit integer from `i` inserted at a
17df50a5 272/// location specified by `IMM8`.
83c7162d 273///
353b0b11 274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi8)
0531ce1d
XL
275#[inline]
276#[target_feature(enable = "sse4.1")]
17df50a5
XL
277#[cfg_attr(test, assert_instr(pinsrb, IMM8 = 0))]
278#[rustc_legacy_const_generics(2)]
83c7162d 279#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5 280pub unsafe fn _mm_insert_epi8<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
353b0b11 281 static_assert_uimm_bits!(IMM8, 4);
c620b35d 282 transmute(simd_insert!(a.as_i8x16(), IMM8 as u32, i as i8))
0531ce1d
XL
283}
284
532ac7d7 285/// Returns a copy of `a` with the 32-bit integer from `i` inserted at a
17df50a5 286/// location specified by `IMM8`.
83c7162d 287///
353b0b11 288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_insert_epi32)
0531ce1d
XL
289#[inline]
290#[target_feature(enable = "sse4.1")]
17df50a5
XL
291#[cfg_attr(test, assert_instr(pinsrd, IMM8 = 0))]
292#[rustc_legacy_const_generics(2)]
83c7162d 293#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5 294pub unsafe fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
353b0b11 295 static_assert_uimm_bits!(IMM8, 2);
c620b35d 296 transmute(simd_insert!(a.as_i32x4(), IMM8 as u32, i))
0531ce1d
XL
297}
298
532ac7d7 299/// Compares packed 8-bit integers in `a` and `b` and returns packed maximum
0531ce1d 300/// values in dst.
83c7162d 301///
353b0b11 302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi8)
0531ce1d
XL
303#[inline]
304#[target_feature(enable = "sse4.1")]
305#[cfg_attr(test, assert_instr(pmaxsb))]
83c7162d 306#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 307pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
9ffffee4
FG
308 let a = a.as_i8x16();
309 let b = b.as_i8x16();
310 transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
0531ce1d
XL
311}
312
532ac7d7 313/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
0531ce1d 314/// maximum.
83c7162d 315///
353b0b11 316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu16)
0531ce1d
XL
317#[inline]
318#[target_feature(enable = "sse4.1")]
319#[cfg_attr(test, assert_instr(pmaxuw))]
83c7162d 320#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 321pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
9ffffee4
FG
322 let a = a.as_u16x8();
323 let b = b.as_u16x8();
324 transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
0531ce1d
XL
325}
326
532ac7d7 327/// Compares packed 32-bit integers in `a` and `b`, and returns packed maximum
0531ce1d 328/// values.
83c7162d 329///
353b0b11 330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi32)
0531ce1d
XL
331#[inline]
332#[target_feature(enable = "sse4.1")]
333#[cfg_attr(test, assert_instr(pmaxsd))]
83c7162d 334#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 335pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
9ffffee4
FG
336 let a = a.as_i32x4();
337 let b = b.as_i32x4();
338 transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
0531ce1d
XL
339}
340
532ac7d7 341/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
0531ce1d 342/// maximum values.
83c7162d 343///
353b0b11 344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu32)
0531ce1d
XL
345#[inline]
346#[target_feature(enable = "sse4.1")]
347#[cfg_attr(test, assert_instr(pmaxud))]
83c7162d 348#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 349pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
9ffffee4
FG
350 let a = a.as_u32x4();
351 let b = b.as_u32x4();
352 transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
0531ce1d
XL
353}
354
532ac7d7 355/// Compares packed 8-bit integers in `a` and `b` and returns packed minimum
0531ce1d 356/// values in dst.
83c7162d 357///
353b0b11 358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi8)
0531ce1d
XL
359#[inline]
360#[target_feature(enable = "sse4.1")]
361#[cfg_attr(test, assert_instr(pminsb))]
83c7162d 362#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 363pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
9ffffee4
FG
364 let a = a.as_i8x16();
365 let b = b.as_i8x16();
366 transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
0531ce1d
XL
367}
368
532ac7d7 369/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
0531ce1d 370/// minimum.
83c7162d 371///
353b0b11 372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu16)
0531ce1d
XL
373#[inline]
374#[target_feature(enable = "sse4.1")]
375#[cfg_attr(test, assert_instr(pminuw))]
83c7162d 376#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 377pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
9ffffee4
FG
378 let a = a.as_u16x8();
379 let b = b.as_u16x8();
380 transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
0531ce1d
XL
381}
382
532ac7d7 383/// Compares packed 32-bit integers in `a` and `b`, and returns packed minimum
0531ce1d 384/// values.
83c7162d 385///
353b0b11 386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi32)
0531ce1d
XL
387#[inline]
388#[target_feature(enable = "sse4.1")]
389#[cfg_attr(test, assert_instr(pminsd))]
83c7162d 390#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 391pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
9ffffee4
FG
392 let a = a.as_i32x4();
393 let b = b.as_i32x4();
394 transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
0531ce1d
XL
395}
396
532ac7d7 397/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
0531ce1d 398/// minimum values.
83c7162d 399///
353b0b11 400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu32)
0531ce1d
XL
401#[inline]
402#[target_feature(enable = "sse4.1")]
403#[cfg_attr(test, assert_instr(pminud))]
83c7162d 404#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 405pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
9ffffee4
FG
406 let a = a.as_u32x4();
407 let b = b.as_u32x4();
408 transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
0531ce1d
XL
409}
410
532ac7d7 411/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
0531ce1d 412/// using unsigned saturation
83c7162d 413///
353b0b11 414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi32)
0531ce1d
XL
415#[inline]
416#[target_feature(enable = "sse4.1")]
417#[cfg_attr(test, assert_instr(packusdw))]
83c7162d 418#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 419pub unsafe fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 420 transmute(packusdw(a.as_i32x4(), b.as_i32x4()))
0531ce1d
XL
421}
422
532ac7d7 423/// Compares packed 64-bit integers in `a` and `b` for equality
83c7162d 424///
353b0b11 425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64)
0531ce1d
XL
426#[inline]
427#[target_feature(enable = "sse4.1")]
428#[cfg_attr(test, assert_instr(pcmpeqq))]
83c7162d 429#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 430pub unsafe fn _mm_cmpeq_epi64(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 431 transmute(simd_eq::<_, i64x2>(a.as_i64x2(), b.as_i64x2()))
0531ce1d
XL
432}
433
434/// Sign extend packed 8-bit integers in `a` to packed 16-bit integers
83c7162d 435///
353b0b11 436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi16)
0531ce1d
XL
437#[inline]
438#[target_feature(enable = "sse4.1")]
439#[cfg_attr(test, assert_instr(pmovsxbw))]
83c7162d 440#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
441pub unsafe fn _mm_cvtepi8_epi16(a: __m128i) -> __m128i {
442 let a = a.as_i8x16();
353b0b11 443 let a: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
532ac7d7 444 transmute(simd_cast::<_, i16x8>(a))
0531ce1d
XL
445}
446
447/// Sign extend packed 8-bit integers in `a` to packed 32-bit integers
83c7162d 448///
353b0b11 449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi32)
0531ce1d
XL
450#[inline]
451#[target_feature(enable = "sse4.1")]
452#[cfg_attr(test, assert_instr(pmovsxbd))]
83c7162d 453#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
454pub unsafe fn _mm_cvtepi8_epi32(a: __m128i) -> __m128i {
455 let a = a.as_i8x16();
353b0b11 456 let a: i8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
532ac7d7 457 transmute(simd_cast::<_, i32x4>(a))
0531ce1d
XL
458}
459
460/// Sign extend packed 8-bit integers in the low 8 bytes of `a` to packed
461/// 64-bit integers
83c7162d 462///
353b0b11 463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi8_epi64)
0531ce1d
XL
464#[inline]
465#[target_feature(enable = "sse4.1")]
466#[cfg_attr(test, assert_instr(pmovsxbq))]
83c7162d 467#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
468pub unsafe fn _mm_cvtepi8_epi64(a: __m128i) -> __m128i {
469 let a = a.as_i8x16();
353b0b11 470 let a: i8x2 = simd_shuffle!(a, a, [0, 1]);
532ac7d7 471 transmute(simd_cast::<_, i64x2>(a))
0531ce1d
XL
472}
473
474/// Sign extend packed 16-bit integers in `a` to packed 32-bit integers
83c7162d 475///
353b0b11 476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi32)
0531ce1d
XL
477#[inline]
478#[target_feature(enable = "sse4.1")]
479#[cfg_attr(test, assert_instr(pmovsxwd))]
83c7162d 480#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
481pub unsafe fn _mm_cvtepi16_epi32(a: __m128i) -> __m128i {
482 let a = a.as_i16x8();
353b0b11 483 let a: i16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
532ac7d7 484 transmute(simd_cast::<_, i32x4>(a))
0531ce1d
XL
485}
486
487/// Sign extend packed 16-bit integers in `a` to packed 64-bit integers
83c7162d 488///
353b0b11 489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi16_epi64)
0531ce1d
XL
490#[inline]
491#[target_feature(enable = "sse4.1")]
492#[cfg_attr(test, assert_instr(pmovsxwq))]
83c7162d 493#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
494pub unsafe fn _mm_cvtepi16_epi64(a: __m128i) -> __m128i {
495 let a = a.as_i16x8();
353b0b11 496 let a: i16x2 = simd_shuffle!(a, a, [0, 1]);
532ac7d7 497 transmute(simd_cast::<_, i64x2>(a))
0531ce1d
XL
498}
499
500/// Sign extend packed 32-bit integers in `a` to packed 64-bit integers
83c7162d 501///
353b0b11 502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi64)
0531ce1d
XL
503#[inline]
504#[target_feature(enable = "sse4.1")]
505#[cfg_attr(test, assert_instr(pmovsxdq))]
83c7162d 506#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
507pub unsafe fn _mm_cvtepi32_epi64(a: __m128i) -> __m128i {
508 let a = a.as_i32x4();
353b0b11 509 let a: i32x2 = simd_shuffle!(a, a, [0, 1]);
532ac7d7 510 transmute(simd_cast::<_, i64x2>(a))
0531ce1d
XL
511}
512
532ac7d7 513/// Zeroes extend packed unsigned 8-bit integers in `a` to packed 16-bit integers
83c7162d 514///
353b0b11 515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi16)
0531ce1d
XL
516#[inline]
517#[target_feature(enable = "sse4.1")]
518#[cfg_attr(test, assert_instr(pmovzxbw))]
83c7162d 519#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
520pub unsafe fn _mm_cvtepu8_epi16(a: __m128i) -> __m128i {
521 let a = a.as_u8x16();
353b0b11 522 let a: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
532ac7d7 523 transmute(simd_cast::<_, i16x8>(a))
0531ce1d
XL
524}
525
532ac7d7 526/// Zeroes extend packed unsigned 8-bit integers in `a` to packed 32-bit integers
83c7162d 527///
353b0b11 528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi32)
0531ce1d
XL
529#[inline]
530#[target_feature(enable = "sse4.1")]
531#[cfg_attr(test, assert_instr(pmovzxbd))]
83c7162d 532#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
533pub unsafe fn _mm_cvtepu8_epi32(a: __m128i) -> __m128i {
534 let a = a.as_u8x16();
353b0b11 535 let a: u8x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
532ac7d7 536 transmute(simd_cast::<_, i32x4>(a))
0531ce1d
XL
537}
538
532ac7d7 539/// Zeroes extend packed unsigned 8-bit integers in `a` to packed 64-bit integers
83c7162d 540///
353b0b11 541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu8_epi64)
0531ce1d
XL
542#[inline]
543#[target_feature(enable = "sse4.1")]
544#[cfg_attr(test, assert_instr(pmovzxbq))]
83c7162d 545#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
546pub unsafe fn _mm_cvtepu8_epi64(a: __m128i) -> __m128i {
547 let a = a.as_u8x16();
353b0b11 548 let a: u8x2 = simd_shuffle!(a, a, [0, 1]);
532ac7d7 549 transmute(simd_cast::<_, i64x2>(a))
0531ce1d
XL
550}
551
532ac7d7 552/// Zeroes extend packed unsigned 16-bit integers in `a`
0531ce1d 553/// to packed 32-bit integers
83c7162d 554///
353b0b11 555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu16_epi32)
0531ce1d
XL
556#[inline]
557#[target_feature(enable = "sse4.1")]
558#[cfg_attr(test, assert_instr(pmovzxwd))]
83c7162d 559#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
560pub unsafe fn _mm_cvtepu16_epi32(a: __m128i) -> __m128i {
561 let a = a.as_u16x8();
353b0b11 562 let a: u16x4 = simd_shuffle!(a, a, [0, 1, 2, 3]);
532ac7d7 563 transmute(simd_cast::<_, i32x4>(a))
0531ce1d
XL
564}
565
532ac7d7 566/// Zeroes extend packed unsigned 16-bit integers in `a`
0531ce1d 567/// to packed 64-bit integers
83c7162d 568///
353b0b11 569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu16_epi64)
0531ce1d
XL
570#[inline]
571#[target_feature(enable = "sse4.1")]
572#[cfg_attr(test, assert_instr(pmovzxwq))]
83c7162d 573#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
574pub unsafe fn _mm_cvtepu16_epi64(a: __m128i) -> __m128i {
575 let a = a.as_u16x8();
353b0b11 576 let a: u16x2 = simd_shuffle!(a, a, [0, 1]);
532ac7d7 577 transmute(simd_cast::<_, i64x2>(a))
0531ce1d
XL
578}
579
532ac7d7 580/// Zeroes extend packed unsigned 32-bit integers in `a`
0531ce1d 581/// to packed 64-bit integers
83c7162d 582///
353b0b11 583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_epi64)
0531ce1d
XL
584#[inline]
585#[target_feature(enable = "sse4.1")]
586#[cfg_attr(test, assert_instr(pmovzxdq))]
83c7162d 587#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
588pub unsafe fn _mm_cvtepu32_epi64(a: __m128i) -> __m128i {
589 let a = a.as_u32x4();
353b0b11 590 let a: u32x2 = simd_shuffle!(a, a, [0, 1]);
532ac7d7 591 transmute(simd_cast::<_, i64x2>(a))
0531ce1d
XL
592}
593
594/// Returns the dot product of two __m128d vectors.
595///
17df50a5 596/// `IMM8[1:0]` is the broadcast mask, and `IMM8[5:4]` is the condition mask.
0531ce1d
XL
597/// If a condition mask bit is zero, the corresponding multiplication is
598/// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of
599/// the dot product will be stored in the return value component. Otherwise if
600/// the broadcast mask bit is zero then the return component will be zero.
83c7162d 601///
353b0b11 602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dp_pd)
0531ce1d
XL
603#[inline]
604#[target_feature(enable = "sse4.1")]
17df50a5
XL
605#[cfg_attr(test, assert_instr(dppd, IMM8 = 0))]
606#[rustc_legacy_const_generics(2)]
607#[stable(feature = "simd_x86", since = "1.27.0")]
608pub unsafe fn _mm_dp_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
353b0b11 609 static_assert_uimm_bits!(IMM8, 8);
17df50a5 610 dppd(a, b, IMM8 as u8)
0531ce1d
XL
611}
612
613/// Returns the dot product of two __m128 vectors.
614///
17df50a5 615/// `IMM8[3:0]` is the broadcast mask, and `IMM8[7:4]` is the condition mask.
0531ce1d
XL
616/// If a condition mask bit is zero, the corresponding multiplication is
617/// replaced by a value of `0.0`. If a broadcast mask bit is one, the result of
618/// the dot product will be stored in the return value component. Otherwise if
619/// the broadcast mask bit is zero then the return component will be zero.
83c7162d 620///
353b0b11 621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_dp_ps)
0531ce1d
XL
622#[inline]
623#[target_feature(enable = "sse4.1")]
17df50a5
XL
624#[cfg_attr(test, assert_instr(dpps, IMM8 = 0))]
625#[rustc_legacy_const_generics(2)]
626#[stable(feature = "simd_x86", since = "1.27.0")]
627pub unsafe fn _mm_dp_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
353b0b11 628 static_assert_uimm_bits!(IMM8, 8);
17df50a5 629 dpps(a, b, IMM8 as u8)
0531ce1d
XL
630}
631
632/// Round the packed double-precision (64-bit) floating-point elements in `a`
532ac7d7 633/// down to an integer value, and stores the results as packed double-precision
0531ce1d 634/// floating-point elements.
83c7162d 635///
353b0b11 636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_pd)
0531ce1d
XL
637#[inline]
638#[target_feature(enable = "sse4.1")]
639#[cfg_attr(test, assert_instr(roundpd))]
83c7162d 640#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 641pub unsafe fn _mm_floor_pd(a: __m128d) -> __m128d {
74b04a01 642 simd_floor(a)
0531ce1d
XL
643}
644
645/// Round the packed single-precision (32-bit) floating-point elements in `a`
532ac7d7 646/// down to an integer value, and stores the results as packed single-precision
0531ce1d 647/// floating-point elements.
83c7162d 648///
353b0b11 649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ps)
0531ce1d
XL
650#[inline]
651#[target_feature(enable = "sse4.1")]
652#[cfg_attr(test, assert_instr(roundps))]
83c7162d 653#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 654pub unsafe fn _mm_floor_ps(a: __m128) -> __m128 {
74b04a01 655 simd_floor(a)
0531ce1d
XL
656}
657
658/// Round the lower double-precision (64-bit) floating-point element in `b`
659/// down to an integer value, store the result as a double-precision
660/// floating-point element in the lower element of the intrinsic result,
532ac7d7 661/// and copies the upper element from `a` to the upper element of the intrinsic
0531ce1d 662/// result.
83c7162d 663///
353b0b11 664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_sd)
0531ce1d
XL
665#[inline]
666#[target_feature(enable = "sse4.1")]
667#[cfg_attr(test, assert_instr(roundsd))]
83c7162d 668#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
669pub unsafe fn _mm_floor_sd(a: __m128d, b: __m128d) -> __m128d {
670 roundsd(a, b, _MM_FROUND_FLOOR)
671}
672
673/// Round the lower single-precision (32-bit) floating-point element in `b`
674/// down to an integer value, store the result as a single-precision
675/// floating-point element in the lower element of the intrinsic result,
532ac7d7 676/// and copies the upper 3 packed elements from `a` to the upper elements
0531ce1d 677/// of the intrinsic result.
83c7162d 678///
353b0b11 679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_floor_ss)
0531ce1d
XL
680#[inline]
681#[target_feature(enable = "sse4.1")]
682#[cfg_attr(test, assert_instr(roundss))]
83c7162d 683#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
684pub unsafe fn _mm_floor_ss(a: __m128, b: __m128) -> __m128 {
685 roundss(a, b, _MM_FROUND_FLOOR)
686}
687
688/// Round the packed double-precision (64-bit) floating-point elements in `a`
532ac7d7 689/// up to an integer value, and stores the results as packed double-precision
0531ce1d 690/// floating-point elements.
83c7162d 691///
353b0b11 692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_pd)
0531ce1d
XL
693#[inline]
694#[target_feature(enable = "sse4.1")]
695#[cfg_attr(test, assert_instr(roundpd))]
83c7162d 696#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 697pub unsafe fn _mm_ceil_pd(a: __m128d) -> __m128d {
74b04a01 698 simd_ceil(a)
0531ce1d
XL
699}
700
701/// Round the packed single-precision (32-bit) floating-point elements in `a`
532ac7d7 702/// up to an integer value, and stores the results as packed single-precision
0531ce1d 703/// floating-point elements.
83c7162d 704///
353b0b11 705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ps)
0531ce1d
XL
706#[inline]
707#[target_feature(enable = "sse4.1")]
708#[cfg_attr(test, assert_instr(roundps))]
83c7162d 709#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 710pub unsafe fn _mm_ceil_ps(a: __m128) -> __m128 {
74b04a01 711 simd_ceil(a)
0531ce1d
XL
712}
713
714/// Round the lower double-precision (64-bit) floating-point element in `b`
715/// up to an integer value, store the result as a double-precision
353b0b11 716/// floating-point element in the lower element of the intrinsic result,
532ac7d7 717/// and copies the upper element from `a` to the upper element
0531ce1d 718/// of the intrinsic result.
83c7162d 719///
353b0b11 720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_sd)
0531ce1d
XL
721#[inline]
722#[target_feature(enable = "sse4.1")]
723#[cfg_attr(test, assert_instr(roundsd))]
83c7162d 724#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
725pub unsafe fn _mm_ceil_sd(a: __m128d, b: __m128d) -> __m128d {
726 roundsd(a, b, _MM_FROUND_CEIL)
727}
728
729/// Round the lower single-precision (32-bit) floating-point element in `b`
730/// up to an integer value, store the result as a single-precision
731/// floating-point element in the lower element of the intrinsic result,
532ac7d7 732/// and copies the upper 3 packed elements from `a` to the upper elements
0531ce1d 733/// of the intrinsic result.
83c7162d 734///
353b0b11 735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ceil_ss)
0531ce1d
XL
736#[inline]
737#[target_feature(enable = "sse4.1")]
738#[cfg_attr(test, assert_instr(roundss))]
83c7162d 739#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
740pub unsafe fn _mm_ceil_ss(a: __m128, b: __m128) -> __m128 {
741 roundss(a, b, _MM_FROUND_CEIL)
742}
743
744/// Round the packed double-precision (64-bit) floating-point elements in `a`
17df50a5 745/// using the `ROUNDING` parameter, and stores the results as packed
0531ce1d
XL
746/// double-precision floating-point elements.
747/// Rounding is done according to the rounding parameter, which can be one of:
748///
749/// ```
0531ce1d
XL
750/// #[cfg(target_arch = "x86")]
751/// use std::arch::x86::*;
752/// #[cfg(target_arch = "x86_64")]
753/// use std::arch::x86_64::*;
754///
755/// # fn main() {
756/// // round to nearest, and suppress exceptions:
757/// # let _x =
758/// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
759/// // round down, and suppress exceptions:
760/// # let _x =
761/// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC;
762/// // round up, and suppress exceptions:
763/// # let _x =
764/// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC;
765/// // truncate, and suppress exceptions:
766/// # let _x =
767/// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC;
768/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
769/// # let _x =
770/// _MM_FROUND_CUR_DIRECTION;
771/// # }
772/// ```
83c7162d 773///
353b0b11 774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_pd)
0531ce1d
XL
775#[inline]
776#[target_feature(enable = "sse4.1")]
17df50a5
XL
777#[cfg_attr(test, assert_instr(roundpd, ROUNDING = 0))]
778#[rustc_legacy_const_generics(1)]
779#[stable(feature = "simd_x86", since = "1.27.0")]
780pub unsafe fn _mm_round_pd<const ROUNDING: i32>(a: __m128d) -> __m128d {
353b0b11 781 static_assert_uimm_bits!(ROUNDING, 4);
17df50a5 782 roundpd(a, ROUNDING)
0531ce1d
XL
783}
784
785/// Round the packed single-precision (32-bit) floating-point elements in `a`
17df50a5 786/// using the `ROUNDING` parameter, and stores the results as packed
0531ce1d
XL
787/// single-precision floating-point elements.
788/// Rounding is done according to the rounding parameter, which can be one of:
789///
790/// ```
0531ce1d
XL
791/// #[cfg(target_arch = "x86")]
792/// use std::arch::x86::*;
793/// #[cfg(target_arch = "x86_64")]
794/// use std::arch::x86_64::*;
795///
796/// # fn main() {
797/// // round to nearest, and suppress exceptions:
798/// # let _x =
799/// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
800/// // round down, and suppress exceptions:
801/// # let _x =
802/// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC;
803/// // round up, and suppress exceptions:
804/// # let _x =
805/// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC;
806/// // truncate, and suppress exceptions:
807/// # let _x =
808/// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC;
809/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
810/// # let _x =
811/// _MM_FROUND_CUR_DIRECTION;
812/// # }
813/// ```
83c7162d 814///
353b0b11 815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_ps)
0531ce1d
XL
816#[inline]
817#[target_feature(enable = "sse4.1")]
17df50a5
XL
818#[cfg_attr(test, assert_instr(roundps, ROUNDING = 0))]
819#[rustc_legacy_const_generics(1)]
820#[stable(feature = "simd_x86", since = "1.27.0")]
821pub unsafe fn _mm_round_ps<const ROUNDING: i32>(a: __m128) -> __m128 {
353b0b11 822 static_assert_uimm_bits!(ROUNDING, 4);
17df50a5 823 roundps(a, ROUNDING)
0531ce1d
XL
824}
825
826/// Round the lower double-precision (64-bit) floating-point element in `b`
17df50a5 827/// using the `ROUNDING` parameter, store the result as a double-precision
0531ce1d 828/// floating-point element in the lower element of the intrinsic result,
532ac7d7 829/// and copies the upper element from `a` to the upper element of the intrinsic
0531ce1d
XL
830/// result.
831/// Rounding is done according to the rounding parameter, which can be one of:
832///
833/// ```
0531ce1d
XL
834/// #[cfg(target_arch = "x86")]
835/// use std::arch::x86::*;
836/// #[cfg(target_arch = "x86_64")]
837/// use std::arch::x86_64::*;
838///
839/// # fn main() {
840/// // round to nearest, and suppress exceptions:
841/// # let _x =
842/// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
843/// // round down, and suppress exceptions:
844/// # let _x =
845/// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC;
846/// // round up, and suppress exceptions:
847/// # let _x =
848/// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC;
849/// // truncate, and suppress exceptions:
850/// # let _x =
851/// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC;
852/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
853/// # let _x =
854/// _MM_FROUND_CUR_DIRECTION;
855/// # }
856/// ```
83c7162d 857///
353b0b11 858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_sd)
0531ce1d
XL
859#[inline]
860#[target_feature(enable = "sse4.1")]
17df50a5
XL
861#[cfg_attr(test, assert_instr(roundsd, ROUNDING = 0))]
862#[rustc_legacy_const_generics(2)]
863#[stable(feature = "simd_x86", since = "1.27.0")]
864pub unsafe fn _mm_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
353b0b11 865 static_assert_uimm_bits!(ROUNDING, 4);
17df50a5 866 roundsd(a, b, ROUNDING)
0531ce1d
XL
867}
868
869/// Round the lower single-precision (32-bit) floating-point element in `b`
17df50a5 870/// using the `ROUNDING` parameter, store the result as a single-precision
0531ce1d 871/// floating-point element in the lower element of the intrinsic result,
532ac7d7 872/// and copies the upper 3 packed elements from `a` to the upper elements
a2a8927a 873/// of the intrinsic result.
0531ce1d
XL
874/// Rounding is done according to the rounding parameter, which can be one of:
875///
876/// ```
0531ce1d
XL
877/// #[cfg(target_arch = "x86")]
878/// use std::arch::x86::*;
879/// #[cfg(target_arch = "x86_64")]
880/// use std::arch::x86_64::*;
881///
882/// # fn main() {
883/// // round to nearest, and suppress exceptions:
884/// # let _x =
885/// _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
886/// // round down, and suppress exceptions:
887/// # let _x =
888/// _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC;
889/// // round up, and suppress exceptions:
890/// # let _x =
891/// _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC;
892/// // truncate, and suppress exceptions:
893/// # let _x =
894/// _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC;
895/// // use MXCSR.RC; see `_MM_SET_ROUNDING_MODE`:
896/// # let _x =
897/// _MM_FROUND_CUR_DIRECTION;
898/// # }
899/// ```
83c7162d 900///
353b0b11 901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_round_ss)
0531ce1d
XL
902#[inline]
903#[target_feature(enable = "sse4.1")]
17df50a5
XL
904#[cfg_attr(test, assert_instr(roundss, ROUNDING = 0))]
905#[rustc_legacy_const_generics(2)]
906#[stable(feature = "simd_x86", since = "1.27.0")]
907pub unsafe fn _mm_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
353b0b11 908 static_assert_uimm_bits!(ROUNDING, 4);
17df50a5 909 roundss(a, b, ROUNDING)
0531ce1d
XL
910}
911
912/// Finds the minimum unsigned 16-bit element in the 128-bit __m128i vector,
913/// returning a vector containing its value in its first position, and its
914/// index
915/// in its second position; all other elements are set to zero.
916///
fc512014 917/// This intrinsic corresponds to the `VPHMINPOSUW` / `PHMINPOSUW`
0531ce1d
XL
918/// instruction.
919///
920/// Arguments:
921///
922/// * `a` - A 128-bit vector of type `__m128i`.
923///
924/// Returns:
925///
926/// A 128-bit value where:
927///
928/// * bits `[15:0]` - contain the minimum value found in parameter `a`,
929/// * bits `[18:16]` - contain the index of the minimum value
930/// * remaining bits are set to `0`.
83c7162d 931///
353b0b11 932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_minpos_epu16)
0531ce1d
XL
933#[inline]
934#[target_feature(enable = "sse4.1")]
935#[cfg_attr(test, assert_instr(phminposuw))]
83c7162d 936#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 937pub unsafe fn _mm_minpos_epu16(a: __m128i) -> __m128i {
532ac7d7 938 transmute(phminposuw(a.as_u16x8()))
0531ce1d
XL
939}
940
532ac7d7
XL
941/// Multiplies the low 32-bit integers from each packed 64-bit
942/// element in `a` and `b`, and returns the signed 64-bit result.
83c7162d 943///
353b0b11 944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mul_epi32)
0531ce1d
XL
945#[inline]
946#[target_feature(enable = "sse4.1")]
947#[cfg_attr(test, assert_instr(pmuldq))]
83c7162d 948#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 949pub unsafe fn _mm_mul_epi32(a: __m128i, b: __m128i) -> __m128i {
ed00b5ec
FG
950 let a = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(a.as_i64x2()));
951 let b = simd_cast::<_, i64x2>(simd_cast::<_, i32x2>(b.as_i64x2()));
952 transmute(simd_mul(a, b))
0531ce1d
XL
953}
954
532ac7d7 955/// Multiplies the packed 32-bit integers in `a` and `b`, producing intermediate
0531ce1d
XL
956/// 64-bit integers, and returns the lowest 32-bit, whatever they might be,
957/// reinterpreted as a signed integer. While `pmulld __m128i::splat(2),
958/// __m128i::splat(2)` returns the obvious `__m128i::splat(4)`, due to wrapping
959/// arithmetic `pmulld __m128i::splat(i32::MAX), __m128i::splat(2)` would
960/// return a negative number.
83c7162d 961///
353b0b11 962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi32)
0531ce1d
XL
963#[inline]
964#[target_feature(enable = "sse4.1")]
965#[cfg_attr(test, assert_instr(pmulld))]
83c7162d 966#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d 967pub unsafe fn _mm_mullo_epi32(a: __m128i, b: __m128i) -> __m128i {
532ac7d7 968 transmute(simd_mul(a.as_i32x4(), b.as_i32x4()))
0531ce1d
XL
969}
970
971/// Subtracts 8-bit unsigned integer values and computes the absolute
972/// values of the differences to the corresponding bits in the destination.
973/// Then sums of the absolute differences are returned according to the bit
974/// fields in the immediate operand.
975///
976/// The following algorithm is performed:
977///
978/// ```ignore
17df50a5
XL
979/// i = IMM8[2] * 4
980/// j = IMM8[1:0] * 4
0531ce1d
XL
981/// for k := 0 to 7
982/// d0 = abs(a[i + k + 0] - b[j + 0])
983/// d1 = abs(a[i + k + 1] - b[j + 1])
984/// d2 = abs(a[i + k + 2] - b[j + 2])
985/// d3 = abs(a[i + k + 3] - b[j + 3])
986/// r[k] = d0 + d1 + d2 + d3
987/// ```
988///
989/// Arguments:
990///
991/// * `a` - A 128-bit vector of type `__m128i`.
992/// * `b` - A 128-bit vector of type `__m128i`.
17df50a5 993/// * `IMM8` - An 8-bit immediate operand specifying how the absolute
0731742a 994/// differences are to be calculated
0531ce1d
XL
995/// * Bit `[2]` specify the offset for operand `a`
996/// * Bits `[1:0]` specify the offset for operand `b`
997///
998/// Returns:
999///
0731742a
XL
1000/// * A `__m128i` vector containing the sums of the sets of absolute
1001/// differences between both operands.
83c7162d 1002///
353b0b11 1003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mpsadbw_epu8)
0531ce1d
XL
1004#[inline]
1005#[target_feature(enable = "sse4.1")]
17df50a5
XL
1006#[cfg_attr(test, assert_instr(mpsadbw, IMM8 = 0))]
1007#[rustc_legacy_const_generics(2)]
83c7162d 1008#[stable(feature = "simd_x86", since = "1.27.0")]
17df50a5 1009pub unsafe fn _mm_mpsadbw_epu8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
353b0b11 1010 static_assert_uimm_bits!(IMM8, 3);
17df50a5 1011 transmute(mpsadbw(a.as_u8x16(), b.as_u8x16(), IMM8 as u8))
0531ce1d
XL
1012}
1013
1014/// Tests whether the specified bits in a 128-bit integer vector are all
1015/// zeros.
1016///
1017/// Arguments:
1018///
1019/// * `a` - A 128-bit integer vector containing the bits to be tested.
1020/// * `mask` - A 128-bit integer vector selecting which bits to test in
0731742a 1021/// operand `a`.
0531ce1d
XL
1022///
1023/// Returns:
1024///
1025/// * `1` - if the specified bits are all zeros,
1026/// * `0` - otherwise.
83c7162d 1027///
353b0b11 1028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testz_si128)
0531ce1d
XL
1029#[inline]
1030#[target_feature(enable = "sse4.1")]
1031#[cfg_attr(test, assert_instr(ptest))]
83c7162d 1032#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1033pub unsafe fn _mm_testz_si128(a: __m128i, mask: __m128i) -> i32 {
1034 ptestz(a.as_i64x2(), mask.as_i64x2())
1035}
1036
1037/// Tests whether the specified bits in a 128-bit integer vector are all
1038/// ones.
1039///
1040/// Arguments:
1041///
1042/// * `a` - A 128-bit integer vector containing the bits to be tested.
1043/// * `mask` - A 128-bit integer vector selecting which bits to test in
0731742a 1044/// operand `a`.
0531ce1d
XL
1045///
1046/// Returns:
1047///
1048/// * `1` - if the specified bits are all ones,
1049/// * `0` - otherwise.
83c7162d 1050///
353b0b11 1051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testc_si128)
0531ce1d
XL
1052#[inline]
1053#[target_feature(enable = "sse4.1")]
1054#[cfg_attr(test, assert_instr(ptest))]
83c7162d 1055#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1056pub unsafe fn _mm_testc_si128(a: __m128i, mask: __m128i) -> i32 {
1057 ptestc(a.as_i64x2(), mask.as_i64x2())
1058}
1059
1060/// Tests whether the specified bits in a 128-bit integer vector are
1061/// neither all zeros nor all ones.
1062///
1063/// Arguments:
1064///
1065/// * `a` - A 128-bit integer vector containing the bits to be tested.
1066/// * `mask` - A 128-bit integer vector selecting which bits to test in
0731742a 1067/// operand `a`.
0531ce1d
XL
1068///
1069/// Returns:
1070///
1071/// * `1` - if the specified bits are neither all zeros nor all ones,
1072/// * `0` - otherwise.
83c7162d 1073///
353b0b11 1074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testnzc_si128)
0531ce1d
XL
1075#[inline]
1076#[target_feature(enable = "sse4.1")]
1077#[cfg_attr(test, assert_instr(ptest))]
83c7162d 1078#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1079pub unsafe fn _mm_testnzc_si128(a: __m128i, mask: __m128i) -> i32 {
1080 ptestnzc(a.as_i64x2(), mask.as_i64x2())
1081}
1082
1083/// Tests whether the specified bits in a 128-bit integer vector are all
1084/// zeros.
1085///
1086/// Arguments:
1087///
1088/// * `a` - A 128-bit integer vector containing the bits to be tested.
1089/// * `mask` - A 128-bit integer vector selecting which bits to test in
0731742a 1090/// operand `a`.
0531ce1d
XL
1091///
1092/// Returns:
1093///
1094/// * `1` - if the specified bits are all zeros,
1095/// * `0` - otherwise.
83c7162d 1096///
353b0b11 1097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_all_zeros)
0531ce1d
XL
1098#[inline]
1099#[target_feature(enable = "sse4.1")]
1100#[cfg_attr(test, assert_instr(ptest))]
83c7162d 1101#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1102pub unsafe fn _mm_test_all_zeros(a: __m128i, mask: __m128i) -> i32 {
1103 _mm_testz_si128(a, mask)
1104}
1105
1106/// Tests whether the specified bits in `a` 128-bit integer vector are all
1107/// ones.
1108///
1109/// Argument:
1110///
1111/// * `a` - A 128-bit integer vector containing the bits to be tested.
1112///
1113/// Returns:
1114///
1115/// * `1` - if the bits specified in the operand are all set to 1,
1116/// * `0` - otherwise.
83c7162d 1117///
353b0b11 1118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_all_ones)
0531ce1d
XL
1119#[inline]
1120#[target_feature(enable = "sse4.1")]
1121#[cfg_attr(test, assert_instr(pcmpeqd))]
1122#[cfg_attr(test, assert_instr(ptest))]
83c7162d 1123#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1124pub unsafe fn _mm_test_all_ones(a: __m128i) -> i32 {
1125 _mm_testc_si128(a, _mm_cmpeq_epi32(a, a))
1126}
1127
1128/// Tests whether the specified bits in a 128-bit integer vector are
1129/// neither all zeros nor all ones.
1130///
1131/// Arguments:
1132///
1133/// * `a` - A 128-bit integer vector containing the bits to be tested.
1134/// * `mask` - A 128-bit integer vector selecting which bits to test in
0731742a 1135/// operand `a`.
0531ce1d
XL
1136///
1137/// Returns:
1138///
1139/// * `1` - if the specified bits are neither all zeros nor all ones,
1140/// * `0` - otherwise.
83c7162d 1141///
353b0b11 1142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_mix_ones_zeros)
0531ce1d
XL
1143#[inline]
1144#[target_feature(enable = "sse4.1")]
1145#[cfg_attr(test, assert_instr(ptest))]
83c7162d 1146#[stable(feature = "simd_x86", since = "1.27.0")]
0531ce1d
XL
1147pub unsafe fn _mm_test_mix_ones_zeros(a: __m128i, mask: __m128i) -> i32 {
1148 _mm_testnzc_si128(a, mask)
1149}
1150
1151#[allow(improper_ctypes)]
1152extern "C" {
0531ce1d
XL
1153 #[link_name = "llvm.x86.sse41.insertps"]
1154 fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128;
0531ce1d
XL
1155 #[link_name = "llvm.x86.sse41.packusdw"]
1156 fn packusdw(a: i32x4, b: i32x4) -> u16x8;
1157 #[link_name = "llvm.x86.sse41.dppd"]
1158 fn dppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d;
1159 #[link_name = "llvm.x86.sse41.dpps"]
1160 fn dpps(a: __m128, b: __m128, imm8: u8) -> __m128;
1161 #[link_name = "llvm.x86.sse41.round.pd"]
1162 fn roundpd(a: __m128d, rounding: i32) -> __m128d;
1163 #[link_name = "llvm.x86.sse41.round.ps"]
1164 fn roundps(a: __m128, rounding: i32) -> __m128;
1165 #[link_name = "llvm.x86.sse41.round.sd"]
1166 fn roundsd(a: __m128d, b: __m128d, rounding: i32) -> __m128d;
1167 #[link_name = "llvm.x86.sse41.round.ss"]
1168 fn roundss(a: __m128, b: __m128, rounding: i32) -> __m128;
1169 #[link_name = "llvm.x86.sse41.phminposuw"]
1170 fn phminposuw(a: u16x8) -> u16x8;
0531ce1d
XL
1171 #[link_name = "llvm.x86.sse41.mpsadbw"]
1172 fn mpsadbw(a: u8x16, b: u8x16, imm8: u8) -> u16x8;
1173 #[link_name = "llvm.x86.sse41.ptestz"]
1174 fn ptestz(a: i64x2, mask: i64x2) -> i32;
1175 #[link_name = "llvm.x86.sse41.ptestc"]
1176 fn ptestc(a: i64x2, mask: i64x2) -> i32;
1177 #[link_name = "llvm.x86.sse41.ptestnzc"]
1178 fn ptestnzc(a: i64x2, mask: i64x2) -> i32;
1179}
1180
1181#[cfg(test)]
1182mod tests {
532ac7d7 1183 use crate::core_arch::x86::*;
0531ce1d 1184 use std::mem;
416331ca 1185 use stdarch_test::simd_test;
0531ce1d 1186
83c7162d 1187 #[simd_test(enable = "sse4.1")]
0531ce1d 1188 unsafe fn test_mm_blendv_epi8() {
0731742a 1189 #[rustfmt::skip]
0531ce1d
XL
1190 let a = _mm_setr_epi8(
1191 0, 1, 2, 3, 4, 5, 6, 7,
1192 8, 9, 10, 11, 12, 13, 14, 15,
1193 );
0731742a 1194 #[rustfmt::skip]
0531ce1d
XL
1195 let b = _mm_setr_epi8(
1196 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
1197 );
0731742a 1198 #[rustfmt::skip]
0531ce1d
XL
1199 let mask = _mm_setr_epi8(
1200 0, -1, 0, -1, 0, -1, 0, -1,
1201 0, -1, 0, -1, 0, -1, 0, -1,
1202 );
0731742a 1203 #[rustfmt::skip]
0531ce1d
XL
1204 let e = _mm_setr_epi8(
1205 0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31,
1206 );
1207 assert_eq_m128i(_mm_blendv_epi8(a, b, mask), e);
1208 }
1209
83c7162d 1210 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1211 unsafe fn test_mm_blendv_pd() {
1212 let a = _mm_set1_pd(0.0);
1213 let b = _mm_set1_pd(1.0);
532ac7d7 1214 let mask = transmute(_mm_setr_epi64x(0, -1));
0531ce1d
XL
1215 let r = _mm_blendv_pd(a, b, mask);
1216 let e = _mm_setr_pd(0.0, 1.0);
1217 assert_eq_m128d(r, e);
1218 }
1219
83c7162d 1220 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1221 unsafe fn test_mm_blendv_ps() {
1222 let a = _mm_set1_ps(0.0);
1223 let b = _mm_set1_ps(1.0);
532ac7d7 1224 let mask = transmute(_mm_setr_epi32(0, -1, 0, -1));
0531ce1d
XL
1225 let r = _mm_blendv_ps(a, b, mask);
1226 let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
1227 assert_eq_m128(r, e);
1228 }
1229
83c7162d 1230 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1231 unsafe fn test_mm_blend_pd() {
1232 let a = _mm_set1_pd(0.0);
1233 let b = _mm_set1_pd(1.0);
17df50a5 1234 let r = _mm_blend_pd::<0b10>(a, b);
0531ce1d
XL
1235 let e = _mm_setr_pd(0.0, 1.0);
1236 assert_eq_m128d(r, e);
1237 }
1238
83c7162d 1239 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1240 unsafe fn test_mm_blend_ps() {
1241 let a = _mm_set1_ps(0.0);
1242 let b = _mm_set1_ps(1.0);
17df50a5 1243 let r = _mm_blend_ps::<0b1010>(a, b);
0531ce1d
XL
1244 let e = _mm_setr_ps(0.0, 1.0, 0.0, 1.0);
1245 assert_eq_m128(r, e);
1246 }
1247
83c7162d 1248 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1249 unsafe fn test_mm_blend_epi16() {
1250 let a = _mm_set1_epi16(0);
1251 let b = _mm_set1_epi16(1);
17df50a5 1252 let r = _mm_blend_epi16::<0b1010_1100>(a, b);
0531ce1d
XL
1253 let e = _mm_setr_epi16(0, 0, 1, 1, 0, 1, 0, 1);
1254 assert_eq_m128i(r, e);
1255 }
1256
83c7162d 1257 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1258 unsafe fn test_mm_extract_ps() {
1259 let a = _mm_setr_ps(0.0, 1.0, 2.0, 3.0);
ed00b5ec 1260 let r: f32 = f32::from_bits(_mm_extract_ps::<1>(a) as u32);
0531ce1d 1261 assert_eq!(r, 1.0);
ed00b5ec 1262 let r: f32 = f32::from_bits(_mm_extract_ps::<3>(a) as u32);
17df50a5 1263 assert_eq!(r, 3.0);
0531ce1d
XL
1264 }
1265
83c7162d 1266 #[simd_test(enable = "sse4.1")]
0531ce1d 1267 unsafe fn test_mm_extract_epi8() {
0731742a 1268 #[rustfmt::skip]
0531ce1d
XL
1269 let a = _mm_setr_epi8(
1270 -1, 1, 2, 3, 4, 5, 6, 7,
1271 8, 9, 10, 11, 12, 13, 14, 15
1272 );
17df50a5
XL
1273 let r1 = _mm_extract_epi8::<0>(a);
1274 let r2 = _mm_extract_epi8::<3>(a);
0531ce1d
XL
1275 assert_eq!(r1, 0xFF);
1276 assert_eq!(r2, 3);
1277 }
1278
83c7162d 1279 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1280 unsafe fn test_mm_extract_epi32() {
1281 let a = _mm_setr_epi32(0, 1, 2, 3);
17df50a5 1282 let r = _mm_extract_epi32::<1>(a);
0531ce1d 1283 assert_eq!(r, 1);
17df50a5
XL
1284 let r = _mm_extract_epi32::<3>(a);
1285 assert_eq!(r, 3);
0531ce1d
XL
1286 }
1287
83c7162d 1288 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1289 unsafe fn test_mm_insert_ps() {
1290 let a = _mm_set1_ps(1.0);
1291 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
17df50a5 1292 let r = _mm_insert_ps::<0b11_00_1100>(a, b);
0531ce1d
XL
1293 let e = _mm_setr_ps(4.0, 1.0, 0.0, 0.0);
1294 assert_eq_m128(r, e);
c620b35d
FG
1295
1296 // Zeroing takes precedence over copied value
1297 let a = _mm_set1_ps(1.0);
1298 let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
1299 let r = _mm_insert_ps::<0b11_00_0001>(a, b);
1300 let e = _mm_setr_ps(0.0, 1.0, 1.0, 1.0);
1301 assert_eq_m128(r, e);
0531ce1d
XL
1302 }
1303
83c7162d 1304 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1305 unsafe fn test_mm_insert_epi8() {
1306 let a = _mm_set1_epi8(0);
1307 let e = _mm_setr_epi8(0, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
17df50a5 1308 let r = _mm_insert_epi8::<1>(a, 32);
0531ce1d 1309 assert_eq_m128i(r, e);
17df50a5
XL
1310 let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0);
1311 let r = _mm_insert_epi8::<14>(a, 32);
0531ce1d
XL
1312 assert_eq_m128i(r, e);
1313 }
1314
83c7162d 1315 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1316 unsafe fn test_mm_insert_epi32() {
1317 let a = _mm_set1_epi32(0);
1318 let e = _mm_setr_epi32(0, 32, 0, 0);
17df50a5 1319 let r = _mm_insert_epi32::<1>(a, 32);
0531ce1d 1320 assert_eq_m128i(r, e);
17df50a5
XL
1321 let e = _mm_setr_epi32(0, 0, 0, 32);
1322 let r = _mm_insert_epi32::<3>(a, 32);
0531ce1d
XL
1323 assert_eq_m128i(r, e);
1324 }
1325
83c7162d 1326 #[simd_test(enable = "sse4.1")]
0531ce1d 1327 unsafe fn test_mm_max_epi8() {
0731742a 1328 #[rustfmt::skip]
0531ce1d
XL
1329 let a = _mm_setr_epi8(
1330 1, 4, 5, 8, 9, 12, 13, 16,
1331 17, 20, 21, 24, 25, 28, 29, 32,
1332 );
0731742a 1333 #[rustfmt::skip]
0531ce1d
XL
1334 let b = _mm_setr_epi8(
1335 2, 3, 6, 7, 10, 11, 14, 15,
1336 18, 19, 22, 23, 26, 27, 30, 31,
1337 );
1338 let r = _mm_max_epi8(a, b);
0731742a 1339 #[rustfmt::skip]
0531ce1d
XL
1340 let e = _mm_setr_epi8(
1341 2, 4, 6, 8, 10, 12, 14, 16,
1342 18, 20, 22, 24, 26, 28, 30, 32,
1343 );
1344 assert_eq_m128i(r, e);
1345 }
1346
83c7162d 1347 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1348 unsafe fn test_mm_max_epu16() {
1349 let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
1350 let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
1351 let r = _mm_max_epu16(a, b);
1352 let e = _mm_setr_epi16(2, 4, 6, 8, 10, 12, 14, 16);
1353 assert_eq_m128i(r, e);
1354 }
1355
83c7162d 1356 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1357 unsafe fn test_mm_max_epi32() {
1358 let a = _mm_setr_epi32(1, 4, 5, 8);
1359 let b = _mm_setr_epi32(2, 3, 6, 7);
1360 let r = _mm_max_epi32(a, b);
1361 let e = _mm_setr_epi32(2, 4, 6, 8);
1362 assert_eq_m128i(r, e);
1363 }
1364
83c7162d 1365 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1366 unsafe fn test_mm_max_epu32() {
1367 let a = _mm_setr_epi32(1, 4, 5, 8);
1368 let b = _mm_setr_epi32(2, 3, 6, 7);
1369 let r = _mm_max_epu32(a, b);
1370 let e = _mm_setr_epi32(2, 4, 6, 8);
1371 assert_eq_m128i(r, e);
1372 }
1373
83c7162d 1374 #[simd_test(enable = "sse4.1")]
0531ce1d 1375 unsafe fn test_mm_min_epi8_1() {
0731742a 1376 #[rustfmt::skip]
0531ce1d
XL
1377 let a = _mm_setr_epi8(
1378 1, 4, 5, 8, 9, 12, 13, 16,
1379 17, 20, 21, 24, 25, 28, 29, 32,
1380 );
0731742a 1381 #[rustfmt::skip]
0531ce1d
XL
1382 let b = _mm_setr_epi8(
1383 2, 3, 6, 7, 10, 11, 14, 15,
1384 18, 19, 22, 23, 26, 27, 30, 31,
1385 );
1386 let r = _mm_min_epi8(a, b);
0731742a 1387 #[rustfmt::skip]
0531ce1d
XL
1388 let e = _mm_setr_epi8(
1389 1, 3, 5, 7, 9, 11, 13, 15,
1390 17, 19, 21, 23, 25, 27, 29, 31,
1391 );
1392 assert_eq_m128i(r, e);
1393 }
1394
83c7162d 1395 #[simd_test(enable = "sse4.1")]
0531ce1d 1396 unsafe fn test_mm_min_epi8_2() {
0731742a 1397 #[rustfmt::skip]
0531ce1d
XL
1398 let a = _mm_setr_epi8(
1399 1, -4, -5, 8, -9, -12, 13, -16,
1400 17, 20, 21, 24, 25, 28, 29, 32,
1401 );
0731742a 1402 #[rustfmt::skip]
0531ce1d
XL
1403 let b = _mm_setr_epi8(
1404 2, -3, -6, 7, -10, -11, 14, -15,
1405 18, 19, 22, 23, 26, 27, 30, 31,
1406 );
1407 let r = _mm_min_epi8(a, b);
0731742a 1408 #[rustfmt::skip]
0531ce1d
XL
1409 let e = _mm_setr_epi8(
1410 1, -4, -6, 7, -10, -12, 13, -16,
1411 17, 19, 21, 23, 25, 27, 29, 31,
1412 );
1413 assert_eq_m128i(r, e);
1414 }
1415
83c7162d 1416 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1417 unsafe fn test_mm_min_epu16() {
1418 let a = _mm_setr_epi16(1, 4, 5, 8, 9, 12, 13, 16);
1419 let b = _mm_setr_epi16(2, 3, 6, 7, 10, 11, 14, 15);
1420 let r = _mm_min_epu16(a, b);
1421 let e = _mm_setr_epi16(1, 3, 5, 7, 9, 11, 13, 15);
1422 assert_eq_m128i(r, e);
1423 }
1424
83c7162d 1425 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1426 unsafe fn test_mm_min_epi32_1() {
1427 let a = _mm_setr_epi32(1, 4, 5, 8);
1428 let b = _mm_setr_epi32(2, 3, 6, 7);
1429 let r = _mm_min_epi32(a, b);
1430 let e = _mm_setr_epi32(1, 3, 5, 7);
1431 assert_eq_m128i(r, e);
1432 }
1433
83c7162d 1434 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1435 unsafe fn test_mm_min_epi32_2() {
1436 let a = _mm_setr_epi32(-1, 4, 5, -7);
1437 let b = _mm_setr_epi32(-2, 3, -6, 8);
1438 let r = _mm_min_epi32(a, b);
1439 let e = _mm_setr_epi32(-2, 3, -6, -7);
1440 assert_eq_m128i(r, e);
1441 }
1442
83c7162d 1443 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1444 unsafe fn test_mm_min_epu32() {
1445 let a = _mm_setr_epi32(1, 4, 5, 8);
1446 let b = _mm_setr_epi32(2, 3, 6, 7);
1447 let r = _mm_min_epu32(a, b);
1448 let e = _mm_setr_epi32(1, 3, 5, 7);
1449 assert_eq_m128i(r, e);
1450 }
1451
83c7162d 1452 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1453 unsafe fn test_mm_packus_epi32() {
1454 let a = _mm_setr_epi32(1, 2, 3, 4);
1455 let b = _mm_setr_epi32(-1, -2, -3, -4);
1456 let r = _mm_packus_epi32(a, b);
1457 let e = _mm_setr_epi16(1, 2, 3, 4, 0, 0, 0, 0);
1458 assert_eq_m128i(r, e);
1459 }
1460
83c7162d 1461 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1462 unsafe fn test_mm_cmpeq_epi64() {
1463 let a = _mm_setr_epi64x(0, 1);
1464 let b = _mm_setr_epi64x(0, 0);
1465 let r = _mm_cmpeq_epi64(a, b);
1466 let e = _mm_setr_epi64x(-1, 0);
1467 assert_eq_m128i(r, e);
1468 }
1469
83c7162d 1470 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1471 unsafe fn test_mm_cvtepi8_epi16() {
1472 let a = _mm_set1_epi8(10);
1473 let r = _mm_cvtepi8_epi16(a);
1474 let e = _mm_set1_epi16(10);
1475 assert_eq_m128i(r, e);
1476 let a = _mm_set1_epi8(-10);
1477 let r = _mm_cvtepi8_epi16(a);
1478 let e = _mm_set1_epi16(-10);
1479 assert_eq_m128i(r, e);
1480 }
1481
83c7162d 1482 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1483 unsafe fn test_mm_cvtepi8_epi32() {
1484 let a = _mm_set1_epi8(10);
1485 let r = _mm_cvtepi8_epi32(a);
1486 let e = _mm_set1_epi32(10);
1487 assert_eq_m128i(r, e);
1488 let a = _mm_set1_epi8(-10);
1489 let r = _mm_cvtepi8_epi32(a);
1490 let e = _mm_set1_epi32(-10);
1491 assert_eq_m128i(r, e);
1492 }
1493
83c7162d 1494 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1495 unsafe fn test_mm_cvtepi8_epi64() {
1496 let a = _mm_set1_epi8(10);
1497 let r = _mm_cvtepi8_epi64(a);
1498 let e = _mm_set1_epi64x(10);
1499 assert_eq_m128i(r, e);
1500 let a = _mm_set1_epi8(-10);
1501 let r = _mm_cvtepi8_epi64(a);
1502 let e = _mm_set1_epi64x(-10);
1503 assert_eq_m128i(r, e);
1504 }
1505
83c7162d 1506 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1507 unsafe fn test_mm_cvtepi16_epi32() {
1508 let a = _mm_set1_epi16(10);
1509 let r = _mm_cvtepi16_epi32(a);
1510 let e = _mm_set1_epi32(10);
1511 assert_eq_m128i(r, e);
1512 let a = _mm_set1_epi16(-10);
1513 let r = _mm_cvtepi16_epi32(a);
1514 let e = _mm_set1_epi32(-10);
1515 assert_eq_m128i(r, e);
1516 }
1517
83c7162d 1518 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1519 unsafe fn test_mm_cvtepi16_epi64() {
1520 let a = _mm_set1_epi16(10);
1521 let r = _mm_cvtepi16_epi64(a);
1522 let e = _mm_set1_epi64x(10);
1523 assert_eq_m128i(r, e);
1524 let a = _mm_set1_epi16(-10);
1525 let r = _mm_cvtepi16_epi64(a);
1526 let e = _mm_set1_epi64x(-10);
1527 assert_eq_m128i(r, e);
1528 }
1529
83c7162d 1530 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1531 unsafe fn test_mm_cvtepi32_epi64() {
1532 let a = _mm_set1_epi32(10);
1533 let r = _mm_cvtepi32_epi64(a);
1534 let e = _mm_set1_epi64x(10);
1535 assert_eq_m128i(r, e);
1536 let a = _mm_set1_epi32(-10);
1537 let r = _mm_cvtepi32_epi64(a);
1538 let e = _mm_set1_epi64x(-10);
1539 assert_eq_m128i(r, e);
1540 }
1541
83c7162d 1542 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1543 unsafe fn test_mm_cvtepu8_epi16() {
1544 let a = _mm_set1_epi8(10);
1545 let r = _mm_cvtepu8_epi16(a);
1546 let e = _mm_set1_epi16(10);
1547 assert_eq_m128i(r, e);
1548 }
1549
83c7162d 1550 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1551 unsafe fn test_mm_cvtepu8_epi32() {
1552 let a = _mm_set1_epi8(10);
1553 let r = _mm_cvtepu8_epi32(a);
1554 let e = _mm_set1_epi32(10);
1555 assert_eq_m128i(r, e);
1556 }
1557
83c7162d 1558 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1559 unsafe fn test_mm_cvtepu8_epi64() {
1560 let a = _mm_set1_epi8(10);
1561 let r = _mm_cvtepu8_epi64(a);
1562 let e = _mm_set1_epi64x(10);
1563 assert_eq_m128i(r, e);
1564 }
1565
83c7162d 1566 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1567 unsafe fn test_mm_cvtepu16_epi32() {
1568 let a = _mm_set1_epi16(10);
1569 let r = _mm_cvtepu16_epi32(a);
1570 let e = _mm_set1_epi32(10);
1571 assert_eq_m128i(r, e);
1572 }
1573
83c7162d 1574 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1575 unsafe fn test_mm_cvtepu16_epi64() {
1576 let a = _mm_set1_epi16(10);
1577 let r = _mm_cvtepu16_epi64(a);
1578 let e = _mm_set1_epi64x(10);
1579 assert_eq_m128i(r, e);
1580 }
1581
83c7162d 1582 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1583 unsafe fn test_mm_cvtepu32_epi64() {
1584 let a = _mm_set1_epi32(10);
1585 let r = _mm_cvtepu32_epi64(a);
1586 let e = _mm_set1_epi64x(10);
1587 assert_eq_m128i(r, e);
1588 }
1589
83c7162d 1590 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1591 unsafe fn test_mm_dp_pd() {
1592 let a = _mm_setr_pd(2.0, 3.0);
1593 let b = _mm_setr_pd(1.0, 4.0);
1594 let e = _mm_setr_pd(14.0, 0.0);
17df50a5 1595 assert_eq_m128d(_mm_dp_pd::<0b00110001>(a, b), e);
0531ce1d
XL
1596 }
1597
83c7162d 1598 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1599 unsafe fn test_mm_dp_ps() {
1600 let a = _mm_setr_ps(2.0, 3.0, 1.0, 10.0);
1601 let b = _mm_setr_ps(1.0, 4.0, 0.5, 10.0);
1602 let e = _mm_setr_ps(14.5, 0.0, 14.5, 0.0);
17df50a5 1603 assert_eq_m128(_mm_dp_ps::<0b01110101>(a, b), e);
0531ce1d
XL
1604 }
1605
83c7162d 1606 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1607 unsafe fn test_mm_floor_pd() {
1608 let a = _mm_setr_pd(2.5, 4.5);
1609 let r = _mm_floor_pd(a);
1610 let e = _mm_setr_pd(2.0, 4.0);
1611 assert_eq_m128d(r, e);
1612 }
1613
83c7162d 1614 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1615 unsafe fn test_mm_floor_ps() {
1616 let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
1617 let r = _mm_floor_ps(a);
1618 let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
1619 assert_eq_m128(r, e);
1620 }
1621
83c7162d 1622 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1623 unsafe fn test_mm_floor_sd() {
1624 let a = _mm_setr_pd(2.5, 4.5);
1625 let b = _mm_setr_pd(-1.5, -3.5);
1626 let r = _mm_floor_sd(a, b);
1627 let e = _mm_setr_pd(-2.0, 4.5);
1628 assert_eq_m128d(r, e);
1629 }
1630
83c7162d 1631 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1632 unsafe fn test_mm_floor_ss() {
1633 let a = _mm_setr_ps(2.5, 4.5, 8.5, 16.5);
1634 let b = _mm_setr_ps(-1.5, -3.5, -7.5, -15.5);
1635 let r = _mm_floor_ss(a, b);
1636 let e = _mm_setr_ps(-2.0, 4.5, 8.5, 16.5);
1637 assert_eq_m128(r, e);
1638 }
1639
83c7162d 1640 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1641 unsafe fn test_mm_ceil_pd() {
1642 let a = _mm_setr_pd(1.5, 3.5);
1643 let r = _mm_ceil_pd(a);
1644 let e = _mm_setr_pd(2.0, 4.0);
1645 assert_eq_m128d(r, e);
1646 }
1647
83c7162d 1648 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1649 unsafe fn test_mm_ceil_ps() {
1650 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1651 let r = _mm_ceil_ps(a);
1652 let e = _mm_setr_ps(2.0, 4.0, 8.0, 16.0);
1653 assert_eq_m128(r, e);
1654 }
1655
83c7162d 1656 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1657 unsafe fn test_mm_ceil_sd() {
1658 let a = _mm_setr_pd(1.5, 3.5);
1659 let b = _mm_setr_pd(-2.5, -4.5);
1660 let r = _mm_ceil_sd(a, b);
1661 let e = _mm_setr_pd(-2.0, 3.5);
1662 assert_eq_m128d(r, e);
1663 }
1664
83c7162d 1665 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1666 unsafe fn test_mm_ceil_ss() {
1667 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1668 let b = _mm_setr_ps(-2.5, -4.5, -8.5, -16.5);
1669 let r = _mm_ceil_ss(a, b);
1670 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1671 assert_eq_m128(r, e);
1672 }
1673
83c7162d 1674 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1675 unsafe fn test_mm_round_pd() {
1676 let a = _mm_setr_pd(1.25, 3.75);
17df50a5 1677 let r = _mm_round_pd::<_MM_FROUND_TO_NEAREST_INT>(a);
0531ce1d
XL
1678 let e = _mm_setr_pd(1.0, 4.0);
1679 assert_eq_m128d(r, e);
1680 }
1681
83c7162d 1682 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1683 unsafe fn test_mm_round_ps() {
1684 let a = _mm_setr_ps(2.25, 4.75, -1.75, -4.25);
17df50a5 1685 let r = _mm_round_ps::<_MM_FROUND_TO_ZERO>(a);
0531ce1d
XL
1686 let e = _mm_setr_ps(2.0, 4.0, -1.0, -4.0);
1687 assert_eq_m128(r, e);
1688 }
1689
83c7162d 1690 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1691 unsafe fn test_mm_round_sd() {
1692 let a = _mm_setr_pd(1.5, 3.5);
1693 let b = _mm_setr_pd(-2.5, -4.5);
c620b35d
FG
1694 let r = _mm_round_sd::<_MM_FROUND_TO_NEAREST_INT>(a, b);
1695 let e = _mm_setr_pd(-2.0, 3.5);
1696 assert_eq_m128d(r, e);
1697
1698 let a = _mm_setr_pd(1.5, 3.5);
1699 let b = _mm_setr_pd(-2.5, -4.5);
1700 let r = _mm_round_sd::<_MM_FROUND_TO_NEG_INF>(a, b);
1701 let e = _mm_setr_pd(-3.0, 3.5);
1702 assert_eq_m128d(r, e);
1703
1704 let a = _mm_setr_pd(1.5, 3.5);
1705 let b = _mm_setr_pd(-2.5, -4.5);
1706 let r = _mm_round_sd::<_MM_FROUND_TO_POS_INF>(a, b);
1707 let e = _mm_setr_pd(-2.0, 3.5);
1708 assert_eq_m128d(r, e);
1709
1710 let a = _mm_setr_pd(1.5, 3.5);
1711 let b = _mm_setr_pd(-2.5, -4.5);
1712 let r = _mm_round_sd::<_MM_FROUND_TO_ZERO>(a, b);
0531ce1d
XL
1713 let e = _mm_setr_pd(-2.0, 3.5);
1714 assert_eq_m128d(r, e);
1715 }
1716
83c7162d 1717 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1718 unsafe fn test_mm_round_ss() {
1719 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1720 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
c620b35d 1721 let r = _mm_round_ss::<_MM_FROUND_TO_NEAREST_INT>(a, b);
0531ce1d
XL
1722 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1723 assert_eq_m128(r, e);
c620b35d
FG
1724
1725 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1726 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1727 let r = _mm_round_ss::<_MM_FROUND_TO_NEG_INF>(a, b);
1728 let e = _mm_setr_ps(-2.0, 3.5, 7.5, 15.5);
1729 assert_eq_m128(r, e);
1730
1731 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1732 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1733 let r = _mm_round_ss::<_MM_FROUND_TO_POS_INF>(a, b);
1734 let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5);
1735 assert_eq_m128(r, e);
1736
1737 let a = _mm_setr_ps(1.5, 3.5, 7.5, 15.5);
1738 let b = _mm_setr_ps(-1.75, -4.5, -8.5, -16.5);
1739 let r = _mm_round_ss::<_MM_FROUND_TO_ZERO>(a, b);
1740 let e = _mm_setr_ps(-1.0, 3.5, 7.5, 15.5);
1741 assert_eq_m128(r, e);
0531ce1d
XL
1742 }
1743
83c7162d 1744 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1745 unsafe fn test_mm_minpos_epu16_1() {
1746 let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 66);
1747 let r = _mm_minpos_epu16(a);
1748 let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
1749 assert_eq_m128i(r, e);
1750 }
1751
83c7162d 1752 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1753 unsafe fn test_mm_minpos_epu16_2() {
1754 let a = _mm_setr_epi16(0, 18, 44, 97, 50, 13, 67, 66);
1755 let r = _mm_minpos_epu16(a);
1756 let e = _mm_setr_epi16(0, 0, 0, 0, 0, 0, 0, 0);
1757 assert_eq_m128i(r, e);
1758 }
1759
c620b35d
FG
1760 #[simd_test(enable = "sse4.1")]
1761 unsafe fn test_mm_minpos_epu16_3() {
1762 // Case where the minimum value is repeated
1763 let a = _mm_setr_epi16(23, 18, 44, 97, 50, 13, 67, 13);
1764 let r = _mm_minpos_epu16(a);
1765 let e = _mm_setr_epi16(13, 5, 0, 0, 0, 0, 0, 0);
1766 assert_eq_m128i(r, e);
1767 }
1768
83c7162d 1769 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1770 unsafe fn test_mm_mul_epi32() {
1771 {
1772 let a = _mm_setr_epi32(1, 1, 1, 1);
1773 let b = _mm_setr_epi32(1, 2, 3, 4);
1774 let r = _mm_mul_epi32(a, b);
1775 let e = _mm_setr_epi64x(1, 3);
1776 assert_eq_m128i(r, e);
1777 }
1778 {
0731742a 1779 let a = _mm_setr_epi32(15, 2 /* ignored */, 1234567, 4 /* ignored */);
0531ce1d 1780 let b = _mm_setr_epi32(
8faf50e0
XL
1781 -20, -256, /* ignored */
1782 666666, 666666, /* ignored */
0531ce1d
XL
1783 );
1784 let r = _mm_mul_epi32(a, b);
1785 let e = _mm_setr_epi64x(-300, 823043843622);
1786 assert_eq_m128i(r, e);
1787 }
1788 }
1789
83c7162d 1790 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1791 unsafe fn test_mm_mullo_epi32() {
1792 {
1793 let a = _mm_setr_epi32(1, 1, 1, 1);
1794 let b = _mm_setr_epi32(1, 2, 3, 4);
1795 let r = _mm_mullo_epi32(a, b);
1796 let e = _mm_setr_epi32(1, 2, 3, 4);
1797 assert_eq_m128i(r, e);
1798 }
1799 {
1800 let a = _mm_setr_epi32(15, -2, 1234567, 99999);
1801 let b = _mm_setr_epi32(-20, -256, 666666, -99999);
1802 let r = _mm_mullo_epi32(a, b);
1803 // Attention, most significant bit in r[2] is treated
1804 // as a sign bit:
1805 // 1234567 * 666666 = -1589877210
1806 let e = _mm_setr_epi32(-300, 512, -1589877210, -1409865409);
1807 assert_eq_m128i(r, e);
1808 }
1809 }
1810
83c7162d 1811 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1812 unsafe fn test_mm_minpos_epu16() {
1813 let a = _mm_setr_epi16(8, 7, 6, 5, 4, 1, 2, 3);
1814 let r = _mm_minpos_epu16(a);
1815 let e = _mm_setr_epi16(1, 5, 0, 0, 0, 0, 0, 0);
1816 assert_eq_m128i(r, e);
1817 }
1818
83c7162d 1819 #[simd_test(enable = "sse4.1")]
0531ce1d 1820 unsafe fn test_mm_mpsadbw_epu8() {
0731742a 1821 #[rustfmt::skip]
0531ce1d
XL
1822 let a = _mm_setr_epi8(
1823 0, 1, 2, 3, 4, 5, 6, 7,
1824 8, 9, 10, 11, 12, 13, 14, 15,
1825 );
1826
17df50a5 1827 let r = _mm_mpsadbw_epu8::<0b000>(a, a);
0531ce1d
XL
1828 let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
1829 assert_eq_m128i(r, e);
1830
17df50a5 1831 let r = _mm_mpsadbw_epu8::<0b001>(a, a);
0531ce1d
XL
1832 let e = _mm_setr_epi16(16, 12, 8, 4, 0, 4, 8, 12);
1833 assert_eq_m128i(r, e);
1834
17df50a5 1835 let r = _mm_mpsadbw_epu8::<0b100>(a, a);
0531ce1d
XL
1836 let e = _mm_setr_epi16(16, 20, 24, 28, 32, 36, 40, 44);
1837 assert_eq_m128i(r, e);
1838
17df50a5 1839 let r = _mm_mpsadbw_epu8::<0b101>(a, a);
0531ce1d
XL
1840 let e = _mm_setr_epi16(0, 4, 8, 12, 16, 20, 24, 28);
1841 assert_eq_m128i(r, e);
1842
17df50a5 1843 let r = _mm_mpsadbw_epu8::<0b111>(a, a);
0531ce1d
XL
1844 let e = _mm_setr_epi16(32, 28, 24, 20, 16, 12, 8, 4);
1845 assert_eq_m128i(r, e);
1846 }
1847
83c7162d 1848 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1849 unsafe fn test_mm_testz_si128() {
1850 let a = _mm_set1_epi8(1);
1851 let mask = _mm_set1_epi8(0);
1852 let r = _mm_testz_si128(a, mask);
1853 assert_eq!(r, 1);
1854 let a = _mm_set1_epi8(0b101);
1855 let mask = _mm_set1_epi8(0b110);
1856 let r = _mm_testz_si128(a, mask);
1857 assert_eq!(r, 0);
1858 let a = _mm_set1_epi8(0b011);
1859 let mask = _mm_set1_epi8(0b100);
1860 let r = _mm_testz_si128(a, mask);
1861 assert_eq!(r, 1);
1862 }
1863
83c7162d 1864 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1865 unsafe fn test_mm_testc_si128() {
1866 let a = _mm_set1_epi8(-1);
1867 let mask = _mm_set1_epi8(0);
1868 let r = _mm_testc_si128(a, mask);
1869 assert_eq!(r, 1);
1870 let a = _mm_set1_epi8(0b101);
1871 let mask = _mm_set1_epi8(0b110);
1872 let r = _mm_testc_si128(a, mask);
1873 assert_eq!(r, 0);
1874 let a = _mm_set1_epi8(0b101);
1875 let mask = _mm_set1_epi8(0b100);
1876 let r = _mm_testc_si128(a, mask);
1877 assert_eq!(r, 1);
1878 }
1879
83c7162d 1880 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1881 unsafe fn test_mm_testnzc_si128() {
1882 let a = _mm_set1_epi8(0);
1883 let mask = _mm_set1_epi8(1);
1884 let r = _mm_testnzc_si128(a, mask);
1885 assert_eq!(r, 0);
1886 let a = _mm_set1_epi8(-1);
1887 let mask = _mm_set1_epi8(0);
1888 let r = _mm_testnzc_si128(a, mask);
1889 assert_eq!(r, 0);
1890 let a = _mm_set1_epi8(0b101);
1891 let mask = _mm_set1_epi8(0b110);
1892 let r = _mm_testnzc_si128(a, mask);
1893 assert_eq!(r, 1);
1894 let a = _mm_set1_epi8(0b101);
1895 let mask = _mm_set1_epi8(0b101);
1896 let r = _mm_testnzc_si128(a, mask);
1897 assert_eq!(r, 0);
1898 }
1899
83c7162d 1900 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1901 unsafe fn test_mm_test_all_zeros() {
1902 let a = _mm_set1_epi8(1);
1903 let mask = _mm_set1_epi8(0);
1904 let r = _mm_test_all_zeros(a, mask);
1905 assert_eq!(r, 1);
1906 let a = _mm_set1_epi8(0b101);
1907 let mask = _mm_set1_epi8(0b110);
1908 let r = _mm_test_all_zeros(a, mask);
1909 assert_eq!(r, 0);
1910 let a = _mm_set1_epi8(0b011);
1911 let mask = _mm_set1_epi8(0b100);
1912 let r = _mm_test_all_zeros(a, mask);
1913 assert_eq!(r, 1);
1914 }
1915
83c7162d 1916 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1917 unsafe fn test_mm_test_all_ones() {
1918 let a = _mm_set1_epi8(-1);
1919 let r = _mm_test_all_ones(a);
1920 assert_eq!(r, 1);
1921 let a = _mm_set1_epi8(0b101);
1922 let r = _mm_test_all_ones(a);
1923 assert_eq!(r, 0);
1924 }
1925
83c7162d 1926 #[simd_test(enable = "sse4.1")]
0531ce1d
XL
1927 unsafe fn test_mm_test_mix_ones_zeros() {
1928 let a = _mm_set1_epi8(0);
1929 let mask = _mm_set1_epi8(1);
1930 let r = _mm_test_mix_ones_zeros(a, mask);
1931 assert_eq!(r, 0);
1932 let a = _mm_set1_epi8(-1);
1933 let mask = _mm_set1_epi8(0);
1934 let r = _mm_test_mix_ones_zeros(a, mask);
1935 assert_eq!(r, 0);
1936 let a = _mm_set1_epi8(0b101);
1937 let mask = _mm_set1_epi8(0b110);
1938 let r = _mm_test_mix_ones_zeros(a, mask);
1939 assert_eq!(r, 1);
1940 let a = _mm_set1_epi8(0b101);
1941 let mask = _mm_set1_epi8(0b101);
1942 let r = _mm_test_mix_ones_zeros(a, mask);
1943 assert_eq!(r, 0);
1944 }
1945}