]>
Commit | Line | Data |
---|---|---|
0531ce1d XL |
1 | //! Streaming SIMD Extensions 4.2 (SSE4.2) |
2 | //! | |
3 | //! Extends SSE4.1 with STTNI (String and Text New Instructions). | |
4 | ||
5 | #[cfg(test)] | |
416331ca | 6 | use stdarch_test::assert_instr; |
0531ce1d | 7 | |
532ac7d7 | 8 | use crate::{ |
c620b35d FG |
9 | core_arch::{simd::*, x86::*}, |
10 | intrinsics::simd::*, | |
532ac7d7 | 11 | }; |
0531ce1d XL |
12 | |
13 | /// String contains unsigned 8-bit characters *(Default)* | |
83c7162d | 14 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
15 | pub const _SIDD_UBYTE_OPS: i32 = 0b0000_0000; |
16 | /// String contains unsigned 16-bit characters | |
83c7162d | 17 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
18 | pub const _SIDD_UWORD_OPS: i32 = 0b0000_0001; |
19 | /// String contains signed 8-bit characters | |
83c7162d | 20 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
21 | pub const _SIDD_SBYTE_OPS: i32 = 0b0000_0010; |
22 | /// String contains unsigned 16-bit characters | |
83c7162d | 23 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
24 | pub const _SIDD_SWORD_OPS: i32 = 0b0000_0011; |
25 | ||
26 | /// For each character in `a`, find if it is in `b` *(Default)* | |
83c7162d | 27 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
28 | pub const _SIDD_CMP_EQUAL_ANY: i32 = 0b0000_0000; |
29 | /// For each character in `a`, determine if | |
30 | /// `b[0] <= c <= b[1] or b[1] <= c <= b[2]...` | |
83c7162d | 31 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
32 | pub const _SIDD_CMP_RANGES: i32 = 0b0000_0100; |
33 | /// The strings defined by `a` and `b` are equal | |
83c7162d | 34 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
35 | pub const _SIDD_CMP_EQUAL_EACH: i32 = 0b0000_1000; |
36 | /// Search for the defined substring in the target | |
83c7162d | 37 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
38 | pub const _SIDD_CMP_EQUAL_ORDERED: i32 = 0b0000_1100; |
39 | ||
40 | /// Do not negate results *(Default)* | |
83c7162d | 41 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 42 | pub const _SIDD_POSITIVE_POLARITY: i32 = 0b0000_0000; |
532ac7d7 | 43 | /// Negates results |
83c7162d | 44 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
45 | pub const _SIDD_NEGATIVE_POLARITY: i32 = 0b0001_0000; |
46 | /// Do not negate results before the end of the string | |
83c7162d | 47 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 48 | pub const _SIDD_MASKED_POSITIVE_POLARITY: i32 = 0b0010_0000; |
532ac7d7 | 49 | /// Negates results only before the end of the string |
83c7162d | 50 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
51 | pub const _SIDD_MASKED_NEGATIVE_POLARITY: i32 = 0b0011_0000; |
52 | ||
53 | /// **Index only**: return the least significant bit *(Default)* | |
83c7162d | 54 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
55 | pub const _SIDD_LEAST_SIGNIFICANT: i32 = 0b0000_0000; |
56 | /// **Index only**: return the most significant bit | |
83c7162d | 57 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
58 | pub const _SIDD_MOST_SIGNIFICANT: i32 = 0b0100_0000; |
59 | ||
60 | /// **Mask only**: return the bit mask | |
83c7162d | 61 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
62 | pub const _SIDD_BIT_MASK: i32 = 0b0000_0000; |
63 | /// **Mask only**: return the byte mask | |
83c7162d | 64 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
65 | pub const _SIDD_UNIT_MASK: i32 = 0b0100_0000; |
66 | ||
532ac7d7 | 67 | /// Compares packed strings with implicit lengths in `a` and `b` using the |
17df50a5 | 68 | /// control in `IMM8`, and return the generated mask. |
83c7162d | 69 | /// |
353b0b11 | 70 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrm) |
0531ce1d XL |
71 | #[inline] |
72 | #[target_feature(enable = "sse4.2")] | |
17df50a5 XL |
73 | #[cfg_attr(test, assert_instr(pcmpistrm, IMM8 = 0))] |
74 | #[rustc_legacy_const_generics(2)] | |
75 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
76 | pub unsafe fn _mm_cmpistrm<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i { | |
353b0b11 | 77 | static_assert_uimm_bits!(IMM8, 8); |
17df50a5 | 78 | transmute(pcmpistrm128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8)) |
0531ce1d XL |
79 | } |
80 | ||
532ac7d7 | 81 | /// Compares packed strings with implicit lengths in `a` and `b` using the |
17df50a5 | 82 | /// control in `IMM8` and return the generated index. Similar to |
0531ce1d XL |
83 | /// [`_mm_cmpestri`] with the exception that [`_mm_cmpestri`] requires the |
84 | /// lengths of `a` and `b` to be explicitly specified. | |
85 | /// | |
86 | /// # Control modes | |
87 | /// | |
17df50a5 | 88 | /// The control specified by `IMM8` may be one or more of the following. |
0531ce1d XL |
89 | /// |
90 | /// ## Data size and signedness | |
91 | /// | |
92 | /// - [`_SIDD_UBYTE_OPS`] - Default | |
93 | /// - [`_SIDD_UWORD_OPS`] | |
94 | /// - [`_SIDD_SBYTE_OPS`] | |
95 | /// - [`_SIDD_SWORD_OPS`] | |
96 | /// | |
97 | /// ## Comparison options | |
98 | /// - [`_SIDD_CMP_EQUAL_ANY`] - Default | |
99 | /// - [`_SIDD_CMP_RANGES`] | |
100 | /// - [`_SIDD_CMP_EQUAL_EACH`] | |
101 | /// - [`_SIDD_CMP_EQUAL_ORDERED`] | |
102 | /// | |
103 | /// ## Result polarity | |
104 | /// - [`_SIDD_POSITIVE_POLARITY`] - Default | |
105 | /// - [`_SIDD_NEGATIVE_POLARITY`] | |
106 | /// | |
107 | /// ## Bit returned | |
108 | /// - [`_SIDD_LEAST_SIGNIFICANT`] - Default | |
109 | /// - [`_SIDD_MOST_SIGNIFICANT`] | |
110 | /// | |
111 | /// # Examples | |
112 | /// | |
532ac7d7 | 113 | /// Finds a substring using [`_SIDD_CMP_EQUAL_ORDERED`] |
0531ce1d XL |
114 | /// |
115 | /// ``` | |
0531ce1d XL |
116 | /// #[cfg(target_arch = "x86")] |
117 | /// use std::arch::x86::*; | |
118 | /// #[cfg(target_arch = "x86_64")] | |
119 | /// use std::arch::x86_64::*; | |
120 | /// | |
121 | /// # fn main() { | |
122 | /// # if is_x86_feature_detected!("sse4.2") { | |
123 | /// # #[target_feature(enable = "sse4.2")] | |
124 | /// # unsafe fn worker() { | |
125 | /// let haystack = b"This is a long string of text data\r\n\tthat extends | |
126 | /// multiple lines"; | |
127 | /// let needle = b"\r\n\t\0\0\0\0\0\0\0\0\0\0\0\0\0"; | |
128 | /// | |
129 | /// let a = _mm_loadu_si128(needle.as_ptr() as *const _); | |
130 | /// let hop = 16; | |
131 | /// let mut indexes = Vec::new(); | |
132 | /// | |
133 | /// // Chunk the haystack into 16 byte chunks and find | |
134 | /// // the first "\r\n\t" in the chunk. | |
135 | /// for (i, chunk) in haystack.chunks(hop).enumerate() { | |
136 | /// let b = _mm_loadu_si128(chunk.as_ptr() as *const _); | |
137 | /// let idx = _mm_cmpistri(a, b, _SIDD_CMP_EQUAL_ORDERED); | |
138 | /// if idx != 16 { | |
8faf50e0 | 139 | /// indexes.push((idx as usize) + (i * hop)); |
0531ce1d XL |
140 | /// } |
141 | /// } | |
142 | /// assert_eq!(indexes, vec![34]); | |
143 | /// # } | |
144 | /// # unsafe { worker(); } | |
145 | /// # } | |
146 | /// # } | |
147 | /// ``` | |
148 | /// | |
a2a8927a | 149 | /// The `_mm_cmpistri` intrinsic may also be used to find the existence of |
0531ce1d XL |
150 | /// one or more of a given set of characters in the haystack. |
151 | /// | |
152 | /// ``` | |
0531ce1d XL |
153 | /// #[cfg(target_arch = "x86")] |
154 | /// use std::arch::x86::*; | |
155 | /// #[cfg(target_arch = "x86_64")] | |
156 | /// use std::arch::x86_64::*; | |
157 | /// | |
158 | /// # fn main() { | |
159 | /// # if is_x86_feature_detected!("sse4.2") { | |
160 | /// # #[target_feature(enable = "sse4.2")] | |
161 | /// # unsafe fn worker() { | |
162 | /// // Ensure your input is 16 byte aligned | |
163 | /// let password = b"hunter2\0\0\0\0\0\0\0\0\0"; | |
164 | /// let special_chars = b"!@#$%^&*()[]:;<>"; | |
165 | /// | |
166 | /// // Load the input | |
167 | /// let a = _mm_loadu_si128(special_chars.as_ptr() as *const _); | |
168 | /// let b = _mm_loadu_si128(password.as_ptr() as *const _); | |
169 | /// | |
170 | /// // Use _SIDD_CMP_EQUAL_ANY to find the index of any bytes in b | |
171 | /// let idx = _mm_cmpistri(a.into(), b.into(), _SIDD_CMP_EQUAL_ANY); | |
172 | /// | |
173 | /// if idx < 16 { | |
174 | /// println!("Congrats! Your password contains a special character"); | |
175 | /// # panic!("{:?} does not contain a special character", password); | |
176 | /// } else { | |
177 | /// println!("Your password should contain a special character"); | |
178 | /// } | |
179 | /// # } | |
180 | /// # unsafe { worker(); } | |
181 | /// # } | |
182 | /// # } | |
183 | /// ``` | |
184 | /// | |
532ac7d7 | 185 | /// Finds the index of the first character in the haystack that is within a |
0531ce1d XL |
186 | /// range of characters. |
187 | /// | |
188 | /// ``` | |
0531ce1d XL |
189 | /// #[cfg(target_arch = "x86")] |
190 | /// use std::arch::x86::*; | |
191 | /// #[cfg(target_arch = "x86_64")] | |
192 | /// use std::arch::x86_64::*; | |
193 | /// | |
194 | /// # fn main() { | |
195 | /// # if is_x86_feature_detected!("sse4.2") { | |
196 | /// # #[target_feature(enable = "sse4.2")] | |
197 | /// # unsafe fn worker() { | |
198 | /// # let b = b":;<=>?@[\\]^_`abc"; | |
199 | /// # let b = _mm_loadu_si128(b.as_ptr() as *const _); | |
200 | /// | |
201 | /// // Specify the ranges of values to be searched for [A-Za-z0-9]. | |
202 | /// let a = b"AZaz09\0\0\0\0\0\0\0\0\0\0"; | |
203 | /// let a = _mm_loadu_si128(a.as_ptr() as *const _); | |
204 | /// | |
205 | /// // Use _SIDD_CMP_RANGES to find the index of first byte in ranges. | |
206 | /// // Which in this case will be the first alpha numeric byte found | |
207 | /// // in the string. | |
208 | /// let idx = _mm_cmpistri(a, b, _SIDD_CMP_RANGES); | |
209 | /// | |
210 | /// if idx < 16 { | |
211 | /// println!("Found an alpha numeric character"); | |
212 | /// # assert_eq!(idx, 13); | |
213 | /// } else { | |
214 | /// println!("Did not find an alpha numeric character"); | |
215 | /// } | |
216 | /// # } | |
217 | /// # unsafe { worker(); } | |
218 | /// # } | |
219 | /// # } | |
220 | /// ``` | |
221 | /// | |
222 | /// Working with 16-bit characters. | |
223 | /// | |
224 | /// ``` | |
0531ce1d XL |
225 | /// #[cfg(target_arch = "x86")] |
226 | /// use std::arch::x86::*; | |
227 | /// #[cfg(target_arch = "x86_64")] | |
228 | /// use std::arch::x86_64::*; | |
229 | /// | |
230 | /// # fn main() { | |
231 | /// # if is_x86_feature_detected!("sse4.2") { | |
232 | /// # #[target_feature(enable = "sse4.2")] | |
233 | /// # unsafe fn worker() { | |
234 | /// # let mut some_utf16_words = [0u16; 8]; | |
235 | /// # let mut more_utf16_words = [0u16; 8]; | |
236 | /// # '❤'.encode_utf16(&mut some_utf16_words); | |
237 | /// # '𝕊'.encode_utf16(&mut more_utf16_words); | |
238 | /// // Load the input | |
239 | /// let a = _mm_loadu_si128(some_utf16_words.as_ptr() as *const _); | |
240 | /// let b = _mm_loadu_si128(more_utf16_words.as_ptr() as *const _); | |
241 | /// | |
242 | /// // Specify _SIDD_UWORD_OPS to compare words instead of bytes, and | |
243 | /// // use _SIDD_CMP_EQUAL_EACH to compare the two strings. | |
244 | /// let idx = _mm_cmpistri(a, b, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_EACH); | |
245 | /// | |
246 | /// if idx == 0 { | |
247 | /// println!("16-bit unicode strings were equal!"); | |
248 | /// # panic!("Strings should not be equal!") | |
249 | /// } else { | |
250 | /// println!("16-bit unicode strings were not equal!"); | |
251 | /// } | |
252 | /// # } | |
253 | /// # unsafe { worker(); } | |
254 | /// # } | |
255 | /// # } | |
256 | /// ``` | |
257 | /// | |
353b0b11 | 258 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistri) |
0531ce1d XL |
259 | #[inline] |
260 | #[target_feature(enable = "sse4.2")] | |
17df50a5 XL |
261 | #[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] |
262 | #[rustc_legacy_const_generics(2)] | |
263 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
264 | pub unsafe fn _mm_cmpistri<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 { | |
353b0b11 | 265 | static_assert_uimm_bits!(IMM8, 8); |
17df50a5 | 266 | pcmpistri128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) |
0531ce1d XL |
267 | } |
268 | ||
532ac7d7 | 269 | /// Compares packed strings with implicit lengths in `a` and `b` using the |
17df50a5 | 270 | /// control in `IMM8`, and return `1` if any character in `b` was null. |
0531ce1d | 271 | /// and `0` otherwise. |
83c7162d | 272 | /// |
353b0b11 | 273 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrz) |
0531ce1d XL |
274 | #[inline] |
275 | #[target_feature(enable = "sse4.2")] | |
17df50a5 XL |
276 | #[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] |
277 | #[rustc_legacy_const_generics(2)] | |
278 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
279 | pub unsafe fn _mm_cmpistrz<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 { | |
353b0b11 | 280 | static_assert_uimm_bits!(IMM8, 8); |
17df50a5 | 281 | pcmpistriz128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) |
0531ce1d XL |
282 | } |
283 | ||
532ac7d7 | 284 | /// Compares packed strings with implicit lengths in `a` and `b` using the |
17df50a5 | 285 | /// control in `IMM8`, and return `1` if the resulting mask was non-zero, |
0531ce1d | 286 | /// and `0` otherwise. |
83c7162d | 287 | /// |
353b0b11 | 288 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrc) |
0531ce1d XL |
289 | #[inline] |
290 | #[target_feature(enable = "sse4.2")] | |
17df50a5 XL |
291 | #[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] |
292 | #[rustc_legacy_const_generics(2)] | |
293 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
294 | pub unsafe fn _mm_cmpistrc<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 { | |
353b0b11 | 295 | static_assert_uimm_bits!(IMM8, 8); |
17df50a5 | 296 | pcmpistric128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) |
0531ce1d XL |
297 | } |
298 | ||
532ac7d7 | 299 | /// Compares packed strings with implicit lengths in `a` and `b` using the |
17df50a5 | 300 | /// control in `IMM8`, and returns `1` if any character in `a` was null, |
0531ce1d | 301 | /// and `0` otherwise. |
83c7162d | 302 | /// |
353b0b11 | 303 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistrs) |
0531ce1d XL |
304 | #[inline] |
305 | #[target_feature(enable = "sse4.2")] | |
17df50a5 XL |
306 | #[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] |
307 | #[rustc_legacy_const_generics(2)] | |
308 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
309 | pub unsafe fn _mm_cmpistrs<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 { | |
353b0b11 | 310 | static_assert_uimm_bits!(IMM8, 8); |
17df50a5 | 311 | pcmpistris128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) |
0531ce1d XL |
312 | } |
313 | ||
532ac7d7 | 314 | /// Compares packed strings with implicit lengths in `a` and `b` using the |
17df50a5 | 315 | /// control in `IMM8`, and return bit `0` of the resulting bit mask. |
83c7162d | 316 | /// |
353b0b11 | 317 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistro) |
0531ce1d XL |
318 | #[inline] |
319 | #[target_feature(enable = "sse4.2")] | |
17df50a5 XL |
320 | #[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] |
321 | #[rustc_legacy_const_generics(2)] | |
322 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
323 | pub unsafe fn _mm_cmpistro<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 { | |
353b0b11 | 324 | static_assert_uimm_bits!(IMM8, 8); |
17df50a5 | 325 | pcmpistrio128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) |
0531ce1d XL |
326 | } |
327 | ||
532ac7d7 | 328 | /// Compares packed strings with implicit lengths in `a` and `b` using the |
17df50a5 | 329 | /// control in `IMM8`, and return `1` if `b` did not contain a null |
0531ce1d | 330 | /// character and the resulting mask was zero, and `0` otherwise. |
83c7162d | 331 | /// |
353b0b11 | 332 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpistra) |
0531ce1d XL |
333 | #[inline] |
334 | #[target_feature(enable = "sse4.2")] | |
17df50a5 XL |
335 | #[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] |
336 | #[rustc_legacy_const_generics(2)] | |
337 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
338 | pub unsafe fn _mm_cmpistra<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 { | |
353b0b11 | 339 | static_assert_uimm_bits!(IMM8, 8); |
17df50a5 | 340 | pcmpistria128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) |
0531ce1d XL |
341 | } |
342 | ||
532ac7d7 | 343 | /// Compares packed strings in `a` and `b` with lengths `la` and `lb` |
17df50a5 | 344 | /// using the control in `IMM8`, and return the generated mask. |
83c7162d | 345 | /// |
353b0b11 | 346 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrm) |
0531ce1d XL |
347 | #[inline] |
348 | #[target_feature(enable = "sse4.2")] | |
17df50a5 XL |
349 | #[cfg_attr(test, assert_instr(pcmpestrm, IMM8 = 0))] |
350 | #[rustc_legacy_const_generics(4)] | |
351 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
352 | pub unsafe fn _mm_cmpestrm<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> __m128i { | |
353b0b11 | 353 | static_assert_uimm_bits!(IMM8, 8); |
17df50a5 | 354 | transmute(pcmpestrm128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8)) |
0531ce1d XL |
355 | } |
356 | ||
532ac7d7 | 357 | /// Compares packed strings `a` and `b` with lengths `la` and `lb` using the |
17df50a5 | 358 | /// control in `IMM8` and return the generated index. Similar to |
0531ce1d XL |
359 | /// [`_mm_cmpistri`] with the exception that [`_mm_cmpistri`] implicitly |
360 | /// determines the length of `a` and `b`. | |
361 | /// | |
362 | /// # Control modes | |
363 | /// | |
17df50a5 | 364 | /// The control specified by `IMM8` may be one or more of the following. |
0531ce1d XL |
365 | /// |
366 | /// ## Data size and signedness | |
367 | /// | |
368 | /// - [`_SIDD_UBYTE_OPS`] - Default | |
369 | /// - [`_SIDD_UWORD_OPS`] | |
370 | /// - [`_SIDD_SBYTE_OPS`] | |
371 | /// - [`_SIDD_SWORD_OPS`] | |
372 | /// | |
373 | /// ## Comparison options | |
374 | /// - [`_SIDD_CMP_EQUAL_ANY`] - Default | |
375 | /// - [`_SIDD_CMP_RANGES`] | |
376 | /// - [`_SIDD_CMP_EQUAL_EACH`] | |
377 | /// - [`_SIDD_CMP_EQUAL_ORDERED`] | |
378 | /// | |
379 | /// ## Result polarity | |
380 | /// - [`_SIDD_POSITIVE_POLARITY`] - Default | |
381 | /// - [`_SIDD_NEGATIVE_POLARITY`] | |
382 | /// | |
383 | /// ## Bit returned | |
384 | /// - [`_SIDD_LEAST_SIGNIFICANT`] - Default | |
385 | /// - [`_SIDD_MOST_SIGNIFICANT`] | |
386 | /// | |
387 | /// # Examples | |
388 | /// | |
389 | /// ``` | |
0531ce1d XL |
390 | /// #[cfg(target_arch = "x86")] |
391 | /// use std::arch::x86::*; | |
392 | /// #[cfg(target_arch = "x86_64")] | |
393 | /// use std::arch::x86_64::*; | |
394 | /// | |
395 | /// # fn main() { | |
396 | /// # if is_x86_feature_detected!("sse4.2") { | |
397 | /// # #[target_feature(enable = "sse4.2")] | |
398 | /// # unsafe fn worker() { | |
399 | /// | |
400 | /// // The string we want to find a substring in | |
401 | /// let haystack = b"Split \r\n\t line "; | |
402 | /// | |
403 | /// // The string we want to search for with some | |
404 | /// // extra bytes we do not want to search for. | |
405 | /// let needle = b"\r\n\t ignore this "; | |
406 | /// | |
407 | /// let a = _mm_loadu_si128(needle.as_ptr() as *const _); | |
408 | /// let b = _mm_loadu_si128(haystack.as_ptr() as *const _); | |
409 | /// | |
410 | /// // Note: We explicitly specify we only want to search `b` for the | |
411 | /// // first 3 characters of a. | |
412 | /// let idx = _mm_cmpestri(a, 3, b, 15, _SIDD_CMP_EQUAL_ORDERED); | |
413 | /// | |
414 | /// assert_eq!(idx, 6); | |
415 | /// # } | |
416 | /// # unsafe { worker(); } | |
417 | /// # } | |
418 | /// # } | |
419 | /// ``` | |
420 | /// | |
421 | /// [`_SIDD_UBYTE_OPS`]: constant._SIDD_UBYTE_OPS.html | |
422 | /// [`_SIDD_UWORD_OPS`]: constant._SIDD_UWORD_OPS.html | |
423 | /// [`_SIDD_SBYTE_OPS`]: constant._SIDD_SBYTE_OPS.html | |
424 | /// [`_SIDD_SWORD_OPS`]: constant._SIDD_SWORD_OPS.html | |
425 | /// [`_SIDD_CMP_EQUAL_ANY`]: constant._SIDD_CMP_EQUAL_ANY.html | |
426 | /// [`_SIDD_CMP_RANGES`]: constant._SIDD_CMP_RANGES.html | |
427 | /// [`_SIDD_CMP_EQUAL_EACH`]: constant._SIDD_CMP_EQUAL_EACH.html | |
428 | /// [`_SIDD_CMP_EQUAL_ORDERED`]: constant._SIDD_CMP_EQUAL_ORDERED.html | |
429 | /// [`_SIDD_POSITIVE_POLARITY`]: constant._SIDD_POSITIVE_POLARITY.html | |
430 | /// [`_SIDD_NEGATIVE_POLARITY`]: constant._SIDD_NEGATIVE_POLARITY.html | |
431 | /// [`_SIDD_LEAST_SIGNIFICANT`]: constant._SIDD_LEAST_SIGNIFICANT.html | |
432 | /// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html | |
433 | /// [`_mm_cmpistri`]: fn._mm_cmpistri.html | |
83c7162d | 434 | /// |
353b0b11 | 435 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri) |
0531ce1d XL |
436 | #[inline] |
437 | #[target_feature(enable = "sse4.2")] | |
17df50a5 XL |
438 | #[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] |
439 | #[rustc_legacy_const_generics(4)] | |
440 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
441 | pub unsafe fn _mm_cmpestri<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { | |
353b0b11 | 442 | static_assert_uimm_bits!(IMM8, 8); |
17df50a5 | 443 | pcmpestri128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) |
0531ce1d XL |
444 | } |
445 | ||
532ac7d7 | 446 | /// Compares packed strings in `a` and `b` with lengths `la` and `lb` |
17df50a5 | 447 | /// using the control in `IMM8`, and return `1` if any character in |
0531ce1d | 448 | /// `b` was null, and `0` otherwise. |
83c7162d | 449 | /// |
353b0b11 | 450 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrz) |
0531ce1d XL |
451 | #[inline] |
452 | #[target_feature(enable = "sse4.2")] | |
17df50a5 XL |
453 | #[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] |
454 | #[rustc_legacy_const_generics(4)] | |
455 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
456 | pub unsafe fn _mm_cmpestrz<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { | |
353b0b11 | 457 | static_assert_uimm_bits!(IMM8, 8); |
17df50a5 | 458 | pcmpestriz128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) |
0531ce1d XL |
459 | } |
460 | ||
532ac7d7 | 461 | /// Compares packed strings in `a` and `b` with lengths `la` and `lb` |
17df50a5 | 462 | /// using the control in `IMM8`, and return `1` if the resulting mask |
0531ce1d | 463 | /// was non-zero, and `0` otherwise. |
83c7162d | 464 | /// |
353b0b11 | 465 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrc) |
0531ce1d XL |
466 | #[inline] |
467 | #[target_feature(enable = "sse4.2")] | |
17df50a5 XL |
468 | #[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] |
469 | #[rustc_legacy_const_generics(4)] | |
470 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
471 | pub unsafe fn _mm_cmpestrc<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { | |
353b0b11 | 472 | static_assert_uimm_bits!(IMM8, 8); |
17df50a5 | 473 | pcmpestric128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) |
0531ce1d XL |
474 | } |
475 | ||
532ac7d7 | 476 | /// Compares packed strings in `a` and `b` with lengths `la` and `lb` |
17df50a5 | 477 | /// using the control in `IMM8`, and return `1` if any character in |
0531ce1d | 478 | /// a was null, and `0` otherwise. |
83c7162d | 479 | /// |
353b0b11 | 480 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestrs) |
0531ce1d XL |
481 | #[inline] |
482 | #[target_feature(enable = "sse4.2")] | |
17df50a5 XL |
483 | #[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] |
484 | #[rustc_legacy_const_generics(4)] | |
485 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
486 | pub unsafe fn _mm_cmpestrs<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { | |
353b0b11 | 487 | static_assert_uimm_bits!(IMM8, 8); |
17df50a5 | 488 | pcmpestris128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) |
0531ce1d XL |
489 | } |
490 | ||
532ac7d7 | 491 | /// Compares packed strings in `a` and `b` with lengths `la` and `lb` |
17df50a5 | 492 | /// using the control in `IMM8`, and return bit `0` of the resulting |
0531ce1d | 493 | /// bit mask. |
83c7162d | 494 | /// |
353b0b11 | 495 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestro) |
0531ce1d XL |
496 | #[inline] |
497 | #[target_feature(enable = "sse4.2")] | |
17df50a5 XL |
498 | #[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] |
499 | #[rustc_legacy_const_generics(4)] | |
500 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
501 | pub unsafe fn _mm_cmpestro<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { | |
353b0b11 | 502 | static_assert_uimm_bits!(IMM8, 8); |
17df50a5 | 503 | pcmpestrio128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) |
0531ce1d XL |
504 | } |
505 | ||
532ac7d7 | 506 | /// Compares packed strings in `a` and `b` with lengths `la` and `lb` |
17df50a5 | 507 | /// using the control in `IMM8`, and return `1` if `b` did not |
0531ce1d XL |
508 | /// contain a null character and the resulting mask was zero, and `0` |
509 | /// otherwise. | |
83c7162d | 510 | /// |
353b0b11 | 511 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestra) |
0531ce1d XL |
512 | #[inline] |
513 | #[target_feature(enable = "sse4.2")] | |
17df50a5 XL |
514 | #[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] |
515 | #[rustc_legacy_const_generics(4)] | |
516 | #[stable(feature = "simd_x86", since = "1.27.0")] | |
517 | pub unsafe fn _mm_cmpestra<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { | |
353b0b11 | 518 | static_assert_uimm_bits!(IMM8, 8); |
17df50a5 | 519 | pcmpestria128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) |
0531ce1d XL |
520 | } |
521 | ||
522 | /// Starting with the initial value in `crc`, return the accumulated | |
5099ac24 | 523 | /// CRC32-C value for unsigned 8-bit integer `v`. |
83c7162d | 524 | /// |
353b0b11 | 525 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u8) |
0531ce1d XL |
526 | #[inline] |
527 | #[target_feature(enable = "sse4.2")] | |
528 | #[cfg_attr(test, assert_instr(crc32))] | |
83c7162d | 529 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
530 | pub unsafe fn _mm_crc32_u8(crc: u32, v: u8) -> u32 { |
531 | crc32_32_8(crc, v) | |
532 | } | |
533 | ||
534 | /// Starting with the initial value in `crc`, return the accumulated | |
5099ac24 | 535 | /// CRC32-C value for unsigned 16-bit integer `v`. |
83c7162d | 536 | /// |
353b0b11 | 537 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u16) |
0531ce1d XL |
538 | #[inline] |
539 | #[target_feature(enable = "sse4.2")] | |
540 | #[cfg_attr(test, assert_instr(crc32))] | |
83c7162d | 541 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
542 | pub unsafe fn _mm_crc32_u16(crc: u32, v: u16) -> u32 { |
543 | crc32_32_16(crc, v) | |
544 | } | |
545 | ||
546 | /// Starting with the initial value in `crc`, return the accumulated | |
5099ac24 | 547 | /// CRC32-C value for unsigned 32-bit integer `v`. |
83c7162d | 548 | /// |
353b0b11 | 549 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u32) |
0531ce1d XL |
550 | #[inline] |
551 | #[target_feature(enable = "sse4.2")] | |
552 | #[cfg_attr(test, assert_instr(crc32))] | |
83c7162d | 553 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
554 | pub unsafe fn _mm_crc32_u32(crc: u32, v: u32) -> u32 { |
555 | crc32_32_32(crc, v) | |
556 | } | |
557 | ||
532ac7d7 | 558 | /// Compares packed 64-bit integers in `a` and `b` for greater-than, |
0531ce1d | 559 | /// return the results. |
83c7162d | 560 | /// |
353b0b11 | 561 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64) |
0531ce1d XL |
562 | #[inline] |
563 | #[target_feature(enable = "sse4.2")] | |
564 | #[cfg_attr(test, assert_instr(pcmpgtq))] | |
83c7162d | 565 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d | 566 | pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i { |
532ac7d7 | 567 | transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) |
0531ce1d XL |
568 | } |
569 | ||
570 | #[allow(improper_ctypes)] | |
571 | extern "C" { | |
572 | // SSE 4.2 string and text comparison ops | |
573 | #[link_name = "llvm.x86.sse42.pcmpestrm128"] | |
574 | fn pcmpestrm128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> u8x16; | |
575 | #[link_name = "llvm.x86.sse42.pcmpestri128"] | |
576 | fn pcmpestri128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; | |
577 | #[link_name = "llvm.x86.sse42.pcmpestriz128"] | |
578 | fn pcmpestriz128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; | |
579 | #[link_name = "llvm.x86.sse42.pcmpestric128"] | |
580 | fn pcmpestric128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; | |
581 | #[link_name = "llvm.x86.sse42.pcmpestris128"] | |
582 | fn pcmpestris128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; | |
583 | #[link_name = "llvm.x86.sse42.pcmpestrio128"] | |
584 | fn pcmpestrio128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; | |
585 | #[link_name = "llvm.x86.sse42.pcmpestria128"] | |
586 | fn pcmpestria128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; | |
587 | #[link_name = "llvm.x86.sse42.pcmpistrm128"] | |
588 | fn pcmpistrm128(a: i8x16, b: i8x16, imm8: i8) -> i8x16; | |
589 | #[link_name = "llvm.x86.sse42.pcmpistri128"] | |
590 | fn pcmpistri128(a: i8x16, b: i8x16, imm8: i8) -> i32; | |
591 | #[link_name = "llvm.x86.sse42.pcmpistriz128"] | |
592 | fn pcmpistriz128(a: i8x16, b: i8x16, imm8: i8) -> i32; | |
593 | #[link_name = "llvm.x86.sse42.pcmpistric128"] | |
594 | fn pcmpistric128(a: i8x16, b: i8x16, imm8: i8) -> i32; | |
595 | #[link_name = "llvm.x86.sse42.pcmpistris128"] | |
596 | fn pcmpistris128(a: i8x16, b: i8x16, imm8: i8) -> i32; | |
597 | #[link_name = "llvm.x86.sse42.pcmpistrio128"] | |
598 | fn pcmpistrio128(a: i8x16, b: i8x16, imm8: i8) -> i32; | |
599 | #[link_name = "llvm.x86.sse42.pcmpistria128"] | |
600 | fn pcmpistria128(a: i8x16, b: i8x16, imm8: i8) -> i32; | |
601 | // SSE 4.2 CRC instructions | |
602 | #[link_name = "llvm.x86.sse42.crc32.32.8"] | |
603 | fn crc32_32_8(crc: u32, v: u8) -> u32; | |
604 | #[link_name = "llvm.x86.sse42.crc32.32.16"] | |
605 | fn crc32_32_16(crc: u32, v: u16) -> u32; | |
606 | #[link_name = "llvm.x86.sse42.crc32.32.32"] | |
607 | fn crc32_32_32(crc: u32, v: u32) -> u32; | |
608 | } | |
609 | ||
610 | #[cfg(test)] | |
611 | mod tests { | |
416331ca | 612 | use stdarch_test::simd_test; |
0531ce1d | 613 | |
532ac7d7 | 614 | use crate::core_arch::x86::*; |
83c7162d | 615 | use std::ptr; |
0531ce1d | 616 | |
9ffffee4 | 617 | // Currently one cannot `load` a &[u8] that is less than 16 |
0531ce1d XL |
618 | // in length. This makes loading strings less than 16 in length |
619 | // a bit difficult. Rather than `load` and mutate the __m128i, | |
620 | // it is easier to memcpy the given string to a local slice with | |
621 | // length 16 and `load` the local slice. | |
622 | #[target_feature(enable = "sse4.2")] | |
623 | unsafe fn str_to_m128i(s: &[u8]) -> __m128i { | |
624 | assert!(s.len() <= 16); | |
625 | let slice = &mut [0u8; 16]; | |
9ffffee4 | 626 | ptr::copy_nonoverlapping(s.as_ptr(), slice.as_mut_ptr(), s.len()); |
0531ce1d XL |
627 | _mm_loadu_si128(slice.as_ptr() as *const _) |
628 | } | |
629 | ||
83c7162d | 630 | #[simd_test(enable = "sse4.2")] |
0531ce1d XL |
631 | unsafe fn test_mm_cmpistrm() { |
632 | let a = str_to_m128i(b"Hello! Good-Bye!"); | |
633 | let b = str_to_m128i(b"hello! good-bye!"); | |
17df50a5 | 634 | let i = _mm_cmpistrm::<_SIDD_UNIT_MASK>(a, b); |
0731742a | 635 | #[rustfmt::skip] |
0531ce1d XL |
636 | let res = _mm_setr_epi8( |
637 | 0x00, !0, !0, !0, !0, !0, !0, 0x00, | |
638 | !0, !0, !0, !0, 0x00, !0, !0, !0, | |
639 | ); | |
640 | assert_eq_m128i(i, res); | |
641 | } | |
642 | ||
83c7162d | 643 | #[simd_test(enable = "sse4.2")] |
0531ce1d XL |
644 | unsafe fn test_mm_cmpistri() { |
645 | let a = str_to_m128i(b"Hello"); | |
646 | let b = str_to_m128i(b" Hello "); | |
17df50a5 | 647 | let i = _mm_cmpistri::<_SIDD_CMP_EQUAL_ORDERED>(a, b); |
0531ce1d XL |
648 | assert_eq!(3, i); |
649 | } | |
650 | ||
83c7162d | 651 | #[simd_test(enable = "sse4.2")] |
0531ce1d XL |
652 | unsafe fn test_mm_cmpistrz() { |
653 | let a = str_to_m128i(b""); | |
654 | let b = str_to_m128i(b"Hello"); | |
17df50a5 | 655 | let i = _mm_cmpistrz::<_SIDD_CMP_EQUAL_ORDERED>(a, b); |
0531ce1d XL |
656 | assert_eq!(1, i); |
657 | } | |
658 | ||
83c7162d | 659 | #[simd_test(enable = "sse4.2")] |
0531ce1d XL |
660 | unsafe fn test_mm_cmpistrc() { |
661 | let a = str_to_m128i(b" "); | |
662 | let b = str_to_m128i(b" ! "); | |
17df50a5 | 663 | let i = _mm_cmpistrc::<_SIDD_UNIT_MASK>(a, b); |
0531ce1d XL |
664 | assert_eq!(1, i); |
665 | } | |
666 | ||
83c7162d | 667 | #[simd_test(enable = "sse4.2")] |
0531ce1d XL |
668 | unsafe fn test_mm_cmpistrs() { |
669 | let a = str_to_m128i(b"Hello"); | |
670 | let b = str_to_m128i(b""); | |
17df50a5 | 671 | let i = _mm_cmpistrs::<_SIDD_CMP_EQUAL_ORDERED>(a, b); |
0531ce1d XL |
672 | assert_eq!(1, i); |
673 | } | |
674 | ||
83c7162d | 675 | #[simd_test(enable = "sse4.2")] |
0531ce1d | 676 | unsafe fn test_mm_cmpistro() { |
0731742a | 677 | #[rustfmt::skip] |
0531ce1d XL |
678 | let a_bytes = _mm_setr_epi8( |
679 | 0x00, 0x47, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, | |
680 | 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
681 | ); | |
0731742a | 682 | #[rustfmt::skip] |
0531ce1d XL |
683 | let b_bytes = _mm_setr_epi8( |
684 | 0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, | |
685 | 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
686 | ); | |
687 | let a = a_bytes; | |
688 | let b = b_bytes; | |
17df50a5 | 689 | let i = _mm_cmpistro::<{ _SIDD_UWORD_OPS | _SIDD_UNIT_MASK }>(a, b); |
0531ce1d XL |
690 | assert_eq!(0, i); |
691 | } | |
692 | ||
83c7162d | 693 | #[simd_test(enable = "sse4.2")] |
0531ce1d XL |
694 | unsafe fn test_mm_cmpistra() { |
695 | let a = str_to_m128i(b""); | |
696 | let b = str_to_m128i(b"Hello!!!!!!!!!!!"); | |
17df50a5 | 697 | let i = _mm_cmpistra::<_SIDD_UNIT_MASK>(a, b); |
0531ce1d XL |
698 | assert_eq!(1, i); |
699 | } | |
700 | ||
83c7162d | 701 | #[simd_test(enable = "sse4.2")] |
0531ce1d XL |
702 | unsafe fn test_mm_cmpestrm() { |
703 | let a = str_to_m128i(b"Hello!"); | |
704 | let b = str_to_m128i(b"Hello."); | |
17df50a5 | 705 | let i = _mm_cmpestrm::<_SIDD_UNIT_MASK>(a, 5, b, 5); |
0731742a | 706 | #[rustfmt::skip] |
0531ce1d XL |
707 | let r = _mm_setr_epi8( |
708 | !0, !0, !0, !0, !0, 0x00, 0x00, 0x00, | |
709 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 | |
710 | ); | |
711 | assert_eq_m128i(i, r); | |
712 | } | |
713 | ||
83c7162d | 714 | #[simd_test(enable = "sse4.2")] |
0531ce1d XL |
715 | unsafe fn test_mm_cmpestri() { |
716 | let a = str_to_m128i(b"bar - garbage"); | |
717 | let b = str_to_m128i(b"foobar"); | |
17df50a5 | 718 | let i = _mm_cmpestri::<_SIDD_CMP_EQUAL_ORDERED>(a, 3, b, 6); |
0531ce1d XL |
719 | assert_eq!(3, i); |
720 | } | |
721 | ||
83c7162d | 722 | #[simd_test(enable = "sse4.2")] |
0531ce1d XL |
723 | unsafe fn test_mm_cmpestrz() { |
724 | let a = str_to_m128i(b""); | |
725 | let b = str_to_m128i(b"Hello"); | |
17df50a5 | 726 | let i = _mm_cmpestrz::<_SIDD_CMP_EQUAL_ORDERED>(a, 16, b, 6); |
0531ce1d XL |
727 | assert_eq!(1, i); |
728 | } | |
729 | ||
83c7162d | 730 | #[simd_test(enable = "sse4.2")] |
0531ce1d XL |
731 | unsafe fn test_mm_cmpestrc() { |
732 | let va = str_to_m128i(b"!!!!!!!!"); | |
733 | let vb = str_to_m128i(b" "); | |
17df50a5 | 734 | let i = _mm_cmpestrc::<_SIDD_UNIT_MASK>(va, 7, vb, 7); |
0531ce1d XL |
735 | assert_eq!(0, i); |
736 | } | |
737 | ||
83c7162d | 738 | #[simd_test(enable = "sse4.2")] |
0531ce1d | 739 | unsafe fn test_mm_cmpestrs() { |
0731742a | 740 | #[rustfmt::skip] |
0531ce1d XL |
741 | let a_bytes = _mm_setr_epi8( |
742 | 0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, | |
743 | 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | |
744 | ); | |
745 | let a = a_bytes; | |
746 | let b = _mm_set1_epi8(0x00); | |
17df50a5 | 747 | let i = _mm_cmpestrs::<_SIDD_UWORD_OPS>(a, 8, b, 0); |
0531ce1d XL |
748 | assert_eq!(0, i); |
749 | } | |
750 | ||
83c7162d | 751 | #[simd_test(enable = "sse4.2")] |
0531ce1d XL |
752 | unsafe fn test_mm_cmpestro() { |
753 | let a = str_to_m128i(b"Hello"); | |
754 | let b = str_to_m128i(b"World"); | |
17df50a5 | 755 | let i = _mm_cmpestro::<_SIDD_UBYTE_OPS>(a, 5, b, 5); |
0531ce1d XL |
756 | assert_eq!(0, i); |
757 | } | |
758 | ||
83c7162d | 759 | #[simd_test(enable = "sse4.2")] |
0531ce1d XL |
760 | unsafe fn test_mm_cmpestra() { |
761 | let a = str_to_m128i(b"Cannot match a"); | |
762 | let b = str_to_m128i(b"Null after 14"); | |
17df50a5 | 763 | let i = _mm_cmpestra::<{ _SIDD_CMP_EQUAL_EACH | _SIDD_UNIT_MASK }>(a, 14, b, 16); |
0531ce1d XL |
764 | assert_eq!(1, i); |
765 | } | |
766 | ||
83c7162d | 767 | #[simd_test(enable = "sse4.2")] |
0531ce1d XL |
768 | unsafe fn test_mm_crc32_u8() { |
769 | let crc = 0x2aa1e72b; | |
770 | let v = 0x2a; | |
771 | let i = _mm_crc32_u8(crc, v); | |
772 | assert_eq!(i, 0xf24122e4); | |
773 | } | |
774 | ||
83c7162d | 775 | #[simd_test(enable = "sse4.2")] |
0531ce1d XL |
776 | unsafe fn test_mm_crc32_u16() { |
777 | let crc = 0x8ecec3b5; | |
778 | let v = 0x22b; | |
779 | let i = _mm_crc32_u16(crc, v); | |
780 | assert_eq!(i, 0x13bb2fb); | |
781 | } | |
782 | ||
83c7162d | 783 | #[simd_test(enable = "sse4.2")] |
0531ce1d XL |
784 | unsafe fn test_mm_crc32_u32() { |
785 | let crc = 0xae2912c8; | |
786 | let v = 0x845fed; | |
787 | let i = _mm_crc32_u32(crc, v); | |
788 | assert_eq!(i, 0xffae2ed1); | |
789 | } | |
790 | ||
83c7162d | 791 | #[simd_test(enable = "sse4.2")] |
0531ce1d XL |
792 | unsafe fn test_mm_cmpgt_epi64() { |
793 | let a = _mm_setr_epi64x(0, 0x2a); | |
794 | let b = _mm_set1_epi64x(0x00); | |
795 | let i = _mm_cmpgt_epi64(a, b); | |
0731742a | 796 | assert_eq_m128i(i, _mm_setr_epi64x(0x00, 0xffffffffffffffffu64 as i64)); |
0531ce1d XL |
797 | } |
798 | } |