]> git.proxmox.com Git - rustc.git/blob - src/stdsimd/coresimd/x86/sse42.rs
New upstream version 1.29.0+dfsg1
[rustc.git] / src / stdsimd / coresimd / x86 / sse42.rs
1 //! Streaming SIMD Extensions 4.2 (SSE4.2)
2 //!
3 //! Extends SSE4.1 with STTNI (String and Text New Instructions).
4
5 #[cfg(test)]
6 use stdsimd_test::assert_instr;
7
8 use coresimd::simd::*;
9 use coresimd::simd_llvm::*;
10 use coresimd::x86::*;
11
12 /// String contains unsigned 8-bit characters *(Default)*
13 #[stable(feature = "simd_x86", since = "1.27.0")]
14 pub const _SIDD_UBYTE_OPS: i32 = 0b0000_0000;
15 /// String contains unsigned 16-bit characters
16 #[stable(feature = "simd_x86", since = "1.27.0")]
17 pub const _SIDD_UWORD_OPS: i32 = 0b0000_0001;
18 /// String contains signed 8-bit characters
19 #[stable(feature = "simd_x86", since = "1.27.0")]
20 pub const _SIDD_SBYTE_OPS: i32 = 0b0000_0010;
21 /// String contains unsigned 16-bit characters
22 #[stable(feature = "simd_x86", since = "1.27.0")]
23 pub const _SIDD_SWORD_OPS: i32 = 0b0000_0011;
24
25 /// For each character in `a`, find if it is in `b` *(Default)*
26 #[stable(feature = "simd_x86", since = "1.27.0")]
27 pub const _SIDD_CMP_EQUAL_ANY: i32 = 0b0000_0000;
28 /// For each character in `a`, determine if
29 /// `b[0] <= c <= b[1] or b[1] <= c <= b[2]...`
30 #[stable(feature = "simd_x86", since = "1.27.0")]
31 pub const _SIDD_CMP_RANGES: i32 = 0b0000_0100;
32 /// The strings defined by `a` and `b` are equal
33 #[stable(feature = "simd_x86", since = "1.27.0")]
34 pub const _SIDD_CMP_EQUAL_EACH: i32 = 0b0000_1000;
35 /// Search for the defined substring in the target
36 #[stable(feature = "simd_x86", since = "1.27.0")]
37 pub const _SIDD_CMP_EQUAL_ORDERED: i32 = 0b0000_1100;
38
39 /// Do not negate results *(Default)*
40 #[stable(feature = "simd_x86", since = "1.27.0")]
41 pub const _SIDD_POSITIVE_POLARITY: i32 = 0b0000_0000;
42 /// Negate results
43 #[stable(feature = "simd_x86", since = "1.27.0")]
44 pub const _SIDD_NEGATIVE_POLARITY: i32 = 0b0001_0000;
45 /// Do not negate results before the end of the string
46 #[stable(feature = "simd_x86", since = "1.27.0")]
47 pub const _SIDD_MASKED_POSITIVE_POLARITY: i32 = 0b0010_0000;
48 /// Negate results only before the end of the string
49 #[stable(feature = "simd_x86", since = "1.27.0")]
50 pub const _SIDD_MASKED_NEGATIVE_POLARITY: i32 = 0b0011_0000;
51
52 /// **Index only**: return the least significant bit *(Default)*
53 #[stable(feature = "simd_x86", since = "1.27.0")]
54 pub const _SIDD_LEAST_SIGNIFICANT: i32 = 0b0000_0000;
55 /// **Index only**: return the most significant bit
56 #[stable(feature = "simd_x86", since = "1.27.0")]
57 pub const _SIDD_MOST_SIGNIFICANT: i32 = 0b0100_0000;
58
59 /// **Mask only**: return the bit mask
60 #[stable(feature = "simd_x86", since = "1.27.0")]
61 pub const _SIDD_BIT_MASK: i32 = 0b0000_0000;
62 /// **Mask only**: return the byte mask
63 #[stable(feature = "simd_x86", since = "1.27.0")]
64 pub const _SIDD_UNIT_MASK: i32 = 0b0100_0000;
65
66 /// Compare packed strings with implicit lengths in `a` and `b` using the
67 /// control in `imm8`, and return the generated mask.
68 ///
69 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrm)
70 #[inline]
71 #[target_feature(enable = "sse4.2")]
72 #[cfg_attr(test, assert_instr(pcmpistrm, imm8 = 0))]
73 #[rustc_args_required_const(2)]
74 #[stable(feature = "simd_x86", since = "1.27.0")]
75 pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
76 let a = a.as_i8x16();
77 let b = b.as_i8x16();
78 macro_rules! call {
79 ($imm8:expr) => {
80 pcmpistrm128(a, b, $imm8)
81 };
82 }
83 mem::transmute(constify_imm8!(imm8, call))
84 }
85
86 /// Compare packed strings with implicit lengths in `a` and `b` using the
87 /// control in `imm8` and return the generated index. Similar to
88 /// [`_mm_cmpestri`] with the exception that [`_mm_cmpestri`] requires the
89 /// lengths of `a` and `b` to be explicitly specified.
90 ///
91 /// # Control modes
92 ///
93 /// The control specified by `imm8` may be one or more of the following.
94 ///
95 /// ## Data size and signedness
96 ///
97 /// - [`_SIDD_UBYTE_OPS`] - Default
98 /// - [`_SIDD_UWORD_OPS`]
99 /// - [`_SIDD_SBYTE_OPS`]
100 /// - [`_SIDD_SWORD_OPS`]
101 ///
102 /// ## Comparison options
103 /// - [`_SIDD_CMP_EQUAL_ANY`] - Default
104 /// - [`_SIDD_CMP_RANGES`]
105 /// - [`_SIDD_CMP_EQUAL_EACH`]
106 /// - [`_SIDD_CMP_EQUAL_ORDERED`]
107 ///
108 /// ## Result polarity
109 /// - [`_SIDD_POSITIVE_POLARITY`] - Default
110 /// - [`_SIDD_NEGATIVE_POLARITY`]
111 ///
112 /// ## Bit returned
113 /// - [`_SIDD_LEAST_SIGNIFICANT`] - Default
114 /// - [`_SIDD_MOST_SIGNIFICANT`]
115 ///
116 /// # Examples
117 ///
118 /// Find a substring using [`_SIDD_CMP_EQUAL_ORDERED`]
119 ///
120 /// ```
121 /// # #![feature(stdsimd)]
122 /// # #![cfg_attr(not(dox), no_std)]
123 /// # #[cfg(not(dox))]
124 /// # #[macro_use]
125 /// # extern crate std as real_std;
126 /// # #[cfg(not(dox))]
127 /// # #[macro_use]
128 /// # extern crate stdsimd as std;
129 /// # #[cfg(not(dox))]
130 /// # use real_std::prelude::v1::*;
131 /// #[cfg(target_arch = "x86")]
132 /// use std::arch::x86::*;
133 /// #[cfg(target_arch = "x86_64")]
134 /// use std::arch::x86_64::*;
135 ///
136 /// # fn main() {
137 /// # if is_x86_feature_detected!("sse4.2") {
138 /// # #[target_feature(enable = "sse4.2")]
139 /// # unsafe fn worker() {
140 /// let haystack = b"This is a long string of text data\r\n\tthat extends
141 /// multiple lines";
142 /// let needle = b"\r\n\t\0\0\0\0\0\0\0\0\0\0\0\0\0";
143 ///
144 /// let a = _mm_loadu_si128(needle.as_ptr() as *const _);
145 /// let hop = 16;
146 /// let mut indexes = Vec::new();
147 ///
148 /// // Chunk the haystack into 16 byte chunks and find
149 /// // the first "\r\n\t" in the chunk.
150 /// for (i, chunk) in haystack.chunks(hop).enumerate() {
151 /// let b = _mm_loadu_si128(chunk.as_ptr() as *const _);
152 /// let idx = _mm_cmpistri(a, b, _SIDD_CMP_EQUAL_ORDERED);
153 /// if idx != 16 {
154 /// indexes.push((idx as usize) + (i * hop));
155 /// }
156 /// }
157 /// assert_eq!(indexes, vec![34]);
158 /// # }
159 /// # unsafe { worker(); }
160 /// # }
161 /// # }
162 /// ```
163 ///
164 /// The `_mm_cmpistri` intrinsic may also be used to find the existance of
165 /// one or more of a given set of characters in the haystack.
166 ///
167 /// ```
168 /// # #![feature(stdsimd)]
169 /// # #![cfg_attr(not(dox), no_std)]
170 /// # #[cfg(not(dox))]
171 /// # #[macro_use]
172 /// # extern crate std as real_std;
173 /// # #[cfg(not(dox))]
174 /// # #[macro_use]
175 /// # extern crate stdsimd as std;
176 /// #[cfg(target_arch = "x86")]
177 /// use std::arch::x86::*;
178 /// #[cfg(target_arch = "x86_64")]
179 /// use std::arch::x86_64::*;
180 ///
181 /// # fn main() {
182 /// # if is_x86_feature_detected!("sse4.2") {
183 /// # #[target_feature(enable = "sse4.2")]
184 /// # unsafe fn worker() {
185 /// // Ensure your input is 16 byte aligned
186 /// let password = b"hunter2\0\0\0\0\0\0\0\0\0";
187 /// let special_chars = b"!@#$%^&*()[]:;<>";
188 ///
189 /// // Load the input
190 /// let a = _mm_loadu_si128(special_chars.as_ptr() as *const _);
191 /// let b = _mm_loadu_si128(password.as_ptr() as *const _);
192 ///
193 /// // Use _SIDD_CMP_EQUAL_ANY to find the index of any bytes in b
194 /// let idx = _mm_cmpistri(a.into(), b.into(), _SIDD_CMP_EQUAL_ANY);
195 ///
196 /// if idx < 16 {
197 /// println!("Congrats! Your password contains a special character");
198 /// # panic!("{:?} does not contain a special character", password);
199 /// } else {
200 /// println!("Your password should contain a special character");
201 /// }
202 /// # }
203 /// # unsafe { worker(); }
204 /// # }
205 /// # }
206 /// ```
207 ///
208 /// Find the index of the first character in the haystack that is within a
209 /// range of characters.
210 ///
211 /// ```
212 /// # #![feature(stdsimd)]
213 /// # #![cfg_attr(not(dox), no_std)]
214 /// # #[cfg(not(dox))]
215 /// # #[macro_use]
216 /// # extern crate std as real_std;
217 /// # #[cfg(not(dox))]
218 /// # #[macro_use]
219 /// # extern crate stdsimd as std;
220 /// #[cfg(target_arch = "x86")]
221 /// use std::arch::x86::*;
222 /// #[cfg(target_arch = "x86_64")]
223 /// use std::arch::x86_64::*;
224 ///
225 /// # fn main() {
226 /// # if is_x86_feature_detected!("sse4.2") {
227 /// # #[target_feature(enable = "sse4.2")]
228 /// # unsafe fn worker() {
229 /// # let b = b":;<=>?@[\\]^_`abc";
230 /// # let b = _mm_loadu_si128(b.as_ptr() as *const _);
231 ///
232 /// // Specify the ranges of values to be searched for [A-Za-z0-9].
233 /// let a = b"AZaz09\0\0\0\0\0\0\0\0\0\0";
234 /// let a = _mm_loadu_si128(a.as_ptr() as *const _);
235 ///
236 /// // Use _SIDD_CMP_RANGES to find the index of first byte in ranges.
237 /// // Which in this case will be the first alpha numeric byte found
238 /// // in the string.
239 /// let idx = _mm_cmpistri(a, b, _SIDD_CMP_RANGES);
240 ///
241 /// if idx < 16 {
242 /// println!("Found an alpha numeric character");
243 /// # assert_eq!(idx, 13);
244 /// } else {
245 /// println!("Did not find an alpha numeric character");
246 /// }
247 /// # }
248 /// # unsafe { worker(); }
249 /// # }
250 /// # }
251 /// ```
252 ///
253 /// Working with 16-bit characters.
254 ///
255 /// ```
256 /// # #![feature(stdsimd)]
257 /// # #![cfg_attr(not(dox), no_std)]
258 /// # #[cfg(not(dox))]
259 /// # #[macro_use]
260 /// # extern crate std as real_std;
261 /// # #[cfg(not(dox))]
262 /// # #[macro_use]
263 /// # extern crate stdsimd as std;
264 /// #[cfg(target_arch = "x86")]
265 /// use std::arch::x86::*;
266 /// #[cfg(target_arch = "x86_64")]
267 /// use std::arch::x86_64::*;
268 ///
269 /// # fn main() {
270 /// # if is_x86_feature_detected!("sse4.2") {
271 /// # #[target_feature(enable = "sse4.2")]
272 /// # unsafe fn worker() {
273 /// # let mut some_utf16_words = [0u16; 8];
274 /// # let mut more_utf16_words = [0u16; 8];
275 /// # '❤'.encode_utf16(&mut some_utf16_words);
276 /// # '𝕊'.encode_utf16(&mut more_utf16_words);
277 /// // Load the input
278 /// let a = _mm_loadu_si128(some_utf16_words.as_ptr() as *const _);
279 /// let b = _mm_loadu_si128(more_utf16_words.as_ptr() as *const _);
280 ///
281 /// // Specify _SIDD_UWORD_OPS to compare words instead of bytes, and
282 /// // use _SIDD_CMP_EQUAL_EACH to compare the two strings.
283 /// let idx = _mm_cmpistri(a, b, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_EACH);
284 ///
285 /// if idx == 0 {
286 /// println!("16-bit unicode strings were equal!");
287 /// # panic!("Strings should not be equal!")
288 /// } else {
289 /// println!("16-bit unicode strings were not equal!");
290 /// }
291 /// # }
292 /// # unsafe { worker(); }
293 /// # }
294 /// # }
295 /// ```
296 ///
297 /// [`_SIDD_UBYTE_OPS`]: constant._SIDD_UBYTE_OPS.html
298 /// [`_SIDD_UWORD_OPS`]: constant._SIDD_UWORD_OPS.html
299 /// [`_SIDD_SBYTE_OPS`]: constant._SIDD_SBYTE_OPS.html
300 /// [`_SIDD_SWORD_OPS`]: constant._SIDD_SWORD_OPS.html
301 /// [`_SIDD_CMP_EQUAL_ANY`]: constant._SIDD_CMP_EQUAL_ANY.html
302 /// [`_SIDD_CMP_RANGES`]: constant._SIDD_CMP_RANGES.html
303 /// [`_SIDD_CMP_EQUAL_EACH`]: constant._SIDD_CMP_EQUAL_EACH.html
304 /// [`_SIDD_CMP_EQUAL_ORDERED`]: constant._SIDD_CMP_EQUAL_ORDERED.html
305 /// [`_SIDD_POSITIVE_POLARITY`]: constant._SIDD_POSITIVE_POLARITY.html
306 /// [`_SIDD_NEGATIVE_POLARITY`]: constant._SIDD_NEGATIVE_POLARITY.html
307 /// [`_SIDD_LEAST_SIGNIFICANT`]: constant._SIDD_LEAST_SIGNIFICANT.html
308 /// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html
309 /// [`_mm_cmpestri`]: fn._mm_cmpestri.html
310 ///
311 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistri)
312 #[inline]
313 #[target_feature(enable = "sse4.2")]
314 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
315 #[rustc_args_required_const(2)]
316 #[stable(feature = "simd_x86", since = "1.27.0")]
317 pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i, imm8: i32) -> i32 {
318 let a = a.as_i8x16();
319 let b = b.as_i8x16();
320 macro_rules! call {
321 ($imm8:expr) => {
322 pcmpistri128(a, b, $imm8)
323 };
324 }
325 constify_imm8!(imm8, call)
326 }
327
328 /// Compare packed strings with implicit lengths in `a` and `b` using the
329 /// control in `imm8`, and return `1` if any character in `b` was null.
330 /// and `0` otherwise.
331 ///
332 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrz)
333 #[inline]
334 #[target_feature(enable = "sse4.2")]
335 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
336 #[rustc_args_required_const(2)]
337 #[stable(feature = "simd_x86", since = "1.27.0")]
338 pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i, imm8: i32) -> i32 {
339 let a = a.as_i8x16();
340 let b = b.as_i8x16();
341 macro_rules! call {
342 ($imm8:expr) => {
343 pcmpistriz128(a, b, $imm8)
344 };
345 }
346 constify_imm8!(imm8, call)
347 }
348
349 /// Compare packed strings with implicit lengths in `a` and `b` using the
350 /// control in `imm8`, and return `1` if the resulting mask was non-zero,
351 /// and `0` otherwise.
352 ///
353 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrc)
354 #[inline]
355 #[target_feature(enable = "sse4.2")]
356 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
357 #[rustc_args_required_const(2)]
358 #[stable(feature = "simd_x86", since = "1.27.0")]
359 pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i, imm8: i32) -> i32 {
360 let a = a.as_i8x16();
361 let b = b.as_i8x16();
362 macro_rules! call {
363 ($imm8:expr) => {
364 pcmpistric128(a, b, $imm8)
365 };
366 }
367 constify_imm8!(imm8, call)
368 }
369
370 /// Compare packed strings with implicit lengths in `a` and `b` using the
371 /// control in `imm8`, and returns `1` if any character in `a` was null,
372 /// and `0` otherwise.
373 ///
374 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrs)
375 #[inline]
376 #[target_feature(enable = "sse4.2")]
377 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
378 #[rustc_args_required_const(2)]
379 #[stable(feature = "simd_x86", since = "1.27.0")]
380 pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i, imm8: i32) -> i32 {
381 let a = a.as_i8x16();
382 let b = b.as_i8x16();
383 macro_rules! call {
384 ($imm8:expr) => {
385 pcmpistris128(a, b, $imm8)
386 };
387 }
388 constify_imm8!(imm8, call)
389 }
390
391 /// Compare packed strings with implicit lengths in `a` and `b` using the
392 /// control in `imm8`, and return bit `0` of the resulting bit mask.
393 ///
394 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistro)
395 #[inline]
396 #[target_feature(enable = "sse4.2")]
397 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
398 #[rustc_args_required_const(2)]
399 #[stable(feature = "simd_x86", since = "1.27.0")]
400 pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i, imm8: i32) -> i32 {
401 let a = a.as_i8x16();
402 let b = b.as_i8x16();
403 macro_rules! call {
404 ($imm8:expr) => {
405 pcmpistrio128(a, b, $imm8)
406 };
407 }
408 constify_imm8!(imm8, call)
409 }
410
411 /// Compare packed strings with implicit lengths in `a` and `b` using the
412 /// control in `imm8`, and return `1` if `b` did not contain a null
413 /// character and the resulting mask was zero, and `0` otherwise.
414 ///
415 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistra)
416 #[inline]
417 #[target_feature(enable = "sse4.2")]
418 #[cfg_attr(test, assert_instr(pcmpistri, imm8 = 0))]
419 #[rustc_args_required_const(2)]
420 #[stable(feature = "simd_x86", since = "1.27.0")]
421 pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i, imm8: i32) -> i32 {
422 let a = a.as_i8x16();
423 let b = b.as_i8x16();
424 macro_rules! call {
425 ($imm8:expr) => {
426 pcmpistria128(a, b, $imm8)
427 };
428 }
429 constify_imm8!(imm8, call)
430 }
431
432 /// Compare packed strings in `a` and `b` with lengths `la` and `lb`
433 /// using the control in `imm8`, and return the generated mask.
434 ///
435 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrm)
436 #[inline]
437 #[target_feature(enable = "sse4.2")]
438 #[cfg_attr(test, assert_instr(pcmpestrm, imm8 = 0))]
439 #[rustc_args_required_const(4)]
440 #[stable(feature = "simd_x86", since = "1.27.0")]
441 pub unsafe fn _mm_cmpestrm(
442 a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
443 ) -> __m128i {
444 let a = a.as_i8x16();
445 let b = b.as_i8x16();
446 macro_rules! call {
447 ($imm8:expr) => {
448 pcmpestrm128(a, la, b, lb, $imm8)
449 };
450 }
451 mem::transmute(constify_imm8!(imm8, call))
452 }
453
454 /// Compare packed strings `a` and `b` with lengths `la` and `lb` using the
455 /// control in `imm8` and return the generated index. Similar to
456 /// [`_mm_cmpistri`] with the exception that [`_mm_cmpistri`] implicitly
457 /// determines the length of `a` and `b`.
458 ///
459 /// # Control modes
460 ///
461 /// The control specified by `imm8` may be one or more of the following.
462 ///
463 /// ## Data size and signedness
464 ///
465 /// - [`_SIDD_UBYTE_OPS`] - Default
466 /// - [`_SIDD_UWORD_OPS`]
467 /// - [`_SIDD_SBYTE_OPS`]
468 /// - [`_SIDD_SWORD_OPS`]
469 ///
470 /// ## Comparison options
471 /// - [`_SIDD_CMP_EQUAL_ANY`] - Default
472 /// - [`_SIDD_CMP_RANGES`]
473 /// - [`_SIDD_CMP_EQUAL_EACH`]
474 /// - [`_SIDD_CMP_EQUAL_ORDERED`]
475 ///
476 /// ## Result polarity
477 /// - [`_SIDD_POSITIVE_POLARITY`] - Default
478 /// - [`_SIDD_NEGATIVE_POLARITY`]
479 ///
480 /// ## Bit returned
481 /// - [`_SIDD_LEAST_SIGNIFICANT`] - Default
482 /// - [`_SIDD_MOST_SIGNIFICANT`]
483 ///
484 /// # Examples
485 ///
486 /// ```
487 /// # #![feature(stdsimd)]
488 /// # #![cfg_attr(not(dox), no_std)]
489 /// # #[cfg(not(dox))]
490 /// # #[macro_use]
491 /// # extern crate std as real_std;
492 /// # #[cfg(not(dox))]
493 /// # #[macro_use]
494 /// # extern crate stdsimd as std;
495 /// #[cfg(target_arch = "x86")]
496 /// use std::arch::x86::*;
497 /// #[cfg(target_arch = "x86_64")]
498 /// use std::arch::x86_64::*;
499 ///
500 /// # fn main() {
501 /// # if is_x86_feature_detected!("sse4.2") {
502 /// # #[target_feature(enable = "sse4.2")]
503 /// # unsafe fn worker() {
504 ///
505 /// // The string we want to find a substring in
506 /// let haystack = b"Split \r\n\t line ";
507 ///
508 /// // The string we want to search for with some
509 /// // extra bytes we do not want to search for.
510 /// let needle = b"\r\n\t ignore this ";
511 ///
512 /// let a = _mm_loadu_si128(needle.as_ptr() as *const _);
513 /// let b = _mm_loadu_si128(haystack.as_ptr() as *const _);
514 ///
515 /// // Note: We explicitly specify we only want to search `b` for the
516 /// // first 3 characters of a.
517 /// let idx = _mm_cmpestri(a, 3, b, 15, _SIDD_CMP_EQUAL_ORDERED);
518 ///
519 /// assert_eq!(idx, 6);
520 /// # }
521 /// # unsafe { worker(); }
522 /// # }
523 /// # }
524 /// ```
525 ///
526 /// [`_SIDD_UBYTE_OPS`]: constant._SIDD_UBYTE_OPS.html
527 /// [`_SIDD_UWORD_OPS`]: constant._SIDD_UWORD_OPS.html
528 /// [`_SIDD_SBYTE_OPS`]: constant._SIDD_SBYTE_OPS.html
529 /// [`_SIDD_SWORD_OPS`]: constant._SIDD_SWORD_OPS.html
530 /// [`_SIDD_CMP_EQUAL_ANY`]: constant._SIDD_CMP_EQUAL_ANY.html
531 /// [`_SIDD_CMP_RANGES`]: constant._SIDD_CMP_RANGES.html
532 /// [`_SIDD_CMP_EQUAL_EACH`]: constant._SIDD_CMP_EQUAL_EACH.html
533 /// [`_SIDD_CMP_EQUAL_ORDERED`]: constant._SIDD_CMP_EQUAL_ORDERED.html
534 /// [`_SIDD_POSITIVE_POLARITY`]: constant._SIDD_POSITIVE_POLARITY.html
535 /// [`_SIDD_NEGATIVE_POLARITY`]: constant._SIDD_NEGATIVE_POLARITY.html
536 /// [`_SIDD_LEAST_SIGNIFICANT`]: constant._SIDD_LEAST_SIGNIFICANT.html
537 /// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html
538 /// [`_mm_cmpistri`]: fn._mm_cmpistri.html
539 ///
540 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestri)
541 #[inline]
542 #[target_feature(enable = "sse4.2")]
543 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
544 #[rustc_args_required_const(4)]
545 #[stable(feature = "simd_x86", since = "1.27.0")]
546 pub unsafe fn _mm_cmpestri(
547 a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
548 ) -> i32 {
549 let a = a.as_i8x16();
550 let b = b.as_i8x16();
551 macro_rules! call {
552 ($imm8:expr) => {
553 pcmpestri128(a, la, b, lb, $imm8)
554 };
555 }
556 constify_imm8!(imm8, call)
557 }
558
559 /// Compare packed strings in `a` and `b` with lengths `la` and `lb`
560 /// using the control in `imm8`, and return `1` if any character in
561 /// `b` was null, and `0` otherwise.
562 ///
563 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrz)
564 #[inline]
565 #[target_feature(enable = "sse4.2")]
566 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
567 #[rustc_args_required_const(4)]
568 #[stable(feature = "simd_x86", since = "1.27.0")]
569 pub unsafe fn _mm_cmpestrz(
570 a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
571 ) -> i32 {
572 let a = a.as_i8x16();
573 let b = b.as_i8x16();
574 macro_rules! call {
575 ($imm8:expr) => {
576 pcmpestriz128(a, la, b, lb, $imm8)
577 };
578 }
579 constify_imm8!(imm8, call)
580 }
581
582 /// Compare packed strings in `a` and `b` with lengths `la` and `lb`
583 /// using the control in `imm8`, and return `1` if the resulting mask
584 /// was non-zero, and `0` otherwise.
585 ///
586 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrc)
587 #[inline]
588 #[target_feature(enable = "sse4.2")]
589 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
590 #[rustc_args_required_const(4)]
591 #[stable(feature = "simd_x86", since = "1.27.0")]
592 pub unsafe fn _mm_cmpestrc(
593 a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
594 ) -> i32 {
595 let a = a.as_i8x16();
596 let b = b.as_i8x16();
597 macro_rules! call {
598 ($imm8:expr) => {
599 pcmpestric128(a, la, b, lb, $imm8)
600 };
601 }
602 constify_imm8!(imm8, call)
603 }
604
605 /// Compare packed strings in `a` and `b` with lengths `la` and `lb`
606 /// using the control in `imm8`, and return `1` if any character in
607 /// a was null, and `0` otherwise.
608 ///
609 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrs)
610 #[inline]
611 #[target_feature(enable = "sse4.2")]
612 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
613 #[rustc_args_required_const(4)]
614 #[stable(feature = "simd_x86", since = "1.27.0")]
615 pub unsafe fn _mm_cmpestrs(
616 a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
617 ) -> i32 {
618 let a = a.as_i8x16();
619 let b = b.as_i8x16();
620 macro_rules! call {
621 ($imm8:expr) => {
622 pcmpestris128(a, la, b, lb, $imm8)
623 };
624 }
625 constify_imm8!(imm8, call)
626 }
627
628 /// Compare packed strings in `a` and `b` with lengths `la` and `lb`
629 /// using the control in `imm8`, and return bit `0` of the resulting
630 /// bit mask.
631 ///
632 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestro)
633 #[inline]
634 #[target_feature(enable = "sse4.2")]
635 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
636 #[rustc_args_required_const(4)]
637 #[stable(feature = "simd_x86", since = "1.27.0")]
638 pub unsafe fn _mm_cmpestro(
639 a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
640 ) -> i32 {
641 let a = a.as_i8x16();
642 let b = b.as_i8x16();
643 macro_rules! call {
644 ($imm8:expr) => {
645 pcmpestrio128(a, la, b, lb, $imm8)
646 };
647 }
648 constify_imm8!(imm8, call)
649 }
650
651 /// Compare packed strings in `a` and `b` with lengths `la` and `lb`
652 /// using the control in `imm8`, and return `1` if `b` did not
653 /// contain a null character and the resulting mask was zero, and `0`
654 /// otherwise.
655 ///
656 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestra)
657 #[inline]
658 #[target_feature(enable = "sse4.2")]
659 #[cfg_attr(test, assert_instr(pcmpestri, imm8 = 0))]
660 #[rustc_args_required_const(4)]
661 #[stable(feature = "simd_x86", since = "1.27.0")]
662 pub unsafe fn _mm_cmpestra(
663 a: __m128i, la: i32, b: __m128i, lb: i32, imm8: i32,
664 ) -> i32 {
665 let a = a.as_i8x16();
666 let b = b.as_i8x16();
667 macro_rules! call {
668 ($imm8:expr) => {
669 pcmpestria128(a, la, b, lb, $imm8)
670 };
671 }
672 constify_imm8!(imm8, call)
673 }
674
675 /// Starting with the initial value in `crc`, return the accumulated
676 /// CRC32 value for unsigned 8-bit integer `v`.
677 ///
678 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u8)
679 #[inline]
680 #[target_feature(enable = "sse4.2")]
681 #[cfg_attr(test, assert_instr(crc32))]
682 #[stable(feature = "simd_x86", since = "1.27.0")]
683 pub unsafe fn _mm_crc32_u8(crc: u32, v: u8) -> u32 {
684 crc32_32_8(crc, v)
685 }
686
687 /// Starting with the initial value in `crc`, return the accumulated
688 /// CRC32 value for unsigned 16-bit integer `v`.
689 ///
690 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u16)
691 #[inline]
692 #[target_feature(enable = "sse4.2")]
693 #[cfg_attr(test, assert_instr(crc32))]
694 #[stable(feature = "simd_x86", since = "1.27.0")]
695 pub unsafe fn _mm_crc32_u16(crc: u32, v: u16) -> u32 {
696 crc32_32_16(crc, v)
697 }
698
699 /// Starting with the initial value in `crc`, return the accumulated
700 /// CRC32 value for unsigned 32-bit integer `v`.
701 ///
702 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u32)
703 #[inline]
704 #[target_feature(enable = "sse4.2")]
705 #[cfg_attr(test, assert_instr(crc32))]
706 #[stable(feature = "simd_x86", since = "1.27.0")]
707 pub unsafe fn _mm_crc32_u32(crc: u32, v: u32) -> u32 {
708 crc32_32_32(crc, v)
709 }
710
711 /// Compare packed 64-bit integers in `a` and `b` for greater-than,
712 /// return the results.
713 ///
714 /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi64)
715 #[inline]
716 #[target_feature(enable = "sse4.2")]
717 #[cfg_attr(test, assert_instr(pcmpgtq))]
718 #[stable(feature = "simd_x86", since = "1.27.0")]
719 pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i {
720 mem::transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2()))
721 }
722
723 #[allow(improper_ctypes)]
724 extern "C" {
725 // SSE 4.2 string and text comparison ops
726 #[link_name = "llvm.x86.sse42.pcmpestrm128"]
727 fn pcmpestrm128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> u8x16;
728 #[link_name = "llvm.x86.sse42.pcmpestri128"]
729 fn pcmpestri128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32;
730 #[link_name = "llvm.x86.sse42.pcmpestriz128"]
731 fn pcmpestriz128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32;
732 #[link_name = "llvm.x86.sse42.pcmpestric128"]
733 fn pcmpestric128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32;
734 #[link_name = "llvm.x86.sse42.pcmpestris128"]
735 fn pcmpestris128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32;
736 #[link_name = "llvm.x86.sse42.pcmpestrio128"]
737 fn pcmpestrio128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32;
738 #[link_name = "llvm.x86.sse42.pcmpestria128"]
739 fn pcmpestria128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32;
740 #[link_name = "llvm.x86.sse42.pcmpistrm128"]
741 fn pcmpistrm128(a: i8x16, b: i8x16, imm8: i8) -> i8x16;
742 #[link_name = "llvm.x86.sse42.pcmpistri128"]
743 fn pcmpistri128(a: i8x16, b: i8x16, imm8: i8) -> i32;
744 #[link_name = "llvm.x86.sse42.pcmpistriz128"]
745 fn pcmpistriz128(a: i8x16, b: i8x16, imm8: i8) -> i32;
746 #[link_name = "llvm.x86.sse42.pcmpistric128"]
747 fn pcmpistric128(a: i8x16, b: i8x16, imm8: i8) -> i32;
748 #[link_name = "llvm.x86.sse42.pcmpistris128"]
749 fn pcmpistris128(a: i8x16, b: i8x16, imm8: i8) -> i32;
750 #[link_name = "llvm.x86.sse42.pcmpistrio128"]
751 fn pcmpistrio128(a: i8x16, b: i8x16, imm8: i8) -> i32;
752 #[link_name = "llvm.x86.sse42.pcmpistria128"]
753 fn pcmpistria128(a: i8x16, b: i8x16, imm8: i8) -> i32;
754 // SSE 4.2 CRC instructions
755 #[link_name = "llvm.x86.sse42.crc32.32.8"]
756 fn crc32_32_8(crc: u32, v: u8) -> u32;
757 #[link_name = "llvm.x86.sse42.crc32.32.16"]
758 fn crc32_32_16(crc: u32, v: u16) -> u32;
759 #[link_name = "llvm.x86.sse42.crc32.32.32"]
760 fn crc32_32_32(crc: u32, v: u32) -> u32;
761 }
762
763 #[cfg(test)]
764 mod tests {
765 use stdsimd_test::simd_test;
766
767 use coresimd::x86::*;
768 use std::ptr;
769
770 // Currently one cannot `load` a &[u8] that is is less than 16
771 // in length. This makes loading strings less than 16 in length
772 // a bit difficult. Rather than `load` and mutate the __m128i,
773 // it is easier to memcpy the given string to a local slice with
774 // length 16 and `load` the local slice.
775 #[target_feature(enable = "sse4.2")]
776 unsafe fn str_to_m128i(s: &[u8]) -> __m128i {
777 assert!(s.len() <= 16);
778 let slice = &mut [0u8; 16];
779 ptr::copy_nonoverlapping(
780 s.get_unchecked(0) as *const u8 as *const u8,
781 slice.get_unchecked_mut(0) as *mut u8 as *mut u8,
782 s.len(),
783 );
784 _mm_loadu_si128(slice.as_ptr() as *const _)
785 }
786
787 #[simd_test(enable = "sse4.2")]
788 unsafe fn test_mm_cmpistrm() {
789 let a = str_to_m128i(b"Hello! Good-Bye!");
790 let b = str_to_m128i(b"hello! good-bye!");
791 let i = _mm_cmpistrm(a, b, _SIDD_UNIT_MASK);
792 #[cfg_attr(rustfmt, rustfmt_skip)]
793 let res = _mm_setr_epi8(
794 0x00, !0, !0, !0, !0, !0, !0, 0x00,
795 !0, !0, !0, !0, 0x00, !0, !0, !0,
796 );
797 assert_eq_m128i(i, res);
798 }
799
800 #[simd_test(enable = "sse4.2")]
801 unsafe fn test_mm_cmpistri() {
802 let a = str_to_m128i(b"Hello");
803 let b = str_to_m128i(b" Hello ");
804 let i = _mm_cmpistri(a, b, _SIDD_CMP_EQUAL_ORDERED);
805 assert_eq!(3, i);
806 }
807
808 #[simd_test(enable = "sse4.2")]
809 unsafe fn test_mm_cmpistrz() {
810 let a = str_to_m128i(b"");
811 let b = str_to_m128i(b"Hello");
812 let i = _mm_cmpistrz(a, b, _SIDD_CMP_EQUAL_ORDERED);
813 assert_eq!(1, i);
814 }
815
816 #[simd_test(enable = "sse4.2")]
817 unsafe fn test_mm_cmpistrc() {
818 let a = str_to_m128i(b" ");
819 let b = str_to_m128i(b" ! ");
820 let i = _mm_cmpistrc(a, b, _SIDD_UNIT_MASK);
821 assert_eq!(1, i);
822 }
823
824 #[simd_test(enable = "sse4.2")]
825 unsafe fn test_mm_cmpistrs() {
826 let a = str_to_m128i(b"Hello");
827 let b = str_to_m128i(b"");
828 let i = _mm_cmpistrs(a, b, _SIDD_CMP_EQUAL_ORDERED);
829 assert_eq!(1, i);
830 }
831
832 #[simd_test(enable = "sse4.2")]
833 unsafe fn test_mm_cmpistro() {
834 #[cfg_attr(rustfmt, rustfmt_skip)]
835 let a_bytes = _mm_setr_epi8(
836 0x00, 0x47, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
837 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
838 );
839 #[cfg_attr(rustfmt, rustfmt_skip)]
840 let b_bytes = _mm_setr_epi8(
841 0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
842 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
843 );
844 let a = a_bytes;
845 let b = b_bytes;
846 let i = _mm_cmpistro(a, b, _SIDD_UWORD_OPS | _SIDD_UNIT_MASK);
847 assert_eq!(0, i);
848 }
849
850 #[simd_test(enable = "sse4.2")]
851 unsafe fn test_mm_cmpistra() {
852 let a = str_to_m128i(b"");
853 let b = str_to_m128i(b"Hello!!!!!!!!!!!");
854 let i = _mm_cmpistra(a, b, _SIDD_UNIT_MASK);
855 assert_eq!(1, i);
856 }
857
858 #[simd_test(enable = "sse4.2")]
859 unsafe fn test_mm_cmpestrm() {
860 let a = str_to_m128i(b"Hello!");
861 let b = str_to_m128i(b"Hello.");
862 let i = _mm_cmpestrm(a, 5, b, 5, _SIDD_UNIT_MASK);
863 #[cfg_attr(rustfmt, rustfmt_skip)]
864 let r = _mm_setr_epi8(
865 !0, !0, !0, !0, !0, 0x00, 0x00, 0x00,
866 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
867 );
868 assert_eq_m128i(i, r);
869 }
870
871 #[simd_test(enable = "sse4.2")]
872 unsafe fn test_mm_cmpestri() {
873 let a = str_to_m128i(b"bar - garbage");
874 let b = str_to_m128i(b"foobar");
875 let i = _mm_cmpestri(a, 3, b, 6, _SIDD_CMP_EQUAL_ORDERED);
876 assert_eq!(3, i);
877 }
878
879 #[simd_test(enable = "sse4.2")]
880 unsafe fn test_mm_cmpestrz() {
881 let a = str_to_m128i(b"");
882 let b = str_to_m128i(b"Hello");
883 let i = _mm_cmpestrz(a, 16, b, 6, _SIDD_CMP_EQUAL_ORDERED);
884 assert_eq!(1, i);
885 }
886
887 #[simd_test(enable = "sse4.2")]
888 unsafe fn test_mm_cmpestrc() {
889 let va = str_to_m128i(b"!!!!!!!!");
890 let vb = str_to_m128i(b" ");
891 let i = _mm_cmpestrc(va, 7, vb, 7, _SIDD_UNIT_MASK);
892 assert_eq!(0, i);
893 }
894
895 #[simd_test(enable = "sse4.2")]
896 unsafe fn test_mm_cmpestrs() {
897 #[cfg_attr(rustfmt, rustfmt_skip)]
898 let a_bytes = _mm_setr_epi8(
899 0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
900 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
901 );
902 let a = a_bytes;
903 let b = _mm_set1_epi8(0x00);
904 let i = _mm_cmpestrs(a, 8, b, 0, _SIDD_UWORD_OPS);
905 assert_eq!(0, i);
906 }
907
908 #[simd_test(enable = "sse4.2")]
909 unsafe fn test_mm_cmpestro() {
910 let a = str_to_m128i(b"Hello");
911 let b = str_to_m128i(b"World");
912 let i = _mm_cmpestro(a, 5, b, 5, _SIDD_UBYTE_OPS);
913 assert_eq!(0, i);
914 }
915
916 #[simd_test(enable = "sse4.2")]
917 unsafe fn test_mm_cmpestra() {
918 let a = str_to_m128i(b"Cannot match a");
919 let b = str_to_m128i(b"Null after 14");
920 let i =
921 _mm_cmpestra(a, 14, b, 16, _SIDD_CMP_EQUAL_EACH | _SIDD_UNIT_MASK);
922 assert_eq!(1, i);
923 }
924
925 #[simd_test(enable = "sse4.2")]
926 unsafe fn test_mm_crc32_u8() {
927 let crc = 0x2aa1e72b;
928 let v = 0x2a;
929 let i = _mm_crc32_u8(crc, v);
930 assert_eq!(i, 0xf24122e4);
931 }
932
933 #[simd_test(enable = "sse4.2")]
934 unsafe fn test_mm_crc32_u16() {
935 let crc = 0x8ecec3b5;
936 let v = 0x22b;
937 let i = _mm_crc32_u16(crc, v);
938 assert_eq!(i, 0x13bb2fb);
939 }
940
941 #[simd_test(enable = "sse4.2")]
942 unsafe fn test_mm_crc32_u32() {
943 let crc = 0xae2912c8;
944 let v = 0x845fed;
945 let i = _mm_crc32_u32(crc, v);
946 assert_eq!(i, 0xffae2ed1);
947 }
948
949 #[simd_test(enable = "sse4.2")]
950 unsafe fn test_mm_cmpgt_epi64() {
951 let a = _mm_setr_epi64x(0, 0x2a);
952 let b = _mm_set1_epi64x(0x00);
953 let i = _mm_cmpgt_epi64(a, b);
954 assert_eq_m128i(
955 i,
956 _mm_setr_epi64x(0x00, 0xffffffffffffffffu64 as i64),
957 );
958 }
959 }