]>
Commit | Line | Data |
---|---|---|
0531ce1d XL |
1 | //! Bit Manipulation Instruction (BMI) Set 2.0. |
2 | //! | |
3 | //! The reference is [Intel 64 and IA-32 Architectures Software Developer's | |
4 | //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. | |
5 | //! | |
6 | //! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions | |
7 | //! available. | |
8 | //! | |
9 | //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf | |
10 | //! [wikipedia_bmi]: | |
11 | //! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 | |
12 | ||
13 | #[cfg(test)] | |
14 | use stdsimd_test::assert_instr; | |
15 | ||
16 | /// Unsigned multiply without affecting flags. | |
17 | /// | |
18 | /// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with | |
19 | /// the low half and the high half of the result. | |
83c7162d XL |
20 | /// |
21 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mulx_u64) | |
0531ce1d | 22 | #[inline] |
9fa01778 | 23 | #[cfg_attr(test, assert_instr(mul))] |
0531ce1d XL |
24 | #[target_feature(enable = "bmi2")] |
25 | #[cfg(not(target_arch = "x86"))] // calls an intrinsic | |
83c7162d | 26 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
27 | pub unsafe fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 { |
28 | let result: u128 = (a as u128) * (b as u128); | |
29 | *hi = (result >> 64) as u64; | |
30 | result as u64 | |
31 | } | |
32 | ||
532ac7d7 | 33 | /// Zeroes higher bits of `a` >= `index`. |
83c7162d XL |
34 | /// |
35 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bzhi_u64) | |
0531ce1d XL |
36 | #[inline] |
37 | #[target_feature(enable = "bmi2")] | |
38 | #[cfg_attr(test, assert_instr(bzhi))] | |
39 | #[cfg(not(target_arch = "x86"))] | |
83c7162d | 40 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
41 | pub unsafe fn _bzhi_u64(a: u64, index: u32) -> u64 { |
42 | x86_bmi2_bzhi_64(a, index as u64) | |
43 | } | |
44 | ||
45 | /// Scatter contiguous low order bits of `a` to the result at the positions | |
46 | /// specified by the `mask`. | |
83c7162d XL |
47 | /// |
48 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pdep_u64) | |
0531ce1d XL |
49 | #[inline] |
50 | #[target_feature(enable = "bmi2")] | |
51 | #[cfg_attr(test, assert_instr(pdep))] | |
52 | #[cfg(not(target_arch = "x86"))] | |
83c7162d | 53 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
54 | pub unsafe fn _pdep_u64(a: u64, mask: u64) -> u64 { |
55 | x86_bmi2_pdep_64(a, mask) | |
56 | } | |
57 | ||
58 | /// Gathers the bits of `x` specified by the `mask` into the contiguous low | |
59 | /// order bit positions of the result. | |
83c7162d XL |
60 | /// |
61 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pext_u64) | |
0531ce1d XL |
62 | #[inline] |
63 | #[target_feature(enable = "bmi2")] | |
64 | #[cfg_attr(test, assert_instr(pext))] | |
65 | #[cfg(not(target_arch = "x86"))] | |
83c7162d | 66 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
67 | pub unsafe fn _pext_u64(a: u64, mask: u64) -> u64 { |
68 | x86_bmi2_pext_64(a, mask) | |
69 | } | |
70 | ||
71 | extern "C" { | |
72 | #[link_name = "llvm.x86.bmi.bzhi.64"] | |
73 | fn x86_bmi2_bzhi_64(x: u64, y: u64) -> u64; | |
74 | #[link_name = "llvm.x86.bmi.pdep.64"] | |
75 | fn x86_bmi2_pdep_64(x: u64, y: u64) -> u64; | |
76 | #[link_name = "llvm.x86.bmi.pext.64"] | |
77 | fn x86_bmi2_pext_64(x: u64, y: u64) -> u64; | |
78 | } | |
79 | ||
80 | #[cfg(test)] | |
81 | mod tests { | |
82 | use stdsimd_test::simd_test; | |
83 | ||
532ac7d7 | 84 | use crate::core_arch::x86_64::*; |
0531ce1d | 85 | |
83c7162d | 86 | #[simd_test(enable = "bmi2")] |
0531ce1d XL |
87 | unsafe fn test_pext_u64() { |
88 | let n = 0b1011_1110_1001_0011u64; | |
89 | ||
90 | let m0 = 0b0110_0011_1000_0101u64; | |
91 | let s0 = 0b0000_0000_0011_0101u64; | |
92 | ||
93 | let m1 = 0b1110_1011_1110_1111u64; | |
94 | let s1 = 0b0001_0111_0100_0011u64; | |
95 | ||
96 | assert_eq!(_pext_u64(n, m0), s0); | |
97 | assert_eq!(_pext_u64(n, m1), s1); | |
98 | } | |
99 | ||
83c7162d | 100 | #[simd_test(enable = "bmi2")] |
0531ce1d XL |
101 | unsafe fn test_pdep_u64() { |
102 | let n = 0b1011_1110_1001_0011u64; | |
103 | ||
104 | let m0 = 0b0110_0011_1000_0101u64; | |
105 | let s0 = 0b0000_0010_0000_0101u64; | |
106 | ||
107 | let m1 = 0b1110_1011_1110_1111u64; | |
108 | let s1 = 0b1110_1001_0010_0011u64; | |
109 | ||
110 | assert_eq!(_pdep_u64(n, m0), s0); | |
111 | assert_eq!(_pdep_u64(n, m1), s1); | |
112 | } | |
113 | ||
83c7162d | 114 | #[simd_test(enable = "bmi2")] |
0531ce1d XL |
115 | unsafe fn test_bzhi_u64() { |
116 | let n = 0b1111_0010u64; | |
117 | let s = 0b0001_0010u64; | |
118 | assert_eq!(_bzhi_u64(n, 5), s); | |
119 | } | |
120 | ||
83c7162d | 121 | #[simd_test(enable = "bmi2")] |
0731742a | 122 | #[rustfmt::skip] |
0531ce1d XL |
123 | unsafe fn test_mulx_u64() { |
124 | let a: u64 = 9_223_372_036_854_775_800; | |
125 | let b: u64 = 100; | |
126 | let mut hi = 0; | |
127 | let lo = _mulx_u64(a, b, &mut hi); | |
128 | /* | |
129 | result = 922337203685477580000 = | |
130 | 0b00110001_1111111111111111_1111111111111111_1111111111111111_1111110011100000 | |
131 | ^~hi~~~~ ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
132 | */ | |
133 | assert_eq!( | |
134 | lo, | |
135 | 0b11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u64 | |
136 | ); | |
137 | assert_eq!(hi, 0b00110001u64); | |
138 | } | |
139 | } |