]>
Commit | Line | Data |
---|---|---|
0531ce1d XL |
1 | //! Bit Manipulation Instruction (BMI) Set 1.0. |
2 | //! | |
3 | //! The reference is [Intel 64 and IA-32 Architectures Software Developer's | |
4 | //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. | |
5 | //! | |
6 | //! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions | |
7 | //! available. | |
8 | //! | |
9 | //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf | |
10 | //! [wikipedia_bmi]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 | |
11 | ||
12 | #[cfg(test)] | |
13 | use stdsimd_test::assert_instr; | |
14 | ||
15 | /// Extracts bits in range [`start`, `start` + `length`) from `a` into | |
16 | /// the least significant bits of the result. | |
83c7162d XL |
17 | /// |
18 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr_u64) | |
0531ce1d XL |
19 | #[inline] |
20 | #[target_feature(enable = "bmi1")] | |
21 | #[cfg_attr(test, assert_instr(bextr))] | |
22 | #[cfg(not(target_arch = "x86"))] | |
83c7162d | 23 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
24 | pub unsafe fn _bextr_u64(a: u64, start: u32, len: u32) -> u64 { |
25 | _bextr2_u64(a, ((start & 0xff) | ((len & 0xff) << 8)) as u64) | |
26 | } | |
27 | ||
28 | /// Extracts bits of `a` specified by `control` into | |
29 | /// the least significant bits of the result. | |
30 | /// | |
8faf50e0 XL |
31 | /// Bits `[7,0]` of `control` specify the index to the first bit in the range |
32 | /// to be extracted, and bits `[15,8]` specify the length of the range. | |
83c7162d XL |
33 | /// |
34 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr2_u64) | |
0531ce1d XL |
35 | #[inline] |
36 | #[target_feature(enable = "bmi1")] | |
37 | #[cfg_attr(test, assert_instr(bextr))] | |
38 | #[cfg(not(target_arch = "x86"))] | |
83c7162d | 39 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
40 | pub unsafe fn _bextr2_u64(a: u64, control: u64) -> u64 { |
41 | x86_bmi_bextr_64(a, control) | |
42 | } | |
43 | ||
44 | /// Bitwise logical `AND` of inverted `a` with `b`. | |
83c7162d XL |
45 | /// |
46 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_andn_u64) | |
0531ce1d XL |
47 | #[inline] |
48 | #[target_feature(enable = "bmi1")] | |
49 | #[cfg_attr(test, assert_instr(andn))] | |
83c7162d | 50 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
51 | pub unsafe fn _andn_u64(a: u64, b: u64) -> u64 { |
52 | !a & b | |
53 | } | |
54 | ||
55 | /// Extract lowest set isolated bit. | |
83c7162d XL |
56 | /// |
57 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsi_u64) | |
0531ce1d XL |
58 | #[inline] |
59 | #[target_feature(enable = "bmi1")] | |
60 | #[cfg_attr(test, assert_instr(blsi))] | |
61 | #[cfg(not(target_arch = "x86"))] // generates lots of instructions | |
83c7162d | 62 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
63 | pub unsafe fn _blsi_u64(x: u64) -> u64 { |
64 | x & x.wrapping_neg() | |
65 | } | |
66 | ||
67 | /// Get mask up to lowest set bit. | |
83c7162d XL |
68 | /// |
69 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsmsk_u64) | |
0531ce1d XL |
70 | #[inline] |
71 | #[target_feature(enable = "bmi1")] | |
72 | #[cfg_attr(test, assert_instr(blsmsk))] | |
73 | #[cfg(not(target_arch = "x86"))] // generates lots of instructions | |
83c7162d | 74 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
75 | pub unsafe fn _blsmsk_u64(x: u64) -> u64 { |
76 | x ^ (x.wrapping_sub(1_u64)) | |
77 | } | |
78 | ||
79 | /// Resets the lowest set bit of `x`. | |
80 | /// | |
81 | /// If `x` is sets CF. | |
83c7162d XL |
82 | /// |
83 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsr_u64) | |
0531ce1d XL |
84 | #[inline] |
85 | #[target_feature(enable = "bmi1")] | |
86 | #[cfg_attr(test, assert_instr(blsr))] | |
87 | #[cfg(not(target_arch = "x86"))] // generates lots of instructions | |
83c7162d | 88 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
89 | pub unsafe fn _blsr_u64(x: u64) -> u64 { |
90 | x & (x.wrapping_sub(1)) | |
91 | } | |
92 | ||
93 | /// Counts the number of trailing least significant zero bits. | |
94 | /// | |
95 | /// When the source operand is 0, it returns its size in bits. | |
83c7162d XL |
96 | /// |
97 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u64) | |
0531ce1d XL |
98 | #[inline] |
99 | #[target_feature(enable = "bmi1")] | |
100 | #[cfg_attr(test, assert_instr(tzcnt))] | |
83c7162d | 101 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
102 | pub unsafe fn _tzcnt_u64(x: u64) -> u64 { |
103 | x.trailing_zeros() as u64 | |
104 | } | |
105 | ||
106 | /// Counts the number of trailing least significant zero bits. | |
107 | /// | |
108 | /// When the source operand is 0, it returns its size in bits. | |
83c7162d XL |
109 | /// |
110 | /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_tzcnt_64) | |
0531ce1d XL |
111 | #[inline] |
112 | #[target_feature(enable = "bmi1")] | |
113 | #[cfg_attr(test, assert_instr(tzcnt))] | |
83c7162d | 114 | #[stable(feature = "simd_x86", since = "1.27.0")] |
0531ce1d XL |
115 | pub unsafe fn _mm_tzcnt_64(x: u64) -> i64 { |
116 | x.trailing_zeros() as i64 | |
117 | } | |
118 | ||
119 | extern "C" { | |
120 | #[link_name = "llvm.x86.bmi.bextr.64"] | |
121 | fn x86_bmi_bextr_64(x: u64, y: u64) -> u64; | |
122 | } | |
123 | ||
124 | #[cfg(test)] | |
125 | mod tests { | |
126 | use stdsimd_test::simd_test; | |
127 | ||
128 | use coresimd::x86::*; | |
129 | use coresimd::x86_64::*; | |
130 | ||
83c7162d | 131 | #[simd_test(enable = "bmi1")] |
0531ce1d XL |
132 | unsafe fn test_bextr_u64() { |
133 | let r = _bextr_u64(0b0101_0000u64, 4, 4); | |
134 | assert_eq!(r, 0b0000_0101u64); | |
135 | } | |
136 | ||
83c7162d | 137 | #[simd_test(enable = "bmi1")] |
0531ce1d XL |
138 | unsafe fn test_andn_u64() { |
139 | assert_eq!(_andn_u64(0, 0), 0); | |
140 | assert_eq!(_andn_u64(0, 1), 1); | |
141 | assert_eq!(_andn_u64(1, 0), 0); | |
142 | assert_eq!(_andn_u64(1, 1), 0); | |
143 | ||
144 | let r = _andn_u64(0b0000_0000u64, 0b0000_0000u64); | |
145 | assert_eq!(r, 0b0000_0000u64); | |
146 | ||
147 | let r = _andn_u64(0b0000_0000u64, 0b1111_1111u64); | |
148 | assert_eq!(r, 0b1111_1111u64); | |
149 | ||
150 | let r = _andn_u64(0b1111_1111u64, 0b0000_0000u64); | |
151 | assert_eq!(r, 0b0000_0000u64); | |
152 | ||
153 | let r = _andn_u64(0b1111_1111u64, 0b1111_1111u64); | |
154 | assert_eq!(r, 0b0000_0000u64); | |
155 | ||
156 | let r = _andn_u64(0b0100_0000u64, 0b0101_1101u64); | |
157 | assert_eq!(r, 0b0001_1101u64); | |
158 | } | |
159 | ||
83c7162d | 160 | #[simd_test(enable = "bmi1")] |
0531ce1d XL |
161 | unsafe fn test_blsi_u64() { |
162 | assert_eq!(_blsi_u64(0b1101_0000u64), 0b0001_0000u64); | |
163 | } | |
164 | ||
83c7162d | 165 | #[simd_test(enable = "bmi1")] |
0531ce1d XL |
166 | unsafe fn test_blsmsk_u64() { |
167 | let r = _blsmsk_u64(0b0011_0000u64); | |
168 | assert_eq!(r, 0b0001_1111u64); | |
169 | } | |
170 | ||
83c7162d | 171 | #[simd_test(enable = "bmi1")] |
0531ce1d XL |
172 | unsafe fn test_blsr_u64() { |
173 | // TODO: test the behavior when the input is 0 | |
174 | let r = _blsr_u64(0b0011_0000u64); | |
175 | assert_eq!(r, 0b0010_0000u64); | |
176 | } | |
177 | ||
83c7162d | 178 | #[simd_test(enable = "bmi1")] |
0531ce1d XL |
179 | unsafe fn test_tzcnt_u64() { |
180 | assert_eq!(_tzcnt_u64(0b0000_0001u64), 0u64); | |
181 | assert_eq!(_tzcnt_u64(0b0000_0000u64), 64u64); | |
182 | assert_eq!(_tzcnt_u64(0b1001_0000u64), 4u64); | |
183 | } | |
184 | } |