]>
Commit | Line | Data |
---|---|---|
f20569fa | 1 | //! LLVM bit manipulation intrinsics. |
cdc7bbd5 | 2 | #[rustfmt::skip] |
f20569fa XL |
3 | |
4 | use crate::*; | |
5 | ||
6 | #[allow(improper_ctypes, dead_code)] | |
7 | extern "C" { | |
8 | #[link_name = "llvm.ctlz.v2i8"] | |
9 | fn ctlz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2; | |
10 | #[link_name = "llvm.ctlz.v4i8"] | |
11 | fn ctlz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4; | |
12 | #[link_name = "llvm.ctlz.v8i8"] | |
13 | fn ctlz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8; | |
14 | #[link_name = "llvm.ctlz.v16i8"] | |
15 | fn ctlz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16; | |
16 | #[link_name = "llvm.ctlz.v32i8"] | |
17 | fn ctlz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32; | |
18 | #[link_name = "llvm.ctlz.v64i8"] | |
19 | fn ctlz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64; | |
20 | ||
21 | #[link_name = "llvm.ctlz.v2i16"] | |
22 | fn ctlz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2; | |
23 | #[link_name = "llvm.ctlz.v4i16"] | |
24 | fn ctlz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4; | |
25 | #[link_name = "llvm.ctlz.v8i16"] | |
26 | fn ctlz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8; | |
27 | #[link_name = "llvm.ctlz.v16i16"] | |
28 | fn ctlz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16; | |
29 | #[link_name = "llvm.ctlz.v32i16"] | |
30 | fn ctlz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32; | |
31 | ||
32 | #[link_name = "llvm.ctlz.v2i32"] | |
33 | fn ctlz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2; | |
34 | #[link_name = "llvm.ctlz.v4i32"] | |
35 | fn ctlz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4; | |
36 | #[link_name = "llvm.ctlz.v8i32"] | |
37 | fn ctlz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8; | |
38 | #[link_name = "llvm.ctlz.v16i32"] | |
39 | fn ctlz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16; | |
40 | ||
41 | #[link_name = "llvm.ctlz.v2i64"] | |
42 | fn ctlz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2; | |
43 | #[link_name = "llvm.ctlz.v4i64"] | |
44 | fn ctlz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4; | |
45 | #[link_name = "llvm.ctlz.v8i64"] | |
46 | fn ctlz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8; | |
47 | ||
48 | #[link_name = "llvm.ctlz.v1i128"] | |
49 | fn ctlz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1; | |
50 | #[link_name = "llvm.ctlz.v2i128"] | |
51 | fn ctlz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2; | |
52 | #[link_name = "llvm.ctlz.v4i128"] | |
53 | fn ctlz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4; | |
54 | ||
55 | #[link_name = "llvm.cttz.v2i8"] | |
56 | fn cttz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2; | |
57 | #[link_name = "llvm.cttz.v4i8"] | |
58 | fn cttz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4; | |
59 | #[link_name = "llvm.cttz.v8i8"] | |
60 | fn cttz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8; | |
61 | #[link_name = "llvm.cttz.v16i8"] | |
62 | fn cttz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16; | |
63 | #[link_name = "llvm.cttz.v32i8"] | |
64 | fn cttz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32; | |
65 | #[link_name = "llvm.cttz.v64i8"] | |
66 | fn cttz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64; | |
67 | ||
68 | #[link_name = "llvm.cttz.v2i16"] | |
69 | fn cttz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2; | |
70 | #[link_name = "llvm.cttz.v4i16"] | |
71 | fn cttz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4; | |
72 | #[link_name = "llvm.cttz.v8i16"] | |
73 | fn cttz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8; | |
74 | #[link_name = "llvm.cttz.v16i16"] | |
75 | fn cttz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16; | |
76 | #[link_name = "llvm.cttz.v32i16"] | |
77 | fn cttz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32; | |
78 | ||
79 | #[link_name = "llvm.cttz.v2i32"] | |
80 | fn cttz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2; | |
81 | #[link_name = "llvm.cttz.v4i32"] | |
82 | fn cttz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4; | |
83 | #[link_name = "llvm.cttz.v8i32"] | |
84 | fn cttz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8; | |
85 | #[link_name = "llvm.cttz.v16i32"] | |
86 | fn cttz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16; | |
87 | ||
88 | #[link_name = "llvm.cttz.v2i64"] | |
89 | fn cttz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2; | |
90 | #[link_name = "llvm.cttz.v4i64"] | |
91 | fn cttz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4; | |
92 | #[link_name = "llvm.cttz.v8i64"] | |
93 | fn cttz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8; | |
94 | ||
95 | #[link_name = "llvm.cttz.v1i128"] | |
96 | fn cttz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1; | |
97 | #[link_name = "llvm.cttz.v2i128"] | |
98 | fn cttz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2; | |
99 | #[link_name = "llvm.cttz.v4i128"] | |
100 | fn cttz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4; | |
101 | ||
102 | #[link_name = "llvm.ctpop.v2i8"] | |
103 | fn ctpop_u8x2(x: u8x2) -> u8x2; | |
104 | #[link_name = "llvm.ctpop.v4i8"] | |
105 | fn ctpop_u8x4(x: u8x4) -> u8x4; | |
106 | #[link_name = "llvm.ctpop.v8i8"] | |
107 | fn ctpop_u8x8(x: u8x8) -> u8x8; | |
108 | #[link_name = "llvm.ctpop.v16i8"] | |
109 | fn ctpop_u8x16(x: u8x16) -> u8x16; | |
110 | #[link_name = "llvm.ctpop.v32i8"] | |
111 | fn ctpop_u8x32(x: u8x32) -> u8x32; | |
112 | #[link_name = "llvm.ctpop.v64i8"] | |
113 | fn ctpop_u8x64(x: u8x64) -> u8x64; | |
114 | ||
115 | #[link_name = "llvm.ctpop.v2i16"] | |
116 | fn ctpop_u16x2(x: u16x2) -> u16x2; | |
117 | #[link_name = "llvm.ctpop.v4i16"] | |
118 | fn ctpop_u16x4(x: u16x4) -> u16x4; | |
119 | #[link_name = "llvm.ctpop.v8i16"] | |
120 | fn ctpop_u16x8(x: u16x8) -> u16x8; | |
121 | #[link_name = "llvm.ctpop.v16i16"] | |
122 | fn ctpop_u16x16(x: u16x16) -> u16x16; | |
123 | #[link_name = "llvm.ctpop.v32i16"] | |
124 | fn ctpop_u16x32(x: u16x32) -> u16x32; | |
125 | ||
126 | #[link_name = "llvm.ctpop.v2i32"] | |
127 | fn ctpop_u32x2(x: u32x2) -> u32x2; | |
128 | #[link_name = "llvm.ctpop.v4i32"] | |
129 | fn ctpop_u32x4(x: u32x4) -> u32x4; | |
130 | #[link_name = "llvm.ctpop.v8i32"] | |
131 | fn ctpop_u32x8(x: u32x8) -> u32x8; | |
132 | #[link_name = "llvm.ctpop.v16i32"] | |
133 | fn ctpop_u32x16(x: u32x16) -> u32x16; | |
134 | ||
135 | #[link_name = "llvm.ctpop.v2i64"] | |
136 | fn ctpop_u64x2(x: u64x2) -> u64x2; | |
137 | #[link_name = "llvm.ctpop.v4i64"] | |
138 | fn ctpop_u64x4(x: u64x4) -> u64x4; | |
139 | #[link_name = "llvm.ctpop.v8i64"] | |
140 | fn ctpop_u64x8(x: u64x8) -> u64x8; | |
141 | ||
142 | #[link_name = "llvm.ctpop.v1i128"] | |
143 | fn ctpop_u128x1(x: u128x1) -> u128x1; | |
144 | #[link_name = "llvm.ctpop.v2i128"] | |
145 | fn ctpop_u128x2(x: u128x2) -> u128x2; | |
146 | #[link_name = "llvm.ctpop.v4i128"] | |
147 | fn ctpop_u128x4(x: u128x4) -> u128x4; | |
148 | } | |
149 | ||
150 | crate trait BitManip { | |
151 | fn ctpop(self) -> Self; | |
152 | fn ctlz(self) -> Self; | |
153 | fn cttz(self) -> Self; | |
154 | } | |
155 | ||
156 | macro_rules! impl_bit_manip { | |
157 | (inner: $ty:ident, $scalar:ty, $uty:ident, | |
158 | $ctpop:ident, $ctlz:ident, $cttz:ident) => { | |
159 | // FIXME: several LLVM intrinsics break on s390x https://github.com/rust-lang-nursery/packed_simd/issues/192 | |
160 | #[cfg(target_arch = "s390x")] | |
161 | impl_bit_manip! { scalar: $ty, $scalar } | |
162 | #[cfg(not(target_arch = "s390x"))] | |
163 | impl BitManip for $ty { | |
164 | #[inline] | |
165 | fn ctpop(self) -> Self { | |
166 | let y: $uty = self.cast(); | |
167 | unsafe { $ctpop(y).cast() } | |
168 | } | |
169 | ||
170 | #[inline] | |
171 | fn ctlz(self) -> Self { | |
172 | let y: $uty = self.cast(); | |
173 | // the ctxx intrinsics need compile-time constant | |
174 | // `is_zero_undef` | |
175 | unsafe { $ctlz(y, false).cast() } | |
176 | } | |
177 | ||
178 | #[inline] | |
179 | fn cttz(self) -> Self { | |
180 | let y: $uty = self.cast(); | |
181 | unsafe { $cttz(y, false).cast() } | |
182 | } | |
183 | } | |
184 | }; | |
185 | (sized_inner: $ty:ident, $scalar:ty, $uty:ident) => { | |
186 | #[cfg(target_arch = "s390x")] | |
187 | impl_bit_manip! { scalar: $ty, $scalar } | |
188 | #[cfg(not(target_arch = "s390x"))] | |
189 | impl BitManip for $ty { | |
190 | #[inline] | |
191 | fn ctpop(self) -> Self { | |
192 | let y: $uty = self.cast(); | |
193 | $uty::ctpop(y).cast() | |
194 | } | |
195 | ||
196 | #[inline] | |
197 | fn ctlz(self) -> Self { | |
198 | let y: $uty = self.cast(); | |
199 | $uty::ctlz(y).cast() | |
200 | } | |
201 | ||
202 | #[inline] | |
203 | fn cttz(self) -> Self { | |
204 | let y: $uty = self.cast(); | |
205 | $uty::cttz(y).cast() | |
206 | } | |
207 | } | |
208 | }; | |
209 | (scalar: $ty:ident, $scalar:ty) => { | |
210 | impl BitManip for $ty { | |
211 | #[inline] | |
212 | fn ctpop(self) -> Self { | |
213 | let mut ones = self; | |
214 | for i in 0..Self::lanes() { | |
215 | ones = ones | |
216 | .replace(i, self.extract(i).count_ones() as $scalar); | |
217 | } | |
218 | ones | |
219 | } | |
220 | ||
221 | #[inline] | |
222 | fn ctlz(self) -> Self { | |
223 | let mut lz = self; | |
224 | for i in 0..Self::lanes() { | |
225 | lz = lz.replace( | |
226 | i, | |
227 | self.extract(i).leading_zeros() as $scalar, | |
228 | ); | |
229 | } | |
230 | lz | |
231 | } | |
232 | ||
233 | #[inline] | |
234 | fn cttz(self) -> Self { | |
235 | let mut tz = self; | |
236 | for i in 0..Self::lanes() { | |
237 | tz = tz.replace( | |
238 | i, | |
239 | self.extract(i).trailing_zeros() as $scalar, | |
240 | ); | |
241 | } | |
242 | tz | |
243 | } | |
244 | } | |
245 | }; | |
246 | ($uty:ident, $uscalar:ty, $ity:ident, $iscalar:ty, | |
247 | $ctpop:ident, $ctlz:ident, $cttz:ident) => { | |
248 | impl_bit_manip! { inner: $uty, $uscalar, $uty, $ctpop, $ctlz, $cttz } | |
249 | impl_bit_manip! { inner: $ity, $iscalar, $uty, $ctpop, $ctlz, $cttz } | |
250 | }; | |
251 | (sized: $usize:ident, $uscalar:ty, $isize:ident, | |
252 | $iscalar:ty, $ty:ident) => { | |
253 | impl_bit_manip! { sized_inner: $usize, $uscalar, $ty } | |
254 | impl_bit_manip! { sized_inner: $isize, $iscalar, $ty } | |
255 | }; | |
256 | } | |
257 | ||
258 | impl_bit_manip! { u8x2 , u8, i8x2, i8, ctpop_u8x2, ctlz_u8x2, cttz_u8x2 } | |
259 | impl_bit_manip! { u8x4 , u8, i8x4, i8, ctpop_u8x4, ctlz_u8x4, cttz_u8x4 } | |
260 | #[cfg(not(target_arch = "aarch64"))] // see below | |
261 | impl_bit_manip! { u8x8 , u8, i8x8, i8, ctpop_u8x8, ctlz_u8x8, cttz_u8x8 } | |
262 | impl_bit_manip! { u8x16 , u8, i8x16, i8, ctpop_u8x16, ctlz_u8x16, cttz_u8x16 } | |
263 | impl_bit_manip! { u8x32 , u8, i8x32, i8, ctpop_u8x32, ctlz_u8x32, cttz_u8x32 } | |
264 | impl_bit_manip! { u8x64 , u8, i8x64, i8, ctpop_u8x64, ctlz_u8x64, cttz_u8x64 } | |
265 | impl_bit_manip! { u16x2 , u16, i16x2, i16, ctpop_u16x2, ctlz_u16x2, cttz_u16x2 } | |
266 | impl_bit_manip! { u16x4 , u16, i16x4, i16, ctpop_u16x4, ctlz_u16x4, cttz_u16x4 } | |
267 | impl_bit_manip! { u16x8 , u16, i16x8, i16, ctpop_u16x8, ctlz_u16x8, cttz_u16x8 } | |
268 | impl_bit_manip! { u16x16 , u16, i16x16, i16, ctpop_u16x16, ctlz_u16x16, cttz_u16x16 } | |
269 | impl_bit_manip! { u16x32 , u16, i16x32, i16, ctpop_u16x32, ctlz_u16x32, cttz_u16x32 } | |
270 | impl_bit_manip! { u32x2 , u32, i32x2, i32, ctpop_u32x2, ctlz_u32x2, cttz_u32x2 } | |
271 | impl_bit_manip! { u32x4 , u32, i32x4, i32, ctpop_u32x4, ctlz_u32x4, cttz_u32x4 } | |
272 | impl_bit_manip! { u32x8 , u32, i32x8, i32, ctpop_u32x8, ctlz_u32x8, cttz_u32x8 } | |
273 | impl_bit_manip! { u32x16 , u32, i32x16, i32, ctpop_u32x16, ctlz_u32x16, cttz_u32x16 } | |
274 | impl_bit_manip! { u64x2 , u64, i64x2, i64, ctpop_u64x2, ctlz_u64x2, cttz_u64x2 } | |
275 | impl_bit_manip! { u64x4 , u64, i64x4, i64, ctpop_u64x4, ctlz_u64x4, cttz_u64x4 } | |
276 | impl_bit_manip! { u64x8 , u64, i64x8, i64, ctpop_u64x8, ctlz_u64x8, cttz_u64x8 } | |
277 | impl_bit_manip! { u128x1 , u128, i128x1, i128, ctpop_u128x1, ctlz_u128x1, cttz_u128x1 } | |
278 | impl_bit_manip! { u128x2 , u128, i128x2, i128, ctpop_u128x2, ctlz_u128x2, cttz_u128x2 } | |
279 | impl_bit_manip! { u128x4 , u128, i128x4, i128, ctpop_u128x4, ctlz_u128x4, cttz_u128x4 } | |
280 | ||
281 | #[cfg(target_arch = "aarch64")] | |
282 | impl BitManip for u8x8 { | |
283 | #[inline] | |
284 | fn ctpop(self) -> Self { | |
285 | let y: u8x8 = self.cast(); | |
286 | unsafe { ctpop_u8x8(y).cast() } | |
287 | } | |
288 | ||
289 | #[inline] | |
290 | fn ctlz(self) -> Self { | |
291 | let y: u8x8 = self.cast(); | |
292 | unsafe { ctlz_u8x8(y, false).cast() } | |
293 | } | |
294 | ||
295 | #[inline] | |
296 | fn cttz(self) -> Self { | |
297 | // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191 | |
298 | // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64 | |
299 | // intrinsics | |
300 | let mut tz = self; | |
301 | for i in 0..Self::lanes() { | |
302 | tz = tz.replace(i, self.extract(i).trailing_zeros() as u8); | |
303 | } | |
304 | tz | |
305 | } | |
306 | } | |
307 | #[cfg(target_arch = "aarch64")] | |
308 | impl BitManip for i8x8 { | |
309 | #[inline] | |
310 | fn ctpop(self) -> Self { | |
311 | let y: u8x8 = self.cast(); | |
312 | unsafe { ctpop_u8x8(y).cast() } | |
313 | } | |
314 | ||
315 | #[inline] | |
316 | fn ctlz(self) -> Self { | |
317 | let y: u8x8 = self.cast(); | |
318 | unsafe { ctlz_u8x8(y, false).cast() } | |
319 | } | |
320 | ||
321 | #[inline] | |
322 | fn cttz(self) -> Self { | |
323 | // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191 | |
324 | // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64 | |
325 | // intrinsics | |
326 | let mut tz = self; | |
327 | for i in 0..Self::lanes() { | |
328 | tz = tz.replace(i, self.extract(i).trailing_zeros() as i8); | |
329 | } | |
330 | tz | |
331 | } | |
332 | } | |
333 | ||
334 | cfg_if! { | |
335 | if #[cfg(target_pointer_width = "8")] { | |
336 | impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u8x2 } | |
337 | impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u8x4 } | |
338 | impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u8x8 } | |
339 | } else if #[cfg(target_pointer_width = "16")] { | |
340 | impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u16x2 } | |
341 | impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u16x4 } | |
342 | impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u16x8 } | |
343 | } else if #[cfg(target_pointer_width = "32")] { | |
344 | impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u32x2 } | |
345 | impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u32x4 } | |
346 | impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u32x8 } | |
347 | } else if #[cfg(target_pointer_width = "64")] { | |
348 | impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u64x2 } | |
349 | impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u64x4 } | |
350 | impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u64x8 } | |
351 | } else { | |
352 | compile_error!("unsupported target_pointer_width"); | |
353 | } | |
354 | } |