]> git.proxmox.com Git - rustc.git/blame - vendor/packed_simd_2/src/codegen/bit_manip.rs
New upstream version 1.68.2+dfsg1
[rustc.git] / vendor / packed_simd_2 / src / codegen / bit_manip.rs
CommitLineData
f20569fa 1//! LLVM bit manipulation intrinsics.
cdc7bbd5 2#[rustfmt::skip]
f20569fa 3
f25598a0 4pub(crate) use crate::*;
f20569fa
XL
5
6#[allow(improper_ctypes, dead_code)]
7extern "C" {
8 #[link_name = "llvm.ctlz.v2i8"]
9 fn ctlz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2;
10 #[link_name = "llvm.ctlz.v4i8"]
11 fn ctlz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4;
12 #[link_name = "llvm.ctlz.v8i8"]
13 fn ctlz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8;
14 #[link_name = "llvm.ctlz.v16i8"]
15 fn ctlz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16;
16 #[link_name = "llvm.ctlz.v32i8"]
17 fn ctlz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32;
18 #[link_name = "llvm.ctlz.v64i8"]
19 fn ctlz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64;
20
21 #[link_name = "llvm.ctlz.v2i16"]
22 fn ctlz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2;
23 #[link_name = "llvm.ctlz.v4i16"]
24 fn ctlz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4;
25 #[link_name = "llvm.ctlz.v8i16"]
26 fn ctlz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8;
27 #[link_name = "llvm.ctlz.v16i16"]
28 fn ctlz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16;
29 #[link_name = "llvm.ctlz.v32i16"]
30 fn ctlz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32;
31
32 #[link_name = "llvm.ctlz.v2i32"]
33 fn ctlz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2;
34 #[link_name = "llvm.ctlz.v4i32"]
35 fn ctlz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4;
36 #[link_name = "llvm.ctlz.v8i32"]
37 fn ctlz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8;
38 #[link_name = "llvm.ctlz.v16i32"]
39 fn ctlz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16;
40
41 #[link_name = "llvm.ctlz.v2i64"]
42 fn ctlz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2;
43 #[link_name = "llvm.ctlz.v4i64"]
44 fn ctlz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4;
45 #[link_name = "llvm.ctlz.v8i64"]
46 fn ctlz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8;
47
48 #[link_name = "llvm.ctlz.v1i128"]
49 fn ctlz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1;
50 #[link_name = "llvm.ctlz.v2i128"]
51 fn ctlz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2;
52 #[link_name = "llvm.ctlz.v4i128"]
53 fn ctlz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4;
54
55 #[link_name = "llvm.cttz.v2i8"]
56 fn cttz_u8x2(x: u8x2, is_zero_undef: bool) -> u8x2;
57 #[link_name = "llvm.cttz.v4i8"]
58 fn cttz_u8x4(x: u8x4, is_zero_undef: bool) -> u8x4;
59 #[link_name = "llvm.cttz.v8i8"]
60 fn cttz_u8x8(x: u8x8, is_zero_undef: bool) -> u8x8;
61 #[link_name = "llvm.cttz.v16i8"]
62 fn cttz_u8x16(x: u8x16, is_zero_undef: bool) -> u8x16;
63 #[link_name = "llvm.cttz.v32i8"]
64 fn cttz_u8x32(x: u8x32, is_zero_undef: bool) -> u8x32;
65 #[link_name = "llvm.cttz.v64i8"]
66 fn cttz_u8x64(x: u8x64, is_zero_undef: bool) -> u8x64;
67
68 #[link_name = "llvm.cttz.v2i16"]
69 fn cttz_u16x2(x: u16x2, is_zero_undef: bool) -> u16x2;
70 #[link_name = "llvm.cttz.v4i16"]
71 fn cttz_u16x4(x: u16x4, is_zero_undef: bool) -> u16x4;
72 #[link_name = "llvm.cttz.v8i16"]
73 fn cttz_u16x8(x: u16x8, is_zero_undef: bool) -> u16x8;
74 #[link_name = "llvm.cttz.v16i16"]
75 fn cttz_u16x16(x: u16x16, is_zero_undef: bool) -> u16x16;
76 #[link_name = "llvm.cttz.v32i16"]
77 fn cttz_u16x32(x: u16x32, is_zero_undef: bool) -> u16x32;
78
79 #[link_name = "llvm.cttz.v2i32"]
80 fn cttz_u32x2(x: u32x2, is_zero_undef: bool) -> u32x2;
81 #[link_name = "llvm.cttz.v4i32"]
82 fn cttz_u32x4(x: u32x4, is_zero_undef: bool) -> u32x4;
83 #[link_name = "llvm.cttz.v8i32"]
84 fn cttz_u32x8(x: u32x8, is_zero_undef: bool) -> u32x8;
85 #[link_name = "llvm.cttz.v16i32"]
86 fn cttz_u32x16(x: u32x16, is_zero_undef: bool) -> u32x16;
87
88 #[link_name = "llvm.cttz.v2i64"]
89 fn cttz_u64x2(x: u64x2, is_zero_undef: bool) -> u64x2;
90 #[link_name = "llvm.cttz.v4i64"]
91 fn cttz_u64x4(x: u64x4, is_zero_undef: bool) -> u64x4;
92 #[link_name = "llvm.cttz.v8i64"]
93 fn cttz_u64x8(x: u64x8, is_zero_undef: bool) -> u64x8;
94
95 #[link_name = "llvm.cttz.v1i128"]
96 fn cttz_u128x1(x: u128x1, is_zero_undef: bool) -> u128x1;
97 #[link_name = "llvm.cttz.v2i128"]
98 fn cttz_u128x2(x: u128x2, is_zero_undef: bool) -> u128x2;
99 #[link_name = "llvm.cttz.v4i128"]
100 fn cttz_u128x4(x: u128x4, is_zero_undef: bool) -> u128x4;
101
102 #[link_name = "llvm.ctpop.v2i8"]
103 fn ctpop_u8x2(x: u8x2) -> u8x2;
104 #[link_name = "llvm.ctpop.v4i8"]
105 fn ctpop_u8x4(x: u8x4) -> u8x4;
106 #[link_name = "llvm.ctpop.v8i8"]
107 fn ctpop_u8x8(x: u8x8) -> u8x8;
108 #[link_name = "llvm.ctpop.v16i8"]
109 fn ctpop_u8x16(x: u8x16) -> u8x16;
110 #[link_name = "llvm.ctpop.v32i8"]
111 fn ctpop_u8x32(x: u8x32) -> u8x32;
112 #[link_name = "llvm.ctpop.v64i8"]
113 fn ctpop_u8x64(x: u8x64) -> u8x64;
114
115 #[link_name = "llvm.ctpop.v2i16"]
116 fn ctpop_u16x2(x: u16x2) -> u16x2;
117 #[link_name = "llvm.ctpop.v4i16"]
118 fn ctpop_u16x4(x: u16x4) -> u16x4;
119 #[link_name = "llvm.ctpop.v8i16"]
120 fn ctpop_u16x8(x: u16x8) -> u16x8;
121 #[link_name = "llvm.ctpop.v16i16"]
122 fn ctpop_u16x16(x: u16x16) -> u16x16;
123 #[link_name = "llvm.ctpop.v32i16"]
124 fn ctpop_u16x32(x: u16x32) -> u16x32;
125
126 #[link_name = "llvm.ctpop.v2i32"]
127 fn ctpop_u32x2(x: u32x2) -> u32x2;
128 #[link_name = "llvm.ctpop.v4i32"]
129 fn ctpop_u32x4(x: u32x4) -> u32x4;
130 #[link_name = "llvm.ctpop.v8i32"]
131 fn ctpop_u32x8(x: u32x8) -> u32x8;
132 #[link_name = "llvm.ctpop.v16i32"]
133 fn ctpop_u32x16(x: u32x16) -> u32x16;
134
135 #[link_name = "llvm.ctpop.v2i64"]
136 fn ctpop_u64x2(x: u64x2) -> u64x2;
137 #[link_name = "llvm.ctpop.v4i64"]
138 fn ctpop_u64x4(x: u64x4) -> u64x4;
139 #[link_name = "llvm.ctpop.v8i64"]
140 fn ctpop_u64x8(x: u64x8) -> u64x8;
141
142 #[link_name = "llvm.ctpop.v1i128"]
143 fn ctpop_u128x1(x: u128x1) -> u128x1;
144 #[link_name = "llvm.ctpop.v2i128"]
145 fn ctpop_u128x2(x: u128x2) -> u128x2;
146 #[link_name = "llvm.ctpop.v4i128"]
147 fn ctpop_u128x4(x: u128x4) -> u128x4;
148}
149
f25598a0 150pub(crate) trait BitManip {
f20569fa
XL
151 fn ctpop(self) -> Self;
152 fn ctlz(self) -> Self;
153 fn cttz(self) -> Self;
154}
155
156macro_rules! impl_bit_manip {
157 (inner: $ty:ident, $scalar:ty, $uty:ident,
158 $ctpop:ident, $ctlz:ident, $cttz:ident) => {
159 // FIXME: several LLVM intrinsics break on s390x https://github.com/rust-lang-nursery/packed_simd/issues/192
160 #[cfg(target_arch = "s390x")]
161 impl_bit_manip! { scalar: $ty, $scalar }
162 #[cfg(not(target_arch = "s390x"))]
163 impl BitManip for $ty {
164 #[inline]
165 fn ctpop(self) -> Self {
166 let y: $uty = self.cast();
167 unsafe { $ctpop(y).cast() }
168 }
169
170 #[inline]
171 fn ctlz(self) -> Self {
172 let y: $uty = self.cast();
173 // the ctxx intrinsics need compile-time constant
174 // `is_zero_undef`
175 unsafe { $ctlz(y, false).cast() }
176 }
177
178 #[inline]
179 fn cttz(self) -> Self {
180 let y: $uty = self.cast();
181 unsafe { $cttz(y, false).cast() }
182 }
183 }
184 };
185 (sized_inner: $ty:ident, $scalar:ty, $uty:ident) => {
186 #[cfg(target_arch = "s390x")]
187 impl_bit_manip! { scalar: $ty, $scalar }
188 #[cfg(not(target_arch = "s390x"))]
189 impl BitManip for $ty {
190 #[inline]
191 fn ctpop(self) -> Self {
192 let y: $uty = self.cast();
193 $uty::ctpop(y).cast()
194 }
195
196 #[inline]
197 fn ctlz(self) -> Self {
198 let y: $uty = self.cast();
199 $uty::ctlz(y).cast()
200 }
201
202 #[inline]
203 fn cttz(self) -> Self {
204 let y: $uty = self.cast();
205 $uty::cttz(y).cast()
206 }
207 }
208 };
209 (scalar: $ty:ident, $scalar:ty) => {
210 impl BitManip for $ty {
211 #[inline]
212 fn ctpop(self) -> Self {
213 let mut ones = self;
214 for i in 0..Self::lanes() {
f25598a0 215 ones = ones.replace(i, self.extract(i).count_ones() as $scalar);
f20569fa
XL
216 }
217 ones
218 }
219
220 #[inline]
221 fn ctlz(self) -> Self {
222 let mut lz = self;
223 for i in 0..Self::lanes() {
f25598a0 224 lz = lz.replace(i, self.extract(i).leading_zeros() as $scalar);
f20569fa
XL
225 }
226 lz
227 }
228
229 #[inline]
230 fn cttz(self) -> Self {
231 let mut tz = self;
232 for i in 0..Self::lanes() {
f25598a0 233 tz = tz.replace(i, self.extract(i).trailing_zeros() as $scalar);
f20569fa
XL
234 }
235 tz
236 }
237 }
238 };
239 ($uty:ident, $uscalar:ty, $ity:ident, $iscalar:ty,
240 $ctpop:ident, $ctlz:ident, $cttz:ident) => {
241 impl_bit_manip! { inner: $uty, $uscalar, $uty, $ctpop, $ctlz, $cttz }
242 impl_bit_manip! { inner: $ity, $iscalar, $uty, $ctpop, $ctlz, $cttz }
243 };
244 (sized: $usize:ident, $uscalar:ty, $isize:ident,
245 $iscalar:ty, $ty:ident) => {
246 impl_bit_manip! { sized_inner: $usize, $uscalar, $ty }
247 impl_bit_manip! { sized_inner: $isize, $iscalar, $ty }
248 };
249}
250
251impl_bit_manip! { u8x2 , u8, i8x2, i8, ctpop_u8x2, ctlz_u8x2, cttz_u8x2 }
252impl_bit_manip! { u8x4 , u8, i8x4, i8, ctpop_u8x4, ctlz_u8x4, cttz_u8x4 }
253#[cfg(not(target_arch = "aarch64"))] // see below
254impl_bit_manip! { u8x8 , u8, i8x8, i8, ctpop_u8x8, ctlz_u8x8, cttz_u8x8 }
255impl_bit_manip! { u8x16 , u8, i8x16, i8, ctpop_u8x16, ctlz_u8x16, cttz_u8x16 }
256impl_bit_manip! { u8x32 , u8, i8x32, i8, ctpop_u8x32, ctlz_u8x32, cttz_u8x32 }
257impl_bit_manip! { u8x64 , u8, i8x64, i8, ctpop_u8x64, ctlz_u8x64, cttz_u8x64 }
258impl_bit_manip! { u16x2 , u16, i16x2, i16, ctpop_u16x2, ctlz_u16x2, cttz_u16x2 }
259impl_bit_manip! { u16x4 , u16, i16x4, i16, ctpop_u16x4, ctlz_u16x4, cttz_u16x4 }
260impl_bit_manip! { u16x8 , u16, i16x8, i16, ctpop_u16x8, ctlz_u16x8, cttz_u16x8 }
261impl_bit_manip! { u16x16 , u16, i16x16, i16, ctpop_u16x16, ctlz_u16x16, cttz_u16x16 }
262impl_bit_manip! { u16x32 , u16, i16x32, i16, ctpop_u16x32, ctlz_u16x32, cttz_u16x32 }
263impl_bit_manip! { u32x2 , u32, i32x2, i32, ctpop_u32x2, ctlz_u32x2, cttz_u32x2 }
264impl_bit_manip! { u32x4 , u32, i32x4, i32, ctpop_u32x4, ctlz_u32x4, cttz_u32x4 }
265impl_bit_manip! { u32x8 , u32, i32x8, i32, ctpop_u32x8, ctlz_u32x8, cttz_u32x8 }
266impl_bit_manip! { u32x16 , u32, i32x16, i32, ctpop_u32x16, ctlz_u32x16, cttz_u32x16 }
267impl_bit_manip! { u64x2 , u64, i64x2, i64, ctpop_u64x2, ctlz_u64x2, cttz_u64x2 }
268impl_bit_manip! { u64x4 , u64, i64x4, i64, ctpop_u64x4, ctlz_u64x4, cttz_u64x4 }
269impl_bit_manip! { u64x8 , u64, i64x8, i64, ctpop_u64x8, ctlz_u64x8, cttz_u64x8 }
270impl_bit_manip! { u128x1 , u128, i128x1, i128, ctpop_u128x1, ctlz_u128x1, cttz_u128x1 }
271impl_bit_manip! { u128x2 , u128, i128x2, i128, ctpop_u128x2, ctlz_u128x2, cttz_u128x2 }
272impl_bit_manip! { u128x4 , u128, i128x4, i128, ctpop_u128x4, ctlz_u128x4, cttz_u128x4 }
273
274#[cfg(target_arch = "aarch64")]
275impl BitManip for u8x8 {
276 #[inline]
277 fn ctpop(self) -> Self {
278 let y: u8x8 = self.cast();
279 unsafe { ctpop_u8x8(y).cast() }
280 }
281
282 #[inline]
283 fn ctlz(self) -> Self {
284 let y: u8x8 = self.cast();
285 unsafe { ctlz_u8x8(y, false).cast() }
286 }
287
288 #[inline]
289 fn cttz(self) -> Self {
290 // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191
291 // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64
292 // intrinsics
293 let mut tz = self;
294 for i in 0..Self::lanes() {
295 tz = tz.replace(i, self.extract(i).trailing_zeros() as u8);
296 }
297 tz
298 }
299}
300#[cfg(target_arch = "aarch64")]
301impl BitManip for i8x8 {
302 #[inline]
303 fn ctpop(self) -> Self {
304 let y: u8x8 = self.cast();
305 unsafe { ctpop_u8x8(y).cast() }
306 }
307
308 #[inline]
309 fn ctlz(self) -> Self {
310 let y: u8x8 = self.cast();
311 unsafe { ctlz_u8x8(y, false).cast() }
312 }
313
314 #[inline]
315 fn cttz(self) -> Self {
316 // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191
317 // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64
318 // intrinsics
319 let mut tz = self;
320 for i in 0..Self::lanes() {
321 tz = tz.replace(i, self.extract(i).trailing_zeros() as i8);
322 }
323 tz
324 }
325}
326
327cfg_if! {
328 if #[cfg(target_pointer_width = "8")] {
329 impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u8x2 }
330 impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u8x4 }
331 impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u8x8 }
332 } else if #[cfg(target_pointer_width = "16")] {
333 impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u16x2 }
334 impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u16x4 }
335 impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u16x8 }
336 } else if #[cfg(target_pointer_width = "32")] {
337 impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u32x2 }
338 impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u32x4 }
339 impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u32x8 }
340 } else if #[cfg(target_pointer_width = "64")] {
341 impl_bit_manip! { sized: usizex2, usize, isizex2, isize, u64x2 }
342 impl_bit_manip! { sized: usizex4, usize, isizex4, isize, u64x4 }
343 impl_bit_manip! { sized: usizex8, usize, isizex8, isize, u64x8 }
344 } else {
345 compile_error!("unsupported target_pointer_width");
346 }
347}