1 //! LLVM bit manipulation intrinsics.
6 #[allow(improper_ctypes, dead_code)]
8 #[link_name = "llvm.ctlz.v2i8"]
9 fn ctlz_u8x2(x
: u8x2
, is_zero_undef
: bool
) -> u8x2
;
10 #[link_name = "llvm.ctlz.v4i8"]
11 fn ctlz_u8x4(x
: u8x4
, is_zero_undef
: bool
) -> u8x4
;
12 #[link_name = "llvm.ctlz.v8i8"]
13 fn ctlz_u8x8(x
: u8x8
, is_zero_undef
: bool
) -> u8x8
;
14 #[link_name = "llvm.ctlz.v16i8"]
15 fn ctlz_u8x16(x
: u8x16
, is_zero_undef
: bool
) -> u8x16
;
16 #[link_name = "llvm.ctlz.v32i8"]
17 fn ctlz_u8x32(x
: u8x32
, is_zero_undef
: bool
) -> u8x32
;
18 #[link_name = "llvm.ctlz.v64i8"]
19 fn ctlz_u8x64(x
: u8x64
, is_zero_undef
: bool
) -> u8x64
;
21 #[link_name = "llvm.ctlz.v2i16"]
22 fn ctlz_u16x2(x
: u16x2
, is_zero_undef
: bool
) -> u16x2
;
23 #[link_name = "llvm.ctlz.v4i16"]
24 fn ctlz_u16x4(x
: u16x4
, is_zero_undef
: bool
) -> u16x4
;
25 #[link_name = "llvm.ctlz.v8i16"]
26 fn ctlz_u16x8(x
: u16x8
, is_zero_undef
: bool
) -> u16x8
;
27 #[link_name = "llvm.ctlz.v16i16"]
28 fn ctlz_u16x16(x
: u16x16
, is_zero_undef
: bool
) -> u16x16
;
29 #[link_name = "llvm.ctlz.v32i16"]
30 fn ctlz_u16x32(x
: u16x32
, is_zero_undef
: bool
) -> u16x32
;
32 #[link_name = "llvm.ctlz.v2i32"]
33 fn ctlz_u32x2(x
: u32x2
, is_zero_undef
: bool
) -> u32x2
;
34 #[link_name = "llvm.ctlz.v4i32"]
35 fn ctlz_u32x4(x
: u32x4
, is_zero_undef
: bool
) -> u32x4
;
36 #[link_name = "llvm.ctlz.v8i32"]
37 fn ctlz_u32x8(x
: u32x8
, is_zero_undef
: bool
) -> u32x8
;
38 #[link_name = "llvm.ctlz.v16i32"]
39 fn ctlz_u32x16(x
: u32x16
, is_zero_undef
: bool
) -> u32x16
;
41 #[link_name = "llvm.ctlz.v2i64"]
42 fn ctlz_u64x2(x
: u64x2
, is_zero_undef
: bool
) -> u64x2
;
43 #[link_name = "llvm.ctlz.v4i64"]
44 fn ctlz_u64x4(x
: u64x4
, is_zero_undef
: bool
) -> u64x4
;
45 #[link_name = "llvm.ctlz.v8i64"]
46 fn ctlz_u64x8(x
: u64x8
, is_zero_undef
: bool
) -> u64x8
;
48 #[link_name = "llvm.ctlz.v1i128"]
49 fn ctlz_u128x1(x
: u128x1
, is_zero_undef
: bool
) -> u128x1
;
50 #[link_name = "llvm.ctlz.v2i128"]
51 fn ctlz_u128x2(x
: u128x2
, is_zero_undef
: bool
) -> u128x2
;
52 #[link_name = "llvm.ctlz.v4i128"]
53 fn ctlz_u128x4(x
: u128x4
, is_zero_undef
: bool
) -> u128x4
;
55 #[link_name = "llvm.cttz.v2i8"]
56 fn cttz_u8x2(x
: u8x2
, is_zero_undef
: bool
) -> u8x2
;
57 #[link_name = "llvm.cttz.v4i8"]
58 fn cttz_u8x4(x
: u8x4
, is_zero_undef
: bool
) -> u8x4
;
59 #[link_name = "llvm.cttz.v8i8"]
60 fn cttz_u8x8(x
: u8x8
, is_zero_undef
: bool
) -> u8x8
;
61 #[link_name = "llvm.cttz.v16i8"]
62 fn cttz_u8x16(x
: u8x16
, is_zero_undef
: bool
) -> u8x16
;
63 #[link_name = "llvm.cttz.v32i8"]
64 fn cttz_u8x32(x
: u8x32
, is_zero_undef
: bool
) -> u8x32
;
65 #[link_name = "llvm.cttz.v64i8"]
66 fn cttz_u8x64(x
: u8x64
, is_zero_undef
: bool
) -> u8x64
;
68 #[link_name = "llvm.cttz.v2i16"]
69 fn cttz_u16x2(x
: u16x2
, is_zero_undef
: bool
) -> u16x2
;
70 #[link_name = "llvm.cttz.v4i16"]
71 fn cttz_u16x4(x
: u16x4
, is_zero_undef
: bool
) -> u16x4
;
72 #[link_name = "llvm.cttz.v8i16"]
73 fn cttz_u16x8(x
: u16x8
, is_zero_undef
: bool
) -> u16x8
;
74 #[link_name = "llvm.cttz.v16i16"]
75 fn cttz_u16x16(x
: u16x16
, is_zero_undef
: bool
) -> u16x16
;
76 #[link_name = "llvm.cttz.v32i16"]
77 fn cttz_u16x32(x
: u16x32
, is_zero_undef
: bool
) -> u16x32
;
79 #[link_name = "llvm.cttz.v2i32"]
80 fn cttz_u32x2(x
: u32x2
, is_zero_undef
: bool
) -> u32x2
;
81 #[link_name = "llvm.cttz.v4i32"]
82 fn cttz_u32x4(x
: u32x4
, is_zero_undef
: bool
) -> u32x4
;
83 #[link_name = "llvm.cttz.v8i32"]
84 fn cttz_u32x8(x
: u32x8
, is_zero_undef
: bool
) -> u32x8
;
85 #[link_name = "llvm.cttz.v16i32"]
86 fn cttz_u32x16(x
: u32x16
, is_zero_undef
: bool
) -> u32x16
;
88 #[link_name = "llvm.cttz.v2i64"]
89 fn cttz_u64x2(x
: u64x2
, is_zero_undef
: bool
) -> u64x2
;
90 #[link_name = "llvm.cttz.v4i64"]
91 fn cttz_u64x4(x
: u64x4
, is_zero_undef
: bool
) -> u64x4
;
92 #[link_name = "llvm.cttz.v8i64"]
93 fn cttz_u64x8(x
: u64x8
, is_zero_undef
: bool
) -> u64x8
;
95 #[link_name = "llvm.cttz.v1i128"]
96 fn cttz_u128x1(x
: u128x1
, is_zero_undef
: bool
) -> u128x1
;
97 #[link_name = "llvm.cttz.v2i128"]
98 fn cttz_u128x2(x
: u128x2
, is_zero_undef
: bool
) -> u128x2
;
99 #[link_name = "llvm.cttz.v4i128"]
100 fn cttz_u128x4(x
: u128x4
, is_zero_undef
: bool
) -> u128x4
;
102 #[link_name = "llvm.ctpop.v2i8"]
103 fn ctpop_u8x2(x
: u8x2
) -> u8x2
;
104 #[link_name = "llvm.ctpop.v4i8"]
105 fn ctpop_u8x4(x
: u8x4
) -> u8x4
;
106 #[link_name = "llvm.ctpop.v8i8"]
107 fn ctpop_u8x8(x
: u8x8
) -> u8x8
;
108 #[link_name = "llvm.ctpop.v16i8"]
109 fn ctpop_u8x16(x
: u8x16
) -> u8x16
;
110 #[link_name = "llvm.ctpop.v32i8"]
111 fn ctpop_u8x32(x
: u8x32
) -> u8x32
;
112 #[link_name = "llvm.ctpop.v64i8"]
113 fn ctpop_u8x64(x
: u8x64
) -> u8x64
;
115 #[link_name = "llvm.ctpop.v2i16"]
116 fn ctpop_u16x2(x
: u16x2
) -> u16x2
;
117 #[link_name = "llvm.ctpop.v4i16"]
118 fn ctpop_u16x4(x
: u16x4
) -> u16x4
;
119 #[link_name = "llvm.ctpop.v8i16"]
120 fn ctpop_u16x8(x
: u16x8
) -> u16x8
;
121 #[link_name = "llvm.ctpop.v16i16"]
122 fn ctpop_u16x16(x
: u16x16
) -> u16x16
;
123 #[link_name = "llvm.ctpop.v32i16"]
124 fn ctpop_u16x32(x
: u16x32
) -> u16x32
;
126 #[link_name = "llvm.ctpop.v2i32"]
127 fn ctpop_u32x2(x
: u32x2
) -> u32x2
;
128 #[link_name = "llvm.ctpop.v4i32"]
129 fn ctpop_u32x4(x
: u32x4
) -> u32x4
;
130 #[link_name = "llvm.ctpop.v8i32"]
131 fn ctpop_u32x8(x
: u32x8
) -> u32x8
;
132 #[link_name = "llvm.ctpop.v16i32"]
133 fn ctpop_u32x16(x
: u32x16
) -> u32x16
;
135 #[link_name = "llvm.ctpop.v2i64"]
136 fn ctpop_u64x2(x
: u64x2
) -> u64x2
;
137 #[link_name = "llvm.ctpop.v4i64"]
138 fn ctpop_u64x4(x
: u64x4
) -> u64x4
;
139 #[link_name = "llvm.ctpop.v8i64"]
140 fn ctpop_u64x8(x
: u64x8
) -> u64x8
;
142 #[link_name = "llvm.ctpop.v1i128"]
143 fn ctpop_u128x1(x
: u128x1
) -> u128x1
;
144 #[link_name = "llvm.ctpop.v2i128"]
145 fn ctpop_u128x2(x
: u128x2
) -> u128x2
;
146 #[link_name = "llvm.ctpop.v4i128"]
147 fn ctpop_u128x4(x
: u128x4
) -> u128x4
;
150 crate trait BitManip
{
151 fn ctpop(self) -> Self;
152 fn ctlz(self) -> Self;
153 fn cttz(self) -> Self;
156 macro_rules
! impl_bit_manip
{
157 (inner
: $ty
:ident
, $scalar
:ty
, $uty
:ident
,
158 $ctpop
:ident
, $ctlz
:ident
, $cttz
:ident
) => {
159 // FIXME: several LLVM intrinsics break on s390x https://github.com/rust-lang-nursery/packed_simd/issues/192
160 #[cfg(target_arch = "s390x")]
161 impl_bit_manip
! { scalar: $ty, $scalar }
162 #[cfg(not(target_arch = "s390x"))]
163 impl BitManip
for $ty
{
165 fn ctpop(self) -> Self {
166 let y
: $uty
= self.cast();
167 unsafe { $ctpop(y).cast() }
171 fn ctlz(self) -> Self {
172 let y
: $uty
= self.cast();
173 // the ctxx intrinsics need compile-time constant
175 unsafe { $ctlz(y, false).cast() }
179 fn cttz(self) -> Self {
180 let y
: $uty
= self.cast();
181 unsafe { $cttz(y, false).cast() }
185 (sized_inner
: $ty
:ident
, $scalar
:ty
, $uty
:ident
) => {
186 #[cfg(target_arch = "s390x")]
187 impl_bit_manip
! { scalar: $ty, $scalar }
188 #[cfg(not(target_arch = "s390x"))]
189 impl BitManip
for $ty
{
191 fn ctpop(self) -> Self {
192 let y
: $uty
= self.cast();
193 $uty
::ctpop(y
).cast()
197 fn ctlz(self) -> Self {
198 let y
: $uty
= self.cast();
203 fn cttz(self) -> Self {
204 let y
: $uty
= self.cast();
209 (scalar
: $ty
:ident
, $scalar
:ty
) => {
210 impl BitManip
for $ty
{
212 fn ctpop(self) -> Self {
214 for i
in 0..Self::lanes() {
216 .replace(i
, self.extract(i
).count_ones() as $scalar
);
222 fn ctlz(self) -> Self {
224 for i
in 0..Self::lanes() {
227 self.extract(i
).leading_zeros() as $scalar
,
234 fn cttz(self) -> Self {
236 for i
in 0..Self::lanes() {
239 self.extract(i
).trailing_zeros() as $scalar
,
246 ($uty
:ident
, $uscalar
:ty
, $ity
:ident
, $iscalar
:ty
,
247 $ctpop
:ident
, $ctlz
:ident
, $cttz
:ident
) => {
248 impl_bit_manip
! { inner: $uty, $uscalar, $uty, $ctpop, $ctlz, $cttz }
249 impl_bit_manip
! { inner: $ity, $iscalar, $uty, $ctpop, $ctlz, $cttz }
251 (sized
: $
usize:ident
, $uscalar
:ty
, $
isize:ident
,
252 $iscalar
:ty
, $ty
:ident
) => {
253 impl_bit_manip
! { sized_inner: $usize, $uscalar, $ty }
254 impl_bit_manip
! { sized_inner: $isize, $iscalar, $ty }
258 impl_bit_manip
! { u8x2 , u8, i8x2, i8, ctpop_u8x2, ctlz_u8x2, cttz_u8x2 }
259 impl_bit_manip
! { u8x4 , u8, i8x4, i8, ctpop_u8x4, ctlz_u8x4, cttz_u8x4 }
260 #[cfg(not(target_arch = "aarch64"))] // see below
261 impl_bit_manip
! { u8x8 , u8, i8x8, i8, ctpop_u8x8, ctlz_u8x8, cttz_u8x8 }
262 impl_bit_manip
! { u8x16 , u8, i8x16, i8, ctpop_u8x16, ctlz_u8x16, cttz_u8x16 }
263 impl_bit_manip
! { u8x32 , u8, i8x32, i8, ctpop_u8x32, ctlz_u8x32, cttz_u8x32 }
264 impl_bit_manip
! { u8x64 , u8, i8x64, i8, ctpop_u8x64, ctlz_u8x64, cttz_u8x64 }
265 impl_bit_manip
! { u16x2 , u16, i16x2, i16, ctpop_u16x2, ctlz_u16x2, cttz_u16x2 }
266 impl_bit_manip
! { u16x4 , u16, i16x4, i16, ctpop_u16x4, ctlz_u16x4, cttz_u16x4 }
267 impl_bit_manip
! { u16x8 , u16, i16x8, i16, ctpop_u16x8, ctlz_u16x8, cttz_u16x8 }
268 impl_bit_manip
! { u16x16 , u16, i16x16, i16, ctpop_u16x16, ctlz_u16x16, cttz_u16x16 }
269 impl_bit_manip
! { u16x32 , u16, i16x32, i16, ctpop_u16x32, ctlz_u16x32, cttz_u16x32 }
270 impl_bit_manip
! { u32x2 , u32, i32x2, i32, ctpop_u32x2, ctlz_u32x2, cttz_u32x2 }
271 impl_bit_manip
! { u32x4 , u32, i32x4, i32, ctpop_u32x4, ctlz_u32x4, cttz_u32x4 }
272 impl_bit_manip
! { u32x8 , u32, i32x8, i32, ctpop_u32x8, ctlz_u32x8, cttz_u32x8 }
273 impl_bit_manip
! { u32x16 , u32, i32x16, i32, ctpop_u32x16, ctlz_u32x16, cttz_u32x16 }
274 impl_bit_manip
! { u64x2 , u64, i64x2, i64, ctpop_u64x2, ctlz_u64x2, cttz_u64x2 }
275 impl_bit_manip
! { u64x4 , u64, i64x4, i64, ctpop_u64x4, ctlz_u64x4, cttz_u64x4 }
276 impl_bit_manip
! { u64x8 , u64, i64x8, i64, ctpop_u64x8, ctlz_u64x8, cttz_u64x8 }
277 impl_bit_manip
! { u128x1 , u128, i128x1, i128, ctpop_u128x1, ctlz_u128x1, cttz_u128x1 }
278 impl_bit_manip
! { u128x2 , u128, i128x2, i128, ctpop_u128x2, ctlz_u128x2, cttz_u128x2 }
279 impl_bit_manip
! { u128x4 , u128, i128x4, i128, ctpop_u128x4, ctlz_u128x4, cttz_u128x4 }
281 #[cfg(target_arch = "aarch64")]
282 impl BitManip
for u8x8
{
284 fn ctpop(self) -> Self {
285 let y
: u8x8
= self.cast();
286 unsafe { ctpop_u8x8(y).cast() }
290 fn ctlz(self) -> Self {
291 let y
: u8x8
= self.cast();
292 unsafe { ctlz_u8x8(y, false).cast() }
296 fn cttz(self) -> Self {
297 // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191
298 // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64
301 for i
in 0..Self::lanes() {
302 tz
= tz
.replace(i
, self.extract(i
).trailing_zeros() as u8);
307 #[cfg(target_arch = "aarch64")]
308 impl BitManip
for i8x8
{
310 fn ctpop(self) -> Self {
311 let y
: u8x8
= self.cast();
312 unsafe { ctpop_u8x8(y).cast() }
316 fn ctlz(self) -> Self {
317 let y
: u8x8
= self.cast();
318 unsafe { ctlz_u8x8(y, false).cast() }
322 fn cttz(self) -> Self {
323 // FIXME: LLVM cttz.v8i8 broken on aarch64 https://github.com/rust-lang-nursery/packed_simd/issues/191
324 // OPTIMIZE: adapt the algorithm used for v8i16/etc to Rust's aarch64
327 for i
in 0..Self::lanes() {
328 tz
= tz
.replace(i
, self.extract(i
).trailing_zeros() as i8);
335 if #[cfg(target_pointer_width = "8")] {
336 impl_bit_manip
! { sized: usizex2, usize, isizex2, isize, u8x2 }
337 impl_bit_manip
! { sized: usizex4, usize, isizex4, isize, u8x4 }
338 impl_bit_manip
! { sized: usizex8, usize, isizex8, isize, u8x8 }
339 } else if #[cfg(target_pointer_width = "16")] {
340 impl_bit_manip
! { sized: usizex2, usize, isizex2, isize, u16x2 }
341 impl_bit_manip
! { sized: usizex4, usize, isizex4, isize, u16x4 }
342 impl_bit_manip
! { sized: usizex8, usize, isizex8, isize, u16x8 }
343 } else if #[cfg(target_pointer_width = "32")] {
344 impl_bit_manip
! { sized: usizex2, usize, isizex2, isize, u32x2 }
345 impl_bit_manip
! { sized: usizex4, usize, isizex4, isize, u32x4 }
346 impl_bit_manip
! { sized: usizex8, usize, isizex8, isize, u32x8 }
347 } else if #[cfg(target_pointer_width = "64")] {
348 impl_bit_manip
! { sized: usizex2, usize, isizex2, isize, u64x2 }
349 impl_bit_manip
! { sized: usizex4, usize, isizex4, isize, u64x4 }
350 impl_bit_manip
! { sized: usizex8, usize, isizex8, isize, u64x8 }
352 compile_error
!("unsupported target_pointer_width");