]>
Commit | Line | Data |
---|---|---|
f20569fa XL |
1 | //! Mask reductions implementation for `aarch64` targets |
2 | ||
3 | /// 128-bit wide vectors | |
4 | macro_rules! aarch64_128_neon_impl { | |
5 | ($id:ident, $vmin:ident, $vmax:ident) => { | |
6 | impl All for $id { | |
7 | #[inline] | |
8 | #[target_feature(enable = "neon")] | |
9 | unsafe fn all(self) -> bool { | |
10 | use crate::arch::aarch64::$vmin; | |
11 | $vmin(crate::mem::transmute(self)) != 0 | |
12 | } | |
13 | } | |
14 | impl Any for $id { | |
15 | #[inline] | |
16 | #[target_feature(enable = "neon")] | |
17 | unsafe fn any(self) -> bool { | |
18 | use crate::arch::aarch64::$vmax; | |
19 | $vmax(crate::mem::transmute(self)) != 0 | |
20 | } | |
21 | } | |
22 | } | |
23 | } | |
24 | ||
25 | /// 64-bit wide vectors | |
26 | macro_rules! aarch64_64_neon_impl { | |
27 | ($id:ident, $vec128:ident) => { | |
28 | impl All for $id { | |
29 | #[inline] | |
30 | #[target_feature(enable = "neon")] | |
31 | unsafe fn all(self) -> bool { | |
32 | // Duplicates the 64-bit vector into a 128-bit one and | |
33 | // calls all on that. | |
34 | union U { | |
35 | halves: ($id, $id), | |
36 | vec: $vec128, | |
37 | } | |
38 | U { | |
39 | halves: (self, self), | |
40 | }.vec.all() | |
41 | } | |
42 | } | |
43 | impl Any for $id { | |
44 | #[inline] | |
45 | #[target_feature(enable = "neon")] | |
46 | unsafe fn any(self) -> bool { | |
47 | union U { | |
48 | halves: ($id, $id), | |
49 | vec: $vec128, | |
50 | } | |
51 | U { | |
52 | halves: (self, self), | |
53 | }.vec.any() | |
54 | } | |
55 | } | |
56 | }; | |
57 | } | |
58 | ||
59 | /// Mask reduction implementation for `aarch64` targets | |
60 | macro_rules! impl_mask_reductions { | |
61 | // 64-bit wide masks | |
62 | (m8x8) => { aarch64_64_neon_impl!(m8x8, m8x16); }; | |
63 | (m16x4) => { aarch64_64_neon_impl!(m16x4, m16x8); }; | |
64 | (m32x2) => { aarch64_64_neon_impl!(m32x2, m32x4); }; | |
65 | // 128-bit wide masks | |
66 | (m8x16) => { aarch64_128_neon_impl!(m8x16, vminvq_u8, vmaxvq_u8); }; | |
67 | (m16x8) => { aarch64_128_neon_impl!(m16x8, vminvq_u16, vmaxvq_u16); }; | |
68 | (m32x4) => { aarch64_128_neon_impl!(m32x4, vminvq_u32, vmaxvq_u32); }; | |
69 | // Fallback to LLVM's default code-generation: | |
70 | ($id:ident) => { fallback_impl!($id); }; | |
71 | } |