]>
Commit | Line | Data |
---|---|---|
f20569fa XL |
1 | //! Horizontal swap bytes reductions. |
2 | ||
3 | // FIXME: investigate using `llvm.bswap` | |
4 | // https://github.com/rust-lang-nursery/packed_simd/issues/19 | |
5 | ||
6 | use crate::*; | |
7 | ||
8 | crate trait SwapBytes { | |
9 | fn swap_bytes(self) -> Self; | |
10 | } | |
11 | ||
12 | macro_rules! impl_swap_bytes { | |
13 | (v16: $($id:ident,)+) => { | |
14 | $( | |
15 | impl SwapBytes for $id { | |
16 | #[inline] | |
17 | fn swap_bytes(self) -> Self { | |
18 | unsafe { shuffle!(self, [1, 0]) } | |
19 | } | |
20 | } | |
21 | )+ | |
22 | }; | |
23 | (v32: $($id:ident,)+) => { | |
24 | $( | |
25 | impl SwapBytes for $id { | |
26 | #[inline] | |
27 | #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))] | |
28 | fn swap_bytes(self) -> Self { | |
29 | unsafe { | |
30 | let bytes: u8x4 = crate::mem::transmute(self); | |
31 | let result: u8x4 = shuffle!(bytes, [3, 2, 1, 0]); | |
32 | crate::mem::transmute(result) | |
33 | } | |
34 | } | |
35 | } | |
36 | )+ | |
37 | }; | |
38 | (v64: $($id:ident,)+) => { | |
39 | $( | |
40 | impl SwapBytes for $id { | |
41 | #[inline] | |
42 | #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))] | |
43 | fn swap_bytes(self) -> Self { | |
44 | unsafe { | |
45 | let bytes: u8x8 = crate::mem::transmute(self); | |
46 | let result: u8x8 = shuffle!( | |
47 | bytes, [7, 6, 5, 4, 3, 2, 1, 0] | |
48 | ); | |
49 | crate::mem::transmute(result) | |
50 | } | |
51 | } | |
52 | } | |
53 | )+ | |
54 | }; | |
55 | (v128: $($id:ident,)+) => { | |
56 | $( | |
57 | impl SwapBytes for $id { | |
58 | #[inline] | |
59 | #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))] | |
60 | fn swap_bytes(self) -> Self { | |
61 | unsafe { | |
62 | let bytes: u8x16 = crate::mem::transmute(self); | |
63 | let result: u8x16 = shuffle!(bytes, [ | |
64 | 15, 14, 13, 12, 11, 10, 9, 8, | |
65 | 7, 6, 5, 4, 3, 2, 1, 0 | |
66 | ]); | |
67 | crate::mem::transmute(result) | |
68 | } | |
69 | } | |
70 | } | |
71 | )+ | |
72 | }; | |
73 | (v256: $($id:ident,)+) => { | |
74 | $( | |
75 | impl SwapBytes for $id { | |
76 | #[inline] | |
77 | #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))] | |
78 | fn swap_bytes(self) -> Self { | |
79 | unsafe { | |
80 | let bytes: u8x32 = crate::mem::transmute(self); | |
81 | let result: u8x32 = shuffle!(bytes, [ | |
82 | 31, 30, 29, 28, 27, 26, 25, 24, | |
83 | 23, 22, 21, 20, 19, 18, 17, 16, | |
84 | 15, 14, 13, 12, 11, 10, 9, 8, | |
85 | 7, 6, 5, 4, 3, 2, 1, 0 | |
86 | ]); | |
87 | crate::mem::transmute(result) | |
88 | } | |
89 | } | |
90 | } | |
91 | )+ | |
92 | }; | |
93 | (v512: $($id:ident,)+) => { | |
94 | $( | |
95 | impl SwapBytes for $id { | |
96 | #[inline] | |
97 | #[cfg_attr(feature = "cargo-clippy", allow(clippy::useless_transmute))] | |
98 | fn swap_bytes(self) -> Self { | |
99 | unsafe { | |
100 | let bytes: u8x64 = crate::mem::transmute(self); | |
101 | let result: u8x64 = shuffle!(bytes, [ | |
102 | 63, 62, 61, 60, 59, 58, 57, 56, | |
103 | 55, 54, 53, 52, 51, 50, 49, 48, | |
104 | 47, 46, 45, 44, 43, 42, 41, 40, | |
105 | 39, 38, 37, 36, 35, 34, 33, 32, | |
106 | 31, 30, 29, 28, 27, 26, 25, 24, | |
107 | 23, 22, 21, 20, 19, 18, 17, 16, | |
108 | 15, 14, 13, 12, 11, 10, 9, 8, | |
109 | 7, 6, 5, 4, 3, 2, 1, 0 | |
110 | ]); | |
111 | crate::mem::transmute(result) | |
112 | } | |
113 | } | |
114 | } | |
115 | )+ | |
116 | }; | |
117 | } | |
118 | ||
119 | impl_swap_bytes!(v16: u8x2, i8x2,); | |
120 | impl_swap_bytes!(v32: u8x4, i8x4, u16x2, i16x2,); | |
121 | // FIXME: 64-bit single element vector | |
122 | impl_swap_bytes!( | |
123 | v64: u8x8, | |
124 | i8x8, | |
125 | u16x4, | |
126 | i16x4, | |
127 | u32x2, | |
128 | i32x2, /* u64x1, i64x1, */ | |
129 | ); | |
130 | ||
131 | impl_swap_bytes!( | |
132 | v128: u8x16, | |
133 | i8x16, | |
134 | u16x8, | |
135 | i16x8, | |
136 | u32x4, | |
137 | i32x4, | |
138 | u64x2, | |
139 | i64x2, | |
140 | u128x1, | |
141 | i128x1, | |
142 | ); | |
143 | impl_swap_bytes!( | |
144 | v256: u8x32, | |
145 | i8x32, | |
146 | u16x16, | |
147 | i16x16, | |
148 | u32x8, | |
149 | i32x8, | |
150 | u64x4, | |
151 | i64x4, | |
152 | u128x2, | |
153 | i128x2, | |
154 | ); | |
155 | ||
156 | impl_swap_bytes!( | |
157 | v512: u8x64, | |
158 | i8x64, | |
159 | u16x32, | |
160 | i16x32, | |
161 | u32x16, | |
162 | i32x16, | |
163 | u64x8, | |
164 | i64x8, | |
165 | u128x4, | |
166 | i128x4, | |
167 | ); | |
168 | ||
169 | cfg_if! { | |
170 | if #[cfg(target_pointer_width = "8")] { | |
171 | impl_swap_bytes!(v16: isizex2, usizex2,); | |
172 | impl_swap_bytes!(v32: isizex4, usizex4,); | |
173 | impl_swap_bytes!(v64: isizex8, usizex8,); | |
174 | } else if #[cfg(target_pointer_width = "16")] { | |
175 | impl_swap_bytes!(v32: isizex2, usizex2,); | |
176 | impl_swap_bytes!(v64: isizex4, usizex4,); | |
177 | impl_swap_bytes!(v128: isizex8, usizex8,); | |
178 | } else if #[cfg(target_pointer_width = "32")] { | |
179 | impl_swap_bytes!(v64: isizex2, usizex2,); | |
180 | impl_swap_bytes!(v128: isizex4, usizex4,); | |
181 | impl_swap_bytes!(v256: isizex8, usizex8,); | |
182 | } else if #[cfg(target_pointer_width = "64")] { | |
183 | impl_swap_bytes!(v128: isizex2, usizex2,); | |
184 | impl_swap_bytes!(v256: isizex4, usizex4,); | |
185 | impl_swap_bytes!(v512: isizex8, usizex8,); | |
186 | } else { | |
187 | compile_error!("unsupported target_pointer_width"); | |
188 | } | |
189 | } |