]> git.proxmox.com Git - rustc.git/blob - src/stdsimd/examples/hex.rs
New upstream version 1.26.0+dfsg1
[rustc.git] / src / stdsimd / examples / hex.rs
1 //! An example showing runtime dispatch to an architecture-optimized
2 //! implementation.
3 //!
4 //! This program implements hex encoding a slice into a predetermined
5 //! destination using various different instruction sets. This selects at
6 //! runtime the most optimized implementation and uses that rather than being
7 //! required to be compiled differently.
8 //!
9 //! You can test out this program via:
10 //!
11 //! echo test | cargo +nightly run --release --example hex -p stdsimd
12 //!
13 //! and you should see `746573740a` get printed out.
14
15 #![feature(cfg_target_feature, target_feature, stdsimd)]
16 #![cfg_attr(test, feature(test))]
17 #![cfg_attr(feature = "cargo-clippy",
18 allow(result_unwrap_used, print_stdout, option_unwrap_used,
19 shadow_reuse, cast_possible_wrap, cast_sign_loss,
20 missing_docs_in_private_items))]
21
22 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
23 #[macro_use]
24 extern crate stdsimd;
25
26 #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
27 extern crate stdsimd;
28
29 #[cfg(test)]
30 #[macro_use]
31 extern crate quickcheck;
32
33 use std::str;
34 use std::io::{self, Read};
35
36 #[cfg(target_arch = "x86")]
37 use stdsimd::arch::x86::*;
38 #[cfg(target_arch = "x86_64")]
39 use stdsimd::arch::x86_64::*;
40
41 fn main() {
42 let mut input = Vec::new();
43 io::stdin().read_to_end(&mut input).unwrap();
44 let mut dst = vec![0; 2 * input.len()];
45 let s = hex_encode(&input, &mut dst).unwrap();
46 println!("{}", s);
47 }
48
49 fn hex_encode<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
50 let len = src.len().checked_mul(2).unwrap();
51 if dst.len() < len {
52 return Err(len);
53 }
54
55 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
56 {
57 if is_x86_feature_detected!("avx2") {
58 return unsafe { hex_encode_avx2(src, dst) };
59 }
60 if is_x86_feature_detected!("sse4.1") {
61 return unsafe { hex_encode_sse41(src, dst) };
62 }
63 }
64
65 hex_encode_fallback(src, dst)
66 }
67
68 #[target_feature(enable = "avx2")]
69 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
70 unsafe fn hex_encode_avx2<'a>(
71 mut src: &[u8], dst: &'a mut [u8]
72 ) -> Result<&'a str, usize> {
73 let ascii_zero = _mm256_set1_epi8(b'0' as i8);
74 let nines = _mm256_set1_epi8(9);
75 let ascii_a = _mm256_set1_epi8((b'a' - 9 - 1) as i8);
76 let and4bits = _mm256_set1_epi8(0xf);
77
78 let mut i = 0_isize;
79 while src.len() >= 32 {
80 let invec = _mm256_loadu_si256(src.as_ptr() as *const _);
81
82 let masked1 = _mm256_and_si256(invec, and4bits);
83 let masked2 = _mm256_and_si256(_mm256_srli_epi64(invec, 4), and4bits);
84
85 // return 0xff corresponding to the elements > 9, or 0x00 otherwise
86 let cmpmask1 = _mm256_cmpgt_epi8(masked1, nines);
87 let cmpmask2 = _mm256_cmpgt_epi8(masked2, nines);
88
89 // add '0' or the offset depending on the masks
90 let masked1 = _mm256_add_epi8(
91 masked1,
92 _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask1),
93 );
94 let masked2 = _mm256_add_epi8(
95 masked2,
96 _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask2),
97 );
98
99 // interleave masked1 and masked2 bytes
100 let res1 = _mm256_unpacklo_epi8(masked2, masked1);
101 let res2 = _mm256_unpackhi_epi8(masked2, masked1);
102
103 // Store everything into the right destination now
104 let base = dst.as_mut_ptr().offset(i * 2);
105 let base1 = base.offset(0) as *mut _;
106 let base2 = base.offset(16) as *mut _;
107 let base3 = base.offset(32) as *mut _;
108 let base4 = base.offset(48) as *mut _;
109 _mm256_storeu2_m128i(base3, base1, res1);
110 _mm256_storeu2_m128i(base4, base2, res2);
111 src = &src[32..];
112 i += 32;
113 }
114
115 let i = i as usize;
116 let _ = hex_encode_sse41(src, &mut dst[i * 2..]);
117
118 Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
119 }
120
121 // copied from https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp
122 #[target_feature(enable = "sse4.1")]
123 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
124 unsafe fn hex_encode_sse41<'a>(
125 mut src: &[u8], dst: &'a mut [u8]
126 ) -> Result<&'a str, usize> {
127 let ascii_zero = _mm_set1_epi8(b'0' as i8);
128 let nines = _mm_set1_epi8(9);
129 let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8);
130 let and4bits = _mm_set1_epi8(0xf);
131
132 let mut i = 0_isize;
133 while src.len() >= 16 {
134 let invec = _mm_loadu_si128(src.as_ptr() as *const _);
135
136 let masked1 = _mm_and_si128(invec, and4bits);
137 let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits);
138
139 // return 0xff corresponding to the elements > 9, or 0x00 otherwise
140 let cmpmask1 = _mm_cmpgt_epi8(masked1, nines);
141 let cmpmask2 = _mm_cmpgt_epi8(masked2, nines);
142
143 // add '0' or the offset depending on the masks
144 let masked1 = _mm_add_epi8(
145 masked1,
146 _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1),
147 );
148 let masked2 = _mm_add_epi8(
149 masked2,
150 _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2),
151 );
152
153 // interleave masked1 and masked2 bytes
154 let res1 = _mm_unpacklo_epi8(masked2, masked1);
155 let res2 = _mm_unpackhi_epi8(masked2, masked1);
156
157 _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
158 _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2);
159 src = &src[16..];
160 i += 16;
161 }
162
163 let i = i as usize;
164 let _ = hex_encode_fallback(src, &mut dst[i * 2..]);
165
166 Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
167 }
168
169 fn hex_encode_fallback<'a>(
170 src: &[u8], dst: &'a mut [u8]
171 ) -> Result<&'a str, usize> {
172 fn hex(byte: u8) -> u8 {
173 static TABLE: &[u8] = b"0123456789abcdef";
174 TABLE[byte as usize]
175 }
176
177 for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) {
178 slots[0] = hex((*byte >> 4) & 0xf);
179 slots[1] = hex(*byte & 0xf);
180 }
181
182 unsafe { Ok(str::from_utf8_unchecked(&dst[..src.len() * 2])) }
183 }
184
185 // Run these with `cargo +nightly test --example hex -p stdsimd`
186 #[cfg(test)]
187 mod tests {
188 use std::iter;
189
190 use super::*;
191
192 fn test(input: &[u8], output: &str) {
193 let tmp = || vec![0; input.len() * 2];
194
195 assert_eq!(hex_encode_fallback(input, &mut tmp()).unwrap(), output);
196 assert_eq!(hex_encode(input, &mut tmp()).unwrap(), output);
197
198 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
199 unsafe {
200 if is_x86_feature_detected!("avx2") {
201 assert_eq!(
202 hex_encode_avx2(input, &mut tmp()).unwrap(),
203 output
204 );
205 }
206 if is_x86_feature_detected!("sse4.1") {
207 assert_eq!(
208 hex_encode_sse41(input, &mut tmp()).unwrap(),
209 output
210 );
211 }
212 }
213 }
214
215 #[test]
216 fn empty() {
217 test(b"", "");
218 }
219
220 #[test]
221 fn big() {
222 test(
223 &[0; 1024],
224 &iter::repeat('0').take(2048).collect::<String>(),
225 );
226 }
227
228 #[test]
229 fn odd() {
230 test(
231 &[0; 313],
232 &iter::repeat('0').take(313 * 2).collect::<String>(),
233 );
234 }
235
236 #[test]
237 fn avx_works() {
238 let mut input = [0; 33];
239 input[4] = 3;
240 input[16] = 3;
241 input[17] = 0x30;
242 input[21] = 1;
243 input[31] = 0x24;
244 test(
245 &input,
246 "\
247 0000000003000000\
248 0000000000000000\
249 0330000000010000\
250 0000000000000024\
251 00\
252 ",
253 );
254 }
255
256 quickcheck! {
257 fn encode_equals_fallback(input: Vec<u8>) -> bool {
258 let mut space1 = vec![0; input.len() * 2];
259 let mut space2 = vec![0; input.len() * 2];
260 let a = hex_encode(&input, &mut space1).unwrap();
261 let b = hex_encode_fallback(&input, &mut space2).unwrap();
262 a == b
263 }
264
265 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
266 fn avx_equals_fallback(input: Vec<u8>) -> bool {
267 if !is_x86_feature_detected!("avx2") {
268 return true
269 }
270 let mut space1 = vec![0; input.len() * 2];
271 let mut space2 = vec![0; input.len() * 2];
272 let a = unsafe { hex_encode_avx2(&input, &mut space1).unwrap() };
273 let b = hex_encode_fallback(&input, &mut space2).unwrap();
274 a == b
275 }
276
277 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
278 fn sse41_equals_fallback(input: Vec<u8>) -> bool {
279 if !is_x86_feature_detected!("avx2") {
280 return true
281 }
282 let mut space1 = vec![0; input.len() * 2];
283 let mut space2 = vec![0; input.len() * 2];
284 let a = unsafe { hex_encode_sse41(&input, &mut space1).unwrap() };
285 let b = hex_encode_fallback(&input, &mut space2).unwrap();
286 a == b
287 }
288 }
289 }
290
291 // Run these with `cargo +nightly bench --example hex -p stdsimd`
292 #[cfg(test)]
293 mod benches {
294 extern crate rand;
295 extern crate test;
296
297 use self::rand::Rng;
298
299 use super::*;
300
301 const SMALL_LEN: usize = 117;
302 const LARGE_LEN: usize = 1 * 1024 * 1024;
303
304 fn doit(
305 b: &mut test::Bencher, len: usize,
306 f: for<'a> unsafe fn(&[u8], &'a mut [u8]) -> Result<&'a str, usize>,
307 ) {
308 let input = rand::thread_rng()
309 .gen_iter::<u8>()
310 .take(len)
311 .collect::<Vec<_>>();
312 let mut dst = vec![0; input.len() * 2];
313 b.bytes = len as u64;
314 b.iter(|| unsafe {
315 f(&input, &mut dst).unwrap();
316 dst[0]
317 });
318 }
319
320 #[bench]
321 fn small_default(b: &mut test::Bencher) {
322 doit(b, SMALL_LEN, hex_encode);
323 }
324
325 #[bench]
326 fn small_fallback(b: &mut test::Bencher) {
327 doit(b, SMALL_LEN, hex_encode_fallback);
328 }
329
330 #[bench]
331 fn large_default(b: &mut test::Bencher) {
332 doit(b, LARGE_LEN, hex_encode);
333 }
334
335 #[bench]
336 fn large_fallback(b: &mut test::Bencher) {
337 doit(b, LARGE_LEN, hex_encode_fallback);
338 }
339
340 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
341 mod x86 {
342 use super::*;
343
344 #[bench]
345 fn small_avx2(b: &mut test::Bencher) {
346 if is_x86_feature_detected!("avx2") {
347 doit(b, SMALL_LEN, hex_encode_avx2);
348 }
349 }
350
351 #[bench]
352 fn small_sse41(b: &mut test::Bencher) {
353 if is_x86_feature_detected!("sse4.1") {
354 doit(b, SMALL_LEN, hex_encode_sse41);
355 }
356 }
357
358 #[bench]
359 fn large_avx2(b: &mut test::Bencher) {
360 if is_x86_feature_detected!("avx2") {
361 doit(b, LARGE_LEN, hex_encode_avx2);
362 }
363 }
364
365 #[bench]
366 fn large_sse41(b: &mut test::Bencher) {
367 if is_x86_feature_detected!("sse4.1") {
368 doit(b, LARGE_LEN, hex_encode_sse41);
369 }
370 }
371 }
372 }