]> git.proxmox.com Git - rustc.git/blame - src/stdarch/examples/hex.rs
New upstream version 1.46.0+dfsg1
[rustc.git] / src / stdarch / examples / hex.rs
CommitLineData
0531ce1d
XL
1//! An example showing runtime dispatch to an architecture-optimized
2//! implementation.
3//!
4//! This program implements hex encoding a slice into a predetermined
5//! destination using various different instruction sets. This selects at
6//! runtime the most optimized implementation and uses that rather than being
7//! required to be compiled differently.
8//!
9//! You can test out this program via:
10//!
9fa01778 11//! echo test | cargo +nightly run --release hex
0531ce1d
XL
12//!
13//! and you should see `746573740a` get printed out.
14
83c7162d 15#![feature(stdsimd)]
0531ce1d 16#![cfg_attr(test, feature(test))]
48663c56
XL
17#![allow(
18 clippy::result_unwrap_used,
19 clippy::print_stdout,
20 clippy::option_unwrap_used,
21 clippy::shadow_reuse,
22 clippy::cast_possible_wrap,
23 clippy::cast_ptr_alignment,
24 clippy::cast_sign_loss,
25 clippy::missing_docs_in_private_items
8faf50e0 26)]
0531ce1d
XL
27
28#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
9fa01778
XL
29#[macro_use(is_x86_feature_detected)]
30extern crate std_detect;
0531ce1d 31
9fa01778 32extern crate core_arch;
0531ce1d
XL
33
34#[cfg(test)]
35#[macro_use]
36extern crate quickcheck;
37
48663c56
XL
38use std::{
39 io::{self, Read},
40 str,
41};
0531ce1d
XL
42
43#[cfg(target_arch = "x86")]
9fa01778 44use core_arch::x86::*;
0531ce1d 45#[cfg(target_arch = "x86_64")]
9fa01778 46use core_arch::x86_64::*;
0531ce1d
XL
47
48fn main() {
49 let mut input = Vec::new();
50 io::stdin().read_to_end(&mut input).unwrap();
51 let mut dst = vec![0; 2 * input.len()];
52 let s = hex_encode(&input, &mut dst).unwrap();
53 println!("{}", s);
54}
55
56fn hex_encode<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
57 let len = src.len().checked_mul(2).unwrap();
58 if dst.len() < len {
59 return Err(len);
60 }
61
62 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
63 {
64 if is_x86_feature_detected!("avx2") {
65 return unsafe { hex_encode_avx2(src, dst) };
66 }
67 if is_x86_feature_detected!("sse4.1") {
68 return unsafe { hex_encode_sse41(src, dst) };
69 }
70 }
71
72 hex_encode_fallback(src, dst)
73}
74
75#[target_feature(enable = "avx2")]
76#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
0731742a 77unsafe fn hex_encode_avx2<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
0531ce1d
XL
78 let ascii_zero = _mm256_set1_epi8(b'0' as i8);
79 let nines = _mm256_set1_epi8(9);
80 let ascii_a = _mm256_set1_epi8((b'a' - 9 - 1) as i8);
81 let and4bits = _mm256_set1_epi8(0xf);
82
83 let mut i = 0_isize;
84 while src.len() >= 32 {
85 let invec = _mm256_loadu_si256(src.as_ptr() as *const _);
86
87 let masked1 = _mm256_and_si256(invec, and4bits);
88 let masked2 = _mm256_and_si256(_mm256_srli_epi64(invec, 4), and4bits);
89
90 // return 0xff corresponding to the elements > 9, or 0x00 otherwise
91 let cmpmask1 = _mm256_cmpgt_epi8(masked1, nines);
92 let cmpmask2 = _mm256_cmpgt_epi8(masked2, nines);
93
94 // add '0' or the offset depending on the masks
0731742a
XL
95 let masked1 = _mm256_add_epi8(masked1, _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask1));
96 let masked2 = _mm256_add_epi8(masked2, _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask2));
0531ce1d
XL
97
98 // interleave masked1 and masked2 bytes
99 let res1 = _mm256_unpacklo_epi8(masked2, masked1);
100 let res2 = _mm256_unpackhi_epi8(masked2, masked1);
101
102 // Store everything into the right destination now
103 let base = dst.as_mut_ptr().offset(i * 2);
104 let base1 = base.offset(0) as *mut _;
105 let base2 = base.offset(16) as *mut _;
106 let base3 = base.offset(32) as *mut _;
107 let base4 = base.offset(48) as *mut _;
108 _mm256_storeu2_m128i(base3, base1, res1);
109 _mm256_storeu2_m128i(base4, base2, res2);
110 src = &src[32..];
111 i += 32;
112 }
113
114 let i = i as usize;
115 let _ = hex_encode_sse41(src, &mut dst[i * 2..]);
116
8faf50e0 117 Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
0531ce1d
XL
118}
119
120// copied from https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp
121#[target_feature(enable = "sse4.1")]
122#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
0731742a 123unsafe fn hex_encode_sse41<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
0531ce1d
XL
124 let ascii_zero = _mm_set1_epi8(b'0' as i8);
125 let nines = _mm_set1_epi8(9);
126 let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8);
127 let and4bits = _mm_set1_epi8(0xf);
128
129 let mut i = 0_isize;
130 while src.len() >= 16 {
131 let invec = _mm_loadu_si128(src.as_ptr() as *const _);
132
133 let masked1 = _mm_and_si128(invec, and4bits);
134 let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits);
135
136 // return 0xff corresponding to the elements > 9, or 0x00 otherwise
137 let cmpmask1 = _mm_cmpgt_epi8(masked1, nines);
138 let cmpmask2 = _mm_cmpgt_epi8(masked2, nines);
139
140 // add '0' or the offset depending on the masks
0731742a
XL
141 let masked1 = _mm_add_epi8(masked1, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1));
142 let masked2 = _mm_add_epi8(masked2, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2));
0531ce1d
XL
143
144 // interleave masked1 and masked2 bytes
145 let res1 = _mm_unpacklo_epi8(masked2, masked1);
146 let res2 = _mm_unpackhi_epi8(masked2, masked1);
147
148 _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
8faf50e0 149 _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2);
0531ce1d
XL
150 src = &src[16..];
151 i += 16;
152 }
153
154 let i = i as usize;
155 let _ = hex_encode_fallback(src, &mut dst[i * 2..]);
156
8faf50e0 157 Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
0531ce1d
XL
158}
159
0731742a 160fn hex_encode_fallback<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
0531ce1d
XL
161 fn hex(byte: u8) -> u8 {
162 static TABLE: &[u8] = b"0123456789abcdef";
163 TABLE[byte as usize]
164 }
165
166 for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) {
167 slots[0] = hex((*byte >> 4) & 0xf);
168 slots[1] = hex(*byte & 0xf);
169 }
170
171 unsafe { Ok(str::from_utf8_unchecked(&dst[..src.len() * 2])) }
172}
173
416331ca 174// Run these with `cargo +nightly test --example hex -p stdarch`
0531ce1d
XL
175#[cfg(test)]
176mod tests {
177 use std::iter;
178
179 use super::*;
180
181 fn test(input: &[u8], output: &str) {
182 let tmp = || vec![0; input.len() * 2];
183
8faf50e0 184 assert_eq!(hex_encode_fallback(input, &mut tmp()).unwrap(), output);
0531ce1d
XL
185 assert_eq!(hex_encode(input, &mut tmp()).unwrap(), output);
186
187 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
188 unsafe {
189 if is_x86_feature_detected!("avx2") {
0731742a 190 assert_eq!(hex_encode_avx2(input, &mut tmp()).unwrap(), output);
0531ce1d
XL
191 }
192 if is_x86_feature_detected!("sse4.1") {
0731742a 193 assert_eq!(hex_encode_sse41(input, &mut tmp()).unwrap(), output);
0531ce1d
XL
194 }
195 }
196 }
197
198 #[test]
199 fn empty() {
200 test(b"", "");
201 }
202
203 #[test]
204 fn big() {
205 test(
206 &[0; 1024],
207 &iter::repeat('0').take(2048).collect::<String>(),
208 );
209 }
210
211 #[test]
212 fn odd() {
213 test(
214 &[0; 313],
8faf50e0 215 &iter::repeat('0').take(313 * 2).collect::<String>(),
0531ce1d
XL
216 );
217 }
218
219 #[test]
220 fn avx_works() {
221 let mut input = [0; 33];
222 input[4] = 3;
223 input[16] = 3;
224 input[17] = 0x30;
225 input[21] = 1;
226 input[31] = 0x24;
227 test(
228 &input,
229 "\
230 0000000003000000\
231 0000000000000000\
232 0330000000010000\
233 0000000000000024\
234 00\
235 ",
236 );
237 }
238
239 quickcheck! {
240 fn encode_equals_fallback(input: Vec<u8>) -> bool {
241 let mut space1 = vec![0; input.len() * 2];
242 let mut space2 = vec![0; input.len() * 2];
243 let a = hex_encode(&input, &mut space1).unwrap();
244 let b = hex_encode_fallback(&input, &mut space2).unwrap();
245 a == b
246 }
247
248 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
249 fn avx_equals_fallback(input: Vec<u8>) -> bool {
250 if !is_x86_feature_detected!("avx2") {
251 return true
252 }
253 let mut space1 = vec![0; input.len() * 2];
254 let mut space2 = vec![0; input.len() * 2];
255 let a = unsafe { hex_encode_avx2(&input, &mut space1).unwrap() };
256 let b = hex_encode_fallback(&input, &mut space2).unwrap();
257 a == b
258 }
259
260 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
261 fn sse41_equals_fallback(input: Vec<u8>) -> bool {
262 if !is_x86_feature_detected!("avx2") {
263 return true
264 }
265 let mut space1 = vec![0; input.len() * 2];
266 let mut space2 = vec![0; input.len() * 2];
267 let a = unsafe { hex_encode_sse41(&input, &mut space1).unwrap() };
268 let b = hex_encode_fallback(&input, &mut space2).unwrap();
269 a == b
270 }
271 }
272}
273
416331ca 274// Run these with `cargo +nightly bench --example hex -p stdarch`
0531ce1d
XL
275#[cfg(test)]
276mod benches {
277 extern crate rand;
278 extern crate test;
279
280 use self::rand::Rng;
281
282 use super::*;
283
284 const SMALL_LEN: usize = 117;
285 const LARGE_LEN: usize = 1 * 1024 * 1024;
286
287 fn doit(
0731742a
XL
288 b: &mut test::Bencher,
289 len: usize,
0531ce1d
XL
290 f: for<'a> unsafe fn(&[u8], &'a mut [u8]) -> Result<&'a str, usize>,
291 ) {
9fa01778
XL
292 let mut rng = rand::thread_rng();
293 let input = std::iter::repeat(())
294 .map(|()| rng.gen::<u8>())
0531ce1d
XL
295 .take(len)
296 .collect::<Vec<_>>();
297 let mut dst = vec![0; input.len() * 2];
298 b.bytes = len as u64;
299 b.iter(|| unsafe {
300 f(&input, &mut dst).unwrap();
301 dst[0]
302 });
303 }
304
305 #[bench]
306 fn small_default(b: &mut test::Bencher) {
307 doit(b, SMALL_LEN, hex_encode);
308 }
309
310 #[bench]
311 fn small_fallback(b: &mut test::Bencher) {
312 doit(b, SMALL_LEN, hex_encode_fallback);
313 }
314
315 #[bench]
316 fn large_default(b: &mut test::Bencher) {
317 doit(b, LARGE_LEN, hex_encode);
318 }
319
320 #[bench]
321 fn large_fallback(b: &mut test::Bencher) {
322 doit(b, LARGE_LEN, hex_encode_fallback);
323 }
324
325 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
326 mod x86 {
327 use super::*;
328
329 #[bench]
330 fn small_avx2(b: &mut test::Bencher) {
331 if is_x86_feature_detected!("avx2") {
332 doit(b, SMALL_LEN, hex_encode_avx2);
333 }
334 }
335
336 #[bench]
337 fn small_sse41(b: &mut test::Bencher) {
338 if is_x86_feature_detected!("sse4.1") {
339 doit(b, SMALL_LEN, hex_encode_sse41);
340 }
341 }
342
343 #[bench]
344 fn large_avx2(b: &mut test::Bencher) {
345 if is_x86_feature_detected!("avx2") {
346 doit(b, LARGE_LEN, hex_encode_avx2);
347 }
348 }
349
350 #[bench]
351 fn large_sse41(b: &mut test::Bencher) {
352 if is_x86_feature_detected!("sse4.1") {
353 doit(b, LARGE_LEN, hex_encode_sse41);
354 }
355 }
356 }
357}