]> git.proxmox.com Git - rustc.git/blame - src/stdsimd/examples/hex.rs
New upstream version 1.29.0+dfsg1
[rustc.git] / src / stdsimd / examples / hex.rs
CommitLineData
0531ce1d
XL
1//! An example showing runtime dispatch to an architecture-optimized
2//! implementation.
3//!
4//! This program implements hex encoding a slice into a predetermined
5//! destination using various different instruction sets. This selects at
6//! runtime the most optimized implementation and uses that rather than being
7//! required to be compiled differently.
8//!
9//! You can test out this program via:
10//!
11//! echo test | cargo +nightly run --release --example hex -p stdsimd
12//!
13//! and you should see `746573740a` get printed out.
14
83c7162d 15#![feature(stdsimd)]
0531ce1d 16#![cfg_attr(test, feature(test))]
8faf50e0
XL
17#![cfg_attr(
18 feature = "cargo-clippy",
19 allow(
20 result_unwrap_used, print_stdout, option_unwrap_used, shadow_reuse,
21 cast_possible_wrap, cast_sign_loss, missing_docs_in_private_items
22 )
23)]
0531ce1d
XL
24
25#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
26#[macro_use]
27extern crate stdsimd;
28
29#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
30extern crate stdsimd;
31
32#[cfg(test)]
33#[macro_use]
34extern crate quickcheck;
35
0531ce1d 36use std::io::{self, Read};
83c7162d 37use std::str;
0531ce1d
XL
38
39#[cfg(target_arch = "x86")]
40use stdsimd::arch::x86::*;
41#[cfg(target_arch = "x86_64")]
42use stdsimd::arch::x86_64::*;
43
44fn main() {
45 let mut input = Vec::new();
46 io::stdin().read_to_end(&mut input).unwrap();
47 let mut dst = vec![0; 2 * input.len()];
48 let s = hex_encode(&input, &mut dst).unwrap();
49 println!("{}", s);
50}
51
52fn hex_encode<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
53 let len = src.len().checked_mul(2).unwrap();
54 if dst.len() < len {
55 return Err(len);
56 }
57
58 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
59 {
60 if is_x86_feature_detected!("avx2") {
61 return unsafe { hex_encode_avx2(src, dst) };
62 }
63 if is_x86_feature_detected!("sse4.1") {
64 return unsafe { hex_encode_sse41(src, dst) };
65 }
66 }
67
68 hex_encode_fallback(src, dst)
69}
70
71#[target_feature(enable = "avx2")]
72#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
73unsafe fn hex_encode_avx2<'a>(
8faf50e0 74 mut src: &[u8], dst: &'a mut [u8],
0531ce1d
XL
75) -> Result<&'a str, usize> {
76 let ascii_zero = _mm256_set1_epi8(b'0' as i8);
77 let nines = _mm256_set1_epi8(9);
78 let ascii_a = _mm256_set1_epi8((b'a' - 9 - 1) as i8);
79 let and4bits = _mm256_set1_epi8(0xf);
80
81 let mut i = 0_isize;
82 while src.len() >= 32 {
83 let invec = _mm256_loadu_si256(src.as_ptr() as *const _);
84
85 let masked1 = _mm256_and_si256(invec, and4bits);
86 let masked2 = _mm256_and_si256(_mm256_srli_epi64(invec, 4), and4bits);
87
88 // return 0xff corresponding to the elements > 9, or 0x00 otherwise
89 let cmpmask1 = _mm256_cmpgt_epi8(masked1, nines);
90 let cmpmask2 = _mm256_cmpgt_epi8(masked2, nines);
91
92 // add '0' or the offset depending on the masks
93 let masked1 = _mm256_add_epi8(
94 masked1,
95 _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask1),
96 );
97 let masked2 = _mm256_add_epi8(
98 masked2,
99 _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask2),
100 );
101
102 // interleave masked1 and masked2 bytes
103 let res1 = _mm256_unpacklo_epi8(masked2, masked1);
104 let res2 = _mm256_unpackhi_epi8(masked2, masked1);
105
106 // Store everything into the right destination now
107 let base = dst.as_mut_ptr().offset(i * 2);
108 let base1 = base.offset(0) as *mut _;
109 let base2 = base.offset(16) as *mut _;
110 let base3 = base.offset(32) as *mut _;
111 let base4 = base.offset(48) as *mut _;
112 _mm256_storeu2_m128i(base3, base1, res1);
113 _mm256_storeu2_m128i(base4, base2, res2);
114 src = &src[32..];
115 i += 32;
116 }
117
118 let i = i as usize;
119 let _ = hex_encode_sse41(src, &mut dst[i * 2..]);
120
8faf50e0 121 Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
0531ce1d
XL
122}
123
124// copied from https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp
125#[target_feature(enable = "sse4.1")]
126#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
127unsafe fn hex_encode_sse41<'a>(
8faf50e0 128 mut src: &[u8], dst: &'a mut [u8],
0531ce1d
XL
129) -> Result<&'a str, usize> {
130 let ascii_zero = _mm_set1_epi8(b'0' as i8);
131 let nines = _mm_set1_epi8(9);
132 let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8);
133 let and4bits = _mm_set1_epi8(0xf);
134
135 let mut i = 0_isize;
136 while src.len() >= 16 {
137 let invec = _mm_loadu_si128(src.as_ptr() as *const _);
138
139 let masked1 = _mm_and_si128(invec, and4bits);
140 let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits);
141
142 // return 0xff corresponding to the elements > 9, or 0x00 otherwise
143 let cmpmask1 = _mm_cmpgt_epi8(masked1, nines);
144 let cmpmask2 = _mm_cmpgt_epi8(masked2, nines);
145
146 // add '0' or the offset depending on the masks
147 let masked1 = _mm_add_epi8(
148 masked1,
149 _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1),
150 );
151 let masked2 = _mm_add_epi8(
152 masked2,
153 _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2),
154 );
155
156 // interleave masked1 and masked2 bytes
157 let res1 = _mm_unpacklo_epi8(masked2, masked1);
158 let res2 = _mm_unpackhi_epi8(masked2, masked1);
159
160 _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
8faf50e0 161 _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2);
0531ce1d
XL
162 src = &src[16..];
163 i += 16;
164 }
165
166 let i = i as usize;
167 let _ = hex_encode_fallback(src, &mut dst[i * 2..]);
168
8faf50e0 169 Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
0531ce1d
XL
170}
171
172fn hex_encode_fallback<'a>(
8faf50e0 173 src: &[u8], dst: &'a mut [u8],
0531ce1d
XL
174) -> Result<&'a str, usize> {
175 fn hex(byte: u8) -> u8 {
176 static TABLE: &[u8] = b"0123456789abcdef";
177 TABLE[byte as usize]
178 }
179
180 for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) {
181 slots[0] = hex((*byte >> 4) & 0xf);
182 slots[1] = hex(*byte & 0xf);
183 }
184
185 unsafe { Ok(str::from_utf8_unchecked(&dst[..src.len() * 2])) }
186}
187
188// Run these with `cargo +nightly test --example hex -p stdsimd`
189#[cfg(test)]
190mod tests {
191 use std::iter;
192
193 use super::*;
194
195 fn test(input: &[u8], output: &str) {
196 let tmp = || vec![0; input.len() * 2];
197
8faf50e0 198 assert_eq!(hex_encode_fallback(input, &mut tmp()).unwrap(), output);
0531ce1d
XL
199 assert_eq!(hex_encode(input, &mut tmp()).unwrap(), output);
200
201 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
202 unsafe {
203 if is_x86_feature_detected!("avx2") {
204 assert_eq!(
205 hex_encode_avx2(input, &mut tmp()).unwrap(),
206 output
207 );
208 }
209 if is_x86_feature_detected!("sse4.1") {
210 assert_eq!(
211 hex_encode_sse41(input, &mut tmp()).unwrap(),
212 output
213 );
214 }
215 }
216 }
217
218 #[test]
219 fn empty() {
220 test(b"", "");
221 }
222
223 #[test]
224 fn big() {
225 test(
226 &[0; 1024],
227 &iter::repeat('0').take(2048).collect::<String>(),
228 );
229 }
230
231 #[test]
232 fn odd() {
233 test(
234 &[0; 313],
8faf50e0 235 &iter::repeat('0').take(313 * 2).collect::<String>(),
0531ce1d
XL
236 );
237 }
238
239 #[test]
240 fn avx_works() {
241 let mut input = [0; 33];
242 input[4] = 3;
243 input[16] = 3;
244 input[17] = 0x30;
245 input[21] = 1;
246 input[31] = 0x24;
247 test(
248 &input,
249 "\
250 0000000003000000\
251 0000000000000000\
252 0330000000010000\
253 0000000000000024\
254 00\
255 ",
256 );
257 }
258
259 quickcheck! {
260 fn encode_equals_fallback(input: Vec<u8>) -> bool {
261 let mut space1 = vec![0; input.len() * 2];
262 let mut space2 = vec![0; input.len() * 2];
263 let a = hex_encode(&input, &mut space1).unwrap();
264 let b = hex_encode_fallback(&input, &mut space2).unwrap();
265 a == b
266 }
267
268 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
269 fn avx_equals_fallback(input: Vec<u8>) -> bool {
270 if !is_x86_feature_detected!("avx2") {
271 return true
272 }
273 let mut space1 = vec![0; input.len() * 2];
274 let mut space2 = vec![0; input.len() * 2];
275 let a = unsafe { hex_encode_avx2(&input, &mut space1).unwrap() };
276 let b = hex_encode_fallback(&input, &mut space2).unwrap();
277 a == b
278 }
279
280 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
281 fn sse41_equals_fallback(input: Vec<u8>) -> bool {
282 if !is_x86_feature_detected!("avx2") {
283 return true
284 }
285 let mut space1 = vec![0; input.len() * 2];
286 let mut space2 = vec![0; input.len() * 2];
287 let a = unsafe { hex_encode_sse41(&input, &mut space1).unwrap() };
288 let b = hex_encode_fallback(&input, &mut space2).unwrap();
289 a == b
290 }
291 }
292}
293
294// Run these with `cargo +nightly bench --example hex -p stdsimd`
295#[cfg(test)]
296mod benches {
297 extern crate rand;
298 extern crate test;
299
300 use self::rand::Rng;
301
302 use super::*;
303
304 const SMALL_LEN: usize = 117;
305 const LARGE_LEN: usize = 1 * 1024 * 1024;
306
307 fn doit(
308 b: &mut test::Bencher, len: usize,
309 f: for<'a> unsafe fn(&[u8], &'a mut [u8]) -> Result<&'a str, usize>,
310 ) {
311 let input = rand::thread_rng()
312 .gen_iter::<u8>()
313 .take(len)
314 .collect::<Vec<_>>();
315 let mut dst = vec![0; input.len() * 2];
316 b.bytes = len as u64;
317 b.iter(|| unsafe {
318 f(&input, &mut dst).unwrap();
319 dst[0]
320 });
321 }
322
323 #[bench]
324 fn small_default(b: &mut test::Bencher) {
325 doit(b, SMALL_LEN, hex_encode);
326 }
327
328 #[bench]
329 fn small_fallback(b: &mut test::Bencher) {
330 doit(b, SMALL_LEN, hex_encode_fallback);
331 }
332
333 #[bench]
334 fn large_default(b: &mut test::Bencher) {
335 doit(b, LARGE_LEN, hex_encode);
336 }
337
338 #[bench]
339 fn large_fallback(b: &mut test::Bencher) {
340 doit(b, LARGE_LEN, hex_encode_fallback);
341 }
342
343 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
344 mod x86 {
345 use super::*;
346
347 #[bench]
348 fn small_avx2(b: &mut test::Bencher) {
349 if is_x86_feature_detected!("avx2") {
350 doit(b, SMALL_LEN, hex_encode_avx2);
351 }
352 }
353
354 #[bench]
355 fn small_sse41(b: &mut test::Bencher) {
356 if is_x86_feature_detected!("sse4.1") {
357 doit(b, SMALL_LEN, hex_encode_sse41);
358 }
359 }
360
361 #[bench]
362 fn large_avx2(b: &mut test::Bencher) {
363 if is_x86_feature_detected!("avx2") {
364 doit(b, LARGE_LEN, hex_encode_avx2);
365 }
366 }
367
368 #[bench]
369 fn large_sse41(b: &mut test::Bencher) {
370 if is_x86_feature_detected!("sse4.1") {
371 doit(b, LARGE_LEN, hex_encode_sse41);
372 }
373 }
374 }
375}