[rustc.git] / src / stdarch / examples / hex.rs

//! An example showing runtime dispatch to an architecture-optimized
//! implementation.
//!
//! This program implements hex encoding a slice into a predetermined
//! destination using various different instruction sets. This selects at
//! runtime the most optimized implementation and uses that rather than being
//! required to be compiled differently.
//!
//! You can test out this program via:
//!
//!     echo test | cargo +nightly run --release hex
//!
//! and you should see `746573740a` get printed out.

#![feature(stdsimd)]
#![cfg_attr(test, feature(test))]
#![allow(
    clippy::result_unwrap_used,
    clippy::print_stdout,
    clippy::option_unwrap_used,
    clippy::shadow_reuse,
    clippy::cast_possible_wrap,
    clippy::cast_ptr_alignment,
    clippy::cast_sign_loss,
    clippy::missing_docs_in_private_items
)]

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[macro_use(is_x86_feature_detected)]
extern crate std_detect;

extern crate core_arch;

#[cfg(test)]
#[macro_use]
extern crate quickcheck;

use std::{
    io::{self, Read},
    str,
};

#[cfg(target_arch = "x86")]
use core_arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core_arch::x86_64::*;

fn main() {
    let mut input = Vec::new();
    io::stdin().read_to_end(&mut input).unwrap();
    let mut dst = vec![0; 2 * input.len()];
    let s = hex_encode(&input, &mut dst).unwrap();
    println!("{}", s);
}

fn hex_encode<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
    let len = src.len().checked_mul(2).unwrap();
    if dst.len() < len {
        return Err(len);
    }

    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    {
        if is_x86_feature_detected!("avx2") {
            return unsafe { hex_encode_avx2(src, dst) };
        }
        if is_x86_feature_detected!("sse4.1") {
            return unsafe { hex_encode_sse41(src, dst) };
        }
    }

    hex_encode_fallback(src, dst)
}

#[target_feature(enable = "avx2")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
unsafe fn hex_encode_avx2<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
    let ascii_zero = _mm256_set1_epi8(b'0' as i8);
    let nines = _mm256_set1_epi8(9);
    let ascii_a = _mm256_set1_epi8((b'a' - 9 - 1) as i8);
    let and4bits = _mm256_set1_epi8(0xf);

    let mut i = 0_isize;
    while src.len() >= 32 {
        let invec = _mm256_loadu_si256(src.as_ptr() as *const _);

        let masked1 = _mm256_and_si256(invec, and4bits);
        let masked2 = _mm256_and_si256(_mm256_srli_epi64(invec, 4), and4bits);

        // return 0xff corresponding to the elements > 9, or 0x00 otherwise
        let cmpmask1 = _mm256_cmpgt_epi8(masked1, nines);
        let cmpmask2 = _mm256_cmpgt_epi8(masked2, nines);

        // add '0' or the offset depending on the masks
        let masked1 = _mm256_add_epi8(masked1, _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask1));
        let masked2 = _mm256_add_epi8(masked2, _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask2));

        // interleave masked1 and masked2 bytes
        let res1 = _mm256_unpacklo_epi8(masked2, masked1);
        let res2 = _mm256_unpackhi_epi8(masked2, masked1);

        // Store everything into the right destination now
        let base = dst.as_mut_ptr().offset(i * 2);
        let base1 = base.offset(0) as *mut _;
        let base2 = base.offset(16) as *mut _;
        let base3 = base.offset(32) as *mut _;
        let base4 = base.offset(48) as *mut _;
        _mm256_storeu2_m128i(base3, base1, res1);
        _mm256_storeu2_m128i(base4, base2, res2);
        src = &src[32..];
        i += 32;
    }

    let i = i as usize;
    let _ = hex_encode_sse41(src, &mut dst[i * 2..]);

    Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
}

// copied from https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp
#[target_feature(enable = "sse4.1")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
unsafe fn hex_encode_sse41<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
    let ascii_zero = _mm_set1_epi8(b'0' as i8);
    let nines = _mm_set1_epi8(9);
    let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8);
    let and4bits = _mm_set1_epi8(0xf);

    let mut i = 0_isize;
    while src.len() >= 16 {
        let invec = _mm_loadu_si128(src.as_ptr() as *const _);

        let masked1 = _mm_and_si128(invec, and4bits);
        let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits);

        // return 0xff corresponding to the elements > 9, or 0x00 otherwise
        let cmpmask1 = _mm_cmpgt_epi8(masked1, nines);
        let cmpmask2 = _mm_cmpgt_epi8(masked2, nines);

        // add '0' or the offset depending on the masks
        let masked1 = _mm_add_epi8(masked1, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1));
        let masked2 = _mm_add_epi8(masked2, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2));

        // interleave masked1 and masked2 bytes
        let res1 = _mm_unpacklo_epi8(masked2, masked1);
        let res2 = _mm_unpackhi_epi8(masked2, masked1);

        _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
        _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2);
        src = &src[16..];
        i += 16;
    }

    let i = i as usize;
    let _ = hex_encode_fallback(src, &mut dst[i * 2..]);

    Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
}

fn hex_encode_fallback<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
    fn hex(byte: u8) -> u8 {
        static TABLE: &[u8] = b"0123456789abcdef";
        TABLE[byte as usize]
    }

    for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) {
        slots[0] = hex((*byte >> 4) & 0xf);
        slots[1] = hex(*byte & 0xf);
    }

    unsafe { Ok(str::from_utf8_unchecked(&dst[..src.len() * 2])) }
}

// Run these with `cargo +nightly test --example hex -p stdarch`
#[cfg(test)]
mod tests {
    use std::iter;

    use super::*;

    fn test(input: &[u8], output: &str) {
        let tmp = || vec![0; input.len() * 2];

        assert_eq!(hex_encode_fallback(input, &mut tmp()).unwrap(), output);
        assert_eq!(hex_encode(input, &mut tmp()).unwrap(), output);

        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
        unsafe {
            if is_x86_feature_detected!("avx2") {
                assert_eq!(hex_encode_avx2(input, &mut tmp()).unwrap(), output);
            }
            if is_x86_feature_detected!("sse4.1") {
                assert_eq!(hex_encode_sse41(input, &mut tmp()).unwrap(), output);
            }
        }
    }

    #[test]
    fn empty() {
        test(b"", "");
    }

    #[test]
    fn big() {
        test(
            &[0; 1024],
            &iter::repeat('0').take(2048).collect::<String>(),
        );
    }

    #[test]
    fn odd() {
        test(
            &[0; 313],
            &iter::repeat('0').take(313 * 2).collect::<String>(),
        );
    }

    #[test]
    fn avx_works() {
        let mut input = [0; 33];
        input[4] = 3;
        input[16] = 3;
        input[17] = 0x30;
        input[21] = 1;
        input[31] = 0x24;
        test(
            &input,
            "\
             0000000003000000\
             0000000000000000\
             0330000000010000\
             0000000000000024\
             00\
             ",
        );
    }

    quickcheck! {
        fn encode_equals_fallback(input: Vec<u8>) -> bool {
            let mut space1 = vec![0; input.len() * 2];
            let mut space2 = vec![0; input.len() * 2];
            let a = hex_encode(&input, &mut space1).unwrap();
            let b = hex_encode_fallback(&input, &mut space2).unwrap();
            a == b
        }

        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
        fn avx_equals_fallback(input: Vec<u8>) -> bool {
            if !is_x86_feature_detected!("avx2") {
                return true
            }
            let mut space1 = vec![0; input.len() * 2];
            let mut space2 = vec![0; input.len() * 2];
            let a = unsafe { hex_encode_avx2(&input, &mut space1).unwrap() };
            let b = hex_encode_fallback(&input, &mut space2).unwrap();
            a == b
        }

        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
        fn sse41_equals_fallback(input: Vec<u8>) -> bool {
            if !is_x86_feature_detected!("avx2") {
                return true
            }
            let mut space1 = vec![0; input.len() * 2];
            let mut space2 = vec![0; input.len() * 2];
            let a = unsafe { hex_encode_sse41(&input, &mut space1).unwrap() };
            let b = hex_encode_fallback(&input, &mut space2).unwrap();
            a == b
        }
    }
}

// Run these with `cargo +nightly bench --example hex -p stdarch`
#[cfg(test)]
mod benches {
    extern crate rand;
    extern crate test;

    use self::rand::Rng;

    use super::*;

    const SMALL_LEN: usize = 117;
    const LARGE_LEN: usize = 1 * 1024 * 1024;

    fn doit(
        b: &mut test::Bencher,
        len: usize,
        f: for<'a> unsafe fn(&[u8], &'a mut [u8]) -> Result<&'a str, usize>,
    ) {
        let mut rng = rand::thread_rng();
        let input = std::iter::repeat(())
            .map(|()| rng.gen::<u8>())
            .take(len)
            .collect::<Vec<_>>();
        let mut dst = vec![0; input.len() * 2];
        b.bytes = len as u64;
        b.iter(|| unsafe {
            f(&input, &mut dst).unwrap();
            dst[0]
        });
    }

    #[bench]
    fn small_default(b: &mut test::Bencher) {
        doit(b, SMALL_LEN, hex_encode);
    }

    #[bench]
    fn small_fallback(b: &mut test::Bencher) {
        doit(b, SMALL_LEN, hex_encode_fallback);
    }

    #[bench]
    fn large_default(b: &mut test::Bencher) {
        doit(b, LARGE_LEN, hex_encode);
    }

    #[bench]
    fn large_fallback(b: &mut test::Bencher) {
        doit(b, LARGE_LEN, hex_encode_fallback);
    }

    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    mod x86 {
        use super::*;

        #[bench]
        fn small_avx2(b: &mut test::Bencher) {
            if is_x86_feature_detected!("avx2") {
                doit(b, SMALL_LEN, hex_encode_avx2);
            }
        }

        #[bench]
        fn small_sse41(b: &mut test::Bencher) {
            if is_x86_feature_detected!("sse4.1") {
                doit(b, SMALL_LEN, hex_encode_sse41);
            }
        }

        #[bench]
        fn large_avx2(b: &mut test::Bencher) {
            if is_x86_feature_detected!("avx2") {
                doit(b, LARGE_LEN, hex_encode_avx2);
            }
        }

        #[bench]
        fn large_sse41(b: &mut test::Bencher) {
            if is_x86_feature_detected!("sse4.1") {
                doit(b, LARGE_LEN, hex_encode_sse41);
            }
        }
    }
}
Commit	Line	Data
0531ce1d XL	1	//! An example showing runtime dispatch to an architecture-optimized
	2	//! implementation.
	3	//!
	4	//! This program implements hex encoding a slice into a predetermined
	5	//! destination using various different instruction sets. This selects at
	6	//! runtime the most optimized implementation and uses that rather than being
	7	//! required to be compiled differently.
	8	//!
	9	//! You can test out this program via:
	10	//!
9fa01778	11	//! echo test \| cargo +nightly run --release hex
0531ce1d XL	12	//!
	13	//! and you should see `746573740a` get printed out.
	14
83c7162d	15	#![feature(stdsimd)]
0531ce1d	16	#![cfg_attr(test, feature(test))]
48663c56 XL	17	#![allow(
	18	clippy::result_unwrap_used,
	19	clippy::print_stdout,
	20	clippy::option_unwrap_used,
	21	clippy::shadow_reuse,
	22	clippy::cast_possible_wrap,
	23	clippy::cast_ptr_alignment,
	24	clippy::cast_sign_loss,
	25	clippy::missing_docs_in_private_items
8faf50e0	26	)]
0531ce1d XL	27
0531ce1d XL	28	#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
9fa01778 XL	29	#[macro_use(is_x86_feature_detected)]
9fa01778 XL	30	extern crate std_detect;
0531ce1d	31
9fa01778	32	extern crate core_arch;
0531ce1d XL	33
	34	#[cfg(test)]
	35	#[macro_use]
	36	extern crate quickcheck;
	37
48663c56 XL	38	use std::{
	39	io::{self, Read},
	40	str,
	41	};
0531ce1d XL	42
0531ce1d XL	43	#[cfg(target_arch = "x86")]
9fa01778	44	use core_arch::x86::*;
0531ce1d	45	#[cfg(target_arch = "x86_64")]
9fa01778	46	use core_arch::x86_64::*;
0531ce1d XL	47
	48	fn main() {
	49	let mut input = Vec::new();
	50	io::stdin().read_to_end(&mut input).unwrap();
	51	let mut dst = vec![0; 2 * input.len()];
	52	let s = hex_encode(&input, &mut dst).unwrap();
	53	println!("{}", s);
	54	}
	55
	56	fn hex_encode<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
	57	let len = src.len().checked_mul(2).unwrap();
	58	if dst.len() < len {
	59	return Err(len);
	60	}
	61
	62	#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
	63	{
	64	if is_x86_feature_detected!("avx2") {
	65	return unsafe { hex_encode_avx2(src, dst) };
	66	}
	67	if is_x86_feature_detected!("sse4.1") {
	68	return unsafe { hex_encode_sse41(src, dst) };
	69	}
	70	}
	71
	72	hex_encode_fallback(src, dst)
	73	}
	74
	75	#[target_feature(enable = "avx2")]
	76	#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
0731742a	77	unsafe fn hex_encode_avx2<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
0531ce1d XL	78	let ascii_zero = _mm256_set1_epi8(b'0' as i8);
	79	let nines = _mm256_set1_epi8(9);
	80	let ascii_a = _mm256_set1_epi8((b'a' - 9 - 1) as i8);
	81	let and4bits = _mm256_set1_epi8(0xf);
	82
	83	let mut i = 0_isize;
	84	while src.len() >= 32 {
	85	let invec = _mm256_loadu_si256(src.as_ptr() as *const _);
	86
	87	let masked1 = _mm256_and_si256(invec, and4bits);
	88	let masked2 = _mm256_and_si256(_mm256_srli_epi64(invec, 4), and4bits);
	89
	90	// return 0xff corresponding to the elements > 9, or 0x00 otherwise
	91	let cmpmask1 = _mm256_cmpgt_epi8(masked1, nines);
	92	let cmpmask2 = _mm256_cmpgt_epi8(masked2, nines);
	93
	94	// add '0' or the offset depending on the masks
0731742a XL	95	let masked1 = _mm256_add_epi8(masked1, _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask1));
0731742a XL	96	let masked2 = _mm256_add_epi8(masked2, _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask2));
0531ce1d XL	97
	98	// interleave masked1 and masked2 bytes
	99	let res1 = _mm256_unpacklo_epi8(masked2, masked1);
	100	let res2 = _mm256_unpackhi_epi8(masked2, masked1);
	101
	102	// Store everything into the right destination now
	103	let base = dst.as_mut_ptr().offset(i * 2);
	104	let base1 = base.offset(0) as *mut _;
	105	let base2 = base.offset(16) as *mut _;
	106	let base3 = base.offset(32) as *mut _;
	107	let base4 = base.offset(48) as *mut _;
	108	_mm256_storeu2_m128i(base3, base1, res1);
	109	_mm256_storeu2_m128i(base4, base2, res2);
	110	src = &src[32..];
	111	i += 32;
	112	}
	113
	114	let i = i as usize;
	115	let _ = hex_encode_sse41(src, &mut dst[i * 2..]);
	116
8faf50e0	117	Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
0531ce1d XL	118	}
	119
	120	// copied from https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp
	121	#[target_feature(enable = "sse4.1")]
	122	#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
0731742a	123	unsafe fn hex_encode_sse41<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
0531ce1d XL	124	let ascii_zero = _mm_set1_epi8(b'0' as i8);
	125	let nines = _mm_set1_epi8(9);
	126	let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8);
	127	let and4bits = _mm_set1_epi8(0xf);
	128
	129	let mut i = 0_isize;
	130	while src.len() >= 16 {
	131	let invec = _mm_loadu_si128(src.as_ptr() as *const _);
	132
	133	let masked1 = _mm_and_si128(invec, and4bits);
	134	let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits);
	135
	136	// return 0xff corresponding to the elements > 9, or 0x00 otherwise
	137	let cmpmask1 = _mm_cmpgt_epi8(masked1, nines);
	138	let cmpmask2 = _mm_cmpgt_epi8(masked2, nines);
	139
	140	// add '0' or the offset depending on the masks
0731742a XL	141	let masked1 = _mm_add_epi8(masked1, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1));
0731742a XL	142	let masked2 = _mm_add_epi8(masked2, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2));
0531ce1d XL	143
	144	// interleave masked1 and masked2 bytes
	145	let res1 = _mm_unpacklo_epi8(masked2, masked1);
	146	let res2 = _mm_unpackhi_epi8(masked2, masked1);
	147
	148	_mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
8faf50e0	149	_mm_storeu_si128(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2);
0531ce1d XL	150	src = &src[16..];
	151	i += 16;
	152	}
	153
	154	let i = i as usize;
	155	let _ = hex_encode_fallback(src, &mut dst[i * 2..]);
	156
8faf50e0	157	Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
0531ce1d XL	158	}
0531ce1d XL	159
0731742a	160	fn hex_encode_fallback<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
0531ce1d XL	161	fn hex(byte: u8) -> u8 {
	162	static TABLE: &[u8] = b"0123456789abcdef";
	163	TABLE[byte as usize]
	164	}
	165
	166	for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) {
	167	slots[0] = hex((*byte >> 4) & 0xf);
	168	slots[1] = hex(*byte & 0xf);
	169	}
	170
	171	unsafe { Ok(str::from_utf8_unchecked(&dst[..src.len() * 2])) }
	172	}
	173
416331ca	174	// Run these with `cargo +nightly test --example hex -p stdarch`
0531ce1d XL	175	#[cfg(test)]
	176	mod tests {
	177	use std::iter;
	178
	179	use super::*;
	180
	181	fn test(input: &[u8], output: &str) {
	182	let tmp = \|\| vec![0; input.len() * 2];
	183
8faf50e0	184	assert_eq!(hex_encode_fallback(input, &mut tmp()).unwrap(), output);
0531ce1d XL	185	assert_eq!(hex_encode(input, &mut tmp()).unwrap(), output);
	186
	187	#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
	188	unsafe {
	189	if is_x86_feature_detected!("avx2") {
0731742a	190	assert_eq!(hex_encode_avx2(input, &mut tmp()).unwrap(), output);
0531ce1d XL	191	}
0531ce1d XL	192	if is_x86_feature_detected!("sse4.1") {
0731742a	193	assert_eq!(hex_encode_sse41(input, &mut tmp()).unwrap(), output);
0531ce1d XL	194	}
	195	}
	196	}
	197
	198	#[test]
	199	fn empty() {
	200	test(b"", "");
	201	}
	202
	203	#[test]
	204	fn big() {
	205	test(
	206	&[0; 1024],
	207	&iter::repeat('0').take(2048).collect::<String>(),
	208	);
	209	}
	210
	211	#[test]
	212	fn odd() {
	213	test(
	214	&[0; 313],
8faf50e0	215	&iter::repeat('0').take(313 * 2).collect::<String>(),
0531ce1d XL	216	);
	217	}
	218
	219	#[test]
	220	fn avx_works() {
	221	let mut input = [0; 33];
	222	input[4] = 3;
	223	input[16] = 3;
	224	input[17] = 0x30;
	225	input[21] = 1;
	226	input[31] = 0x24;
	227	test(
	228	&input,
	229	"\
	230	0000000003000000\
	231	0000000000000000\
	232	0330000000010000\
	233	0000000000000024\
	234	00\
	235	",
	236	);
	237	}
	238
	239	quickcheck! {
	240	fn encode_equals_fallback(input: Vec<u8>) -> bool {
	241	let mut space1 = vec![0; input.len() * 2];
	242	let mut space2 = vec![0; input.len() * 2];
	243	let a = hex_encode(&input, &mut space1).unwrap();
	244	let b = hex_encode_fallback(&input, &mut space2).unwrap();
	245	a == b
	246	}
	247
	248	#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
	249	fn avx_equals_fallback(input: Vec<u8>) -> bool {
	250	if !is_x86_feature_detected!("avx2") {
	251	return true
	252	}
	253	let mut space1 = vec![0; input.len() * 2];
	254	let mut space2 = vec![0; input.len() * 2];
	255	let a = unsafe { hex_encode_avx2(&input, &mut space1).unwrap() };
	256	let b = hex_encode_fallback(&input, &mut space2).unwrap();
	257	a == b
	258	}
	259
	260	#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
	261	fn sse41_equals_fallback(input: Vec<u8>) -> bool {
	262	if !is_x86_feature_detected!("avx2") {
	263	return true
	264	}
	265	let mut space1 = vec![0; input.len() * 2];
	266	let mut space2 = vec![0; input.len() * 2];
	267	let a = unsafe { hex_encode_sse41(&input, &mut space1).unwrap() };
	268	let b = hex_encode_fallback(&input, &mut space2).unwrap();
	269	a == b
	270	}
	271	}
	272	}
	273
416331ca	274	// Run these with `cargo +nightly bench --example hex -p stdarch`
0531ce1d XL	275	#[cfg(test)]
	276	mod benches {
	277	extern crate rand;
	278	extern crate test;
	279
	280	use self::rand::Rng;
	281
	282	use super::*;
	283
	284	const SMALL_LEN: usize = 117;
	285	const LARGE_LEN: usize = 1 * 1024 * 1024;
	286
	287	fn doit(
0731742a XL	288	b: &mut test::Bencher,
0731742a XL	289	len: usize,
0531ce1d XL	290	f: for<'a> unsafe fn(&[u8], &'a mut [u8]) -> Result<&'a str, usize>,
0531ce1d XL	291	) {
9fa01778 XL	292	let mut rng = rand::thread_rng();
	293	let input = std::iter::repeat(())
	294	.map(\|()\| rng.gen::<u8>())
0531ce1d XL	295	.take(len)
	296	.collect::<Vec<_>>();
	297	let mut dst = vec![0; input.len() * 2];
	298	b.bytes = len as u64;
	299	b.iter(\|\| unsafe {
	300	f(&input, &mut dst).unwrap();
	301	dst[0]
	302	});
	303	}
	304
	305	#[bench]
	306	fn small_default(b: &mut test::Bencher) {
	307	doit(b, SMALL_LEN, hex_encode);
	308	}
	309
	310	#[bench]
	311	fn small_fallback(b: &mut test::Bencher) {
	312	doit(b, SMALL_LEN, hex_encode_fallback);
	313	}
	314
	315	#[bench]
	316	fn large_default(b: &mut test::Bencher) {
	317	doit(b, LARGE_LEN, hex_encode);
	318	}
	319
	320	#[bench]
	321	fn large_fallback(b: &mut test::Bencher) {
	322	doit(b, LARGE_LEN, hex_encode_fallback);
	323	}
	324
	325	#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
	326	mod x86 {
	327	use super::*;
	328
	329	#[bench]
	330	fn small_avx2(b: &mut test::Bencher) {
	331	if is_x86_feature_detected!("avx2") {
	332	doit(b, SMALL_LEN, hex_encode_avx2);
	333	}
	334	}
	335
	336	#[bench]
	337	fn small_sse41(b: &mut test::Bencher) {
	338	if is_x86_feature_detected!("sse4.1") {
	339	doit(b, SMALL_LEN, hex_encode_sse41);
	340	}
	341	}
	342
	343	#[bench]
	344	fn large_avx2(b: &mut test::Bencher) {
	345	if is_x86_feature_detected!("avx2") {
	346	doit(b, LARGE_LEN, hex_encode_avx2);
	347	}
	348	}
	349
	350	#[bench]
	351	fn large_sse41(b: &mut test::Bencher) {
	352	if is_x86_feature_detected!("sse4.1") {
	353	doit(b, LARGE_LEN, hex_encode_sse41);
	354	}
	355	}
	356	}
	357	}