[rustc.git] / src / stdsimd / examples / hex.rs

//! An example showing runtime dispatch to an architecture-optimized
//! implementation.
//!
//! This program implements hex encoding a slice into a predetermined
//! destination using various different instruction sets. This selects at
//! runtime the most optimized implementation and uses that rather than being
//! required to be compiled differently.
//!
//! You can test out this program via:
//!
//!     echo test | cargo +nightly run --release --example hex -p stdsimd
//!
//! and you should see `746573740a` get printed out.

#![feature(stdsimd)]
#![cfg_attr(test, feature(test))]
#![cfg_attr(
    feature = "cargo-clippy",
    allow(
        result_unwrap_used, print_stdout, option_unwrap_used, shadow_reuse,
        cast_possible_wrap, cast_sign_loss, missing_docs_in_private_items
    )
)]

#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[macro_use]
extern crate stdsimd;

#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
extern crate stdsimd;

#[cfg(test)]
#[macro_use]
extern crate quickcheck;

use std::io::{self, Read};
use std::str;

#[cfg(target_arch = "x86")]
use stdsimd::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use stdsimd::arch::x86_64::*;

fn main() {
    let mut input = Vec::new();
    io::stdin().read_to_end(&mut input).unwrap();
    let mut dst = vec![0; 2 * input.len()];
    let s = hex_encode(&input, &mut dst).unwrap();
    println!("{}", s);
}

fn hex_encode<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
    let len = src.len().checked_mul(2).unwrap();
    if dst.len() < len {
        return Err(len);
    }

    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    {
        if is_x86_feature_detected!("avx2") {
            return unsafe { hex_encode_avx2(src, dst) };
        }
        if is_x86_feature_detected!("sse4.1") {
            return unsafe { hex_encode_sse41(src, dst) };
        }
    }

    hex_encode_fallback(src, dst)
}

#[target_feature(enable = "avx2")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
unsafe fn hex_encode_avx2<'a>(
    mut src: &[u8], dst: &'a mut [u8],
) -> Result<&'a str, usize> {
    let ascii_zero = _mm256_set1_epi8(b'0' as i8);
    let nines = _mm256_set1_epi8(9);
    let ascii_a = _mm256_set1_epi8((b'a' - 9 - 1) as i8);
    let and4bits = _mm256_set1_epi8(0xf);

    let mut i = 0_isize;
    while src.len() >= 32 {
        let invec = _mm256_loadu_si256(src.as_ptr() as *const _);

        let masked1 = _mm256_and_si256(invec, and4bits);
        let masked2 = _mm256_and_si256(_mm256_srli_epi64(invec, 4), and4bits);

        // return 0xff corresponding to the elements > 9, or 0x00 otherwise
        let cmpmask1 = _mm256_cmpgt_epi8(masked1, nines);
        let cmpmask2 = _mm256_cmpgt_epi8(masked2, nines);

        // add '0' or the offset depending on the masks
        let masked1 = _mm256_add_epi8(
            masked1,
            _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask1),
        );
        let masked2 = _mm256_add_epi8(
            masked2,
            _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask2),
        );

        // interleave masked1 and masked2 bytes
        let res1 = _mm256_unpacklo_epi8(masked2, masked1);
        let res2 = _mm256_unpackhi_epi8(masked2, masked1);

        // Store everything into the right destination now
        let base = dst.as_mut_ptr().offset(i * 2);
        let base1 = base.offset(0) as *mut _;
        let base2 = base.offset(16) as *mut _;
        let base3 = base.offset(32) as *mut _;
        let base4 = base.offset(48) as *mut _;
        _mm256_storeu2_m128i(base3, base1, res1);
        _mm256_storeu2_m128i(base4, base2, res2);
        src = &src[32..];
        i += 32;
    }

    let i = i as usize;
    let _ = hex_encode_sse41(src, &mut dst[i * 2..]);

    Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
}

// copied from https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp
#[target_feature(enable = "sse4.1")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
unsafe fn hex_encode_sse41<'a>(
    mut src: &[u8], dst: &'a mut [u8],
) -> Result<&'a str, usize> {
    let ascii_zero = _mm_set1_epi8(b'0' as i8);
    let nines = _mm_set1_epi8(9);
    let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8);
    let and4bits = _mm_set1_epi8(0xf);

    let mut i = 0_isize;
    while src.len() >= 16 {
        let invec = _mm_loadu_si128(src.as_ptr() as *const _);

        let masked1 = _mm_and_si128(invec, and4bits);
        let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits);

        // return 0xff corresponding to the elements > 9, or 0x00 otherwise
        let cmpmask1 = _mm_cmpgt_epi8(masked1, nines);
        let cmpmask2 = _mm_cmpgt_epi8(masked2, nines);

        // add '0' or the offset depending on the masks
        let masked1 = _mm_add_epi8(
            masked1,
            _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1),
        );
        let masked2 = _mm_add_epi8(
            masked2,
            _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2),
        );

        // interleave masked1 and masked2 bytes
        let res1 = _mm_unpacklo_epi8(masked2, masked1);
        let res2 = _mm_unpackhi_epi8(masked2, masked1);

        _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
        _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2);
        src = &src[16..];
        i += 16;
    }

    let i = i as usize;
    let _ = hex_encode_fallback(src, &mut dst[i * 2..]);

    Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
}

fn hex_encode_fallback<'a>(
    src: &[u8], dst: &'a mut [u8],
) -> Result<&'a str, usize> {
    fn hex(byte: u8) -> u8 {
        static TABLE: &[u8] = b"0123456789abcdef";
        TABLE[byte as usize]
    }

    for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) {
        slots[0] = hex((*byte >> 4) & 0xf);
        slots[1] = hex(*byte & 0xf);
    }

    unsafe { Ok(str::from_utf8_unchecked(&dst[..src.len() * 2])) }
}

// Run these with `cargo +nightly test --example hex -p stdsimd`
#[cfg(test)]
mod tests {
    use std::iter;

    use super::*;

    fn test(input: &[u8], output: &str) {
        let tmp = || vec![0; input.len() * 2];

        assert_eq!(hex_encode_fallback(input, &mut tmp()).unwrap(), output);
        assert_eq!(hex_encode(input, &mut tmp()).unwrap(), output);

        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
        unsafe {
            if is_x86_feature_detected!("avx2") {
                assert_eq!(
                    hex_encode_avx2(input, &mut tmp()).unwrap(),
                    output
                );
            }
            if is_x86_feature_detected!("sse4.1") {
                assert_eq!(
                    hex_encode_sse41(input, &mut tmp()).unwrap(),
                    output
                );
            }
        }
    }

    #[test]
    fn empty() {
        test(b"", "");
    }

    #[test]
    fn big() {
        test(
            &[0; 1024],
            &iter::repeat('0').take(2048).collect::<String>(),
        );
    }

    #[test]
    fn odd() {
        test(
            &[0; 313],
            &iter::repeat('0').take(313 * 2).collect::<String>(),
        );
    }

    #[test]
    fn avx_works() {
        let mut input = [0; 33];
        input[4] = 3;
        input[16] = 3;
        input[17] = 0x30;
        input[21] = 1;
        input[31] = 0x24;
        test(
            &input,
            "\
             0000000003000000\
             0000000000000000\
             0330000000010000\
             0000000000000024\
             00\
             ",
        );
    }

    quickcheck! {
        fn encode_equals_fallback(input: Vec<u8>) -> bool {
            let mut space1 = vec![0; input.len() * 2];
            let mut space2 = vec![0; input.len() * 2];
            let a = hex_encode(&input, &mut space1).unwrap();
            let b = hex_encode_fallback(&input, &mut space2).unwrap();
            a == b
        }

        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
        fn avx_equals_fallback(input: Vec<u8>) -> bool {
            if !is_x86_feature_detected!("avx2") {
                return true
            }
            let mut space1 = vec![0; input.len() * 2];
            let mut space2 = vec![0; input.len() * 2];
            let a = unsafe { hex_encode_avx2(&input, &mut space1).unwrap() };
            let b = hex_encode_fallback(&input, &mut space2).unwrap();
            a == b
        }

        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
        fn sse41_equals_fallback(input: Vec<u8>) -> bool {
            if !is_x86_feature_detected!("avx2") {
                return true
            }
            let mut space1 = vec![0; input.len() * 2];
            let mut space2 = vec![0; input.len() * 2];
            let a = unsafe { hex_encode_sse41(&input, &mut space1).unwrap() };
            let b = hex_encode_fallback(&input, &mut space2).unwrap();
            a == b
        }
    }
}

// Run these with `cargo +nightly bench --example hex -p stdsimd`
#[cfg(test)]
mod benches {
    extern crate rand;
    extern crate test;

    use self::rand::Rng;

    use super::*;

    const SMALL_LEN: usize = 117;
    const LARGE_LEN: usize = 1 * 1024 * 1024;

    fn doit(
        b: &mut test::Bencher, len: usize,
        f: for<'a> unsafe fn(&[u8], &'a mut [u8]) -> Result<&'a str, usize>,
    ) {
        let input = rand::thread_rng()
            .gen_iter::<u8>()
            .take(len)
            .collect::<Vec<_>>();
        let mut dst = vec![0; input.len() * 2];
        b.bytes = len as u64;
        b.iter(|| unsafe {
            f(&input, &mut dst).unwrap();
            dst[0]
        });
    }

    #[bench]
    fn small_default(b: &mut test::Bencher) {
        doit(b, SMALL_LEN, hex_encode);
    }

    #[bench]
    fn small_fallback(b: &mut test::Bencher) {
        doit(b, SMALL_LEN, hex_encode_fallback);
    }

    #[bench]
    fn large_default(b: &mut test::Bencher) {
        doit(b, LARGE_LEN, hex_encode);
    }

    #[bench]
    fn large_fallback(b: &mut test::Bencher) {
        doit(b, LARGE_LEN, hex_encode_fallback);
    }

    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    mod x86 {
        use super::*;

        #[bench]
        fn small_avx2(b: &mut test::Bencher) {
            if is_x86_feature_detected!("avx2") {
                doit(b, SMALL_LEN, hex_encode_avx2);
            }
        }

        #[bench]
        fn small_sse41(b: &mut test::Bencher) {
            if is_x86_feature_detected!("sse4.1") {
                doit(b, SMALL_LEN, hex_encode_sse41);
            }
        }

        #[bench]
        fn large_avx2(b: &mut test::Bencher) {
            if is_x86_feature_detected!("avx2") {
                doit(b, LARGE_LEN, hex_encode_avx2);
            }
        }

        #[bench]
        fn large_sse41(b: &mut test::Bencher) {
            if is_x86_feature_detected!("sse4.1") {
                doit(b, LARGE_LEN, hex_encode_sse41);
            }
        }
    }
}
Commit	Line	Data
0531ce1d XL	1	//! An example showing runtime dispatch to an architecture-optimized
	2	//! implementation.
	3	//!
	4	//! This program implements hex encoding a slice into a predetermined
	5	//! destination using various different instruction sets. This selects at
	6	//! runtime the most optimized implementation and uses that rather than being
	7	//! required to be compiled differently.
	8	//!
	9	//! You can test out this program via:
	10	//!
	11	//! echo test \| cargo +nightly run --release --example hex -p stdsimd
	12	//!
	13	//! and you should see `746573740a` get printed out.
	14
83c7162d	15	#![feature(stdsimd)]
0531ce1d	16	#![cfg_attr(test, feature(test))]
8faf50e0 XL	17	#![cfg_attr(
	18	feature = "cargo-clippy",
	19	allow(
	20	result_unwrap_used, print_stdout, option_unwrap_used, shadow_reuse,
	21	cast_possible_wrap, cast_sign_loss, missing_docs_in_private_items
	22	)
	23	)]
0531ce1d XL	24
	25	#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
	26	#[macro_use]
	27	extern crate stdsimd;
	28
	29	#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
	30	extern crate stdsimd;
	31
	32	#[cfg(test)]
	33	#[macro_use]
	34	extern crate quickcheck;
	35
0531ce1d	36	use std::io::{self, Read};
83c7162d	37	use std::str;
0531ce1d XL	38
	39	#[cfg(target_arch = "x86")]
	40	use stdsimd::arch::x86::*;
	41	#[cfg(target_arch = "x86_64")]
	42	use stdsimd::arch::x86_64::*;
	43
	44	fn main() {
	45	let mut input = Vec::new();
	46	io::stdin().read_to_end(&mut input).unwrap();
	47	let mut dst = vec![0; 2 * input.len()];
	48	let s = hex_encode(&input, &mut dst).unwrap();
	49	println!("{}", s);
	50	}
	51
	52	fn hex_encode<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
	53	let len = src.len().checked_mul(2).unwrap();
	54	if dst.len() < len {
	55	return Err(len);
	56	}
	57
	58	#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
	59	{
	60	if is_x86_feature_detected!("avx2") {
	61	return unsafe { hex_encode_avx2(src, dst) };
	62	}
	63	if is_x86_feature_detected!("sse4.1") {
	64	return unsafe { hex_encode_sse41(src, dst) };
	65	}
	66	}
	67
	68	hex_encode_fallback(src, dst)
	69	}
	70
	71	#[target_feature(enable = "avx2")]
	72	#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
	73	unsafe fn hex_encode_avx2<'a>(
8faf50e0	74	mut src: &[u8], dst: &'a mut [u8],
0531ce1d XL	75	) -> Result<&'a str, usize> {
	76	let ascii_zero = _mm256_set1_epi8(b'0' as i8);
	77	let nines = _mm256_set1_epi8(9);
	78	let ascii_a = _mm256_set1_epi8((b'a' - 9 - 1) as i8);
	79	let and4bits = _mm256_set1_epi8(0xf);
	80
	81	let mut i = 0_isize;
	82	while src.len() >= 32 {
	83	let invec = _mm256_loadu_si256(src.as_ptr() as *const _);
	84
	85	let masked1 = _mm256_and_si256(invec, and4bits);
	86	let masked2 = _mm256_and_si256(_mm256_srli_epi64(invec, 4), and4bits);
	87
	88	// return 0xff corresponding to the elements > 9, or 0x00 otherwise
	89	let cmpmask1 = _mm256_cmpgt_epi8(masked1, nines);
	90	let cmpmask2 = _mm256_cmpgt_epi8(masked2, nines);
	91
	92	// add '0' or the offset depending on the masks
	93	let masked1 = _mm256_add_epi8(
	94	masked1,
	95	_mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask1),
	96	);
	97	let masked2 = _mm256_add_epi8(
	98	masked2,
	99	_mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask2),
	100	);
	101
	102	// interleave masked1 and masked2 bytes
	103	let res1 = _mm256_unpacklo_epi8(masked2, masked1);
	104	let res2 = _mm256_unpackhi_epi8(masked2, masked1);
	105
	106	// Store everything into the right destination now
	107	let base = dst.as_mut_ptr().offset(i * 2);
	108	let base1 = base.offset(0) as *mut _;
	109	let base2 = base.offset(16) as *mut _;
	110	let base3 = base.offset(32) as *mut _;
	111	let base4 = base.offset(48) as *mut _;
	112	_mm256_storeu2_m128i(base3, base1, res1);
	113	_mm256_storeu2_m128i(base4, base2, res2);
	114	src = &src[32..];
	115	i += 32;
	116	}
	117
	118	let i = i as usize;
	119	let _ = hex_encode_sse41(src, &mut dst[i * 2..]);
	120
8faf50e0	121	Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
0531ce1d XL	122	}
	123
	124	// copied from https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp
	125	#[target_feature(enable = "sse4.1")]
	126	#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
	127	unsafe fn hex_encode_sse41<'a>(
8faf50e0	128	mut src: &[u8], dst: &'a mut [u8],
0531ce1d XL	129	) -> Result<&'a str, usize> {
	130	let ascii_zero = _mm_set1_epi8(b'0' as i8);
	131	let nines = _mm_set1_epi8(9);
	132	let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8);
	133	let and4bits = _mm_set1_epi8(0xf);
	134
	135	let mut i = 0_isize;
	136	while src.len() >= 16 {
	137	let invec = _mm_loadu_si128(src.as_ptr() as *const _);
	138
	139	let masked1 = _mm_and_si128(invec, and4bits);
	140	let masked2 = _mm_and_si128(_mm_srli_epi64(invec, 4), and4bits);
	141
	142	// return 0xff corresponding to the elements > 9, or 0x00 otherwise
	143	let cmpmask1 = _mm_cmpgt_epi8(masked1, nines);
	144	let cmpmask2 = _mm_cmpgt_epi8(masked2, nines);
	145
	146	// add '0' or the offset depending on the masks
	147	let masked1 = _mm_add_epi8(
	148	masked1,
	149	_mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1),
	150	);
	151	let masked2 = _mm_add_epi8(
	152	masked2,
	153	_mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2),
	154	);
	155
	156	// interleave masked1 and masked2 bytes
	157	let res1 = _mm_unpacklo_epi8(masked2, masked1);
	158	let res2 = _mm_unpackhi_epi8(masked2, masked1);
	159
	160	_mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
8faf50e0	161	_mm_storeu_si128(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2);
0531ce1d XL	162	src = &src[16..];
	163	i += 16;
	164	}
	165
	166	let i = i as usize;
	167	let _ = hex_encode_fallback(src, &mut dst[i * 2..]);
	168
8faf50e0	169	Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
0531ce1d XL	170	}
	171
	172	fn hex_encode_fallback<'a>(
8faf50e0	173	src: &[u8], dst: &'a mut [u8],
0531ce1d XL	174	) -> Result<&'a str, usize> {
	175	fn hex(byte: u8) -> u8 {
	176	static TABLE: &[u8] = b"0123456789abcdef";
	177	TABLE[byte as usize]
	178	}
	179
	180	for (byte, slots) in src.iter().zip(dst.chunks_mut(2)) {
	181	slots[0] = hex((*byte >> 4) & 0xf);
	182	slots[1] = hex(*byte & 0xf);
	183	}
	184
	185	unsafe { Ok(str::from_utf8_unchecked(&dst[..src.len() * 2])) }
	186	}
	187
	188	// Run these with `cargo +nightly test --example hex -p stdsimd`
	189	#[cfg(test)]
	190	mod tests {
	191	use std::iter;
	192
	193	use super::*;
	194
	195	fn test(input: &[u8], output: &str) {
	196	let tmp = \|\| vec![0; input.len() * 2];
	197
8faf50e0	198	assert_eq!(hex_encode_fallback(input, &mut tmp()).unwrap(), output);
0531ce1d XL	199	assert_eq!(hex_encode(input, &mut tmp()).unwrap(), output);
	200
	201	#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
	202	unsafe {
	203	if is_x86_feature_detected!("avx2") {
	204	assert_eq!(
	205	hex_encode_avx2(input, &mut tmp()).unwrap(),
	206	output
	207	);
	208	}
	209	if is_x86_feature_detected!("sse4.1") {
	210	assert_eq!(
	211	hex_encode_sse41(input, &mut tmp()).unwrap(),
	212	output
	213	);
	214	}
	215	}
	216	}
	217
	218	#[test]
	219	fn empty() {
	220	test(b"", "");
	221	}
	222
	223	#[test]
	224	fn big() {
	225	test(
	226	&[0; 1024],
	227	&iter::repeat('0').take(2048).collect::<String>(),
	228	);
	229	}
	230
	231	#[test]
	232	fn odd() {
	233	test(
	234	&[0; 313],
8faf50e0	235	&iter::repeat('0').take(313 * 2).collect::<String>(),
0531ce1d XL	236	);
	237	}
	238
	239	#[test]
	240	fn avx_works() {
	241	let mut input = [0; 33];
	242	input[4] = 3;
	243	input[16] = 3;
	244	input[17] = 0x30;
	245	input[21] = 1;
	246	input[31] = 0x24;
	247	test(
	248	&input,
	249	"\
	250	0000000003000000\
	251	0000000000000000\
	252	0330000000010000\
	253	0000000000000024\
	254	00\
	255	",
	256	);
	257	}
	258
	259	quickcheck! {
	260	fn encode_equals_fallback(input: Vec<u8>) -> bool {
	261	let mut space1 = vec![0; input.len() * 2];
	262	let mut space2 = vec![0; input.len() * 2];
	263	let a = hex_encode(&input, &mut space1).unwrap();
	264	let b = hex_encode_fallback(&input, &mut space2).unwrap();
	265	a == b
	266	}
	267
	268	#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
	269	fn avx_equals_fallback(input: Vec<u8>) -> bool {
	270	if !is_x86_feature_detected!("avx2") {
	271	return true
	272	}
	273	let mut space1 = vec![0; input.len() * 2];
	274	let mut space2 = vec![0; input.len() * 2];
	275	let a = unsafe { hex_encode_avx2(&input, &mut space1).unwrap() };
	276	let b = hex_encode_fallback(&input, &mut space2).unwrap();
	277	a == b
	278	}
	279
	280	#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
	281	fn sse41_equals_fallback(input: Vec<u8>) -> bool {
	282	if !is_x86_feature_detected!("avx2") {
	283	return true
	284	}
	285	let mut space1 = vec![0; input.len() * 2];
	286	let mut space2 = vec![0; input.len() * 2];
	287	let a = unsafe { hex_encode_sse41(&input, &mut space1).unwrap() };
	288	let b = hex_encode_fallback(&input, &mut space2).unwrap();
	289	a == b
	290	}
	291	}
	292	}
	293
	294	// Run these with `cargo +nightly bench --example hex -p stdsimd`
	295	#[cfg(test)]
	296	mod benches {
	297	extern crate rand;
	298	extern crate test;
	299
300	use self::rand::Rng;
301
302	use super::*;
303
304	const SMALL_LEN: usize = 117;
305	const LARGE_LEN: usize = 1 * 1024 * 1024;
306
307	fn doit(
308	b: &mut test::Bencher, len: usize,
309	f: for<'a> unsafe fn(&[u8], &'a mut [u8]) -> Result<&'a str, usize>,
310	) {
311	let input = rand::thread_rng()
312	.gen_iter::<u8>()
313	.take(len)
314	.collect::<Vec<_>>();
315	let mut dst = vec![0; input.len() * 2];
316	b.bytes = len as u64;
317	b.iter(\|\| unsafe {
318	f(&input, &mut dst).unwrap();
319	dst[0]
320	});
321	}
322
323	#[bench]
324	fn small_default(b: &mut test::Bencher) {
325	doit(b, SMALL_LEN, hex_encode);
326	}
327
328	#[bench]
329	fn small_fallback(b: &mut test::Bencher) {
330	doit(b, SMALL_LEN, hex_encode_fallback);
331	}
332
333	#[bench]
334	fn large_default(b: &mut test::Bencher) {
335	doit(b, LARGE_LEN, hex_encode);
336	}
337
338	#[bench]
339	fn large_fallback(b: &mut test::Bencher) {
340	doit(b, LARGE_LEN, hex_encode_fallback);
341	}
342
343	#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
344	mod x86 {
345	use super::*;
346
347	#[bench]
348	fn small_avx2(b: &mut test::Bencher) {
349	if is_x86_feature_detected!("avx2") {
350	doit(b, SMALL_LEN, hex_encode_avx2);
351	}
352	}
353
354	#[bench]
355	fn small_sse41(b: &mut test::Bencher) {
356	if is_x86_feature_detected!("sse4.1") {
357	doit(b, SMALL_LEN, hex_encode_sse41);
358	}
359	}
360
361	#[bench]
362	fn large_avx2(b: &mut test::Bencher) {
363	if is_x86_feature_detected!("avx2") {
364	doit(b, LARGE_LEN, hex_encode_avx2);
365	}
366	}
367
368	#[bench]
369	fn large_sse41(b: &mut test::Bencher) {
370	if is_x86_feature_detected!("sse4.1") {
371	doit(b, LARGE_LEN, hex_encode_sse41);
372	}
373	}
374	}
375	}