//!
//! You can test out this program via:
//!
-//! echo test | cargo +nightly run --release --example hex -p stdsimd
+//! echo test | cargo +nightly run --release hex
//!
//! and you should see `746573740a` get printed out.
#![feature(stdsimd)]
#![cfg_attr(test, feature(test))]
-#![cfg_attr(feature = "cargo-clippy",
- allow(result_unwrap_used, print_stdout, option_unwrap_used,
- shadow_reuse, cast_possible_wrap, cast_sign_loss,
- missing_docs_in_private_items))]
+#![cfg_attr(
+ feature = "cargo-clippy",
+ allow(
+ clippy::result_unwrap_used,
+ clippy::print_stdout,
+ clippy::option_unwrap_used,
+ clippy::shadow_reuse,
+ clippy::cast_possible_wrap,
+ clippy::cast_ptr_alignment,
+ clippy::cast_sign_loss,
+ clippy::missing_docs_in_private_items
+ )
+)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-#[macro_use]
-extern crate stdsimd;
+#[macro_use(is_x86_feature_detected)]
+extern crate std_detect;
-#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
-extern crate stdsimd;
+extern crate core_arch;
#[cfg(test)]
#[macro_use]
use std::str;
#[cfg(target_arch = "x86")]
-use stdsimd::arch::x86::*;
+use core_arch::x86::*;
#[cfg(target_arch = "x86_64")]
-use stdsimd::arch::x86_64::*;
+use core_arch::x86_64::*;
fn main() {
let mut input = Vec::new();
#[target_feature(enable = "avx2")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-unsafe fn hex_encode_avx2<'a>(
- mut src: &[u8], dst: &'a mut [u8]
-) -> Result<&'a str, usize> {
+unsafe fn hex_encode_avx2<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
let ascii_zero = _mm256_set1_epi8(b'0' as i8);
let nines = _mm256_set1_epi8(9);
let ascii_a = _mm256_set1_epi8((b'a' - 9 - 1) as i8);
let cmpmask2 = _mm256_cmpgt_epi8(masked2, nines);
// add '0' or the offset depending on the masks
- let masked1 = _mm256_add_epi8(
- masked1,
- _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask1),
- );
- let masked2 = _mm256_add_epi8(
- masked2,
- _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask2),
- );
+ let masked1 = _mm256_add_epi8(masked1, _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask1));
+ let masked2 = _mm256_add_epi8(masked2, _mm256_blendv_epi8(ascii_zero, ascii_a, cmpmask2));
// interleave masked1 and masked2 bytes
let res1 = _mm256_unpacklo_epi8(masked2, masked1);
let i = i as usize;
let _ = hex_encode_sse41(src, &mut dst[i * 2..]);
- Ok(str::from_utf8_unchecked(
- &dst[..src.len() * 2 + i * 2],
- ))
+ Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
}
// copied from https://github.com/Matherunner/bin2hex-sse/blob/master/base16_sse4.cpp
#[target_feature(enable = "sse4.1")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-unsafe fn hex_encode_sse41<'a>(
- mut src: &[u8], dst: &'a mut [u8]
-) -> Result<&'a str, usize> {
+unsafe fn hex_encode_sse41<'a>(mut src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
let ascii_zero = _mm_set1_epi8(b'0' as i8);
let nines = _mm_set1_epi8(9);
let ascii_a = _mm_set1_epi8((b'a' - 9 - 1) as i8);
let cmpmask2 = _mm_cmpgt_epi8(masked2, nines);
// add '0' or the offset depending on the masks
- let masked1 = _mm_add_epi8(
- masked1,
- _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1),
- );
- let masked2 = _mm_add_epi8(
- masked2,
- _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2),
- );
+ let masked1 = _mm_add_epi8(masked1, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask1));
+ let masked2 = _mm_add_epi8(masked2, _mm_blendv_epi8(ascii_zero, ascii_a, cmpmask2));
// interleave masked1 and masked2 bytes
let res1 = _mm_unpacklo_epi8(masked2, masked1);
let res2 = _mm_unpackhi_epi8(masked2, masked1);
_mm_storeu_si128(dst.as_mut_ptr().offset(i * 2) as *mut _, res1);
- _mm_storeu_si128(
- dst.as_mut_ptr().offset(i * 2 + 16) as *mut _,
- res2,
- );
+ _mm_storeu_si128(dst.as_mut_ptr().offset(i * 2 + 16) as *mut _, res2);
src = &src[16..];
i += 16;
}
let i = i as usize;
let _ = hex_encode_fallback(src, &mut dst[i * 2..]);
- Ok(str::from_utf8_unchecked(
- &dst[..src.len() * 2 + i * 2],
- ))
+ Ok(str::from_utf8_unchecked(&dst[..src.len() * 2 + i * 2]))
}
-fn hex_encode_fallback<'a>(
- src: &[u8], dst: &'a mut [u8]
-) -> Result<&'a str, usize> {
+fn hex_encode_fallback<'a>(src: &[u8], dst: &'a mut [u8]) -> Result<&'a str, usize> {
fn hex(byte: u8) -> u8 {
static TABLE: &[u8] = b"0123456789abcdef";
TABLE[byte as usize]
fn test(input: &[u8], output: &str) {
let tmp = || vec![0; input.len() * 2];
- assert_eq!(
- hex_encode_fallback(input, &mut tmp()).unwrap(),
- output
- );
+ assert_eq!(hex_encode_fallback(input, &mut tmp()).unwrap(), output);
assert_eq!(hex_encode(input, &mut tmp()).unwrap(), output);
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
unsafe {
if is_x86_feature_detected!("avx2") {
- assert_eq!(
- hex_encode_avx2(input, &mut tmp()).unwrap(),
- output
- );
+ assert_eq!(hex_encode_avx2(input, &mut tmp()).unwrap(), output);
}
if is_x86_feature_detected!("sse4.1") {
- assert_eq!(
- hex_encode_sse41(input, &mut tmp()).unwrap(),
- output
- );
+ assert_eq!(hex_encode_sse41(input, &mut tmp()).unwrap(), output);
}
}
}
fn odd() {
test(
&[0; 313],
- &iter::repeat('0')
- .take(313 * 2)
- .collect::<String>(),
+ &iter::repeat('0').take(313 * 2).collect::<String>(),
);
}
const LARGE_LEN: usize = 1 * 1024 * 1024;
fn doit(
- b: &mut test::Bencher, len: usize,
+ b: &mut test::Bencher,
+ len: usize,
f: for<'a> unsafe fn(&[u8], &'a mut [u8]) -> Result<&'a str, usize>,
) {
- let input = rand::thread_rng()
- .gen_iter::<u8>()
+ let mut rng = rand::thread_rng();
+ let input = std::iter::repeat(())
+ .map(|()| rng.gen::<u8>())
.take(len)
.collect::<Vec<_>>();
let mut dst = vec![0; input.len() * 2];