3 // We only use AVX when we can detect at runtime whether it's available, which
5 #[cfg(feature = "use_std")]
9 // This macro employs a gcc-like "ifunc" trick where by upon first calling
10 // `memchr` (for example), CPU feature detection will be performed at runtime
11 // to determine the best implementation to use. After CPU feature detection
12 // is done, we replace `memchr`'s function pointer with the selection. Upon
13 // subsequent invocations, the CPU-specific routine is invoked directly, which
14 // skips the CPU feature detection and subsequent branch that's required.
16 // While this typically doesn't matter for rare occurrences or when used on
17 // larger haystacks, `memchr` can be called in tight loops where the overhead
18 // of this branch can actually add up *and is measurable*. This trick was
19 // necessary to bring this implementation up to glibc's speeds for the 'tiny'
20 // benchmarks, for example.
22 // At some point, I expect the Rust ecosystem will get a nice macro for doing
23 // exactly this, at which point, we can replace our hand-jammed version of it.
25 // N.B. The ifunc strategy does prevent function inlining of course, but on
26 // modern CPUs, you'll probably end up with the AVX2 implementation, which
27 // probably can't be inlined anyway---unless you've compiled your entire
28 // program with AVX2 enabled. However, even then, the various memchr
29 // implementations aren't exactly small, so inlining might not help anyway!
30 #[cfg(feature = "use_std")]
32 ($fnty
:ty
, $name
:ident
, $haystack
:ident
, $
($needle
:ident
),+) => {{
34 use std
::sync
::atomic
::{AtomicPtr, Ordering}
;
38 static FN
: AtomicPtr
<()> = AtomicPtr
::new(detect
as FnRaw
);
40 fn detect($
($needle
: u8),+, haystack
: &[u8]) -> Option
<usize> {
42 if cfg
!(memchr_runtime_avx
) && is_x86_feature_detected
!("avx2") {
44 } else if cfg
!(memchr_runtime_sse2
) {
47 fallback
::$name
as FnRaw
49 FN
.store(fun
as FnRaw
, Ordering
::Relaxed
);
51 mem
::transmute
::<FnRaw
, $fnty
>(fun
)($
($needle
),+, haystack
)
56 let fun
= FN
.load(Ordering
::Relaxed
);
57 mem
::transmute
::<FnRaw
, $fnty
>(fun
)($
($needle
),+, $haystack
)
62 // When std isn't enable (which provides runtime CPU feature detection), or if
63 // runtime CPU feature detection has been explicitly disabled, then just call
64 // our optimized SSE2 routine directly. SSE2 is avalbale on all x86_64 targets,
65 // so no CPU feature detection is necessary.
66 #[cfg(not(feature = "use_std"))]
68 ($fnty
:ty
, $name
:ident
, $haystack
:ident
, $
($needle
:ident
),+) => {{
69 if cfg
!(memchr_runtime_sse2
) {
70 unsafe { sse2::$name($($needle),+, $haystack) }
72 fallback
::$
name($
($needle
),+, $haystack
)
78 pub fn memchr(n1
: u8, haystack
: &[u8]) -> Option
<usize> {
79 ifunc
!(fn(u8, &[u8]) -> Option
<usize>, memchr
, haystack
, n1
)
83 pub fn memchr2(n1
: u8, n2
: u8, haystack
: &[u8]) -> Option
<usize> {
84 ifunc
!(fn(u8, u8, &[u8]) -> Option
<usize>, memchr2
, haystack
, n1
, n2
)
88 pub fn memchr3(n1
: u8, n2
: u8, n3
: u8, haystack
: &[u8]) -> Option
<usize> {
89 ifunc
!(fn(u8, u8, u8, &[u8]) -> Option
<usize>, memchr3
, haystack
, n1
, n2
, n3
)
93 pub fn memrchr(n1
: u8, haystack
: &[u8]) -> Option
<usize> {
94 ifunc
!(fn(u8, &[u8]) -> Option
<usize>, memrchr
, haystack
, n1
)
98 pub fn memrchr2(n1
: u8, n2
: u8, haystack
: &[u8]) -> Option
<usize> {
99 ifunc
!(fn(u8, u8, &[u8]) -> Option
<usize>, memrchr2
, haystack
, n1
, n2
)
103 pub fn memrchr3(n1
: u8, n2
: u8, n3
: u8, haystack
: &[u8]) -> Option
<usize> {
104 ifunc
!(fn(u8, u8, u8, &[u8]) -> Option
<usize>, memrchr3
, haystack
, n1
, n2
, n3
)