1 use crate::memmem
::{rarebytes::RareNeedleBytes, NeedleInfo}
;
4 #[cfg(memchr_runtime_simd)]
6 #[cfg(all(not(miri), target_arch = "wasm32", memchr_runtime_simd))]
8 #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
11 /// The maximum frequency rank permitted for the fallback prefilter. If the
12 /// rarest byte in the needle has a frequency rank above this value, then no
13 /// prefilter is used if the fallback prefilter would otherwise be selected.
14 const MAX_FALLBACK_RANK
: usize = 250;
16 /// A combination of prefilter effectiveness state, the prefilter function and
17 /// the needle info required to run a prefilter.
19 /// For the most part, these are grouped into a single type for convenience,
20 /// instead of needing to pass around all three as distinct function
22 pub(crate) struct Pre
<'a
> {
23 /// State that tracks the effectiveness of a prefilter.
24 pub(crate) state
: &'a
mut PrefilterState
,
25 /// The actual prefilter function.
26 pub(crate) prefn
: PrefilterFn
,
27 /// Information about a needle, such as its RK hash and rare byte offsets.
28 pub(crate) ninfo
: &'a NeedleInfo
,
32 /// Call this prefilter on the given haystack with the given needle.
39 self.prefn
.call(self.state
, self.ninfo
, haystack
, needle
)
42 /// Return true if and only if this prefilter should be used.
44 pub(crate) fn should_call(&mut self) -> bool
{
45 self.state
.is_effective()
49 /// A prefilter function.
51 /// A prefilter function describes both forward and reverse searches.
52 /// (Although, we don't currently implement prefilters for reverse searching.)
53 /// In the case of a forward search, the position returned corresponds to
54 /// the starting offset of a match (confirmed or possible). Its minimum
55 /// value is `0`, and its maximum value is `haystack.len() - 1`. In the case
56 /// of a reverse search, the position returned corresponds to the position
57 /// immediately after a match (confirmed or possible). Its minimum value is `1`
58 /// and its maximum value is `haystack.len()`.
60 /// In both cases, the position returned is the starting (or ending) point of a
61 /// _possible_ match. That is, returning a false positive is okay. A prefilter,
62 /// however, must never return any false negatives. That is, if a match exists
63 /// at a particular position `i`, then a prefilter _must_ return that position.
64 /// It cannot skip past it.
68 /// A prefilter function is not safe to create, since not all prefilters are
69 /// safe to call in all contexts. (e.g., A prefilter that uses AVX instructions
70 /// may only be called on x86_64 CPUs with the relevant AVX feature enabled.)
71 /// Thus, callers must ensure that when a prefilter function is created that it
72 /// is safe to call for the current environment.
73 #[derive(Clone, Copy)]
74 pub(crate) struct PrefilterFn(PrefilterFnTy
);
76 /// The type of a prefilter function. All prefilters must satisfy this
79 /// Using a function pointer like this does inhibit inlining, but it does
80 /// eliminate branching and the extra costs associated with copying a larger
81 /// enum. Note also, that using Box<dyn SomePrefilterTrait> can't really work
82 /// here, since we want to work in contexts that don't have dynamic memory
83 /// allocation. Moreover, in the default configuration of this crate on x86_64
84 /// CPUs released in the past ~decade, we will use an AVX2-optimized prefilter,
85 /// which generally won't be inlineable into the surrounding code anyway.
86 /// (Unless AVX2 is enabled at compile time, but this is typically rare, since
87 /// it produces a non-portable binary.)
88 pub(crate) type PrefilterFnTy
= unsafe fn(
89 prestate
: &mut PrefilterState
,
95 // If the haystack is too small for SSE2, then just run memchr on the
96 // rarest byte and be done with it. (It is likely that this code path is
97 // rarely exercised, since a higher level routine will probably dispatch to
98 // Rabin-Karp for such a small haystack.)
99 #[cfg(memchr_runtime_simd)]
100 fn simple_memchr_fallback(
101 _prestate
: &mut PrefilterState
,
106 let (rare
, _
) = ninfo
.rarebytes
.as_rare_ordered_usize();
107 crate::memchr(needle
[rare
], haystack
).map(|i
| i
.saturating_sub(rare
))
111 /// Create a new prefilter function from the function pointer given.
115 /// Callers must ensure that the given prefilter function is safe to call
116 /// for all inputs in the current environment. For example, if the given
117 /// prefilter function uses AVX instructions, then the caller must ensure
118 /// that the appropriate AVX CPU features are enabled.
119 pub(crate) unsafe fn new(prefn
: PrefilterFnTy
) -> PrefilterFn
{
123 /// Call the underlying prefilter function with the given arguments.
126 prestate
: &mut PrefilterState
,
131 // SAFETY: Callers have the burden of ensuring that a prefilter
132 // function is safe to call for all inputs in the current environment.
133 unsafe { (self.0)(prestate, ninfo, haystack, needle) }
137 impl core
::fmt
::Debug
for PrefilterFn
{
138 fn fmt(&self, f
: &mut core
::fmt
::Formatter
) -> core
::fmt
::Result
{
139 "<prefilter-fn(...)>".fmt(f
)
143 /// Prefilter controls whether heuristics are used to accelerate searching.
145 /// A prefilter refers to the idea of detecting candidate matches very quickly,
146 /// and then confirming whether those candidates are full matches. This
147 /// idea can be quite effective since it's often the case that looking for
148 /// candidates can be a lot faster than running a complete substring search
149 /// over the entire input. Namely, looking for candidates can be done with
150 /// extremely fast vectorized code.
152 /// The downside of a prefilter is that it assumes false positives (which are
153 /// candidates generated by a prefilter that aren't matches) are somewhat rare
154 /// relative to the frequency of full matches. That is, if a lot of false
155 /// positives are generated, then it's possible for search time to be worse
156 /// than if the prefilter wasn't enabled in the first place.
158 /// Another downside of a prefilter is that it can result in highly variable
159 /// performance, where some cases are extraordinarily fast and others aren't.
160 /// Typically, variable performance isn't a problem, but it may be for your use
163 /// The use of prefilters in this implementation does use a heuristic to detect
164 /// when a prefilter might not be carrying its weight, and will dynamically
165 /// disable its use. Nevertheless, this configuration option gives callers
166 /// the ability to disable prefilters if you have knowledge that they won't be
168 #[derive(Clone, Copy, Debug)]
171 /// Never used a prefilter in substring search.
173 /// Automatically detect whether a heuristic prefilter should be used. If
174 /// it is used, then heuristics will be used to dynamically disable the
175 /// prefilter if it is believed to not be carrying its weight.
179 impl Default
for Prefilter
{
180 fn default() -> Prefilter
{
186 pub(crate) fn is_none(&self) -> bool
{
188 Prefilter
::None
=> true,
194 /// PrefilterState tracks state associated with the effectiveness of a
195 /// prefilter. It is used to track how many bytes, on average, are skipped by
196 /// the prefilter. If this average dips below a certain threshold over time,
197 /// then the state renders the prefilter inert and stops using it.
199 /// A prefilter state should be created for each search. (Where creating an
200 /// iterator is treated as a single search.) A prefilter state should only be
201 /// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert
202 /// `PrefilterState`.
203 #[derive(Clone, Debug)]
204 pub(crate) struct PrefilterState
{
205 /// The number of skips that has been executed. This is always 1 greater
206 /// than the actual number of skips. The special sentinel value of 0
207 /// indicates that the prefilter is inert. This is useful to avoid
208 /// additional checks to determine whether the prefilter is still
209 /// "effective." Once a prefilter becomes inert, it should no longer be
210 /// used (according to our heuristics).
212 /// The total number of bytes that have been skipped.
216 impl PrefilterState
{
217 /// The minimum number of skip attempts to try before considering whether
218 /// a prefilter is effective or not.
219 const MIN_SKIPS
: u32 = 50;
221 /// The minimum amount of bytes that skipping must average.
223 /// This value was chosen based on varying it and checking
224 /// the microbenchmarks. In particular, this can impact the
225 /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set
227 const MIN_SKIP_BYTES
: u32 = 8;
229 /// Create a fresh prefilter state.
230 pub(crate) fn new() -> PrefilterState
{
231 PrefilterState { skips: 1, skipped: 0 }
234 /// Create a fresh prefilter state that is always inert.
235 pub(crate) fn inert() -> PrefilterState
{
236 PrefilterState { skips: 0, skipped: 0 }
239 /// Update this state with the number of bytes skipped on the last
240 /// invocation of the prefilter.
242 pub(crate) fn update(&mut self, skipped
: usize) {
243 self.skips
= self.skips
.saturating_add(1);
244 // We need to do this dance since it's technically possible for
245 // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the
246 // size of a prefilter state.)
247 if skipped
> core
::u32::MAX
as usize {
248 self.skipped
= core
::u32::MAX
;
250 self.skipped
= self.skipped
.saturating_add(skipped
as u32);
254 /// Return true if and only if this state indicates that a prefilter is
257 pub(crate) fn is_effective(&mut self) -> bool
{
261 if self.skips() < PrefilterState
::MIN_SKIPS
{
264 if self.skipped
>= PrefilterState
::MIN_SKIP_BYTES
* self.skips() {
274 fn is_inert(&self) -> bool
{
279 fn skips(&self) -> u32 {
280 self.skips
.saturating_sub(1)
284 /// Determine which prefilter function, if any, to use.
286 /// This only applies to x86_64 when runtime SIMD detection is enabled (which
287 /// is the default). In general, we try to use an AVX prefilter, followed by
288 /// SSE and then followed by a generic one based on memchr.
290 pub(crate) fn forward(
292 rare
: &RareNeedleBytes
,
294 ) -> Option
<PrefilterFn
> {
295 if config
.is_none() || needle
.len() <= 1 {
299 #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
301 #[cfg(feature = "std")]
303 if cfg
!(memchr_runtime_avx
) {
304 if is_x86_feature_detected
!("avx2") {
305 // SAFETY: x86::avx::find only requires the avx2 feature,
306 // which we've just checked above.
307 return unsafe { Some(PrefilterFn::new(x86::avx::find)) }
;
311 if cfg
!(memchr_runtime_sse2
) {
312 // SAFETY: x86::sse::find only requires the sse2 feature, which is
313 // guaranteed to be available on x86_64.
314 return unsafe { Some(PrefilterFn::new(x86::sse::find)) }
;
317 #[cfg(all(not(miri), target_arch = "wasm32", memchr_runtime_simd))]
319 // SAFETY: `wasm::find` is actually a safe function
321 // Also note that the `if true` is here to prevent, on wasm with simd,
322 // rustc warning about the code below being dead code.
324 return unsafe { Some(PrefilterFn::new(wasm::find)) }
;
327 // Check that our rarest byte has a reasonably low rank. The main issue
328 // here is that the fallback prefilter can perform pretty poorly if it's
329 // given common bytes. So we try to avoid the worst cases here.
330 let (rare1_rank
, _
) = rare
.as_ranks(needle
);
331 if rare1_rank
<= MAX_FALLBACK_RANK
{
332 // SAFETY: fallback::find is safe to call in all environments.
333 return unsafe { Some(PrefilterFn::new(fallback::find)) }
;
338 /// Return the minimum length of the haystack in which a prefilter should be
339 /// used. If the haystack is below this length, then it's probably not worth
340 /// the overhead of running the prefilter.
342 /// We used to look at the length of a haystack here. That is, if it was too
343 /// small, then don't bother with the prefilter. But two things changed:
344 /// the prefilter falls back to memchr for small haystacks, and, at the
345 /// meta-searcher level, Rabin-Karp is employed for tiny haystacks anyway.
347 /// We keep it around for now in case we want to bring it back.
349 pub(crate) fn minimum_len(_haystack
: &[u8], needle
: &[u8]) -> usize {
350 // If the haystack length isn't greater than needle.len() * FACTOR, then
351 // no prefilter will be used. The presumption here is that since there
352 // are so few bytes to check, it's not worth running the prefilter since
353 // there will need to be a validation step anyway. Thus, the prefilter is
354 // largely redundant work.
356 // Increase the factor noticeably hurts the
357 // memmem/krate/prebuilt/teeny-*/never-john-watson benchmarks.
358 const PREFILTER_LENGTH_FACTOR
: usize = 2;
359 const VECTOR_MIN_LENGTH
: usize = 16;
360 let min
= core
::cmp
::max(
362 PREFILTER_LENGTH_FACTOR
* needle
.len(),
364 // For haystacks with length==min, we still want to avoid the prefilter,
369 #[cfg(all(test, feature = "std", not(miri)))]
370 pub(crate) mod tests
{
371 use std
::convert
::{TryFrom, TryInto}
;
375 prefilter
::PrefilterFnTy
, rabinkarp
, rarebytes
::RareNeedleBytes
,
378 // Below is a small jig that generates prefilter tests. The main purpose
379 // of this jig is to generate tests of varying needle/haystack lengths
380 // in order to try and exercise all code paths in our prefilters. And in
381 // particular, this is especially important for vectorized prefilters where
382 // certain code paths might only be exercised at certain lengths.
384 /// A test that represents the input and expected output to a prefilter
385 /// function. The test should be able to run with any prefilter function
386 /// and get the expected output.
387 pub(crate) struct PrefilterTest
{
388 // These fields represent the inputs and expected output of a forwards
389 // prefilter function.
390 pub(crate) ninfo
: NeedleInfo
,
391 pub(crate) haystack
: Vec
<u8>,
392 pub(crate) needle
: Vec
<u8>,
393 pub(crate) output
: Option
<usize>,
397 /// Run all generated forward prefilter tests on the given prefn.
401 /// Callers must ensure that the given prefilter function pointer is
402 /// safe to call for all inputs in the current environment.
403 pub(crate) unsafe fn run_all_tests(prefn
: PrefilterFnTy
) {
404 PrefilterTest
::run_all_tests_filter(prefn
, |_
| true)
407 /// Run all generated forward prefilter tests that pass the given
408 /// predicate on the given prefn.
412 /// Callers must ensure that the given prefilter function pointer is
413 /// safe to call for all inputs in the current environment.
414 pub(crate) unsafe fn run_all_tests_filter(
415 prefn
: PrefilterFnTy
,
416 mut predicate
: impl FnMut(&PrefilterTest
) -> bool
,
418 for seed
in PREFILTER_TEST_SEEDS
{
419 for test
in seed
.generate() {
420 if predicate(&test
) {
427 /// Create a new prefilter test from a seed and some chose offsets to
428 /// rare bytes in the seed's needle.
430 /// If a valid test could not be constructed, then None is returned.
431 /// (Currently, we take the approach of massaging tests to be valid
432 /// instead of rejecting them outright.)
434 seed
: PrefilterTestSeed
,
439 output
: Option
<usize>,
440 ) -> Option
<PrefilterTest
> {
441 let mut rare1i
: u8 = rare1i
.try_into().unwrap();
442 let mut rare2i
: u8 = rare2i
.try_into().unwrap();
443 // The '#' byte is never used in a haystack (unless we're expecting
444 // a match), while the '@' byte is never used in a needle.
445 let mut haystack
= vec
![b'@'
; haystack_len
];
446 let mut needle
= vec
![b'
#'; needle_len];
447 needle
[0] = seed
.first
;
448 needle
[rare1i
as usize] = seed
.rare1
;
449 needle
[rare2i
as usize] = seed
.rare2
;
450 // If we're expecting a match, then make sure the needle occurs
451 // in the haystack at the expected position.
452 if let Some(i
) = output
{
453 haystack
[i
..i
+ needle
.len()].copy_from_slice(&needle
);
455 // If the operations above lead to rare offsets pointing to the
456 // non-first occurrence of a byte, then adjust it. This might lead
457 // to redundant tests, but it's simpler than trying to change the
458 // generation process I think.
459 if let Some(i
) = crate::memchr(seed
.rare1
, &needle
) {
460 rare1i
= u8::try_from(i
).unwrap();
462 if let Some(i
) = crate::memchr(seed
.rare2
, &needle
) {
463 rare2i
= u8::try_from(i
).unwrap();
465 let ninfo
= NeedleInfo
{
466 rarebytes
: RareNeedleBytes
::new(rare1i
, rare2i
),
467 nhash
: rabinkarp
::NeedleHash
::forward(&needle
),
469 Some(PrefilterTest { ninfo, haystack, needle, output }
)
472 /// Run this specific test on the given prefilter function. If the
473 /// outputs do no match, then this routine panics with a failure
478 /// Callers must ensure that the given prefilter function pointer is
479 /// safe to call for all inputs in the current environment.
480 unsafe fn run(&self, prefn
: PrefilterFnTy
) {
481 let mut prestate
= PrefilterState
::new();
490 "ninfo: {:?}, haystack(len={}): {:?}, needle(len={}): {:?}",
493 std
::str::from_utf8(&self.haystack
).unwrap(),
495 std
::str::from_utf8(&self.needle
).unwrap(),
500 /// A set of prefilter test seeds. Each seed serves as the base for the
501 /// generation of many other tests. In essence, the seed captures the
502 /// "rare" and first bytes among our needle. The tests generated from each
503 /// seed essentially vary the length of the needle and haystack, while
504 /// using the rare/first byte configuration from the seed.
506 /// The purpose of this is to test many different needle/haystack lengths.
507 /// In particular, some of the vector optimizations might only have bugs
508 /// in haystacks of a certain size.
509 const PREFILTER_TEST_SEEDS
: &[PrefilterTestSeed
] = &[
510 PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'z' }
,
511 PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'z' }
,
512 PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'x' }
,
513 PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'x' }
,
514 PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'y' }
,
517 /// Data that describes a single prefilter test seed.
518 #[derive(Clone, Copy)]
519 struct PrefilterTestSeed
{
525 impl PrefilterTestSeed
{
526 /// Generate a series of prefilter tests from this seed.
527 fn generate(self) -> impl Iterator
<Item
= PrefilterTest
> {
529 // The iterator below generates *a lot* of tests. The number of
530 // tests was chosen somewhat empirically to be "bearable" when
531 // running the test suite.
533 // We use an iterator here because the collective haystacks of all
534 // these test cases add up to enough memory to OOM a conservative
535 // sandbox or a small laptop.
536 (len_start
..=40).flat_map(move |needle_len
| {
537 let rare_start
= len_start
- 1;
538 (rare_start
..needle_len
).flat_map(move |rare1i
| {
539 (rare1i
..needle_len
).flat_map(move |rare2i
| {
540 (needle_len
..=66).flat_map(move |haystack_len
| {
551 (0..=(haystack_len
- needle_len
)).flat_map(