3 panic
::{RefUnwindSafe, UnwindSafe}
,
8 use crate::packed
::{ext::Pointer, pattern::Patterns, teddy::generic::Match}
;
10 /// A builder for constructing a Teddy matcher.
12 /// The builder primarily permits fine grained configuration of the Teddy
13 /// matcher. Most options are made only available for testing/benchmarking
14 /// purposes. In reality, options are automatically determined by the nature
15 /// and number of patterns given to the builder.
16 #[derive(Clone, Debug)]
17 pub(crate) struct Builder
{
18 /// When none, this is automatically determined. Otherwise, `false` means
19 /// slim Teddy is used (8 buckets) and `true` means fat Teddy is used
20 /// (16 buckets). Fat Teddy requires AVX2, so if that CPU feature isn't
21 /// available and Fat Teddy was requested, no matcher will be built.
22 only_fat
: Option
<bool
>,
23 /// When none, this is automatically determined. Otherwise, `false` means
24 /// that 128-bit vectors will be used (up to SSSE3 instructions) where as
25 /// `true` means that 256-bit vectors will be used. As with `fat`, if
26 /// 256-bit vectors are requested and they aren't available, then a
27 /// searcher will not be built.
28 only_256bit
: Option
<bool
>,
29 /// When true (the default), the number of patterns will be used as a
30 /// heuristic for refusing construction of a Teddy searcher. The point here
31 /// is that too many patterns can overwhelm Teddy. But this can be disabled
32 /// in cases where the caller knows better.
33 heuristic_pattern_limits
: bool
,
36 impl Default
for Builder
{
37 fn default() -> Builder
{
43 /// Create a new builder for configuring a Teddy matcher.
44 pub(crate) fn new() -> Builder
{
48 heuristic_pattern_limits
: true,
52 /// Build a matcher for the set of patterns given. If a matcher could not
53 /// be built, then `None` is returned.
55 /// Generally, a matcher isn't built if the necessary CPU features aren't
56 /// available, an unsupported target or if the searcher is believed to be
57 /// slower than standard techniques (i.e., if there are too many literals).
58 pub(crate) fn build(&self, patterns
: Arc
<Patterns
>) -> Option
<Searcher
> {
59 self.build_imp(patterns
)
62 /// Require the use of Fat (true) or Slim (false) Teddy. Fat Teddy uses
63 /// 16 buckets where as Slim Teddy uses 8 buckets. More buckets are useful
64 /// for a larger set of literals.
66 /// `None` is the default, which results in an automatic selection based
67 /// on the number of literals and available CPU features.
68 pub(crate) fn only_fat(&mut self, yes
: Option
<bool
>) -> &mut Builder
{
73 /// Request the use of 256-bit vectors (true) or 128-bit vectors (false).
74 /// Generally, a larger vector size is better since it either permits
75 /// matching more patterns or matching more bytes in the haystack at once.
77 /// `None` is the default, which results in an automatic selection based on
78 /// the number of literals and available CPU features.
79 pub(crate) fn only_256bit(&mut self, yes
: Option
<bool
>) -> &mut Builder
{
80 self.only_256bit
= yes
;
84 /// Request that heuristic limitations on the number of patterns be
85 /// employed. This useful to disable for benchmarking where one wants to
86 /// explore how Teddy performs on large number of patterns even if the
87 /// heuristics would otherwise refuse construction.
89 /// This is enabled by default.
90 pub(crate) fn heuristic_pattern_limits(
94 self.heuristic_pattern_limits
= yes
;
98 fn build_imp(&self, patterns
: Arc
<Patterns
>) -> Option
<Searcher
> {
99 let patlimit
= self.heuristic_pattern_limits
;
100 // There's no particular reason why we limit ourselves to little endian
101 // here, but it seems likely that some parts of Teddy as they are
102 // currently written (e.g., the uses of `trailing_zeros`) are likely
103 // wrong on non-little-endian targets. Such things are likely easy to
104 // fix, but at the time of writing (2023/09/18), I actually do not know
105 // how to test this code on a big-endian target. So for now, we're
106 // conservative and just bail out.
107 if !cfg
!(target_endian
= "little") {
108 debug
!("skipping Teddy because target isn't little endian");
111 // Too many patterns will overwhelm Teddy and likely lead to slow
112 // downs, typically in the verification step.
113 if patlimit
&& patterns
.len() > 64 {
114 debug
!("skipping Teddy because of too many patterns");
118 #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
120 use self::x86_64
::{FatAVX2, SlimAVX2, SlimSSSE3}
;
122 let mask_len
= core
::cmp
::min(4, patterns
.minimum_len());
123 let beefy
= patterns
.len() > 32;
124 let has_avx2
= self::x86_64
::is_available_avx2();
125 let has_ssse3
= has_avx2
|| self::x86_64
::is_available_ssse3();
126 let use_avx2
= if self.only_256bit
== Some(true) {
129 "skipping Teddy because avx2 was demanded but unavailable"
134 } else if self.only_256bit
== Some(false) {
137 "skipping Teddy because ssse3 was demanded but unavailable"
142 } else if !has_ssse3
&& !has_avx2
{
144 "skipping Teddy because ssse3 and avx2 are unavailable"
150 let fat
= match self.only_fat
{
151 None
=> use_avx2
&& beefy
,
152 Some(false) => false,
153 Some(true) if !use_avx2
=> {
155 "skipping Teddy because fat was demanded, but fat \
156 Teddy requires avx2 which is unavailable"
162 // Just like for aarch64, it's possible that too many patterns will
163 // overhwelm Teddy. Unlike aarch64 though, we have Fat teddy which
164 // helps things scale a bit more by spreading patterns over more
167 // These thresholds were determined by looking at the measurements
168 // for the rust/aho-corasick/packed/leftmost-first and
169 // rust/aho-corasick/dfa/leftmost-first engines on the `teddy/`
171 if patlimit
&& mask_len
== 1 && patterns
.len() > 16 {
173 "skipping Teddy (mask len: 1) because there are \
178 match (mask_len
, use_avx2
, fat
) {
180 debug
!("Teddy choice: 128-bit slim, 1 byte");
181 SlimSSSE3
::<1>::new(&patterns
)
183 (1, true, false) => {
184 debug
!("Teddy choice: 256-bit slim, 1 byte");
185 SlimAVX2
::<1>::new(&patterns
)
188 debug
!("Teddy choice: 256-bit fat, 1 byte");
189 FatAVX2
::<1>::new(&patterns
)
192 debug
!("Teddy choice: 128-bit slim, 2 bytes");
193 SlimSSSE3
::<2>::new(&patterns
)
195 (2, true, false) => {
196 debug
!("Teddy choice: 256-bit slim, 2 bytes");
197 SlimAVX2
::<2>::new(&patterns
)
200 debug
!("Teddy choice: 256-bit fat, 2 bytes");
201 FatAVX2
::<2>::new(&patterns
)
204 debug
!("Teddy choice: 128-bit slim, 3 bytes");
205 SlimSSSE3
::<3>::new(&patterns
)
207 (3, true, false) => {
208 debug
!("Teddy choice: 256-bit slim, 3 bytes");
209 SlimAVX2
::<3>::new(&patterns
)
212 debug
!("Teddy choice: 256-bit fat, 3 bytes");
213 FatAVX2
::<3>::new(&patterns
)
216 debug
!("Teddy choice: 128-bit slim, 4 bytes");
217 SlimSSSE3
::<4>::new(&patterns
)
219 (4, true, false) => {
220 debug
!("Teddy choice: 256-bit slim, 4 bytes");
221 SlimAVX2
::<4>::new(&patterns
)
224 debug
!("Teddy choice: 256-bit fat, 4 bytes");
225 FatAVX2
::<4>::new(&patterns
)
228 debug
!("no supported Teddy configuration found");
233 #[cfg(target_arch = "aarch64")]
235 use self::aarch64
::SlimNeon
;
237 let mask_len
= core
::cmp
::min(4, patterns
.minimum_len());
238 if self.only_256bit
== Some(true) {
240 "skipping Teddy because 256-bits were demanded \
245 if self.only_fat
== Some(true) {
247 "skipping Teddy because fat was demanded but unavailable"
250 // Since we don't have Fat teddy in aarch64 (I think we'd want at
251 // least 256-bit vectors for that), we need to be careful not to
252 // allow too many patterns as it might overwhelm Teddy. Generally
253 // speaking, as the mask length goes up, the more patterns we can
254 // handle because the mask length results in fewer candidates
257 // These thresholds were determined by looking at the measurements
258 // for the rust/aho-corasick/packed/leftmost-first and
259 // rust/aho-corasick/dfa/leftmost-first engines on the `teddy/`
263 if patlimit
&& patterns
.len() > 16 {
265 "skipping Teddy (mask len: 1) because there are \
269 debug
!("Teddy choice: 128-bit slim, 1 byte");
270 SlimNeon
::<1>::new(&patterns
)
273 if patlimit
&& patterns
.len() > 32 {
275 "skipping Teddy (mask len: 2) because there are \
279 debug
!("Teddy choice: 128-bit slim, 2 bytes");
280 SlimNeon
::<2>::new(&patterns
)
283 if patlimit
&& patterns
.len() > 48 {
285 "skipping Teddy (mask len: 3) because there are \
289 debug
!("Teddy choice: 128-bit slim, 3 bytes");
290 SlimNeon
::<3>::new(&patterns
)
293 debug
!("Teddy choice: 128-bit slim, 4 bytes");
294 SlimNeon
::<4>::new(&patterns
)
297 debug
!("no supported Teddy configuration found");
303 all(target_arch
= "x86_64", target_feature
= "sse2"),
304 target_arch
= "aarch64"
312 /// A searcher that dispatches to one of several possible Teddy variants.
313 #[derive(Clone, Debug)]
314 pub(crate) struct Searcher
{
315 /// The Teddy variant we use. We use dynamic dispatch under the theory that
316 /// it results in better codegen then a enum, although this is a specious
319 /// This `Searcher` is essentially a wrapper for a `SearcherT` trait
320 /// object. We just make `memory_usage` and `minimum_len` available without
321 /// going through dynamic dispatch.
322 imp
: Arc
<dyn SearcherT
>,
323 /// Total heap memory used by the Teddy variant.
325 /// The minimum haystack length this searcher can handle. It is intended
326 /// for callers to use some other search routine (such as Rabin-Karp) in
327 /// cases where the haystack (or remainer of the haystack) is too short.
332 /// Look for the leftmost occurrence of any pattern in this search in the
333 /// given haystack starting at the given position.
337 /// This panics when `haystack[at..].len()` is less than the minimum length
338 /// for this haystack.
344 ) -> Option
<crate::Match
> {
345 // SAFETY: The Teddy implementations all require a minimum haystack
346 // length, and this is required for safety. Therefore, we assert it
347 // here in order to make this method sound.
348 assert
!(haystack
[at
..].len() >= self.minimum_len
);
349 let hayptr
= haystack
.as_ptr();
350 // SAFETY: Construction of the searcher guarantees that we are able
351 // to run it in the current environment (i.e., we won't get an AVX2
352 // searcher on a x86-64 CPU without AVX2 support). Also, the pointers
353 // are valid as they are derived directly from a borrowed slice.
354 let teddym
= unsafe {
355 self.imp
.find(hayptr
.add(at
), hayptr
.add(haystack
.len()))?
357 let start
= teddym
.start().as_usize().wrapping_sub(hayptr
.as_usize());
358 let end
= teddym
.end().as_usize().wrapping_sub(hayptr
.as_usize());
359 let span
= crate::Span { start, end }
;
360 // OK because we won't permit the construction of a searcher that
361 // could report a pattern ID bigger than what can fit in the crate-wide
363 let pid
= crate::PatternID
::new_unchecked(teddym
.pattern().as_usize());
364 let m
= crate::Match
::new(pid
, span
);
368 /// Returns the approximate total amount of heap used by this type, in
371 pub(crate) fn memory_usage(&self) -> usize {
375 /// Returns the minimum length, in bytes, that a haystack must be in order
376 /// to use it with this searcher.
378 pub(crate) fn minimum_len(&self) -> usize {
383 /// A trait that provides dynamic dispatch over the different possible Teddy
384 /// variants on the same algorithm.
386 /// On `x86_64` for example, it isn't known until runtime which of 12 possible
387 /// variants will be used. One might use one of the four slim 128-bit vector
388 /// variants, or one of the four 256-bit vector variants or even one of the
389 /// four fat 256-bit vector variants.
391 /// Since this choice is generally made when the Teddy searcher is constructed
392 /// and this choice is based on the patterns given and what the current CPU
393 /// supports, it follows that there must be some kind of indirection at search
394 /// time that "selects" the variant chosen at build time.
396 /// There are a few different ways to go about this. One approach is to use an
397 /// enum. It works fine, but in my experiments, this generally results in worse
398 /// codegen. Another approach, which is what we use here, is dynamic dispatch
399 /// via a trait object. We basically implement this trait for each possible
400 /// variant, select the variant we want at build time and convert it to a
401 /// trait object for use at search time.
403 /// Another approach is to use function pointers and stick each of the possible
404 /// variants into a union. This is essentially isomorphic to the dynamic
405 /// dispatch approach, but doesn't require any allocations. Since this crate
406 /// requires `alloc`, there's no real reason (AFAIK) to go down this path. (The
407 /// `memchr` crate does this.)
409 Debug
+ Send
+ Sync
+ UnwindSafe
+ RefUnwindSafe
+ '
static
411 /// Execute a search on the given haystack (identified by `start` and `end`
416 /// Essentially, the `start` and `end` pointers must be valid and point
417 /// to a haystack one can read. As long as you derive them from, for
418 /// example, a `&[u8]`, they should automatically satisfy all of the safety
421 /// * Both `start` and `end` must be valid for reads.
422 /// * Both `start` and `end` must point to an initialized value.
423 /// * Both `start` and `end` must point to the same allocated object and
424 /// must either be in bounds or at most one byte past the end of the
425 /// allocated object.
426 /// * Both `start` and `end` must be _derived from_ a pointer to the same
428 /// * The distance between `start` and `end` must not overflow `isize`.
429 /// * The distance being in bounds must not rely on "wrapping around" the
431 /// * It must be the case that `start <= end`.
432 /// * `end - start` must be greater than the minimum length for this
435 /// Also, it is expected that implementations of this trait will tag this
436 /// method with a `target_feature` attribute. Callers must ensure that
437 /// they are executing this method in an environment where that attribute
439 unsafe fn find(&self, start
: *const u8, end
: *const u8) -> Option
<Match
>;
442 #[cfg(all(target_arch = "x86_64", target_feature = "sse2"))]
444 use core
::arch
::x86_64
::{__m128i, __m256i}
;
446 use alloc
::sync
::Arc
;
451 teddy
::generic
::{self, Match}
,
454 use super::{Searcher, SearcherT}
;
456 #[derive(Clone, Debug)]
457 pub(super) struct SlimSSSE3
<const BYTES
: usize> {
458 slim128
: generic
::Slim
<__m128i
, BYTES
>,
461 // Defines SlimSSSE3 wrapper functions for 1, 2, 3 and 4 bytes.
462 macro_rules
! slim_ssse3
{
464 impl SlimSSSE3
<$len
> {
465 /// Creates a new searcher using "slim" Teddy with 128-bit
466 /// vectors. If SSSE3 is not available in the current
467 /// environment, then this returns `None`.
469 patterns
: &Arc
<Patterns
>,
470 ) -> Option
<Searcher
> {
471 if !is_available_ssse3() {
474 Some(unsafe { SlimSSSE3::<$len>::new_unchecked(patterns) }
)
477 /// Creates a new searcher using "slim" Teddy with 256-bit
478 /// vectors without checking whether SSSE3 is available or not.
482 /// Callers must ensure that SSSE3 is available in the current
484 #[target_feature(enable = "ssse3")]
485 unsafe fn new_unchecked(patterns
: &Arc
<Patterns
>) -> Searcher
{
486 let slim128
= generic
::Slim
::<__m128i
, $len
>::new(
487 Arc
::clone(patterns
),
489 let memory_usage
= slim128
.memory_usage();
490 let minimum_len
= slim128
.minimum_len();
491 let imp
= Arc
::new(SlimSSSE3 { slim128 }
);
492 Searcher { imp, memory_usage, minimum_len }
496 impl SearcherT
for SlimSSSE3
<$len
> {
497 #[target_feature(enable = "ssse3")]
504 // SAFETY: All obligations except for `target_feature` are
505 // passed to the caller. Our use of `target_feature` is
506 // safe because construction of this type requires that the
507 // requisite target features are available.
508 self.slim128
.find(start
, end
)
519 #[derive(Clone, Debug)]
520 pub(super) struct SlimAVX2
<const BYTES
: usize> {
521 slim128
: generic
::Slim
<__m128i
, BYTES
>,
522 slim256
: generic
::Slim
<__m256i
, BYTES
>,
525 // Defines SlimAVX2 wrapper functions for 1, 2, 3 and 4 bytes.
526 macro_rules
! slim_avx2
{
528 impl SlimAVX2
<$len
> {
529 /// Creates a new searcher using "slim" Teddy with 256-bit
530 /// vectors. If AVX2 is not available in the current
531 /// environment, then this returns `None`.
533 patterns
: &Arc
<Patterns
>,
534 ) -> Option
<Searcher
> {
535 if !is_available_avx2() {
538 Some(unsafe { SlimAVX2::<$len>::new_unchecked(patterns) }
)
541 /// Creates a new searcher using "slim" Teddy with 256-bit
542 /// vectors without checking whether AVX2 is available or not.
546 /// Callers must ensure that AVX2 is available in the current
548 #[target_feature(enable = "avx2")]
549 unsafe fn new_unchecked(patterns
: &Arc
<Patterns
>) -> Searcher
{
550 let slim128
= generic
::Slim
::<__m128i
, $len
>::new(
551 Arc
::clone(&patterns
),
553 let slim256
= generic
::Slim
::<__m256i
, $len
>::new(
554 Arc
::clone(&patterns
),
557 slim128
.memory_usage() + slim256
.memory_usage();
558 let minimum_len
= slim128
.minimum_len();
559 let imp
= Arc
::new(SlimAVX2 { slim128, slim256 }
);
560 Searcher { imp, memory_usage, minimum_len }
564 impl SearcherT
for SlimAVX2
<$len
> {
565 #[target_feature(enable = "avx2")]
572 // SAFETY: All obligations except for `target_feature` are
573 // passed to the caller. Our use of `target_feature` is
574 // safe because construction of this type requires that the
575 // requisite target features are available.
576 let len
= end
.distance(start
);
577 if len
< self.slim256
.minimum_len() {
578 self.slim128
.find(start
, end
)
580 self.slim256
.find(start
, end
)
592 #[derive(Clone, Debug)]
593 pub(super) struct FatAVX2
<const BYTES
: usize> {
594 fat256
: generic
::Fat
<__m256i
, BYTES
>,
597 // Defines SlimAVX2 wrapper functions for 1, 2, 3 and 4 bytes.
598 macro_rules
! fat_avx2
{
601 /// Creates a new searcher using "slim" Teddy with 256-bit
602 /// vectors. If AVX2 is not available in the current
603 /// environment, then this returns `None`.
605 patterns
: &Arc
<Patterns
>,
606 ) -> Option
<Searcher
> {
607 if !is_available_avx2() {
610 Some(unsafe { FatAVX2::<$len>::new_unchecked(patterns) }
)
613 /// Creates a new searcher using "slim" Teddy with 256-bit
614 /// vectors without checking whether AVX2 is available or not.
618 /// Callers must ensure that AVX2 is available in the current
620 #[target_feature(enable = "avx2")]
621 unsafe fn new_unchecked(patterns
: &Arc
<Patterns
>) -> Searcher
{
622 let fat256
= generic
::Fat
::<__m256i
, $len
>::new(
623 Arc
::clone(&patterns
),
625 let memory_usage
= fat256
.memory_usage();
626 let minimum_len
= fat256
.minimum_len();
627 let imp
= Arc
::new(FatAVX2 { fat256 }
);
628 Searcher { imp, memory_usage, minimum_len }
632 impl SearcherT
for FatAVX2
<$len
> {
633 #[target_feature(enable = "avx2")]
640 // SAFETY: All obligations except for `target_feature` are
641 // passed to the caller. Our use of `target_feature` is
642 // safe because construction of this type requires that the
643 // requisite target features are available.
644 self.fat256
.find(start
, end
)
656 pub(super) fn is_available_ssse3() -> bool
{
657 #[cfg(not(target_feature = "sse2"))]
661 #[cfg(target_feature = "sse2")]
663 #[cfg(target_feature = "ssse3")]
667 #[cfg(not(target_feature = "ssse3"))]
669 #[cfg(feature = "std")]
671 std
::is_x86_feature_detected
!("ssse3")
673 #[cfg(not(feature = "std"))]
682 pub(super) fn is_available_avx2() -> bool
{
683 #[cfg(not(target_feature = "sse2"))]
687 #[cfg(target_feature = "sse2")]
689 #[cfg(target_feature = "avx2")]
693 #[cfg(not(target_feature = "avx2"))]
695 #[cfg(feature = "std")]
697 std
::is_x86_feature_detected
!("avx2")
699 #[cfg(not(feature = "std"))]
708 #[cfg(target_arch = "aarch64")]
710 use core
::arch
::aarch64
::uint8x16_t
;
712 use alloc
::sync
::Arc
;
716 teddy
::generic
::{self, Match}
,
719 use super::{Searcher, SearcherT}
;
721 #[derive(Clone, Debug)]
722 pub(super) struct SlimNeon
<const BYTES
: usize> {
723 slim128
: generic
::Slim
<uint8x16_t
, BYTES
>,
726 // Defines SlimSSSE3 wrapper functions for 1, 2, 3 and 4 bytes.
727 macro_rules
! slim_neon
{
729 impl SlimNeon
<$len
> {
730 /// Creates a new searcher using "slim" Teddy with 128-bit
731 /// vectors. If SSSE3 is not available in the current
732 /// environment, then this returns `None`.
734 patterns
: &Arc
<Patterns
>,
735 ) -> Option
<Searcher
> {
736 Some(unsafe { SlimNeon::<$len>::new_unchecked(patterns) }
)
739 /// Creates a new searcher using "slim" Teddy with 256-bit
740 /// vectors without checking whether SSSE3 is available or not.
744 /// Callers must ensure that SSSE3 is available in the current
746 #[target_feature(enable = "neon")]
747 unsafe fn new_unchecked(patterns
: &Arc
<Patterns
>) -> Searcher
{
748 let slim128
= generic
::Slim
::<uint8x16_t
, $len
>::new(
749 Arc
::clone(patterns
),
751 let memory_usage
= slim128
.memory_usage();
752 let minimum_len
= slim128
.minimum_len();
753 let imp
= Arc
::new(SlimNeon { slim128 }
);
754 Searcher { imp, memory_usage, minimum_len }
758 impl SearcherT
for SlimNeon
<$len
> {
759 #[target_feature(enable = "neon")]
766 // SAFETY: All obligations except for `target_feature` are
767 // passed to the caller. Our use of `target_feature` is
768 // safe because construction of this type requires that the
769 // requisite target features are available.
770 self.slim128
.find(start
, end
)