2 A 256-bit vector implementation of the "packed pair" SIMD algorithm.
4 The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main
5 difference is that it (by default) uses a background distribution of byte
6 frequencies to heuristically select the pair of bytes to search for.
8 [generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last
11 use core
::arch
::x86_64
::{__m128i, __m256i}
;
13 use crate::arch
::{all::packedpair::Pair, generic::packedpair}
;
15 /// A "packed pair" finder that uses 256-bit vector operations.
17 /// This finder picks two bytes that it believes have high predictive power
18 /// for indicating an overall match of a needle. Depending on whether
19 /// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets
20 /// where the needle matches or could match. In the prefilter case, candidates
21 /// are reported whenever the [`Pair`] of bytes given matches.
22 #[derive(Clone, Copy, Debug)]
24 sse2
: packedpair
::Finder
<__m128i
>,
25 avx2
: packedpair
::Finder
<__m256i
>,
29 /// Create a new pair searcher. The searcher returned can either report
30 /// exact matches of `needle` or act as a prefilter and report candidate
31 /// positions of `needle`.
33 /// If AVX2 is unavailable in the current environment or if a [`Pair`]
34 /// could not be constructed from the needle given, then `None` is
37 pub fn new(needle
: &[u8]) -> Option
<Finder
> {
38 Finder
::with_pair(needle
, Pair
::new(needle
)?
)
41 /// Create a new "packed pair" finder using the pair of bytes given.
43 /// This constructor permits callers to control precisely which pair of
44 /// bytes is used as a predicate.
46 /// If AVX2 is unavailable in the current environment, then `None` is
49 pub fn with_pair(needle
: &[u8], pair
: Pair
) -> Option
<Finder
> {
50 if Finder
::is_available() {
51 // SAFETY: we check that sse2/avx2 is available above. We are also
52 // guaranteed to have needle.len() > 1 because we have a valid
54 unsafe { Some(Finder::with_pair_impl(needle, pair)) }
60 /// Create a new `Finder` specific to SSE2 vectors and routines.
64 /// Same as the safety for `packedpair::Finder::new`, and callers must also
65 /// ensure that both SSE2 and AVX2 are available.
66 #[target_feature(enable = "sse2", enable = "avx2")]
68 unsafe fn with_pair_impl(needle
: &[u8], pair
: Pair
) -> Finder
{
69 let sse2
= packedpair
::Finder
::<__m128i
>::new(needle
, pair
);
70 let avx2
= packedpair
::Finder
::<__m256i
>::new(needle
, pair
);
74 /// Returns true when this implementation is available in the current
77 /// When this is true, it is guaranteed that [`Finder::with_pair`] will
78 /// return a `Some` value. Similarly, when it is false, it is guaranteed
79 /// that `Finder::with_pair` will return a `None` value. Notice that this
80 /// does not guarantee that [`Finder::new`] will return a `Finder`. Namely,
81 /// even when `Finder::is_available` is true, it is not guaranteed that a
82 /// valid [`Pair`] can be found from the needle given.
84 /// Note also that for the lifetime of a single program, if this returns
85 /// true then it will always return true.
87 pub fn is_available() -> bool
{
88 #[cfg(not(target_feature = "sse2"))]
92 #[cfg(target_feature = "sse2")]
94 #[cfg(target_feature = "avx2")]
98 #[cfg(not(target_feature = "avx2"))]
100 #[cfg(feature = "std")]
102 std
::is_x86_feature_detected
!("avx2")
104 #[cfg(not(feature = "std"))]
112 /// Execute a search using AVX2 vectors and routines.
116 /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
118 pub fn find(&self, haystack
: &[u8], needle
: &[u8]) -> Option
<usize> {
119 // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines.
120 unsafe { self.find_impl(haystack, needle) }
123 /// Run this finder on the given haystack as a prefilter.
125 /// If a candidate match is found, then an offset where the needle *could*
126 /// begin in the haystack is returned.
130 /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
132 pub fn find_prefilter(&self, haystack
: &[u8]) -> Option
<usize> {
133 // SAFETY: Building a `Finder` means it's safe to call 'sse2' routines.
134 unsafe { self.find_prefilter_impl(haystack) }
137 /// Execute a search using AVX2 vectors and routines.
141 /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
145 /// (The target feature safety obligation is automatically fulfilled by
146 /// virtue of being a method on `Finder`, which can only be constructed
147 /// when it is safe to call `sse2` and `avx2` routines.)
148 #[target_feature(enable = "sse2", enable = "avx2")]
155 if haystack
.len() < self.avx2
.min_haystack_len() {
156 self.sse2
.find(haystack
, needle
)
158 self.avx2
.find(haystack
, needle
)
162 /// Execute a prefilter search using AVX2 vectors and routines.
166 /// When `haystack.len()` is less than [`Finder::min_haystack_len`].
170 /// (The target feature safety obligation is automatically fulfilled by
171 /// virtue of being a method on `Finder`, which can only be constructed
172 /// when it is safe to call `sse2` and `avx2` routines.)
173 #[target_feature(enable = "sse2", enable = "avx2")]
175 unsafe fn find_prefilter_impl(&self, haystack
: &[u8]) -> Option
<usize> {
176 if haystack
.len() < self.avx2
.min_haystack_len() {
177 self.sse2
.find_prefilter(haystack
)
179 self.avx2
.find_prefilter(haystack
)
183 /// Returns the pair of offsets (into the needle) used to check as a
184 /// predicate before confirming whether a needle exists at a particular
187 pub fn pair(&self) -> &Pair
{
191 /// Returns the minimum haystack length that this `Finder` can search.
193 /// Using a haystack with length smaller than this in a search will result
194 /// in a panic. The reason for this restriction is that this finder is
195 /// meant to be a low-level component that is part of a larger substring
196 /// strategy. In that sense, it avoids trying to handle all cases and
197 /// instead only handles the cases that it can handle very well.
199 pub fn min_haystack_len(&self) -> usize {
200 // The caller doesn't need to care about AVX2's min_haystack_len
201 // since this implementation will automatically switch to the SSE2
202 // implementation if the haystack is too short for AVX2. Therefore, the
203 // caller only needs to care about SSE2's min_haystack_len.
205 // This does assume that SSE2's min_haystack_len is less than or
206 // equal to AVX2's min_haystack_len. In practice, this is true and
207 // there is no way it could be false based on how this Finder is
208 // implemented. Namely, both SSE2 and AVX2 use the same `Pair`. If
209 // they used different pairs, then it's possible (although perhaps
210 // pathological) for SSE2's min_haystack_len to be bigger than AVX2's.
211 self.sse2
.min_haystack_len()
219 fn find(haystack
: &[u8], needle
: &[u8]) -> Option
<Option
<usize>> {
220 let f
= Finder
::new(needle
)?
;
221 if haystack
.len() < f
.min_haystack_len() {
224 Some(f
.find(haystack
, needle
))
227 define_substring_forward_quickcheck
!(find
);
230 fn forward_substring() {
231 crate::tests
::substring
::Runner
::new().fwd(find
).run()
235 fn forward_packedpair() {
241 ) -> Option
<Option
<usize>> {
242 let pair
= Pair
::with_indices(needle
, index1
, index2
)?
;
243 let f
= Finder
::with_pair(needle
, pair
)?
;
244 if haystack
.len() < f
.min_haystack_len() {
247 Some(f
.find(haystack
, needle
))
249 crate::tests
::packedpair
::Runner
::new().fwd(find
).run()
253 fn forward_packedpair_prefilter() {
259 ) -> Option
<Option
<usize>> {
260 if !cfg
!(target_feature
= "sse2") {
263 let pair
= Pair
::with_indices(needle
, index1
, index2
)?
;
264 let f
= Finder
::with_pair(needle
, pair
)?
;
265 if haystack
.len() < f
.min_haystack_len() {
268 Some(f
.find_prefilter(haystack
))
270 crate::tests
::packedpair
::Runner
::new().fwd(find
).run()