]>
Commit | Line | Data |
---|---|---|
94b46f34 XL |
1 | // Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT |
2 | // file at the top-level directory of this distribution and at | |
3 | // http://rust-lang.org/COPYRIGHT. | |
4 | // | |
5 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | |
6 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license | |
7 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your | |
8 | // option. This file may not be copied, modified, or distributed | |
9 | // except according to those terms. | |
10 | ||
11 | use std::cmp; | |
12 | use std::mem; | |
13 | ||
14 | use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton}; | |
15 | use memchr::{memchr, memchr2, memchr3}; | |
16 | use syntax::hir::literal::{Literal, Literals}; | |
17 | ||
18 | use freqs::BYTE_FREQUENCIES; | |
19 | use self::teddy_avx2::{Teddy as TeddyAVX2}; | |
20 | use self::teddy_ssse3::{Teddy as TeddySSSE3}; | |
21 | ||
22 | mod teddy_avx2; | |
23 | mod teddy_ssse3; | |
24 | ||
25 | /// A prefix extracted from a compiled regular expression. | |
26 | /// | |
27 | /// A regex prefix is a set of literal strings that *must* be matched at the | |
28 | /// beginning of a regex in order for the entire regex to match. Similarly | |
29 | /// for a regex suffix. | |
30 | #[derive(Clone, Debug)] | |
31 | pub struct LiteralSearcher { | |
32 | complete: bool, | |
33 | lcp: FreqyPacked, | |
34 | lcs: FreqyPacked, | |
35 | matcher: Matcher, | |
36 | } | |
37 | ||
38 | #[derive(Clone, Debug)] | |
39 | enum Matcher { | |
40 | /// No literals. (Never advances through the input.) | |
41 | Empty, | |
42 | /// A set of four or more single byte literals. | |
43 | Bytes(SingleByteSet), | |
44 | /// A single substring, find using memchr and frequency analysis. | |
45 | FreqyPacked(FreqyPacked), | |
46 | /// A single substring, find using Boyer-Moore. | |
47 | BoyerMoore(BoyerMooreSearch), | |
48 | /// An Aho-Corasick automaton. | |
49 | AC(FullAcAutomaton<Literal>), | |
50 | /// A simd accelerated multiple string matcher. Used only for a small | |
51 | /// number of small literals. | |
52 | TeddySSSE3(TeddySSSE3), | |
53 | /// A simd accelerated multiple string matcher. Used only for a small | |
54 | /// number of small literals. This uses 256-bit vectors. | |
55 | TeddyAVX2(TeddyAVX2), | |
56 | } | |
57 | ||
58 | impl LiteralSearcher { | |
59 | /// Returns a matcher that never matches and never advances the input. | |
60 | pub fn empty() -> Self { | |
61 | Self::new(Literals::empty(), Matcher::Empty) | |
62 | } | |
63 | ||
64 | /// Returns a matcher for literal prefixes from the given set. | |
65 | pub fn prefixes(lits: Literals) -> Self { | |
66 | let matcher = Matcher::prefixes(&lits); | |
67 | Self::new(lits, matcher) | |
68 | } | |
69 | ||
70 | /// Returns a matcher for literal suffixes from the given set. | |
71 | pub fn suffixes(lits: Literals) -> Self { | |
72 | let matcher = Matcher::suffixes(&lits); | |
73 | Self::new(lits, matcher) | |
74 | } | |
75 | ||
76 | fn new(lits: Literals, matcher: Matcher) -> Self { | |
77 | let complete = lits.all_complete(); | |
78 | LiteralSearcher { | |
79 | complete: complete, | |
80 | lcp: FreqyPacked::new(lits.longest_common_prefix().to_vec()), | |
81 | lcs: FreqyPacked::new(lits.longest_common_suffix().to_vec()), | |
82 | matcher: matcher, | |
83 | } | |
84 | } | |
85 | ||
86 | /// Returns true if all matches comprise the entire regular expression. | |
87 | /// | |
88 | /// This does not necessarily mean that a literal match implies a match | |
89 | /// of the regular expression. For example, the regular expresison `^a` | |
90 | /// is comprised of a single complete literal `a`, but the regular | |
91 | /// expression demands that it only match at the beginning of a string. | |
92 | pub fn complete(&self) -> bool { | |
93 | self.complete && !self.is_empty() | |
94 | } | |
95 | ||
96 | /// Find the position of a literal in `haystack` if it exists. | |
97 | #[inline(always)] // reduces constant overhead | |
98 | pub fn find(&self, haystack: &[u8]) -> Option<(usize, usize)> { | |
99 | use self::Matcher::*; | |
100 | match self.matcher { | |
101 | Empty => Some((0, 0)), | |
102 | Bytes(ref sset) => sset.find(haystack).map(|i| (i, i + 1)), | |
103 | FreqyPacked(ref s) => s.find(haystack).map(|i| (i, i + s.len())), | |
104 | BoyerMoore(ref s) => s.find(haystack).map(|i| (i, i + s.len())), | |
105 | AC(ref aut) => aut.find(haystack).next().map(|m| (m.start, m.end)), | |
106 | TeddySSSE3(ref t) => t.find(haystack).map(|m| (m.start, m.end)), | |
107 | TeddyAVX2(ref t) => t.find(haystack).map(|m| (m.start, m.end)), | |
108 | } | |
109 | } | |
110 | ||
111 | /// Like find, except matches must start at index `0`. | |
112 | pub fn find_start(&self, haystack: &[u8]) -> Option<(usize, usize)> { | |
113 | for lit in self.iter() { | |
114 | if lit.len() > haystack.len() { | |
115 | continue; | |
116 | } | |
117 | if lit == &haystack[0..lit.len()] { | |
118 | return Some((0, lit.len())); | |
119 | } | |
120 | } | |
121 | None | |
122 | } | |
123 | ||
124 | /// Like find, except matches must end at index `haystack.len()`. | |
125 | pub fn find_end(&self, haystack: &[u8]) -> Option<(usize, usize)> { | |
126 | for lit in self.iter() { | |
127 | if lit.len() > haystack.len() { | |
128 | continue; | |
129 | } | |
130 | if lit == &haystack[haystack.len() - lit.len()..] { | |
131 | return Some((haystack.len() - lit.len(), haystack.len())); | |
132 | } | |
133 | } | |
134 | None | |
135 | } | |
136 | ||
137 | /// Returns an iterator over all literals to be matched. | |
138 | pub fn iter(&self) -> LiteralIter { | |
139 | match self.matcher { | |
140 | Matcher::Empty => LiteralIter::Empty, | |
141 | Matcher::Bytes(ref sset) => LiteralIter::Bytes(&sset.dense), | |
142 | Matcher::FreqyPacked(ref s) => LiteralIter::Single(&s.pat), | |
143 | Matcher::BoyerMoore(ref s) => LiteralIter::Single(&s.pattern), | |
144 | Matcher::AC(ref ac) => LiteralIter::AC(ac.patterns()), | |
145 | Matcher::TeddySSSE3(ref ted) => { | |
146 | LiteralIter::TeddySSSE3(ted.patterns()) | |
147 | } | |
148 | Matcher::TeddyAVX2(ref ted) => { | |
149 | LiteralIter::TeddyAVX2(ted.patterns()) | |
150 | } | |
151 | } | |
152 | } | |
153 | ||
154 | /// Returns a matcher for the longest common prefix of this matcher. | |
155 | pub fn lcp(&self) -> &FreqyPacked { | |
156 | &self.lcp | |
157 | } | |
158 | ||
159 | /// Returns a matcher for the longest common suffix of this matcher. | |
160 | pub fn lcs(&self) -> &FreqyPacked { | |
161 | &self.lcs | |
162 | } | |
163 | ||
164 | /// Returns true iff this prefix is empty. | |
165 | pub fn is_empty(&self) -> bool { | |
166 | self.len() == 0 | |
167 | } | |
168 | ||
169 | /// Returns the number of prefixes in this machine. | |
170 | pub fn len(&self) -> usize { | |
171 | use self::Matcher::*; | |
172 | match self.matcher { | |
173 | Empty => 0, | |
174 | Bytes(ref sset) => sset.dense.len(), | |
175 | FreqyPacked(_) => 1, | |
176 | BoyerMoore(_) => 1, | |
177 | AC(ref aut) => aut.len(), | |
178 | TeddySSSE3(ref ted) => ted.len(), | |
179 | TeddyAVX2(ref ted) => ted.len(), | |
180 | } | |
181 | } | |
182 | ||
183 | /// Return the approximate heap usage of literals in bytes. | |
184 | pub fn approximate_size(&self) -> usize { | |
185 | use self::Matcher::*; | |
186 | match self.matcher { | |
187 | Empty => 0, | |
188 | Bytes(ref sset) => sset.approximate_size(), | |
189 | FreqyPacked(ref single) => single.approximate_size(), | |
190 | BoyerMoore(ref single) => single.approximate_size(), | |
191 | AC(ref aut) => aut.heap_bytes(), | |
192 | TeddySSSE3(ref ted) => ted.approximate_size(), | |
193 | TeddyAVX2(ref ted) => ted.approximate_size(), | |
194 | } | |
195 | } | |
196 | } | |
197 | ||
198 | impl Matcher { | |
199 | fn prefixes(lits: &Literals) -> Self { | |
200 | let sset = SingleByteSet::prefixes(lits); | |
201 | Matcher::new(lits, sset) | |
202 | } | |
203 | ||
204 | fn suffixes(lits: &Literals) -> Self { | |
205 | let sset = SingleByteSet::suffixes(lits); | |
206 | Matcher::new(lits, sset) | |
207 | } | |
208 | ||
209 | fn new(lits: &Literals, sset: SingleByteSet) -> Self { | |
210 | if lits.literals().is_empty() { | |
211 | return Matcher::Empty; | |
212 | } | |
213 | if sset.dense.len() >= 26 { | |
214 | // Avoid trying to match a large number of single bytes. | |
215 | // This is *very* sensitive to a frequency analysis comparison | |
216 | // between the bytes in sset and the composition of the haystack. | |
217 | // No matter the size of sset, if its members all are rare in the | |
218 | // haystack, then it'd be worth using it. How to tune this... IDK. | |
219 | // ---AG | |
220 | return Matcher::Empty; | |
221 | } | |
222 | if sset.complete { | |
223 | return Matcher::Bytes(sset); | |
224 | } | |
225 | if lits.literals().len() == 1 { | |
226 | let lit = lits.literals()[0].to_vec(); | |
227 | if BoyerMooreSearch::should_use(lit.as_slice()) { | |
228 | return Matcher::BoyerMoore(BoyerMooreSearch::new(lit)); | |
229 | } else { | |
230 | return Matcher::FreqyPacked(FreqyPacked::new(lit)); | |
231 | } | |
232 | } | |
233 | let is_aho_corasick_fast = sset.dense.len() == 1 && sset.all_ascii; | |
234 | if TeddyAVX2::available() && !is_aho_corasick_fast { | |
235 | const MAX_TEDDY_LITERALS: usize = 32; | |
236 | if lits.literals().len() <= MAX_TEDDY_LITERALS { | |
237 | if let Some(ted) = TeddyAVX2::new(lits) { | |
238 | return Matcher::TeddyAVX2(ted); | |
239 | } | |
240 | } | |
241 | } | |
242 | if TeddySSSE3::available() && !is_aho_corasick_fast { | |
243 | // Only try Teddy if Aho-Corasick can't use memchr on an ASCII | |
244 | // byte. Also, in its current form, Teddy doesn't scale well to | |
245 | // lots of literals. | |
246 | // | |
247 | // We impose the ASCII restriction since an alternation of | |
248 | // non-ASCII string literals in the same language is likely to all | |
249 | // start with the same byte. Even worse, the corpus being searched | |
250 | // probably has a similar composition, which ends up completely | |
251 | // negating the benefit of memchr. | |
252 | const MAX_TEDDY_LITERALS: usize = 32; | |
253 | if lits.literals().len() <= MAX_TEDDY_LITERALS { | |
254 | if let Some(ted) = TeddySSSE3::new(lits) { | |
255 | return Matcher::TeddySSSE3(ted); | |
256 | } | |
257 | } | |
258 | // Fallthrough to ol' reliable Aho-Corasick... | |
259 | } | |
260 | let pats = lits.literals().to_owned(); | |
261 | Matcher::AC(AcAutomaton::new(pats).into_full()) | |
262 | } | |
263 | } | |
264 | ||
265 | pub enum LiteralIter<'a> { | |
266 | Empty, | |
267 | Bytes(&'a [u8]), | |
268 | Single(&'a [u8]), | |
269 | AC(&'a [Literal]), | |
270 | TeddySSSE3(&'a [Vec<u8>]), | |
271 | TeddyAVX2(&'a [Vec<u8>]), | |
272 | } | |
273 | ||
274 | impl<'a> Iterator for LiteralIter<'a> { | |
275 | type Item = &'a [u8]; | |
276 | ||
277 | fn next(&mut self) -> Option<Self::Item> { | |
278 | match *self { | |
279 | LiteralIter::Empty => None, | |
280 | LiteralIter::Bytes(ref mut many) => { | |
281 | if many.is_empty() { | |
282 | None | |
283 | } else { | |
284 | let next = &many[0..1]; | |
285 | *many = &many[1..]; | |
286 | Some(next) | |
287 | } | |
288 | } | |
289 | LiteralIter::Single(ref mut one) => { | |
290 | if one.is_empty() { | |
291 | None | |
292 | } else { | |
293 | let next = &one[..]; | |
294 | *one = &[]; | |
295 | Some(next) | |
296 | } | |
297 | } | |
298 | LiteralIter::AC(ref mut lits) => { | |
299 | if lits.is_empty() { | |
300 | None | |
301 | } else { | |
302 | let next = &lits[0]; | |
303 | *lits = &lits[1..]; | |
304 | Some(&**next) | |
305 | } | |
306 | } | |
307 | LiteralIter::TeddySSSE3(ref mut lits) => { | |
308 | if lits.is_empty() { | |
309 | None | |
310 | } else { | |
311 | let next = &lits[0]; | |
312 | *lits = &lits[1..]; | |
313 | Some(&**next) | |
314 | } | |
315 | } | |
316 | LiteralIter::TeddyAVX2(ref mut lits) => { | |
317 | if lits.is_empty() { | |
318 | None | |
319 | } else { | |
320 | let next = &lits[0]; | |
321 | *lits = &lits[1..]; | |
322 | Some(&**next) | |
323 | } | |
324 | } | |
325 | } | |
326 | } | |
327 | } | |
328 | ||
329 | #[derive(Clone, Debug)] | |
330 | struct SingleByteSet { | |
331 | sparse: Vec<bool>, | |
332 | dense: Vec<u8>, | |
333 | complete: bool, | |
334 | all_ascii: bool, | |
335 | } | |
336 | ||
337 | impl SingleByteSet { | |
338 | fn new() -> SingleByteSet { | |
339 | SingleByteSet { | |
340 | sparse: vec![false; 256], | |
341 | dense: vec![], | |
342 | complete: true, | |
343 | all_ascii: true, | |
344 | } | |
345 | } | |
346 | ||
347 | fn prefixes(lits: &Literals) -> SingleByteSet { | |
348 | let mut sset = SingleByteSet::new(); | |
349 | for lit in lits.literals() { | |
350 | sset.complete = sset.complete && lit.len() == 1; | |
351 | if let Some(&b) = lit.get(0) { | |
352 | if !sset.sparse[b as usize] { | |
353 | if b > 0x7F { | |
354 | sset.all_ascii = false; | |
355 | } | |
356 | sset.dense.push(b); | |
357 | sset.sparse[b as usize] = true; | |
358 | } | |
359 | } | |
360 | } | |
361 | sset | |
362 | } | |
363 | ||
364 | fn suffixes(lits: &Literals) -> SingleByteSet { | |
365 | let mut sset = SingleByteSet::new(); | |
366 | for lit in lits.literals() { | |
367 | sset.complete = sset.complete && lit.len() == 1; | |
368 | if let Some(&b) = lit.get(lit.len().checked_sub(1).unwrap()) { | |
369 | if !sset.sparse[b as usize] { | |
370 | if b > 0x7F { | |
371 | sset.all_ascii = false; | |
372 | } | |
373 | sset.dense.push(b); | |
374 | sset.sparse[b as usize] = true; | |
375 | } | |
376 | } | |
377 | } | |
378 | sset | |
379 | } | |
380 | ||
381 | /// Faster find that special cases certain sizes to use memchr. | |
382 | #[inline(always)] // reduces constant overhead | |
383 | fn find(&self, text: &[u8]) -> Option<usize> { | |
384 | match self.dense.len() { | |
385 | 0 => None, | |
386 | 1 => memchr(self.dense[0], text), | |
387 | 2 => memchr2(self.dense[0], self.dense[1], text), | |
388 | 3 => memchr3(self.dense[0], self.dense[1], self.dense[2], text), | |
389 | _ => self._find(text), | |
390 | } | |
391 | } | |
392 | ||
393 | /// Generic find that works on any sized set. | |
394 | fn _find(&self, haystack: &[u8]) -> Option<usize> { | |
395 | for (i, &b) in haystack.iter().enumerate() { | |
396 | if self.sparse[b as usize] { | |
397 | return Some(i); | |
398 | } | |
399 | } | |
400 | None | |
401 | } | |
402 | ||
403 | fn approximate_size(&self) -> usize { | |
404 | (self.dense.len() * mem::size_of::<u8>()) | |
405 | + (self.sparse.len() * mem::size_of::<bool>()) | |
406 | } | |
407 | } | |
408 | ||
409 | /// Provides an implementation of fast subtring search using frequency | |
410 | /// analysis. | |
411 | /// | |
412 | /// memchr is so fast that we do everything we can to keep the loop in memchr | |
413 | /// for as long as possible. The easiest way to do this is to intelligently | |
414 | /// pick the byte to send to memchr. The best byte is the byte that occurs | |
415 | /// least frequently in the haystack. Since doing frequency analysis on the | |
416 | /// haystack is far too expensive, we compute a set of fixed frequencies up | |
417 | /// front and hard code them in src/freqs.rs. Frequency analysis is done via | |
418 | /// scripts/frequencies.py. | |
419 | #[derive(Clone, Debug)] | |
420 | pub struct FreqyPacked { | |
421 | /// The pattern. | |
422 | pat: Vec<u8>, | |
423 | /// The number of Unicode characters in the pattern. This is useful for | |
424 | /// determining the effective length of a pattern when deciding which | |
425 | /// optimizations to perform. A trailing incomplete UTF-8 sequence counts | |
426 | /// as one character. | |
427 | char_len: usize, | |
428 | /// The rarest byte in the pattern, according to pre-computed frequency | |
429 | /// analysis. | |
430 | rare1: u8, | |
431 | /// The offset of the rarest byte in `pat`. | |
432 | rare1i: usize, | |
433 | /// The second rarest byte in the pattern, according to pre-computed | |
434 | /// frequency analysis. (This may be equivalent to the rarest byte.) | |
435 | /// | |
436 | /// The second rarest byte is used as a type of guard for quickly detecting | |
437 | /// a mismatch after memchr locates an instance of the rarest byte. This | |
438 | /// is a hedge against pathological cases where the pre-computed frequency | |
439 | /// analysis may be off. (But of course, does not prevent *all* | |
440 | /// pathological cases.) | |
441 | rare2: u8, | |
442 | /// The offset of the second rarest byte in `pat`. | |
443 | rare2i: usize, | |
444 | } | |
445 | ||
446 | impl FreqyPacked { | |
447 | fn new(pat: Vec<u8>) -> FreqyPacked { | |
448 | if pat.is_empty() { | |
449 | return FreqyPacked::empty(); | |
450 | } | |
451 | ||
452 | // Find the rarest two bytes. Try to make them distinct (but it's not | |
453 | // required). | |
454 | let mut rare1 = pat[0]; | |
455 | let mut rare2 = pat[0]; | |
456 | for b in pat[1..].iter().cloned() { | |
457 | if freq_rank(b) < freq_rank(rare1) { | |
458 | rare1 = b; | |
459 | } | |
460 | } | |
461 | for &b in &pat { | |
462 | if rare1 == rare2 { | |
463 | rare2 = b | |
464 | } else if b != rare1 && freq_rank(b) < freq_rank(rare2) { | |
465 | rare2 = b; | |
466 | } | |
467 | } | |
468 | ||
469 | // And find the offsets of their last occurrences. | |
470 | let rare1i = pat.iter().rposition(|&b| b == rare1).unwrap(); | |
471 | let rare2i = pat.iter().rposition(|&b| b == rare2).unwrap(); | |
472 | ||
473 | let char_len = char_len_lossy(&pat); | |
474 | FreqyPacked { | |
475 | pat: pat, | |
476 | char_len: char_len, | |
477 | rare1: rare1, | |
478 | rare1i: rare1i, | |
479 | rare2: rare2, | |
480 | rare2i: rare2i, | |
481 | } | |
482 | } | |
483 | ||
484 | fn empty() -> FreqyPacked { | |
485 | FreqyPacked { | |
486 | pat: vec![], | |
487 | char_len: 0, | |
488 | rare1: 0, | |
489 | rare1i: 0, | |
490 | rare2: 0, | |
491 | rare2i: 0, | |
492 | } | |
493 | } | |
494 | ||
495 | #[inline(always)] // reduces constant overhead | |
496 | pub fn find(&self, haystack: &[u8]) -> Option<usize> { | |
497 | let pat = &*self.pat; | |
498 | if haystack.len() < pat.len() || pat.is_empty() { | |
499 | return None; | |
500 | } | |
501 | let mut i = self.rare1i; | |
502 | while i < haystack.len() { | |
503 | i += match memchr(self.rare1, &haystack[i..]) { | |
504 | None => return None, | |
505 | Some(i) => i, | |
506 | }; | |
507 | let start = i - self.rare1i; | |
508 | let end = start + pat.len(); | |
509 | if end > haystack.len() { | |
510 | return None; | |
511 | } | |
512 | let aligned = &haystack[start..end]; | |
513 | if aligned[self.rare2i] == self.rare2 && aligned == &*self.pat { | |
514 | return Some(start); | |
515 | } | |
516 | i += 1; | |
517 | } | |
518 | None | |
519 | } | |
520 | ||
521 | #[inline(always)] // reduces constant overhead | |
522 | pub fn is_suffix(&self, text: &[u8]) -> bool { | |
523 | if text.len() < self.len() { | |
524 | return false; | |
525 | } | |
526 | text[text.len() - self.len()..] == *self.pat | |
527 | } | |
528 | ||
529 | pub fn len(&self) -> usize { | |
530 | self.pat.len() | |
531 | } | |
532 | ||
533 | pub fn char_len(&self) -> usize { | |
534 | self.char_len | |
535 | } | |
536 | ||
537 | fn approximate_size(&self) -> usize { | |
538 | self.pat.len() * mem::size_of::<u8>() | |
539 | } | |
540 | } | |
541 | ||
542 | fn char_len_lossy(bytes: &[u8]) -> usize { | |
543 | String::from_utf8_lossy(bytes).chars().count() | |
544 | } | |
545 | ||
546 | /// An implementation of Tuned Boyer-Moore as laid out by | |
547 | /// Andrew Hume and Daniel Sunday in "Fast String Searching". | |
548 | /// O(n) in the size of the input. | |
549 | /// | |
550 | /// Fast string searching algorithms come in many variations, | |
551 | /// but they can generally be described in terms of three main | |
552 | /// components. | |
553 | /// | |
554 | /// The skip loop is where the string searcher wants to spend | |
555 | /// as much time as possible. Exactly which character in the | |
556 | /// pattern the skip loop examines varies from algorithm to | |
557 | /// algorithm, but in the simplest case this loop repeated | |
558 | /// looks at the last character in the pattern and jumps | |
559 | /// forward in the input if it is not in the pattern. | |
560 | /// Robert Boyer and J Moore called this the "fast" loop in | |
561 | /// their original paper. | |
562 | /// | |
563 | /// The match loop is responsible for actually examining the | |
564 | /// whole potentially matching substring. In order to fail | |
565 | /// faster, the match loop sometimes has a guard test attached. | |
566 | /// The guard test uses frequency analysis of the different | |
567 | /// characters in the pattern to choose the least frequency | |
568 | /// occurring character and use it to find match failures | |
569 | /// as quickly as possible. | |
570 | /// | |
571 | /// The shift rule governs how the algorithm will shuffle its | |
572 | /// test window in the event of a failure during the match loop. | |
573 | /// Certain shift rules allow the worst-case run time of the | |
574 | /// algorithm to be shown to be O(n) in the size of the input | |
575 | /// rather than O(nm) in the size of the input and the size | |
576 | /// of the pattern (as naive Boyer-Moore is). | |
577 | /// | |
578 | /// "Fast String Searching", in addition to presenting a tuned | |
579 | /// algorithm, provides a comprehensive taxonomy of the many | |
580 | /// different flavors of string searchers. Under that taxonomy | |
581 | /// TBM, the algorithm implemented here, uses an unrolled fast | |
582 | /// skip loop with memchr fallback, a forward match loop with guard, | |
583 | /// and the mini Sunday's delta shift rule. To unpack that you'll have to | |
584 | /// read the paper. | |
585 | #[derive(Clone, Debug)] | |
586 | pub struct BoyerMooreSearch { | |
587 | /// The pattern we are going to look for in the haystack. | |
588 | pattern: Vec<u8>, | |
589 | ||
590 | /// The skip table for the skip loop. | |
591 | /// | |
592 | /// Maps the character at the end of the input | |
593 | /// to a shift. | |
594 | skip_table: Vec<usize>, | |
595 | ||
596 | /// The guard character (least frequently occurring char). | |
597 | guard: u8, | |
598 | /// The reverse-index of the guard character in the pattern. | |
599 | guard_reverse_idx: usize, | |
600 | ||
601 | /// Daniel Sunday's mini generalized delta2 shift table. | |
602 | /// | |
603 | /// We use a skip loop, so we only have to provide a shift | |
604 | /// for the skip char (last char). This is why it is a mini | |
605 | /// shift rule. | |
606 | md2_shift: usize, | |
607 | } | |
608 | ||
609 | impl BoyerMooreSearch { | |
610 | /// Create a new string searcher, performing whatever | |
611 | /// compilation steps are required. | |
612 | fn new(pattern: Vec<u8>) -> Self { | |
613 | debug_assert!(pattern.len() > 0); | |
614 | ||
615 | let (g, gi) = Self::select_guard(pattern.as_slice()); | |
616 | let skip_table = Self::compile_skip_table(pattern.as_slice()); | |
617 | let md2_shift = Self::compile_md2_shift(pattern.as_slice()); | |
618 | BoyerMooreSearch { | |
619 | pattern: pattern, | |
620 | skip_table: skip_table, | |
621 | guard: g, | |
622 | guard_reverse_idx: gi, | |
623 | md2_shift: md2_shift, | |
624 | } | |
625 | } | |
626 | ||
627 | /// Find the pattern in `haystack`, returning the offset | |
628 | /// of the start of the first occurrence of the pattern | |
629 | /// in `haystack`. | |
630 | #[inline] | |
631 | fn find(&self, haystack: &[u8]) -> Option<usize> { | |
632 | if haystack.len() < self.pattern.len() { | |
633 | return None; | |
634 | } | |
635 | ||
636 | let mut window_end = self.pattern.len() - 1; | |
637 | ||
638 | // Inspired by the grep source. It is a way | |
639 | // to do correct loop unrolling without having to place | |
640 | // a crashpad of terminating charicters at the end in | |
641 | // the way described in the Fast String Searching paper. | |
642 | const NUM_UNROLL: usize = 10; | |
643 | // 1 for the initial position, and 1 for the md2 shift | |
644 | let short_circut = (NUM_UNROLL + 2) * self.pattern.len(); | |
645 | ||
646 | if haystack.len() > short_circut { | |
647 | // just 1 for the md2 shift | |
648 | let backstop = haystack.len() - ((NUM_UNROLL + 1) * self.pattern.len()); | |
649 | loop { | |
650 | window_end = match self.skip_loop(haystack, window_end, backstop) { | |
651 | Some(i) => i, | |
652 | None => return None, | |
653 | }; | |
654 | if window_end >= backstop { | |
655 | break; | |
656 | } | |
657 | ||
658 | if self.check_match(haystack, window_end) { | |
659 | return Some(window_end - (self.pattern.len() - 1)); | |
660 | } else { | |
661 | let skip = self.skip_table[haystack[window_end] as usize]; | |
662 | window_end += | |
663 | if skip == 0 { self.md2_shift } else { skip }; | |
664 | continue; | |
665 | } | |
666 | } | |
667 | } | |
668 | ||
669 | // now process the input after the backstop | |
670 | while window_end < haystack.len() { | |
671 | let mut skip = self.skip_table[haystack[window_end] as usize]; | |
672 | if skip == 0 { | |
673 | if self.check_match(haystack, window_end) { | |
674 | return Some(window_end - (self.pattern.len() - 1)); | |
675 | } else { | |
676 | skip = self.md2_shift; | |
677 | } | |
678 | } | |
679 | window_end += skip; | |
680 | } | |
681 | ||
682 | None | |
683 | } | |
684 | ||
685 | fn len(&self) -> usize { | |
686 | return self.pattern.len() | |
687 | } | |
688 | ||
689 | /// The key heuristic behind which the BoyerMooreSearch lives. | |
690 | /// | |
691 | /// See `rust-lang/regex/issues/408`. | |
692 | /// | |
693 | /// Tuned Boyer-Moore is actually pretty slow! It turns out a handrolled | |
694 | /// platform-specific memchr routine with a bit of frequency | |
695 | /// analysis sprinkled on top actually wins most of the time. | |
696 | /// However, there are a few cases where Tuned Boyer-Moore still | |
697 | /// wins. | |
698 | /// | |
699 | /// If the haystack is random, frequency analysis doesn't help us, | |
700 | /// so Boyer-Moore will win for sufficiently large needles. | |
701 | /// Unfortunately, there is no obvious way to determine this | |
702 | /// ahead of time. | |
703 | /// | |
704 | /// If the pattern itself consists of very common characters, | |
705 | /// frequency analysis won't get us anywhere. The most extreme | |
706 | /// example of this is a pattern like `eeeeeeeeeeeeeeee`. Fortunately, | |
707 | /// this case is wholly determined by the pattern, so we can actually | |
708 | /// implement the heuristic. | |
709 | /// | |
710 | /// A third case is if the pattern is sufficiently long. The idea | |
711 | /// here is that once the pattern gets long enough the Tuned | |
712 | /// Boyer-Moore skip loop will start making strides long enough | |
713 | /// to beat the asm deep magic that is memchr. | |
714 | fn should_use(pattern: &[u8]) -> bool { | |
715 | // The minimum pattern length required to use TBM. | |
716 | const MIN_LEN: usize = 9; | |
717 | // The minimum frequency rank (lower is rarer) that every byte in the | |
718 | // pattern must have in order to use TBM. That is, if the pattern | |
719 | // contains _any_ byte with a lower rank, then TBM won't be used. | |
720 | const MIN_CUTOFF: usize = 150; | |
721 | // The maximum frequency rank for any byte. | |
722 | const MAX_CUTOFF: usize = 255; | |
723 | // The scaling factor used to determine the actual cutoff frequency | |
724 | // to use (keeping in mind that the minimum frequency rank is bounded | |
725 | // by MIN_CUTOFF). This scaling factor is an attempt to make TBM more | |
726 | // likely to be used as the pattern grows longer. That is, longer | |
727 | // patterns permit somewhat less frequent bytes than shorter patterns, | |
728 | // under the assumption that TBM gets better as the pattern gets | |
729 | // longer. | |
730 | const LEN_CUTOFF_PROPORTION: usize = 4; | |
731 | ||
732 | let scaled_rank = pattern.len().wrapping_mul(LEN_CUTOFF_PROPORTION); | |
733 | let cutoff = cmp::max( | |
734 | MIN_CUTOFF, | |
735 | MAX_CUTOFF - cmp::min(MAX_CUTOFF, scaled_rank), | |
736 | ); | |
737 | // The pattern must be long enough to be worthwhile. e.g., memchr will | |
738 | // be faster on `e` because it is short even though e is quite common. | |
739 | pattern.len() > MIN_LEN | |
740 | // all the bytes must be more common than the cutoff. | |
741 | && pattern.iter().all(|c| freq_rank(*c) >= cutoff) | |
742 | } | |
743 | ||
744 | /// Check to see if there is a match at the given position | |
745 | #[inline] | |
746 | fn check_match(&self, haystack: &[u8], window_end: usize) -> bool { | |
747 | // guard test | |
748 | if haystack[window_end - self.guard_reverse_idx] != self.guard { | |
749 | return false; | |
750 | } | |
751 | ||
752 | // match loop | |
753 | let window_start = window_end - (self.pattern.len() - 1); | |
754 | for i in 0..self.pattern.len() { | |
755 | if self.pattern[i] != haystack[window_start + i] { | |
756 | return false; | |
757 | } | |
758 | } | |
759 | ||
760 | true | |
761 | } | |
762 | ||
763 | /// Skip forward according to the shift table. | |
764 | /// | |
765 | /// Returns the offset of the next occurrence | |
766 | /// of the last char in the pattern, or the none | |
767 | /// if it never reappears. If `skip_loop` hits the backstop | |
768 | /// it will leave early. | |
769 | #[inline] | |
770 | fn skip_loop(&self, | |
771 | haystack: &[u8], | |
772 | mut window_end: usize, | |
773 | backstop: usize, | |
774 | ) -> Option<usize> { | |
775 | use std::mem; | |
776 | ||
777 | let window_end_snapshot = window_end; | |
778 | let skip_of = |we: usize| -> usize { | |
779 | // Unsafe might make this faster, but the benchmarks | |
780 | // were hard to interpret. | |
781 | self.skip_table[haystack[we] as usize] | |
782 | }; | |
783 | ||
784 | loop { | |
785 | let mut skip = skip_of(window_end); window_end += skip; | |
786 | skip = skip_of(window_end); window_end += skip; | |
787 | if skip != 0 { | |
788 | skip = skip_of(window_end); window_end += skip; | |
789 | skip = skip_of(window_end); window_end += skip; | |
790 | skip = skip_of(window_end); window_end += skip; | |
791 | if skip != 0 { | |
792 | skip = skip_of(window_end); window_end += skip; | |
793 | skip = skip_of(window_end); window_end += skip; | |
794 | skip = skip_of(window_end); window_end += skip; | |
795 | if skip != 0 { | |
796 | skip = skip_of(window_end); window_end += skip; | |
797 | skip = skip_of(window_end); window_end += skip; | |
798 | ||
799 | // If ten iterations did not make at least 16 words | |
800 | // worth of progress, we just fall back on memchr. | |
801 | if window_end - window_end_snapshot > | |
802 | 16 * mem::size_of::<usize>() { | |
803 | ||
804 | // Returning a window_end >= backstop will immediatly | |
805 | // break us out of the inner loop in `find`. | |
806 | if window_end >= backstop { | |
807 | return Some(window_end); | |
808 | } | |
809 | ||
810 | continue; // we made enough progress | |
811 | } else { | |
812 | // In case we are already there, and so that | |
813 | // we will catch the guard char. | |
814 | window_end = window_end | |
815 | .checked_sub(1 + self.guard_reverse_idx) | |
816 | .unwrap_or(0); | |
817 | ||
818 | match memchr(self.guard, &haystack[window_end..]) { | |
819 | None => return None, | |
820 | Some(g_idx) => { | |
821 | return Some(window_end + g_idx + self.guard_reverse_idx); | |
822 | } | |
823 | } | |
824 | } | |
825 | } | |
826 | } | |
827 | } | |
828 | ||
829 | return Some(window_end); | |
830 | } | |
831 | } | |
832 | ||
833 | /// Compute the ufast skip table. | |
834 | fn compile_skip_table(pattern: &[u8]) -> Vec<usize> { | |
835 | let mut tab = vec![pattern.len(); 256]; | |
836 | ||
837 | // For every char in the pattern, we write a skip | |
838 | // that will line us up with the rightmost occurrence. | |
839 | // | |
840 | // N.B. the sentinel (0) is written by the last | |
841 | // loop iteration. | |
842 | for (i, c) in pattern.iter().enumerate() { | |
843 | tab[*c as usize] = (pattern.len() - 1) - i; | |
844 | } | |
845 | ||
846 | tab | |
847 | } | |
848 | ||
849 | /// Select the guard character based off of the precomputed | |
850 | /// frequency table. | |
851 | fn select_guard(pattern: &[u8]) -> (u8, usize) { | |
852 | let mut rarest = pattern[0]; | |
853 | let mut rarest_rev_idx = pattern.len() - 1; | |
854 | for (i, c) in pattern.iter().enumerate() { | |
855 | if freq_rank(*c) < freq_rank(rarest) { | |
856 | rarest = *c; | |
857 | rarest_rev_idx = (pattern.len() - 1) - i; | |
858 | } | |
859 | } | |
860 | ||
861 | (rarest, rarest_rev_idx) | |
862 | } | |
863 | ||
864 | /// If there is another occurrence of the skip | |
865 | /// char, shift to it, otherwise just shift to | |
866 | /// the next window. | |
867 | fn compile_md2_shift(pattern: &[u8]) -> usize { | |
868 | let shiftc = *pattern.last().unwrap(); | |
869 | ||
870 | // For a pattern of length 1 we will never apply the | |
871 | // shift rule, so we use a poison value on the principle | |
872 | // that failing fast is a good thing. | |
873 | if pattern.len() == 1 { | |
874 | return 0xDEADBEAF; | |
875 | } | |
876 | ||
877 | let mut i = pattern.len() - 2; | |
878 | while i > 0 { | |
879 | if pattern[i] == shiftc { | |
880 | return (pattern.len() - 1) - i; | |
881 | } | |
882 | i -= 1; | |
883 | } | |
884 | ||
885 | // The skip char never re-occurs in the pattern, so | |
886 | // we can just shift the whole window length. | |
887 | pattern.len() - 1 | |
888 | } | |
889 | ||
890 | fn approximate_size(&self) -> usize { | |
891 | (self.pattern.len() * mem::size_of::<u8>()) | |
892 | + (256 * mem::size_of::<usize>()) // skip table | |
893 | } | |
894 | } | |
895 | ||
896 | fn freq_rank(b: u8) -> usize { | |
897 | BYTE_FREQUENCIES[b as usize] as usize | |
898 | } | |
899 | ||
900 | #[cfg(test)] | |
901 | mod tests { | |
902 | use super::{BoyerMooreSearch, FreqyPacked}; | |
903 | ||
904 | // | |
905 | // Unit Tests | |
906 | // | |
907 | ||
908 | // The "hello, world" of string searching | |
909 | #[test] | |
910 | fn bm_find_subs() { | |
911 | let searcher = BoyerMooreSearch::new(Vec::from(&b"pattern"[..])); | |
912 | let haystack = b"I keep seeing patterns in this text"; | |
913 | assert_eq!(14, searcher.find(haystack).unwrap()); | |
914 | } | |
915 | ||
916 | #[test] | |
917 | fn bm_find_no_subs() { | |
918 | let searcher = BoyerMooreSearch::new(Vec::from(&b"pattern"[..])); | |
919 | let haystack = b"I keep seeing needles in this text"; | |
920 | assert_eq!(None, searcher.find(haystack)); | |
921 | } | |
922 | ||
923 | // | |
924 | // Regression Tests | |
925 | // | |
926 | ||
927 | #[test] | |
928 | fn bm_skip_reset_bug() { | |
929 | let haystack = vec![0, 0, 0, 0, 0, 1, 1, 0]; | |
930 | let needle = vec![0, 1, 1, 0]; | |
931 | ||
932 | let searcher = BoyerMooreSearch::new(needle); | |
933 | let offset = searcher.find(haystack.as_slice()).unwrap(); | |
934 | assert_eq!(4, offset); | |
935 | } | |
936 | ||
937 | #[test] | |
938 | fn bm_backstop_underflow_bug() { | |
939 | let haystack = vec![0, 0]; | |
940 | let needle = vec![0, 0]; | |
941 | ||
942 | let searcher = BoyerMooreSearch::new(needle); | |
943 | let offset = searcher.find(haystack.as_slice()).unwrap(); | |
944 | assert_eq!(0, offset); | |
945 | } | |
946 | ||
947 | #[test] | |
948 | fn bm_naive_off_by_one_bug() { | |
949 | let haystack = vec![91]; | |
950 | let needle = vec![91]; | |
951 | ||
952 | let naive_offset = naive_find(&needle, &haystack).unwrap(); | |
953 | assert_eq!(0, naive_offset); | |
954 | } | |
955 | ||
956 | #[test] | |
957 | fn bm_memchr_fallback_indexing_bug() { | |
958 | let mut haystack = vec![ | |
959 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
960 | 0, 0, 0, 0, 0, 0, 0, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
961 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
962 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
963 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; | |
964 | let needle = vec![1, 1, 1, 1, 32, 32, 87]; | |
965 | let needle_start = haystack.len(); | |
966 | haystack.extend(needle.clone()); | |
967 | ||
968 | let searcher = BoyerMooreSearch::new(needle); | |
969 | assert_eq!(needle_start, searcher.find(haystack.as_slice()).unwrap()); | |
970 | } | |
971 | ||
972 | #[test] | |
973 | fn bm_backstop_boundary() { | |
974 | let haystack = b"\ | |
975 | // aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa | |
976 | e_data.clone_created(entity_id, entity_to_add.entity_id); | |
977 | aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa | |
978 | aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa | |
979 | ".to_vec(); | |
980 | let needle = b"clone_created".to_vec(); | |
981 | ||
982 | let searcher = BoyerMooreSearch::new(needle); | |
983 | let result = searcher.find(&haystack); | |
984 | assert_eq!(Some(43), result); | |
985 | } | |
986 | ||
987 | #[test] | |
988 | fn bm_win_gnu_indexing_bug() { | |
989 | let haystack_raw = vec![ | |
990 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
991 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
992 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
993 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; | |
994 | let needle = vec![1, 1, 1, 1, 1, 1, 1]; | |
995 | let haystack = haystack_raw.as_slice(); | |
996 | ||
997 | BoyerMooreSearch::new(needle.clone()).find(haystack); | |
998 | } | |
999 | ||
1000 | // | |
1001 | // QuickCheck Properties | |
1002 | // | |
1003 | ||
1004 | use quickcheck::TestResult; | |
1005 | ||
1006 | fn naive_find(needle: &[u8], haystack: &[u8]) -> Option<usize> { | |
1007 | assert!(needle.len() <= haystack.len()); | |
1008 | ||
1009 | for i in 0..(haystack.len() - (needle.len() - 1)) { | |
1010 | if haystack[i] == needle[0] | |
1011 | && &haystack[i..(i+needle.len())] == needle { | |
1012 | return Some(i) | |
1013 | } | |
1014 | } | |
1015 | ||
1016 | None | |
1017 | } | |
1018 | ||
1019 | quickcheck! { | |
1020 | fn qc_bm_equals_nieve_find(pile1: Vec<u8>, pile2: Vec<u8>) -> TestResult { | |
1021 | if pile1.len() == 0 || pile2.len() == 0 { | |
1022 | return TestResult::discard(); | |
1023 | } | |
1024 | ||
1025 | let (needle, haystack) = if pile1.len() < pile2.len() { | |
1026 | (pile1, pile2.as_slice()) | |
1027 | } else { | |
1028 | (pile2, pile1.as_slice()) | |
1029 | }; | |
1030 | ||
1031 | let searcher = BoyerMooreSearch::new(needle.clone()); | |
1032 | TestResult::from_bool( | |
1033 | searcher.find(haystack) == naive_find(&needle, haystack)) | |
1034 | } | |
1035 | ||
1036 | fn qc_bm_equals_single(pile1: Vec<u8>, pile2: Vec<u8>) -> TestResult { | |
1037 | if pile1.len() == 0 || pile2.len() == 0 { | |
1038 | return TestResult::discard(); | |
1039 | } | |
1040 | ||
1041 | let (needle, haystack) = if pile1.len() < pile2.len() { | |
1042 | (pile1, pile2.as_slice()) | |
1043 | } else { | |
1044 | (pile2, pile1.as_slice()) | |
1045 | }; | |
1046 | ||
1047 | let bm_searcher = BoyerMooreSearch::new(needle.clone()); | |
1048 | let freqy_memchr = FreqyPacked::new(needle); | |
1049 | TestResult::from_bool( | |
1050 | bm_searcher.find(haystack) == freqy_memchr.find(haystack)) | |
1051 | } | |
1052 | ||
1053 | fn qc_bm_finds_trailing_needle( | |
1054 | haystack_pre: Vec<u8>, | |
1055 | needle: Vec<u8> | |
1056 | ) -> TestResult { | |
1057 | if needle.len() == 0 { | |
1058 | return TestResult::discard(); | |
1059 | } | |
1060 | ||
1061 | let mut haystack = haystack_pre.clone(); | |
1062 | let searcher = BoyerMooreSearch::new(needle.clone()); | |
1063 | ||
1064 | if haystack.len() >= needle.len() && | |
1065 | searcher.find(haystack.as_slice()).is_some() { | |
1066 | return TestResult::discard(); | |
1067 | } | |
1068 | ||
1069 | haystack.extend(needle.clone()); | |
1070 | ||
1071 | // What if the the tail of the haystack can start the | |
1072 | // needle? | |
1073 | let start = haystack_pre.len() | |
1074 | .checked_sub(needle.len()) | |
1075 | .unwrap_or(0); | |
1076 | for i in 0..(needle.len() - 1) { | |
1077 | if searcher.find(&haystack[(i + start)..]).is_some() { | |
1078 | return TestResult::discard(); | |
1079 | } | |
1080 | } | |
1081 | ||
1082 | TestResult::from_bool( | |
1083 | searcher.find(haystack.as_slice()) | |
1084 | .map(|x| x == haystack_pre.len()) | |
1085 | .unwrap_or(false)) | |
1086 | } | |
1087 | ||
1088 | // qc_equals_* is only testing the negative case as @burntsushi | |
1089 | // pointed out in https://github.com/rust-lang/regex/issues/446. | |
1090 | // This quickcheck prop represents an effort to force testing of | |
1091 | // the positive case. qc_bm_finds_first and qc_bm_finds_trailing_needle | |
1092 | // already check some of the positive cases, but they don't cover | |
1093 | // cases where the needle is in the middle of haystack. This prop | |
1094 | // fills that hole. | |
1095 | fn qc_bm_finds_subslice( | |
1096 | haystack: Vec<u8>, | |
1097 | needle_start: usize, | |
1098 | needle_length: usize | |
1099 | ) -> TestResult { | |
1100 | if haystack.len() == 0 { | |
1101 | return TestResult::discard(); | |
1102 | } | |
1103 | ||
1104 | let needle_start = needle_start % haystack.len(); | |
1105 | let needle_length = needle_length % (haystack.len() - needle_start); | |
1106 | ||
1107 | if needle_length == 0 { | |
1108 | return TestResult::discard(); | |
1109 | } | |
1110 | ||
1111 | let needle = &haystack[needle_start..(needle_start + needle_length)]; | |
1112 | ||
1113 | let bm_searcher = BoyerMooreSearch::new(needle.to_vec()); | |
1114 | ||
1115 | let start = naive_find(&needle, &haystack); | |
1116 | match start { | |
1117 | None => TestResult::from_bool(false), | |
1118 | Some(nf_start) => | |
1119 | TestResult::from_bool( | |
1120 | nf_start <= needle_start | |
1121 | && bm_searcher.find(&haystack) == start | |
1122 | ) | |
1123 | } | |
1124 | } | |
1125 | ||
1126 | fn qc_bm_finds_first(needle: Vec<u8>) -> TestResult { | |
1127 | if needle.len() == 0 { | |
1128 | return TestResult::discard(); | |
1129 | } | |
1130 | ||
1131 | let mut haystack = needle.clone(); | |
1132 | let searcher = BoyerMooreSearch::new(needle.clone()); | |
1133 | haystack.extend(needle); | |
1134 | ||
1135 | TestResult::from_bool( | |
1136 | searcher.find(haystack.as_slice()) | |
1137 | .map(|x| x == 0) | |
1138 | .unwrap_or(false)) | |
1139 | } | |
1140 | } | |
1141 | } |