]> git.proxmox.com Git - cargo.git/blob - vendor/bstr/src/search/twoway.rs
New upstream version 0.35.0
[cargo.git] / vendor / bstr / src / search / twoway.rs
1 use core::cmp;
2
3 use bstr::BStr;
4 use cow::CowBStr;
5 use search::prefilter::{Freqy, PrefilterState};
6
7 /// An implementation of the TwoWay substring search algorithm, with heuristics
8 /// for accelerating search based on frequency analysis.
9 ///
10 /// This searcher supports forward and reverse search, although not
11 /// simultaneously. It runs in O(n + m) time and O(1) space, where
12 /// `n ~ len(needle)` and `m ~ len(haystack)`.
13 ///
14 /// The implementation here roughly matches that which was developed by
15 /// Crochemore and Perrin in their 1991 paper "Two-way string-matching." The
16 /// only change in this implementation is the use of zero-based indices and
17 /// the addition of heuristics for a fast skip loop. That is, this will detect
18 /// bytes that are believed to be rare in the needle and use fast vectorized
19 /// instructions to find their occurrences quickly. The Two-Way algorithm is
20 /// then used to confirm whether a match at that location occurred.
21 ///
22 /// The heuristic for fast skipping is automatically shut off if it's
23 /// detected to be ineffective at search time. Generally, this only occurs in
24 /// pathological cases. But this is generally necessary in order to preserve
25 /// a `O(n + m)` time bound.
26 ///
27 /// The code below is fairly complex and not obviously correct at all. It's
28 /// likely necessary to read the Two-Way paper cited above in order to fully
29 /// grok this code.
30 #[derive(Clone, Debug)]
31 pub struct TwoWay<'b> {
32 /// The needle that we're looking for.
33 needle: CowBStr<'b>,
34 /// An implementation of a fast skip loop based on hard-coded frequency
35 /// data. This is only used when conditions are deemed favorable.
36 freqy: Freqy,
37 /// A critical position in needle. Specifically, this position corresponds
38 /// to beginning of either the minimal or maximal suffix in needle. (N.B.
39 /// See SuffixType below for why "minimal" isn't quite the correct word
40 /// here.)
41 ///
42 /// This is the position at which every search begins. Namely, search
43 /// starts by scanning text to the right of this position, and only if
44 /// there's a match does the text to the left of this position get scanned.
45 critical_pos: usize,
46 /// The amount we shift by in the Two-Way search algorithm. This
47 /// corresponds to the "small period" and "large period" cases.
48 shift: Shift,
49 }
50
51 impl<'b> TwoWay<'b> {
52 /// Create a searcher that uses the Two-Way algorithm by searching forwards
53 /// through any haystack.
54 pub fn forward(needle: &'b BStr) -> TwoWay<'b> {
55 let freqy = Freqy::forward(needle);
56 if needle.is_empty() {
57 return TwoWay {
58 needle: CowBStr::new(needle),
59 freqy,
60 critical_pos: 0,
61 shift: Shift::Large { shift: 0 },
62 };
63 }
64
65 let min_suffix = Suffix::forward(needle, SuffixKind::Minimal);
66 let max_suffix = Suffix::forward(needle, SuffixKind::Maximal);
67 let (period_lower_bound, critical_pos) =
68 if min_suffix.pos > max_suffix.pos {
69 (min_suffix.period, min_suffix.pos)
70 } else {
71 (max_suffix.period, max_suffix.pos)
72 };
73 let shift = Shift::forward(needle, period_lower_bound, critical_pos);
74 let needle = CowBStr::new(needle);
75 TwoWay { needle, freqy, critical_pos, shift }
76 }
77
78 /// Create a searcher that uses the Two-Way algorithm by searching in
79 /// reverse through any haystack.
80 pub fn reverse(needle: &'b BStr) -> TwoWay<'b> {
81 let freqy = Freqy::reverse(needle);
82 if needle.is_empty() {
83 return TwoWay {
84 needle: CowBStr::new(needle),
85 freqy,
86 critical_pos: 0,
87 shift: Shift::Large { shift: 0 },
88 };
89 }
90
91 let min_suffix = Suffix::reverse(needle, SuffixKind::Minimal);
92 let max_suffix = Suffix::reverse(needle, SuffixKind::Maximal);
93 let (period_lower_bound, critical_pos) =
94 if min_suffix.pos < max_suffix.pos {
95 (min_suffix.period, min_suffix.pos)
96 } else {
97 (max_suffix.period, max_suffix.pos)
98 };
99 let shift = Shift::reverse(needle, period_lower_bound, critical_pos);
100 let needle = CowBStr::new(needle);
101 TwoWay { needle, freqy, critical_pos, shift }
102 }
103
104 /// Return a fresh prefilter state that can be used with this searcher.
105 /// A prefilter state is used to track the effectiveness of a searcher's
106 /// prefilter for speeding up searches. Therefore, the prefilter state
107 /// should generally be reused on subsequent searches (such as in an
108 /// iterator). For searches on a different haystack, then a new prefilter
109 /// state should be used.
110 ///
111 /// This always initializes a valid prefilter state even if this searcher
112 /// does not have a prefilter enabled.
113 pub fn prefilter_state(&self) -> PrefilterState {
114 self.freqy.prefilter_state()
115 }
116
117 /// Return the needle used by this searcher.
118 pub fn needle(&self) -> &BStr {
119 self.needle.as_bstr()
120 }
121
122 /// Convert this searched into an owned version, where the needle is
123 /// copied if it isn't already owned.
124 #[cfg(feature = "std")]
125 pub fn into_owned(self) -> TwoWay<'static> {
126 TwoWay {
127 needle: self.needle.into_owned(),
128 freqy: self.freqy,
129 critical_pos: self.critical_pos,
130 shift: self.shift,
131 }
132 }
133
134 /// Find the position of the first occurrence of this searcher's needle in
135 /// the given haystack. If one does not exist, then return None.
136 ///
137 /// This will automatically initialize prefilter state. This should only
138 /// be used for one-off searches.
139 pub fn find(&self, haystack: &BStr) -> Option<usize> {
140 self.find_with(&mut self.prefilter_state(), haystack)
141 }
142
143 /// Find the position of the last occurrence of this searcher's needle
144 /// in the given haystack. If one does not exist, then return None.
145 ///
146 /// This will automatically initialize prefilter state. This should only
147 /// be used for one-off searches.
148 pub fn rfind(&self, haystack: &BStr) -> Option<usize> {
149 self.rfind_with(&mut self.prefilter_state(), haystack)
150 }
151
152 /// Find the position of the first occurrence of this searcher's needle in
153 /// the given haystack. If one does not exist, then return None.
154 ///
155 /// This accepts prefilter state that is useful when using the same
156 /// searcher multiple times, such as in an iterator.
157 pub fn find_with(
158 &self,
159 prestate: &mut PrefilterState,
160 haystack: &BStr,
161 ) -> Option<usize> {
162 if self.needle.is_empty() {
163 return Some(0);
164 } else if haystack.len() < self.needle.len() {
165 return None;
166 } else if self.needle.len() == 1 {
167 return haystack.find_byte(self.needle[0]);
168 }
169 match self.shift {
170 Shift::Small { period } => {
171 self.find_small(prestate, haystack, period)
172 }
173 Shift::Large { shift } => {
174 self.find_large(prestate, haystack, shift)
175 }
176 }
177 }
178
179 /// Find the position of the last occurrence of this searcher's needle
180 /// in the given haystack. If one does not exist, then return None.
181 ///
182 /// This accepts prefilter state that is useful when using the same
183 /// searcher multiple times, such as in an iterator.
184 pub fn rfind_with(
185 &self,
186 prestate: &mut PrefilterState,
187 haystack: &BStr,
188 ) -> Option<usize> {
189 if self.needle.is_empty() {
190 return Some(haystack.len());
191 } else if haystack.len() < self.needle.len() {
192 return None;
193 } else if self.needle.len() == 1 {
194 return haystack.rfind_byte(self.needle[0]);
195 }
196 match self.shift {
197 Shift::Small { period } => {
198 self.rfind_small(prestate, haystack, period)
199 }
200 Shift::Large { shift } => {
201 self.rfind_large(prestate, haystack, shift)
202 }
203 }
204 }
205
206 // Below is the actual implementation of TwoWay searching, including both
207 // forwards and backwards searching. Each forward and reverse search has
208 // two fairly similar implementations, each handling the small and large
209 // period cases, for a total 4 different search routines.
210 //
211 // On top of that, each search implementation can be accelerated by a
212 // Freqy prefilter, but it is not always enabled. To avoid its overhead
213 // when its disabled, we explicitly inline each search implementation based
214 // on whether Freqy will be used or not. This brings us up to a total of
215 // 8 monomorphized versions of the search code.
216
217 #[inline(never)]
218 fn find_small(
219 &self,
220 prestate: &mut PrefilterState,
221 haystack: &BStr,
222 period: usize,
223 ) -> Option<usize> {
224 if prestate.is_effective() {
225 self.find_small_imp(prestate, true, haystack, period)
226 } else {
227 self.find_small_imp(prestate, false, haystack, period)
228 }
229 }
230
231 #[inline(always)]
232 fn find_small_imp(
233 &self,
234 prestate: &mut PrefilterState,
235 prefilter: bool,
236 haystack: &BStr,
237 period: usize,
238 ) -> Option<usize> {
239 let needle = self.needle.as_bstr();
240 let mut pos = 0;
241 let mut shift = 0;
242 while pos + needle.len() <= haystack.len() {
243 let mut i = cmp::max(self.critical_pos, shift);
244 if prefilter && prestate.is_effective() {
245 match self.freqy.find_candidate(prestate, &haystack[pos..]) {
246 None => return None,
247 Some(found) => {
248 shift = 0;
249 i = self.critical_pos;
250 pos += found;
251 if pos + needle.len() > haystack.len() {
252 return None;
253 }
254 }
255 }
256 }
257 while i < needle.len() && needle[i] == haystack[pos + i] {
258 i += 1;
259 }
260 if i < needle.len() {
261 pos += i - self.critical_pos + 1;
262 shift = 0;
263 } else {
264 let mut j = self.critical_pos;
265 while j > shift && needle[j] == haystack[pos + j] {
266 j -= 1;
267 }
268 if j <= shift && needle[shift] == haystack[pos + shift] {
269 return Some(pos);
270 }
271 pos += period;
272 shift = needle.len() - period;
273 }
274 }
275 None
276 }
277
278 #[inline(never)]
279 fn find_large(
280 &self,
281 prestate: &mut PrefilterState,
282 haystack: &BStr,
283 shift: usize,
284 ) -> Option<usize> {
285 if prestate.is_effective() {
286 self.find_large_imp(prestate, true, haystack, shift)
287 } else {
288 self.find_large_imp(prestate, false, haystack, shift)
289 }
290 }
291
292 #[inline(always)]
293 fn find_large_imp(
294 &self,
295 prestate: &mut PrefilterState,
296 prefilter: bool,
297 haystack: &BStr,
298 shift: usize,
299 ) -> Option<usize> {
300 let needle = self.needle.as_bstr();
301 let mut pos = 0;
302 while pos + needle.len() <= haystack.len() {
303 let mut i = self.critical_pos;
304 if prefilter && prestate.is_effective() {
305 match self.freqy.find_candidate(prestate, &haystack[pos..]) {
306 None => return None,
307 Some(found) => {
308 pos += found;
309 if pos + needle.len() > haystack.len() {
310 return None;
311 }
312 }
313 }
314 }
315 while i < needle.len() && needle[i] == haystack[pos + i] {
316 i += 1;
317 }
318 if i < needle.len() {
319 pos += i - self.critical_pos + 1;
320 } else {
321 let mut j = self.critical_pos;
322 while j > 0 && needle[j] == haystack[pos + j] {
323 j -= 1;
324 }
325 if j == 0 && needle[0] == haystack[pos] {
326 return Some(pos);
327 }
328 pos += shift;
329 }
330 }
331 None
332 }
333
334 #[inline(never)]
335 fn rfind_small(
336 &self,
337 prestate: &mut PrefilterState,
338 haystack: &BStr,
339 period: usize,
340 ) -> Option<usize> {
341 if prestate.is_effective() {
342 self.rfind_small_imp(prestate, true, haystack, period)
343 } else {
344 self.rfind_small_imp(prestate, false, haystack, period)
345 }
346 }
347
348 #[inline(always)]
349 fn rfind_small_imp(
350 &self,
351 prestate: &mut PrefilterState,
352 prefilter: bool,
353 haystack: &BStr,
354 period: usize,
355 ) -> Option<usize> {
356 let needle = &*self.needle;
357 let nlen = needle.len();
358 let mut pos = haystack.len();
359 let mut shift = nlen;
360 while pos >= nlen {
361 let mut i = cmp::min(self.critical_pos, shift);
362 if prefilter && prestate.is_effective() {
363 match self.freqy.rfind_candidate(prestate, &haystack[..pos]) {
364 None => return None,
365 Some(found) => {
366 shift = nlen;
367 i = self.critical_pos;
368 pos = found;
369 if pos < nlen {
370 return None;
371 }
372 }
373 }
374 }
375 while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] {
376 i -= 1;
377 }
378 if i > 0 || needle[0] != haystack[pos - nlen] {
379 pos -= self.critical_pos - i + 1;
380 shift = nlen;
381 } else {
382 let mut j = self.critical_pos;
383 while j < shift && needle[j] == haystack[pos - nlen + j] {
384 j += 1;
385 }
386 if j == shift {
387 return Some(pos - nlen);
388 }
389 pos -= period;
390 shift = period;
391 }
392 }
393 None
394 }
395
396 #[inline(never)]
397 fn rfind_large(
398 &self,
399 prestate: &mut PrefilterState,
400 haystack: &BStr,
401 shift: usize,
402 ) -> Option<usize> {
403 if prestate.is_effective() {
404 self.rfind_large_imp(prestate, true, haystack, shift)
405 } else {
406 self.rfind_large_imp(prestate, false, haystack, shift)
407 }
408 }
409
410 #[inline(always)]
411 fn rfind_large_imp(
412 &self,
413 prestate: &mut PrefilterState,
414 prefilter: bool,
415 haystack: &BStr,
416 shift: usize,
417 ) -> Option<usize> {
418 let needle = &*self.needle;
419 let nlen = needle.len();
420 let mut pos = haystack.len();
421 while pos >= nlen {
422 if prefilter && prestate.is_effective() {
423 match self.freqy.rfind_candidate(prestate, &haystack[..pos]) {
424 None => return None,
425 Some(found) => {
426 pos = found;
427 if pos < nlen {
428 return None;
429 }
430 }
431 }
432 }
433
434 let mut i = self.critical_pos;
435 while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] {
436 i -= 1;
437 }
438 if i > 0 || needle[0] != haystack[pos - nlen] {
439 pos -= self.critical_pos - i + 1;
440 } else {
441 let mut j = self.critical_pos;
442 while j < nlen && needle[j] == haystack[pos - nlen + j] {
443 j += 1;
444 }
445 if j == nlen {
446 return Some(pos - nlen);
447 }
448 pos -= shift;
449 }
450 }
451 None
452 }
453 }
454
455 /// A representation of the amount we're allowed to shift by during Two-Way
456 /// search.
457 ///
458 /// When computing a critical factorization of the needle, we find the position
459 /// of the critical factorization by finding the needle's maximal (or minimal)
460 /// suffix, along with the period of that suffix. It turns out that the period
461 /// of that suffix is a lower bound on the period of the needle itself.
462 ///
463 /// This lower bound is equivalent to the actual period of the needle in
464 /// some cases. To describe that case, we denote the needle as `x` where
465 /// `x = uv` and `v` is the lexicographic maximal suffix of `v`. The lower
466 /// bound given here is always the period of `v`, which is `<= period(x)`. The
467 /// case where `period(v) == period(x)` occurs when `len(u) < (len(x) / 2)` and
468 /// where `u` is a suffix of `v[0..period(v)]`.
469 ///
470 /// This case is important because the search algorithm for when the
471 /// periods are equivalent is slightly different than the search algorithm
472 /// for when the periods are not equivalent. In particular, when they aren't
473 /// equivalent, we know that the period of the needle is no less than half its
474 /// length. In this case, we shift by an amount less than or equal to the
475 /// period of the needle (determined by the maximum length of the components
476 /// of the critical factorization of `x`, i.e., `max(len(u), len(v))`)..
477 ///
478 /// The above two cases are represented by the variants below. Each entails
479 /// a different instantiation of the Two-Way search algorithm.
480 ///
481 /// N.B. If we could find a way to compute the exact period in all cases,
482 /// then we could collapse this case analysis and simplify the algorithm. The
483 /// Two-Way paper suggests this is possible, but more reading is required to
484 /// grok why the authors didn't pursue that path.
485 #[derive(Clone, Debug)]
486 enum Shift {
487 Small { period: usize },
488 Large { shift: usize },
489 }
490
491 impl Shift {
492 /// Compute the shift for a given needle in the forward direction.
493 ///
494 /// This requires a lower bound on the period and a critical position.
495 /// These can be computed by extracting both the minimal and maximal
496 /// lexicographic suffixes, and choosing the right-most starting position.
497 /// The lower bound on the period is then the period of the chosen suffix.
498 fn forward(
499 needle: &BStr,
500 period_lower_bound: usize,
501 critical_pos: usize,
502 ) -> Shift {
503 let large = cmp::max(critical_pos, needle.len() - critical_pos);
504 if critical_pos * 2 >= needle.len() {
505 return Shift::Large { shift: large };
506 }
507
508 let (u, v) = needle.split_at(critical_pos);
509 if !v[..period_lower_bound].ends_with(u) {
510 return Shift::Large { shift: large };
511 }
512 Shift::Small { period: period_lower_bound }
513 }
514
515 /// Compute the shift for a given needle in the reverse direction.
516 ///
517 /// This requires a lower bound on the period and a critical position.
518 /// These can be computed by extracting both the minimal and maximal
519 /// lexicographic suffixes, and choosing the left-most starting position.
520 /// The lower bound on the period is then the period of the chosen suffix.
521 fn reverse(
522 needle: &BStr,
523 period_lower_bound: usize,
524 critical_pos: usize,
525 ) -> Shift {
526 let large = cmp::max(critical_pos, needle.len() - critical_pos);
527 if (needle.len() - critical_pos) * 2 >= needle.len() {
528 return Shift::Large { shift: large };
529 }
530
531 let (v, u) = needle.split_at(critical_pos);
532 if !v[v.len() - period_lower_bound..].starts_with(u) {
533 return Shift::Large { shift: large };
534 }
535 Shift::Small { period: period_lower_bound }
536 }
537 }
538
539 /// A suffix extracted from a needle along with its period.
540 #[derive(Debug)]
541 struct Suffix {
542 /// The starting position of this suffix.
543 ///
544 /// If this is a forward suffix, then `&bytes[pos..]` can be used. If this
545 /// is a reverse suffix, then `&bytes[..pos]` can be used. That is, for
546 /// forward suffixes, this is an inclusive starting position, where as for
547 /// reverse suffixes, this is an exclusive ending position.
548 pos: usize,
549 /// The period of this suffix.
550 ///
551 /// Note that this is NOT necessarily the period of the string from which
552 /// this suffix comes from. (It is always less than or equal to the period
553 /// of the original string.)
554 period: usize,
555 }
556
557 impl Suffix {
558 fn forward(needle: &BStr, kind: SuffixKind) -> Suffix {
559 debug_assert!(!needle.is_empty());
560
561 // suffix represents our maximal (or minimal) suffix, along with
562 // its period.
563 let mut suffix = Suffix { pos: 0, period: 1 };
564 // The start of a suffix in `needle` that we are considering as a
565 // more maximal (or minimal) suffix than what's in `suffix`.
566 let mut candidate_start = 1;
567 // The current offset of our suffixes that we're comparing.
568 //
569 // When the characters at this offset are the same, then we mush on
570 // to the next position since no decision is possible. When the
571 // candidate's character is greater (or lesser) than the corresponding
572 // character than our current maximal (or minimal) suffix, then the
573 // current suffix is changed over to the candidate and we restart our
574 // search. Otherwise, the candidate suffix is no good and we restart
575 // our search on the next candidate.
576 //
577 // The three cases above correspond to the three cases in the loop
578 // below.
579 let mut offset = 0;
580
581 while candidate_start + offset < needle.len() {
582 let current = needle[suffix.pos + offset];
583 let candidate = needle[candidate_start + offset];
584 match kind.cmp(current, candidate) {
585 SuffixOrdering::Accept => {
586 suffix = Suffix { pos: candidate_start, period: 1 };
587 candidate_start += 1;
588 offset = 0;
589 }
590 SuffixOrdering::Skip => {
591 candidate_start += offset + 1;
592 offset = 0;
593 suffix.period = candidate_start - suffix.pos;
594 }
595 SuffixOrdering::Push => {
596 if offset + 1 == suffix.period {
597 candidate_start += suffix.period;
598 offset = 0;
599 } else {
600 offset += 1;
601 }
602 }
603 }
604 }
605 suffix
606 }
607
608 fn reverse(needle: &BStr, kind: SuffixKind) -> Suffix {
609 debug_assert!(!needle.is_empty());
610
611 // See the comments in `forward` for how this works.
612 let mut suffix = Suffix { pos: needle.len(), period: 1 };
613 if needle.len() == 1 {
614 return suffix;
615 }
616 let mut candidate_start = needle.len() - 1;
617 let mut offset = 0;
618
619 while offset < candidate_start {
620 let current = needle[suffix.pos - offset - 1];
621 let candidate = needle[candidate_start - offset - 1];
622 match kind.cmp(current, candidate) {
623 SuffixOrdering::Accept => {
624 suffix = Suffix { pos: candidate_start, period: 1 };
625 candidate_start -= 1;
626 offset = 0;
627 }
628 SuffixOrdering::Skip => {
629 candidate_start -= offset + 1;
630 offset = 0;
631 suffix.period = suffix.pos - candidate_start;
632 }
633 SuffixOrdering::Push => {
634 if offset + 1 == suffix.period {
635 candidate_start -= suffix.period;
636 offset = 0;
637 } else {
638 offset += 1;
639 }
640 }
641 }
642 }
643 suffix
644 }
645 }
646
647 /// The kind of suffix to extract.
648 #[derive(Clone, Copy, Debug)]
649 enum SuffixKind {
650 /// Extract the smallest lexicographic suffix from a string.
651 ///
652 /// Technically, this doesn't actually pick the smallest lexicographic
653 /// suffix. e.g., Given the choice between `a` and `aa`, this will choose
654 /// the latter over the former, even though `a < aa`. The reasoning for
655 /// this isn't clear from the paper, but it still smells like a minimal
656 /// suffix.
657 Minimal,
658 /// Extract the largest lexicographic suffix from a string.
659 ///
660 /// Unlike `Minimal`, this really does pick the maximum suffix. e.g., Given
661 /// the choice between `z` and `zz`, this will choose the latter over the
662 /// former.
663 Maximal,
664 }
665
666 /// The result of comparing corresponding bytes between two suffixes.
667 #[derive(Clone, Copy, Debug)]
668 enum SuffixOrdering {
669 /// This occurs when the given candidate byte indicates that the candidate
670 /// suffix is better than the current maximal (or minimal) suffix. That is,
671 /// the current candidate suffix should supplant the current maximal (or
672 /// minimal) suffix.
673 Accept,
674 /// This occurs when the given candidate byte excludes the candidate suffix
675 /// from being better than the current maximal (or minimal) suffix. That
676 /// is, the current candidate suffix should be dropped and the next one
677 /// should be considered.
678 Skip,
679 /// This occurs when no decision to accept or skip the candidate suffix
680 /// can be made, e.g., when corresponding bytes are equivalent. In this
681 /// case, the next corresponding bytes should be compared.
682 Push,
683 }
684
685 impl SuffixKind {
686 /// Returns true if and only if the given candidate byte indicates that
687 /// it should replace the current suffix as the maximal (or minimal)
688 /// suffix.
689 fn cmp(self, current: u8, candidate: u8) -> SuffixOrdering {
690 use self::SuffixOrdering::*;
691
692 match self {
693 SuffixKind::Minimal if candidate < current => Accept,
694 SuffixKind::Minimal if candidate > current => Skip,
695 SuffixKind::Minimal => Push,
696 SuffixKind::Maximal if candidate > current => Accept,
697 SuffixKind::Maximal if candidate < current => Skip,
698 SuffixKind::Maximal => Push,
699 }
700 }
701 }
702
703 // N.B. There are more holistic tests in src/search/tests.rs.
704 #[cfg(test)]
705 mod tests {
706 use bstr::{B, BStr};
707 use bstring::BString;
708
709 use super::*;
710
711 /// Convenience wrapper for computing the suffix as a byte string.
712 fn get_suffix_forward(needle: &BStr, kind: SuffixKind) -> (&BStr, usize) {
713 let s = Suffix::forward(needle, kind);
714 (&needle[s.pos..], s.period)
715 }
716
717 /// Convenience wrapper for computing the reverse suffix as a byte string.
718 fn get_suffix_reverse(needle: &BStr, kind: SuffixKind) -> (&BStr, usize) {
719 let s = Suffix::reverse(needle, kind);
720 (&needle[..s.pos], s.period)
721 }
722
723 /// Return all of the non-empty suffixes in the given byte string.
724 fn suffixes(bytes: &BStr) -> Vec<&BStr> {
725 (0..bytes.len()).map(|i| &bytes[i..]).collect()
726 }
727
728 /// Return the lexicographically maximal suffix of the given byte string.
729 fn naive_maximal_suffix_forward(needle: &BStr) -> &BStr {
730 let mut sufs = suffixes(needle);
731 sufs.sort();
732 sufs.pop().unwrap()
733 }
734
735 /// Return the lexicographically maximal suffix of the reverse of the given
736 /// byte string.
737 fn naive_maximal_suffix_reverse(needle: &BStr) -> BString {
738 let mut reversed = needle.to_bstring();
739 reversed.reverse_bytes();
740 let mut got = naive_maximal_suffix_forward(&reversed).to_bstring();
741 got.reverse_bytes();
742 got
743 }
744
745 #[test]
746 fn suffix_forward() {
747 macro_rules! assert_suffix_min {
748 ($given:expr, $expected:expr, $period:expr) => {
749 let (got_suffix, got_period) = get_suffix_forward(
750 B($given),
751 SuffixKind::Minimal,
752 );
753 assert_eq!((B($expected), $period), (got_suffix, got_period));
754 };
755 }
756
757 macro_rules! assert_suffix_max {
758 ($given:expr, $expected:expr, $period:expr) => {
759 let (got_suffix, got_period) = get_suffix_forward(
760 B($given),
761 SuffixKind::Maximal,
762 );
763 assert_eq!((B($expected), $period), (got_suffix, got_period));
764 };
765 }
766
767 assert_suffix_min!("a", "a", 1);
768 assert_suffix_max!("a", "a", 1);
769
770 assert_suffix_min!("ab", "ab", 2);
771 assert_suffix_max!("ab", "b", 1);
772
773 assert_suffix_min!("ba", "a", 1);
774 assert_suffix_max!("ba", "ba", 2);
775
776 assert_suffix_min!("abc", "abc", 3);
777 assert_suffix_max!("abc", "c", 1);
778
779 assert_suffix_min!("acb", "acb", 3);
780 assert_suffix_max!("acb", "cb", 2);
781
782 assert_suffix_min!("cba", "a", 1);
783 assert_suffix_max!("cba", "cba", 3);
784
785 assert_suffix_min!("abcabc", "abcabc", 3);
786 assert_suffix_max!("abcabc", "cabc", 3);
787
788 assert_suffix_min!("abcabcabc", "abcabcabc", 3);
789 assert_suffix_max!("abcabcabc", "cabcabc", 3);
790
791 assert_suffix_min!("abczz", "abczz", 5);
792 assert_suffix_max!("abczz", "zz", 1);
793
794 assert_suffix_min!("zzabc", "abc", 3);
795 assert_suffix_max!("zzabc", "zzabc", 5);
796
797 assert_suffix_min!("aaa", "aaa", 1);
798 assert_suffix_max!("aaa", "aaa", 1);
799
800 assert_suffix_min!("foobar", "ar", 2);
801 assert_suffix_max!("foobar", "r", 1);
802 }
803
804 #[test]
805 fn suffix_reverse() {
806 macro_rules! assert_suffix_min {
807 ($given:expr, $expected:expr, $period:expr) => {
808 let (got_suffix, got_period) = get_suffix_reverse(
809 B($given),
810 SuffixKind::Minimal,
811 );
812 assert_eq!((B($expected), $period), (got_suffix, got_period));
813 };
814 }
815
816 macro_rules! assert_suffix_max {
817 ($given:expr, $expected:expr, $period:expr) => {
818 let (got_suffix, got_period) = get_suffix_reverse(
819 B($given),
820 SuffixKind::Maximal,
821 );
822 assert_eq!((B($expected), $period), (got_suffix, got_period));
823 };
824 }
825
826 assert_suffix_min!("a", "a", 1);
827 assert_suffix_max!("a", "a", 1);
828
829 assert_suffix_min!("ab", "a", 1);
830 assert_suffix_max!("ab", "ab", 2);
831
832 assert_suffix_min!("ba", "ba", 2);
833 assert_suffix_max!("ba", "b", 1);
834
835 assert_suffix_min!("abc", "a", 1);
836 assert_suffix_max!("abc", "abc", 3);
837
838 assert_suffix_min!("acb", "a", 1);
839 assert_suffix_max!("acb", "ac", 2);
840
841 assert_suffix_min!("cba", "cba", 3);
842 assert_suffix_max!("cba", "c", 1);
843
844 assert_suffix_min!("abcabc", "abca", 3);
845 assert_suffix_max!("abcabc", "abcabc", 3);
846
847 assert_suffix_min!("abcabcabc", "abcabca", 3);
848 assert_suffix_max!("abcabcabc", "abcabcabc", 3);
849
850 assert_suffix_min!("abczz", "a", 1);
851 assert_suffix_max!("abczz", "abczz", 5);
852
853 assert_suffix_min!("zzabc", "zza", 3);
854 assert_suffix_max!("zzabc", "zz", 1);
855
856 assert_suffix_min!("aaa", "aaa", 1);
857 assert_suffix_max!("aaa", "aaa", 1);
858 }
859
860 quickcheck! {
861 fn qc_suffix_forward_maximal(bytes: Vec<u8>) -> bool {
862 let bytes = BString::from(bytes);
863 if bytes.is_empty() {
864 return true;
865 }
866
867 let (got, _) = get_suffix_forward(&bytes, SuffixKind::Maximal);
868 let expected = naive_maximal_suffix_forward(&bytes);
869 got == expected
870 }
871
872 fn qc_suffix_reverse_maximal(bytes: Vec<u8>) -> bool {
873 let bytes = BString::from(bytes);
874 if bytes.is_empty() {
875 return true;
876 }
877
878 let (got, _) = get_suffix_reverse(&bytes, SuffixKind::Maximal);
879 let expected = naive_maximal_suffix_reverse(&bytes);
880 got == expected
881 }
882 }
883 }