2 Provides routines for extracting literal prefixes and suffixes from an `Hir`.
11 use crate::hir
::{self, Hir, HirKind}
;
13 /// A set of literal byte strings extracted from a regular expression.
15 /// Every member of the set is a `Literal`, which is represented by a
16 /// `Vec<u8>`. (Notably, it may contain invalid UTF-8.) Every member is
17 /// said to be either *complete* or *cut*. A complete literal means that
18 /// it extends until the beginning (or end) of the regular expression. In
19 /// some circumstances, this can be used to indicate a match in the regular
22 /// A key aspect of literal extraction is knowing when to stop. It is not
23 /// feasible to blindly extract all literals from a regular expression, even if
24 /// there are finitely many. For example, the regular expression `[0-9]{10}`
25 /// has `10^10` distinct literals. For this reason, literal extraction is
26 /// bounded to some low number by default using heuristics, but the limits can
29 /// **WARNING**: Literal extraction uses stack space proportional to the size
30 /// of the `Hir` expression. At some point, this drawback will be eliminated.
31 /// To protect yourself, set a reasonable
32 /// [`nest_limit` on your `Parser`](../../struct.ParserBuilder.html#method.nest_limit).
33 /// This is done for you by default.
34 #[derive(Clone, Eq, PartialEq)]
41 /// A single member of a set of literals extracted from a regular expression.
43 /// This type has `Deref` and `DerefMut` impls to `Vec<u8>` so that all slice
44 /// and `Vec` operations are available.
45 #[derive(Clone, Eq, Ord)]
52 /// Returns a new empty set of literals using default limits.
53 pub fn empty() -> Literals
{
54 Literals { lits: vec![], limit_size: 250, limit_class: 10 }
57 /// Returns a set of literal prefixes extracted from the given `Hir`.
58 pub fn prefixes(expr
: &Hir
) -> Literals
{
59 let mut lits
= Literals
::empty();
60 lits
.union_prefixes(expr
);
64 /// Returns a set of literal suffixes extracted from the given `Hir`.
65 pub fn suffixes(expr
: &Hir
) -> Literals
{
66 let mut lits
= Literals
::empty();
67 lits
.union_suffixes(expr
);
71 /// Get the approximate size limit (in bytes) of this set.
72 pub fn limit_size(&self) -> usize {
76 /// Set the approximate size limit (in bytes) of this set.
78 /// If extracting a literal would put the set over this limit, then
81 /// The new limits will only apply to additions to this set. Existing
82 /// members remain unchanged, even if the set exceeds the new limit.
83 pub fn set_limit_size(&mut self, size
: usize) -> &mut Literals
{
84 self.limit_size
= size
;
88 /// Get the character class size limit for this set.
89 pub fn limit_class(&self) -> usize {
93 /// Limits the size of character(or byte) classes considered.
95 /// A value of `0` prevents all character classes from being considered.
97 /// This limit also applies to case insensitive literals, since each
98 /// character in the case insensitive literal is converted to a class, and
101 /// The new limits will only apply to additions to this set. Existing
102 /// members remain unchanged, even if the set exceeds the new limit.
103 pub fn set_limit_class(&mut self, size
: usize) -> &mut Literals
{
104 self.limit_class
= size
;
108 /// Returns the set of literals as a slice. Its order is unspecified.
109 pub fn literals(&self) -> &[Literal
] {
113 /// Returns the length of the smallest literal.
115 /// Returns None is there are no literals in the set.
116 pub fn min_len(&self) -> Option
<usize> {
118 for lit
in &self.lits
{
120 None
=> min
= Some(lit
.len()),
121 Some(m
) if lit
.len() < m
=> min
= Some(lit
.len()),
128 /// Returns true if all members in this set are complete.
129 pub fn all_complete(&self) -> bool
{
130 !self.lits
.is_empty() && self.lits
.iter().all(|l
| !l
.is_cut())
133 /// Returns true if any member in this set is complete.
134 pub fn any_complete(&self) -> bool
{
135 self.lits
.iter().any(|lit
| !lit
.is_cut())
138 /// Returns true if this set contains an empty literal.
139 pub fn contains_empty(&self) -> bool
{
140 self.lits
.iter().any(|lit
| lit
.is_empty())
143 /// Returns true if this set is empty or if all of its members is empty.
144 pub fn is_empty(&self) -> bool
{
145 self.lits
.is_empty() || self.lits
.iter().all(|lit
| lit
.is_empty())
148 /// Returns a new empty set of literals using this set's limits.
149 pub fn to_empty(&self) -> Literals
{
150 let mut lits
= Literals
::empty();
151 lits
.set_limit_size(self.limit_size
).set_limit_class(self.limit_class
);
155 /// Returns the longest common prefix of all members in this set.
156 pub fn longest_common_prefix(&self) -> &[u8] {
160 let lit0
= &*self.lits
[0];
161 let mut len
= lit0
.len();
162 for lit
in &self.lits
[1..] {
165 lit
.iter().zip(lit0
).take_while(|&(a
, b
)| a
== b
).count(),
171 /// Returns the longest common suffix of all members in this set.
172 pub fn longest_common_suffix(&self) -> &[u8] {
176 let lit0
= &*self.lits
[0];
177 let mut len
= lit0
.len();
178 for lit
in &self.lits
[1..] {
183 .zip(lit0
.iter().rev())
184 .take_while(|&(a
, b
)| a
== b
)
188 &self.lits
[0][self.lits
[0].len() - len
..]
191 /// Returns a new set of literals with the given number of bytes trimmed
192 /// from the suffix of each literal.
194 /// If any literal would be cut out completely by trimming, then None is
197 /// Any duplicates that are created as a result of this transformation are
199 pub fn trim_suffix(&self, num_bytes
: usize) -> Option
<Literals
> {
200 if self.min_len().map(|len
| len
<= num_bytes
).unwrap_or(true) {
203 let mut new
= self.to_empty();
204 for mut lit
in self.lits
.iter().cloned() {
205 let new_len
= lit
.len() - num_bytes
;
206 lit
.truncate(new_len
);
215 /// Returns a new set of prefixes of this set of literals that are
216 /// guaranteed to be unambiguous.
218 /// Any substring match with a member of the set is returned is guaranteed
219 /// to never overlap with a substring match of another member of the set
220 /// at the same starting position.
222 /// Given any two members of the returned set, neither is a substring of
224 pub fn unambiguous_prefixes(&self) -> Literals
{
225 if self.lits
.is_empty() {
226 return self.to_empty();
228 let mut old
: Vec
<Literal
> = self.lits
.iter().cloned().collect();
229 let mut new
= self.to_empty();
230 'OUTER
: while let Some(mut candidate
) = old
.pop() {
231 if candidate
.is_empty() {
234 if new
.lits
.is_empty() {
235 new
.lits
.push(candidate
);
238 for lit2
in &mut new
.lits
{
242 if &candidate
== lit2
{
243 // If the literal is already in the set, then we can
244 // just drop it. But make sure that cut literals are
246 candidate
.cut
= candidate
.cut
|| lit2
.cut
;
247 lit2
.cut
= candidate
.cut
;
250 if candidate
.len() < lit2
.len() {
251 if let Some(i
) = position(&candidate
, &lit2
) {
253 let mut lit3
= lit2
.clone();
260 if let Some(i
) = position(&lit2
, &candidate
) {
262 let mut new_candidate
= candidate
.clone();
263 new_candidate
.truncate(i
);
265 old
.push(new_candidate
);
269 // Oops, the candidate is already represented in the set.
270 if candidate
.is_empty() {
274 new
.lits
.push(candidate
);
276 new
.lits
.retain(|lit
| !lit
.is_empty());
282 /// Returns a new set of suffixes of this set of literals that are
283 /// guaranteed to be unambiguous.
285 /// Any substring match with a member of the set is returned is guaranteed
286 /// to never overlap with a substring match of another member of the set
287 /// at the same ending position.
289 /// Given any two members of the returned set, neither is a substring of
291 pub fn unambiguous_suffixes(&self) -> Literals
{
292 // This is a touch wasteful...
293 let mut lits
= self.clone();
295 let mut unamb
= lits
.unambiguous_prefixes();
300 /// Unions the prefixes from the given expression to this set.
302 /// If prefixes could not be added (for example, this set would exceed its
303 /// size limits or the set of prefixes from `expr` includes the empty
304 /// string), then false is returned.
306 /// Note that prefix literals extracted from `expr` are said to be complete
307 /// if and only if the literal extends from the beginning of `expr` to the
309 pub fn union_prefixes(&mut self, expr
: &Hir
) -> bool
{
310 let mut lits
= self.to_empty();
311 prefixes(expr
, &mut lits
);
312 !lits
.is_empty() && !lits
.contains_empty() && self.union(lits
)
315 /// Unions the suffixes from the given expression to this set.
317 /// If suffixes could not be added (for example, this set would exceed its
318 /// size limits or the set of suffixes from `expr` includes the empty
319 /// string), then false is returned.
321 /// Note that prefix literals extracted from `expr` are said to be complete
322 /// if and only if the literal extends from the end of `expr` to the
323 /// beginning of `expr`.
324 pub fn union_suffixes(&mut self, expr
: &Hir
) -> bool
{
325 let mut lits
= self.to_empty();
326 suffixes(expr
, &mut lits
);
328 !lits
.is_empty() && !lits
.contains_empty() && self.union(lits
)
331 /// Unions this set with another set.
333 /// If the union would cause the set to exceed its limits, then the union
334 /// is skipped and it returns false. Otherwise, if the union succeeds, it
336 pub fn union(&mut self, lits
: Literals
) -> bool
{
337 if self.num_bytes() + lits
.num_bytes() > self.limit_size
{
341 self.lits
.push(Literal
::empty());
343 self.lits
.extend(lits
.lits
);
348 /// Extends this set with another set.
350 /// The set of literals is extended via a cross product.
352 /// If a cross product would cause this set to exceed its limits, then the
353 /// cross product is skipped and it returns false. Otherwise, if the cross
354 /// product succeeds, it returns true.
355 pub fn cross_product(&mut self, lits
: &Literals
) -> bool
{
359 // Check that we make sure we stay in our limits.
361 if self.is_empty() || !self.any_complete() {
362 size_after
= self.num_bytes();
363 for lits_lit
in lits
.literals() {
364 size_after
+= lits_lit
.len();
367 size_after
= self.lits
.iter().fold(0, |accum
, lit
| {
368 accum
+ if lit
.is_cut() { lit.len() }
else { 0 }
370 for lits_lit
in lits
.literals() {
371 for self_lit
in self.literals() {
372 if !self_lit
.is_cut() {
373 size_after
+= self_lit
.len() + lits_lit
.len();
378 if size_after
> self.limit_size
{
382 let mut base
= self.remove_complete();
384 base
= vec
![Literal
::empty()];
386 for lits_lit
in lits
.literals() {
387 for mut self_lit
in base
.clone() {
388 self_lit
.extend(&**lits_lit
);
389 self_lit
.cut
= lits_lit
.cut
;
390 self.lits
.push(self_lit
);
396 /// Extends each literal in this set with the bytes given.
398 /// If the set is empty, then the given literal is added to the set.
400 /// If adding any number of bytes to all members of this set causes a limit
401 /// to be exceeded, then no bytes are added and false is returned. If a
402 /// prefix of `bytes` can be fit into this set, then it is used and all
403 /// resulting literals are cut.
404 pub fn cross_add(&mut self, bytes
: &[u8]) -> bool
{
405 // N.B. This could be implemented by simply calling cross_product with
406 // a literal set containing just `bytes`, but we can be smarter about
407 // taking shorter prefixes of `bytes` if they'll fit.
408 if bytes
.is_empty() {
411 if self.lits
.is_empty() {
412 let i
= cmp
::min(self.limit_size
, bytes
.len());
413 self.lits
.push(Literal
::new(bytes
[..i
].to_owned()));
414 self.lits
[0].cut
= i
< bytes
.len();
415 return !self.lits
[0].is_cut();
417 let size
= self.num_bytes();
418 if size
+ self.lits
.len() >= self.limit_size
{
422 while size
+ (i
* self.lits
.len()) <= self.limit_size
427 for lit
in &mut self.lits
{
429 lit
.extend(&bytes
[..i
]);
438 /// Adds the given literal to this set.
440 /// Returns false if adding this literal would cause the class to be too
442 pub fn add(&mut self, lit
: Literal
) -> bool
{
443 if self.num_bytes() + lit
.len() > self.limit_size
{
450 /// Extends each literal in this set with the character class given.
452 /// Returns false if the character class was too big to add.
453 pub fn add_char_class(&mut self, cls
: &hir
::ClassUnicode
) -> bool
{
454 self._add_char_class(cls
, false)
457 /// Extends each literal in this set with the character class given,
458 /// writing the bytes of each character in reverse.
460 /// Returns false if the character class was too big to add.
461 fn add_char_class_reverse(&mut self, cls
: &hir
::ClassUnicode
) -> bool
{
462 self._add_char_class(cls
, true)
467 cls
: &hir
::ClassUnicode
,
472 if self.class_exceeds_limits(cls_char_count(cls
)) {
475 let mut base
= self.remove_complete();
477 base
= vec
![Literal
::empty()];
479 for r
in cls
.iter() {
480 let (s
, e
) = (r
.start
as u32, r
.end
as u32 + 1);
481 for c
in (s
..e
).filter_map(char::from_u32
) {
482 for mut lit
in base
.clone() {
483 let mut bytes
= c
.to_string().into_bytes();
495 /// Extends each literal in this set with the byte class given.
497 /// Returns false if the byte class was too big to add.
498 pub fn add_byte_class(&mut self, cls
: &hir
::ClassBytes
) -> bool
{
499 if self.class_exceeds_limits(cls_byte_count(cls
)) {
502 let mut base
= self.remove_complete();
504 base
= vec
![Literal
::empty()];
506 for r
in cls
.iter() {
507 let (s
, e
) = (r
.start
as u32, r
.end
as u32 + 1);
508 for b
in (s
..e
).map(|b
| b
as u8) {
509 for mut lit
in base
.clone() {
518 /// Cuts every member of this set. When a member is cut, it can never
520 pub fn cut(&mut self) {
521 for lit
in &mut self.lits
{
526 /// Reverses all members in place.
527 pub fn reverse(&mut self) {
528 for lit
in &mut self.lits
{
533 /// Clears this set of all members.
534 pub fn clear(&mut self) {
538 /// Pops all complete literals out of this set.
539 fn remove_complete(&mut self) -> Vec
<Literal
> {
540 let mut base
= vec
![];
541 for lit
in mem
::replace(&mut self.lits
, vec
![]) {
551 /// Returns the total number of bytes in this set.
552 fn num_bytes(&self) -> usize {
553 self.lits
.iter().fold(0, |accum
, lit
| accum
+ lit
.len())
556 /// Returns true if a character class with the given size would cause this
557 /// set to exceed its limits.
559 /// The size given should correspond to the number of items in the class.
560 fn class_exceeds_limits(&self, size
: usize) -> bool
{
561 if size
> self.limit_class
{
564 // This is an approximation since codepoints in a char class can encode
566 let new_byte_count
= if self.lits
.is_empty() {
569 self.lits
.iter().fold(0, |accum
, lit
| {
572 // If the literal is cut, then we'll never add
573 // anything to it, so don't count it.
576 (lit
.len() + 1) * size
580 new_byte_count
> self.limit_size
584 fn prefixes(expr
: &Hir
, lits
: &mut Literals
) {
586 HirKind
::Literal(hir
::Literal
::Unicode(c
)) => {
587 let mut buf
= [0; 4];
588 lits
.cross_add(c
.encode_utf8(&mut buf
).as_bytes());
590 HirKind
::Literal(hir
::Literal
::Byte(b
)) => {
591 lits
.cross_add(&[b
]);
593 HirKind
::Class(hir
::Class
::Unicode(ref cls
)) => {
594 if !lits
.add_char_class(cls
) {
598 HirKind
::Class(hir
::Class
::Bytes(ref cls
)) => {
599 if !lits
.add_byte_class(cls
) {
603 HirKind
::Group(hir
::Group { ref hir, .. }
) => {
604 prefixes(&**hir
, lits
);
606 HirKind
::Repetition(ref x
) => match x
.kind
{
607 hir
::RepetitionKind
::ZeroOrOne
=> {
608 repeat_zero_or_one_literals(&x
.hir
, lits
, prefixes
);
610 hir
::RepetitionKind
::ZeroOrMore
=> {
611 repeat_zero_or_more_literals(&x
.hir
, lits
, prefixes
);
613 hir
::RepetitionKind
::OneOrMore
=> {
614 repeat_one_or_more_literals(&x
.hir
, lits
, prefixes
);
616 hir
::RepetitionKind
::Range(ref rng
) => {
617 let (min
, max
) = match *rng
{
618 hir
::RepetitionRange
::Exactly(m
) => (m
, Some(m
)),
619 hir
::RepetitionRange
::AtLeast(m
) => (m
, None
),
620 hir
::RepetitionRange
::Bounded(m
, n
) => (m
, Some(n
)),
622 repeat_range_literals(
623 &x
.hir
, min
, max
, x
.greedy
, lits
, prefixes
,
627 HirKind
::Concat(ref es
) if es
.is_empty() => {}
628 HirKind
::Concat(ref es
) if es
.len() == 1 => prefixes(&es
[0], lits
),
629 HirKind
::Concat(ref es
) => {
631 if let HirKind
::Anchor(hir
::Anchor
::StartText
) = *e
.kind() {
632 if !lits
.is_empty() {
636 lits
.add(Literal
::empty());
639 let mut lits2
= lits
.to_empty();
640 prefixes(e
, &mut lits2
);
641 if !lits
.cross_product(&lits2
) || !lits2
.any_complete() {
642 // If this expression couldn't yield any literal that
643 // could be extended, then we need to quit. Since we're
644 // short-circuiting, we also need to freeze every member.
650 HirKind
::Alternation(ref es
) => {
651 alternate_literals(es
, lits
, prefixes
);
657 fn suffixes(expr
: &Hir
, lits
: &mut Literals
) {
659 HirKind
::Literal(hir
::Literal
::Unicode(c
)) => {
660 let mut buf
= [0u8; 4];
661 let i
= c
.encode_utf8(&mut buf
).len();
662 let buf
= &mut buf
[..i
];
666 HirKind
::Literal(hir
::Literal
::Byte(b
)) => {
667 lits
.cross_add(&[b
]);
669 HirKind
::Class(hir
::Class
::Unicode(ref cls
)) => {
670 if !lits
.add_char_class_reverse(cls
) {
674 HirKind
::Class(hir
::Class
::Bytes(ref cls
)) => {
675 if !lits
.add_byte_class(cls
) {
679 HirKind
::Group(hir
::Group { ref hir, .. }
) => {
680 suffixes(&**hir
, lits
);
682 HirKind
::Repetition(ref x
) => match x
.kind
{
683 hir
::RepetitionKind
::ZeroOrOne
=> {
684 repeat_zero_or_one_literals(&x
.hir
, lits
, suffixes
);
686 hir
::RepetitionKind
::ZeroOrMore
=> {
687 repeat_zero_or_more_literals(&x
.hir
, lits
, suffixes
);
689 hir
::RepetitionKind
::OneOrMore
=> {
690 repeat_one_or_more_literals(&x
.hir
, lits
, suffixes
);
692 hir
::RepetitionKind
::Range(ref rng
) => {
693 let (min
, max
) = match *rng
{
694 hir
::RepetitionRange
::Exactly(m
) => (m
, Some(m
)),
695 hir
::RepetitionRange
::AtLeast(m
) => (m
, None
),
696 hir
::RepetitionRange
::Bounded(m
, n
) => (m
, Some(n
)),
698 repeat_range_literals(
699 &x
.hir
, min
, max
, x
.greedy
, lits
, suffixes
,
703 HirKind
::Concat(ref es
) if es
.is_empty() => {}
704 HirKind
::Concat(ref es
) if es
.len() == 1 => suffixes(&es
[0], lits
),
705 HirKind
::Concat(ref es
) => {
706 for e
in es
.iter().rev() {
707 if let HirKind
::Anchor(hir
::Anchor
::EndText
) = *e
.kind() {
708 if !lits
.is_empty() {
712 lits
.add(Literal
::empty());
715 let mut lits2
= lits
.to_empty();
716 suffixes(e
, &mut lits2
);
717 if !lits
.cross_product(&lits2
) || !lits2
.any_complete() {
718 // If this expression couldn't yield any literal that
719 // could be extended, then we need to quit. Since we're
720 // short-circuiting, we also need to freeze every member.
726 HirKind
::Alternation(ref es
) => {
727 alternate_literals(es
, lits
, suffixes
);
733 fn repeat_zero_or_one_literals
<F
: FnMut(&Hir
, &mut Literals
)>(
739 &Hir
::repetition(hir
::Repetition
{
740 kind
: hir
::RepetitionKind
::ZeroOrMore
,
741 // FIXME: Our literal extraction doesn't care about greediness.
742 // Which is partially why we're treating 'e?' as 'e*'. Namely,
743 // 'ab??' yields [Complete(ab), Complete(a)], but it should yield
744 // [Complete(a), Complete(ab)] because of the non-greediness.
746 hir
: Box
::new(e
.clone()),
752 fn repeat_zero_or_more_literals
<F
: FnMut(&Hir
, &mut Literals
)>(
757 let (mut lits2
, mut lits3
) = (lits
.clone(), lits
.to_empty());
758 lits3
.set_limit_size(lits
.limit_size() / 2);
761 if lits3
.is_empty() || !lits2
.cross_product(&lits3
) {
766 lits2
.add(Literal
::empty());
767 if !lits
.union(lits2
) {
772 fn repeat_one_or_more_literals
<F
: FnMut(&Hir
, &mut Literals
)>(
781 fn repeat_range_literals
<F
: FnMut(&Hir
, &mut Literals
)>(
790 // This is a bit conservative. If `max` is set, then we could
791 // treat this as a finite set of alternations. For now, we
792 // just treat it as `e*`.
794 &Hir
::repetition(hir
::Repetition
{
795 kind
: hir
::RepetitionKind
::ZeroOrMore
,
797 hir
: Box
::new(e
.clone()),
803 let n
= cmp
::min(lits
.limit_size
, min
as usize);
804 let es
= iter
::repeat(e
.clone()).take(n
).collect();
805 f(&Hir
::concat(es
), lits
);
806 if n
< min
as usize || lits
.contains_empty() {
810 if max
.map_or(true, |max
| min
< max
) {
816 fn alternate_literals
<F
: FnMut(&Hir
, &mut Literals
)>(
821 let mut lits2
= lits
.to_empty();
823 let mut lits3
= lits
.to_empty();
824 lits3
.set_limit_size(lits
.limit_size() / 5);
826 if lits3
.is_empty() || !lits2
.union(lits3
) {
827 // If we couldn't find suffixes for *any* of the
828 // alternates, then the entire alternation has to be thrown
829 // away and any existing members must be frozen. Similarly,
830 // if the union couldn't complete, stop and freeze.
835 if !lits
.cross_product(&lits2
) {
840 impl fmt
::Debug
for Literals
{
841 fn fmt(&self, f
: &mut fmt
::Formatter
<'_
>) -> fmt
::Result
{
842 f
.debug_struct("Literals")
843 .field("lits", &self.lits
)
844 .field("limit_size", &self.limit_size
)
845 .field("limit_class", &self.limit_class
)
851 /// Returns a new complete literal with the bytes given.
852 pub fn new(bytes
: Vec
<u8>) -> Literal
{
853 Literal { v: bytes, cut: false }
856 /// Returns a new complete empty literal.
857 pub fn empty() -> Literal
{
858 Literal { v: vec![], cut: false }
861 /// Returns true if this literal was "cut."
862 pub fn is_cut(&self) -> bool
{
866 /// Cuts this literal.
867 pub fn cut(&mut self) {
872 impl PartialEq
for Literal
{
873 fn eq(&self, other
: &Literal
) -> bool
{
878 impl PartialOrd
for Literal
{
879 fn partial_cmp(&self, other
: &Literal
) -> Option
<cmp
::Ordering
> {
880 self.v
.partial_cmp(&other
.v
)
884 impl fmt
::Debug
for Literal
{
885 fn fmt(&self, f
: &mut fmt
::Formatter
<'_
>) -> fmt
::Result
{
887 write
!(f
, "Cut({})", escape_unicode(&self.v
))
889 write
!(f
, "Complete({})", escape_unicode(&self.v
))
894 impl AsRef
<[u8]> for Literal
{
895 fn as_ref(&self) -> &[u8] {
900 impl ops
::Deref
for Literal
{
901 type Target
= Vec
<u8>;
902 fn deref(&self) -> &Vec
<u8> {
907 impl ops
::DerefMut
for Literal
{
908 fn deref_mut(&mut self) -> &mut Vec
<u8> {
913 fn position(needle
: &[u8], mut haystack
: &[u8]) -> Option
<usize> {
915 while haystack
.len() >= needle
.len() {
916 if needle
== &haystack
[..needle
.len()] {
920 haystack
= &haystack
[1..];
925 fn escape_unicode(bytes
: &[u8]) -> String
{
926 let show
= match ::std
::str::from_utf8(bytes
) {
927 Ok(v
) => v
.to_string(),
928 Err(_
) => escape_bytes(bytes
),
930 let mut space_escaped
= String
::new();
931 for c
in show
.chars() {
932 if c
.is_whitespace() {
933 let escaped
= if c
as u32 <= 0x7F {
936 if c
as u32 <= 0xFFFF {
937 format
!(r
"\u{{{:04x}}}", c
as u32)
939 format
!(r
"\U{{{:08x}}}", c
as u32)
942 space_escaped
.push_str(&escaped
);
944 space_escaped
.push(c
);
950 fn escape_bytes(bytes
: &[u8]) -> String
{
951 let mut s
= String
::new();
953 s
.push_str(&escape_byte(b
));
958 fn escape_byte(byte
: u8) -> String
{
959 use std
::ascii
::escape_default
;
961 let escaped
: Vec
<u8> = escape_default(byte
).collect();
962 String
::from_utf8_lossy(&escaped
).into_owned()
965 fn cls_char_count(cls
: &hir
::ClassUnicode
) -> usize {
966 cls
.iter().map(|&r
| 1 + (r
.end
as u32) - (r
.start
as u32)).sum
::<u32>()
970 fn cls_byte_count(cls
: &hir
::ClassBytes
) -> usize {
971 cls
.iter().map(|&r
| 1 + (r
.end
as u32) - (r
.start
as u32)).sum
::<u32>()
979 use super::{escape_bytes, Literal, Literals}
;
981 use crate::ParserBuilder
;
983 // To make test failures easier to read.
984 #[derive(Debug, Eq, PartialEq)]
985 struct Bytes(Vec
<ULiteral
>);
986 #[derive(Debug, Eq, PartialEq)]
987 struct Unicode(Vec
<ULiteral
>);
989 fn escape_lits(blits
: &[Literal
]) -> Vec
<ULiteral
> {
990 let mut ulits
= vec
![];
993 .push(ULiteral { v: escape_bytes(&blit), cut: blit.is_cut() }
);
998 fn create_lits
<I
: IntoIterator
<Item
= Literal
>>(it
: I
) -> Literals
{
1000 lits
: it
.into_iter().collect(),
1006 // Needs to be pub for 1.3?
1007 #[derive(Clone, Eq, PartialEq)]
1008 pub struct ULiteral
{
1014 fn is_cut(&self) -> bool
{
1019 impl fmt
::Debug
for ULiteral
{
1020 fn fmt(&self, f
: &mut fmt
::Formatter
<'_
>) -> fmt
::Result
{
1022 write
!(f
, "Cut({})", self.v
)
1024 write
!(f
, "Complete({})", self.v
)
1029 impl PartialEq
<Literal
> for ULiteral
{
1030 fn eq(&self, other
: &Literal
) -> bool
{
1031 self.v
.as_bytes() == &*other
.v
&& self.is_cut() == other
.is_cut()
1035 impl PartialEq
<ULiteral
> for Literal
{
1036 fn eq(&self, other
: &ULiteral
) -> bool
{
1037 &*self.v
== other
.v
.as_bytes() && self.is_cut() == other
.is_cut()
1041 #[allow(non_snake_case)]
1042 fn C(s
: &'
static str) -> ULiteral
{
1043 ULiteral { v: s.to_owned(), cut: true }
1045 #[allow(non_snake_case)]
1046 fn M(s
: &'
static str) -> ULiteral
{
1047 ULiteral { v: s.to_owned(), cut: false }
1050 fn prefixes(lits
: &mut Literals
, expr
: &Hir
) {
1051 lits
.union_prefixes(expr
);
1054 fn suffixes(lits
: &mut Literals
, expr
: &Hir
) {
1055 lits
.union_suffixes(expr
);
1058 macro_rules
! assert_lit_eq
{
1059 ($which
:ident
, $got_lits
:expr
, $
($expected_lit
:expr
),*) => {{
1060 let expected
: Vec
<ULiteral
> = vec
![$
($expected_lit
),*];
1061 let lits
= $got_lits
;
1063 $
which(expected
.clone()),
1064 $
which(escape_lits(lits
.literals())));
1066 !expected
.is_empty() && expected
.iter().all(|l
| !l
.is_cut()),
1067 lits
.all_complete());
1069 expected
.iter().any(|l
| !l
.is_cut()),
1070 lits
.any_complete());
1074 macro_rules
! test_lit
{
1075 ($name
:ident
, $which
:ident
, $re
:expr
) => {
1076 test_lit
!($name
, $which
, $re
,);
1078 ($name
:ident
, $which
:ident
, $re
:expr
, $
($lit
:expr
),*) => {
1081 let expr
= ParserBuilder
::new()
1085 let lits
= Literals
::$
which(&expr
);
1086 assert_lit_eq
!(Unicode
, lits
, $
($lit
),*);
1088 let expr
= ParserBuilder
::new()
1089 .allow_invalid_utf8(true)
1094 let lits
= Literals
::$
which(&expr
);
1095 assert_lit_eq
!(Bytes
, lits
, $
($lit
),*);
1100 // ************************************************************************
1101 // Tests for prefix literal extraction.
1102 // ************************************************************************
1104 // Elementary tests.
1105 test_lit
!(pfx_one_lit1
, prefixes
, "a", M("a"));
1106 test_lit
!(pfx_one_lit2
, prefixes
, "abc", M("abc"));
1107 test_lit
!(pfx_one_lit3
, prefixes
, "(?u)☃", M("\\xe2\\x98\\x83"));
1108 #[cfg(feature = "unicode-case")]
1109 test_lit
!(pfx_one_lit4
, prefixes
, "(?ui)☃", M("\\xe2\\x98\\x83"));
1110 test_lit
!(pfx_class1
, prefixes
, "[1-4]", M("1"), M("2"), M("3"), M("4"));
1115 M("\\xe2\\x85\\xa0"),
1116 M("\\xe2\\x98\\x83")
1118 #[cfg(feature = "unicode-case")]
1123 M("\\xe2\\x85\\xa0"),
1124 M("\\xe2\\x85\\xb0"),
1125 M("\\xe2\\x98\\x83")
1127 test_lit
!(pfx_one_lit_casei1
, prefixes
, "(?i-u)a", M("A"), M("a"));
1141 test_lit
!(pfx_group1
, prefixes
, "(a)", M("a"));
1142 test_lit
!(pfx_rep_zero_or_one1
, prefixes
, "a?");
1143 test_lit
!(pfx_rep_zero_or_one2
, prefixes
, "(?:abc)?");
1144 test_lit
!(pfx_rep_zero_or_one_cat1
, prefixes
, "ab?", C("ab"), M("a"));
1145 // FIXME: This should return [M("a"), M("ab")] because of the non-greedy
1146 // repetition. As a work-around, we rewrite ab?? as ab*?, and thus we get
1148 test_lit
!(pfx_rep_zero_or_one_cat2
, prefixes
, "ab??", C("ab"), M("a"));
1149 test_lit
!(pfx_rep_zero_or_more1
, prefixes
, "a*");
1150 test_lit
!(pfx_rep_zero_or_more2
, prefixes
, "(?:abc)*");
1151 test_lit
!(pfx_rep_one_or_more1
, prefixes
, "a+", C("a"));
1152 test_lit
!(pfx_rep_one_or_more2
, prefixes
, "(?:abc)+", C("abc"));
1153 test_lit
!(pfx_rep_nested_one_or_more
, prefixes
, "(?:a+)+", C("a"));
1154 test_lit
!(pfx_rep_range1
, prefixes
, "a{0}");
1155 test_lit
!(pfx_rep_range2
, prefixes
, "a{0,}");
1156 test_lit
!(pfx_rep_range3
, prefixes
, "a{0,1}");
1157 test_lit
!(pfx_rep_range4
, prefixes
, "a{1}", M("a"));
1158 test_lit
!(pfx_rep_range5
, prefixes
, "a{2}", M("aa"));
1159 test_lit
!(pfx_rep_range6
, prefixes
, "a{1,2}", C("a"));
1160 test_lit
!(pfx_rep_range7
, prefixes
, "a{2,3}", C("aa"));
1162 // Test regexes with concatenations.
1163 test_lit
!(pfx_cat1
, prefixes
, "(?:a)(?:b)", M("ab"));
1164 test_lit
!(pfx_cat2
, prefixes
, "[ab]z", M("az"), M("bz"));
1187 test_lit
!(pfx_cat5
, prefixes
, "a*b", C("a"), M("b"));
1188 test_lit
!(pfx_cat6
, prefixes
, "a*b*c", C("a"), C("b"), M("c"));
1189 test_lit
!(pfx_cat7
, prefixes
, "a*b*c+", C("a"), C("b"), C("c"));
1190 test_lit
!(pfx_cat8
, prefixes
, "a*b+c", C("a"), C("b"));
1191 test_lit
!(pfx_cat9
, prefixes
, "a*b+c*", C("a"), C("b"));
1192 test_lit
!(pfx_cat10
, prefixes
, "ab*", C("ab"), M("a"));
1193 test_lit
!(pfx_cat11
, prefixes
, "ab*c", C("ab"), M("ac"));
1194 test_lit
!(pfx_cat12
, prefixes
, "ab+", C("ab"));
1195 test_lit
!(pfx_cat13
, prefixes
, "ab+c", C("ab"));
1196 test_lit
!(pfx_cat14
, prefixes
, "a^", C("a"));
1197 test_lit
!(pfx_cat15
, prefixes
, "$a");
1198 test_lit
!(pfx_cat16
, prefixes
, r
"ab*c", C("ab"), M("ac"));
1199 test_lit
!(pfx_cat17
, prefixes
, r
"ab+c", C("ab"));
1200 test_lit
!(pfx_cat18
, prefixes
, r
"z*azb", C("z"), M("azb"));
1201 test_lit
!(pfx_cat19
, prefixes
, "a.z", C("a"));
1203 // Test regexes with alternations.
1204 test_lit
!(pfx_alt1
, prefixes
, "a|b", M("a"), M("b"));
1205 test_lit
!(pfx_alt2
, prefixes
, "[1-3]|b", M("1"), M("2"), M("3"), M("b"));
1206 test_lit
!(pfx_alt3
, prefixes
, "y(?:a|b)z", M("yaz"), M("ybz"));
1207 test_lit
!(pfx_alt4
, prefixes
, "a|b*");
1208 test_lit
!(pfx_alt5
, prefixes
, "a|b+", M("a"), C("b"));
1209 test_lit
!(pfx_alt6
, prefixes
, "a|(?:b|c*)");
1221 test_lit
!(pfx_alt8
, prefixes
, "a*b|c", C("a"), M("b"), M("c"));
1223 // Test regexes with empty assertions.
1224 test_lit
!(pfx_empty1
, prefixes
, "^a", M("a"));
1225 test_lit
!(pfx_empty2
, prefixes
, "a${2}", C("a"));
1226 test_lit
!(pfx_empty3
, prefixes
, "^abc", M("abc"));
1227 test_lit
!(pfx_empty4
, prefixes
, "(?:^abc)|(?:^z)", M("abc"), M("z"));
1229 // Make sure some curious regexes have no prefixes.
1230 test_lit
!(pfx_nothing1
, prefixes
, ".");
1231 test_lit
!(pfx_nothing2
, prefixes
, "(?s).");
1232 test_lit
!(pfx_nothing3
, prefixes
, "^");
1233 test_lit
!(pfx_nothing4
, prefixes
, "$");
1234 test_lit
!(pfx_nothing6
, prefixes
, "(?m)$");
1235 test_lit
!(pfx_nothing7
, prefixes
, r
"\b");
1236 test_lit
!(pfx_nothing8
, prefixes
, r
"\B");
1238 // Test a few regexes that defeat any prefix literal detection.
1239 test_lit
!(pfx_defeated1
, prefixes
, ".a");
1240 test_lit
!(pfx_defeated2
, prefixes
, "(?s).a");
1241 test_lit
!(pfx_defeated3
, prefixes
, "a*b*c*");
1242 test_lit
!(pfx_defeated4
, prefixes
, "a|.");
1243 test_lit
!(pfx_defeated5
, prefixes
, ".|a");
1244 test_lit
!(pfx_defeated6
, prefixes
, "a|^");
1245 test_lit
!(pfx_defeated7
, prefixes
, ".(?:a(?:b)(?:c))");
1246 test_lit
!(pfx_defeated8
, prefixes
, "$a");
1247 test_lit
!(pfx_defeated9
, prefixes
, "(?m)$a");
1248 test_lit
!(pfx_defeated10
, prefixes
, r
"\ba");
1249 test_lit
!(pfx_defeated11
, prefixes
, r
"\Ba");
1250 test_lit
!(pfx_defeated12
, prefixes
, "^*a");
1251 test_lit
!(pfx_defeated13
, prefixes
, "^+a");
1256 r
"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
1263 // ************************************************************************
1264 // Tests for quiting prefix literal search.
1265 // ************************************************************************
1267 macro_rules
! test_exhausted
{
1268 ($name
:ident
, $which
:ident
, $re
:expr
) => {
1269 test_exhausted
!($name
, $which
, $re
,);
1271 ($name
:ident
, $which
:ident
, $re
:expr
, $
($lit
:expr
),*) => {
1274 let expr
= ParserBuilder
::new()
1278 let mut lits
= Literals
::empty();
1279 lits
.set_limit_size(20).set_limit_class(10);
1280 $
which(&mut lits
, &expr
);
1281 assert_lit_eq
!(Unicode
, lits
, $
($lit
),*);
1283 let expr
= ParserBuilder
::new()
1284 .allow_invalid_utf8(true)
1289 let mut lits
= Literals
::empty();
1290 lits
.set_limit_size(20).set_limit_class(10);
1291 $
which(&mut lits
, &expr
);
1292 assert_lit_eq
!(Bytes
, lits
, $
($lit
),*);
1297 // These test use a much lower limit than the default so that we can
1298 // write test cases of reasonable size.
1299 test_exhausted
!(pfx_exhausted1
, prefixes
, "[a-z]");
1300 test_exhausted
!(pfx_exhausted2
, prefixes
, "[a-z]*A");
1301 test_exhausted
!(pfx_exhausted3
, prefixes
, "A[a-z]Z", C("A"));
1315 C("abababababababababab")
1320 "(?:(?:ab){100})*cd",
1327 "z(?:(?:ab){100})*cd",
1334 "aaaaaaaaaaaaaaaaaaaaz",
1335 C("aaaaaaaaaaaaaaaaaaaa")
1338 // ************************************************************************
1339 // Tests for suffix literal extraction.
1340 // ************************************************************************
1342 // Elementary tests.
1343 test_lit
!(sfx_one_lit1
, suffixes
, "a", M("a"));
1344 test_lit
!(sfx_one_lit2
, suffixes
, "abc", M("abc"));
1345 test_lit
!(sfx_one_lit3
, suffixes
, "(?u)☃", M("\\xe2\\x98\\x83"));
1346 #[cfg(feature = "unicode-case")]
1347 test_lit
!(sfx_one_lit4
, suffixes
, "(?ui)☃", M("\\xe2\\x98\\x83"));
1348 test_lit
!(sfx_class1
, suffixes
, "[1-4]", M("1"), M("2"), M("3"), M("4"));
1353 M("\\xe2\\x85\\xa0"),
1354 M("\\xe2\\x98\\x83")
1356 #[cfg(feature = "unicode-case")]
1361 M("\\xe2\\x85\\xa0"),
1362 M("\\xe2\\x85\\xb0"),
1363 M("\\xe2\\x98\\x83")
1365 test_lit
!(sfx_one_lit_casei1
, suffixes
, "(?i-u)a", M("A"), M("a"));
1379 test_lit
!(sfx_group1
, suffixes
, "(a)", M("a"));
1380 test_lit
!(sfx_rep_zero_or_one1
, suffixes
, "a?");
1381 test_lit
!(sfx_rep_zero_or_one2
, suffixes
, "(?:abc)?");
1382 test_lit
!(sfx_rep_zero_or_more1
, suffixes
, "a*");
1383 test_lit
!(sfx_rep_zero_or_more2
, suffixes
, "(?:abc)*");
1384 test_lit
!(sfx_rep_one_or_more1
, suffixes
, "a+", C("a"));
1385 test_lit
!(sfx_rep_one_or_more2
, suffixes
, "(?:abc)+", C("abc"));
1386 test_lit
!(sfx_rep_nested_one_or_more
, suffixes
, "(?:a+)+", C("a"));
1387 test_lit
!(sfx_rep_range1
, suffixes
, "a{0}");
1388 test_lit
!(sfx_rep_range2
, suffixes
, "a{0,}");
1389 test_lit
!(sfx_rep_range3
, suffixes
, "a{0,1}");
1390 test_lit
!(sfx_rep_range4
, suffixes
, "a{1}", M("a"));
1391 test_lit
!(sfx_rep_range5
, suffixes
, "a{2}", M("aa"));
1392 test_lit
!(sfx_rep_range6
, suffixes
, "a{1,2}", C("a"));
1393 test_lit
!(sfx_rep_range7
, suffixes
, "a{2,3}", C("aa"));
1395 // Test regexes with concatenations.
1396 test_lit
!(sfx_cat1
, suffixes
, "(?:a)(?:b)", M("ab"));
1397 test_lit
!(sfx_cat2
, suffixes
, "[ab]z", M("az"), M("bz"));
1420 test_lit
!(sfx_cat5
, suffixes
, "a*b", C("ab"), M("b"));
1421 test_lit
!(sfx_cat6
, suffixes
, "a*b*c", C("bc"), C("ac"), M("c"));
1422 test_lit
!(sfx_cat7
, suffixes
, "a*b*c+", C("c"));
1423 test_lit
!(sfx_cat8
, suffixes
, "a*b+c", C("bc"));
1424 test_lit
!(sfx_cat9
, suffixes
, "a*b+c*", C("c"), C("b"));
1425 test_lit
!(sfx_cat10
, suffixes
, "ab*", C("b"), M("a"));
1426 test_lit
!(sfx_cat11
, suffixes
, "ab*c", C("bc"), M("ac"));
1427 test_lit
!(sfx_cat12
, suffixes
, "ab+", C("b"));
1428 test_lit
!(sfx_cat13
, suffixes
, "ab+c", C("bc"));
1429 test_lit
!(sfx_cat14
, suffixes
, "a^");
1430 test_lit
!(sfx_cat15
, suffixes
, "$a", C("a"));
1431 test_lit
!(sfx_cat16
, suffixes
, r
"ab*c", C("bc"), M("ac"));
1432 test_lit
!(sfx_cat17
, suffixes
, r
"ab+c", C("bc"));
1433 test_lit
!(sfx_cat18
, suffixes
, r
"z*azb", C("zazb"), M("azb"));
1434 test_lit
!(sfx_cat19
, suffixes
, "a.z", C("z"));
1436 // Test regexes with alternations.
1437 test_lit
!(sfx_alt1
, suffixes
, "a|b", M("a"), M("b"));
1438 test_lit
!(sfx_alt2
, suffixes
, "[1-3]|b", M("1"), M("2"), M("3"), M("b"));
1439 test_lit
!(sfx_alt3
, suffixes
, "y(?:a|b)z", M("yaz"), M("ybz"));
1440 test_lit
!(sfx_alt4
, suffixes
, "a|b*");
1441 test_lit
!(sfx_alt5
, suffixes
, "a|b+", M("a"), C("b"));
1442 test_lit
!(sfx_alt6
, suffixes
, "a|(?:b|c*)");
1454 test_lit
!(sfx_alt8
, suffixes
, "a*b|c", C("ab"), M("b"), M("c"));
1456 // Test regexes with empty assertions.
1457 test_lit
!(sfx_empty1
, suffixes
, "a$", M("a"));
1458 test_lit
!(sfx_empty2
, suffixes
, "${2}a", C("a"));
1460 // Make sure some curious regexes have no suffixes.
1461 test_lit
!(sfx_nothing1
, suffixes
, ".");
1462 test_lit
!(sfx_nothing2
, suffixes
, "(?s).");
1463 test_lit
!(sfx_nothing3
, suffixes
, "^");
1464 test_lit
!(sfx_nothing4
, suffixes
, "$");
1465 test_lit
!(sfx_nothing6
, suffixes
, "(?m)$");
1466 test_lit
!(sfx_nothing7
, suffixes
, r
"\b");
1467 test_lit
!(sfx_nothing8
, suffixes
, r
"\B");
1469 // Test a few regexes that defeat any suffix literal detection.
1470 test_lit
!(sfx_defeated1
, suffixes
, "a.");
1471 test_lit
!(sfx_defeated2
, suffixes
, "(?s)a.");
1472 test_lit
!(sfx_defeated3
, suffixes
, "a*b*c*");
1473 test_lit
!(sfx_defeated4
, suffixes
, "a|.");
1474 test_lit
!(sfx_defeated5
, suffixes
, ".|a");
1475 test_lit
!(sfx_defeated6
, suffixes
, "a|^");
1476 test_lit
!(sfx_defeated7
, suffixes
, "(?:a(?:b)(?:c)).");
1477 test_lit
!(sfx_defeated8
, suffixes
, "a^");
1478 test_lit
!(sfx_defeated9
, suffixes
, "(?m)a$");
1479 test_lit
!(sfx_defeated10
, suffixes
, r
"a\b");
1480 test_lit
!(sfx_defeated11
, suffixes
, r
"a\B");
1481 test_lit
!(sfx_defeated12
, suffixes
, "a^*");
1482 test_lit
!(sfx_defeated13
, suffixes
, "a^+");
1484 // These test use a much lower limit than the default so that we can
1485 // write test cases of reasonable size.
1486 test_exhausted
!(sfx_exhausted1
, suffixes
, "[a-z]");
1487 test_exhausted
!(sfx_exhausted2
, suffixes
, "A[a-z]*");
1488 test_exhausted
!(sfx_exhausted3
, suffixes
, "A[a-z]Z", C("Z"));
1502 C("abababababababababab")
1507 "cd(?:(?:ab){100})*",
1514 "cd(?:(?:ab){100})*z",
1521 "zaaaaaaaaaaaaaaaaaaaa",
1522 C("aaaaaaaaaaaaaaaaaaaa")
1525 // ************************************************************************
1526 // Tests for generating unambiguous literal sets.
1527 // ************************************************************************
1529 macro_rules
! test_unamb
{
1530 ($name
:ident
, $given
:expr
, $expected
:expr
) => {
1533 let given
: Vec
<Literal
> = $given
1536 let cut
= ul
.is_cut();
1537 Literal { v: ul.v.into_bytes(), cut: cut }
1540 let lits
= create_lits(given
);
1541 let got
= lits
.unambiguous_prefixes();
1542 assert_eq
!($expected
, escape_lits(got
.literals()));
1547 test_unamb
!(unambiguous1
, vec
![M("z"), M("azb")], vec
![C("a"), C("z")]);
1550 vec
![M("zaaaaaa"), M("aa")],
1551 vec
![C("aa"), C("z")]
1555 vec
![M("Sherlock"), M("Watson")],
1556 vec
![M("Sherlock"), M("Watson")]
1558 test_unamb
!(unambiguous4
, vec
![M("abc"), M("bc")], vec
![C("a"), C("bc")]);
1559 test_unamb
!(unambiguous5
, vec
![M("bc"), M("abc")], vec
![C("a"), C("bc")]);
1560 test_unamb
!(unambiguous6
, vec
![M("a"), M("aa")], vec
![C("a")]);
1561 test_unamb
!(unambiguous7
, vec
![M("aa"), M("a")], vec
![C("a")]);
1562 test_unamb
!(unambiguous8
, vec
![M("ab"), M("a")], vec
![C("a")]);
1565 vec
![M("ac"), M("bc"), M("c"), M("ac"), M("abc"), M("c")],
1566 vec
![C("a"), C("b"), C("c")]
1570 vec
![M("Mo'"), M("Mu'"), M("Mo"), M("Mu")],
1571 vec
![C("Mo"), C("Mu")]
1575 vec
![M("zazb"), M("azb")],
1576 vec
![C("a"), C("z")]
1578 test_unamb
!(unambiguous12
, vec
![M("foo"), C("foo")], vec
![C("foo")]);
1581 vec
![M("ABCX"), M("CDAX"), M("BCX")],
1582 vec
![C("A"), C("BCX"), C("CD")]
1586 vec
![M("IMGX"), M("MVIX"), M("MGX"), M("DSX")],
1587 vec
![M("DSX"), C("I"), C("MGX"), C("MV")]
1591 vec
![M("IMG_"), M("MG_"), M("CIMG")],
1592 vec
![C("C"), C("I"), C("MG_")]
1595 // ************************************************************************
1596 // Tests for suffix trimming.
1597 // ************************************************************************
1598 macro_rules
! test_trim
{
1599 ($name
:ident
, $trim
:expr
, $given
:expr
, $expected
:expr
) => {
1602 let given
: Vec
<Literal
> = $given
1605 let cut
= ul
.is_cut();
1606 Literal { v: ul.v.into_bytes(), cut: cut }
1609 let lits
= create_lits(given
);
1610 let got
= lits
.trim_suffix($trim
).unwrap();
1611 assert_eq
!($expected
, escape_lits(got
.literals()));
1616 test_trim
!(trim1
, 1, vec
![M("ab"), M("yz")], vec
![C("a"), C("y")]);
1617 test_trim
!(trim2
, 1, vec
![M("abc"), M("abd")], vec
![C("ab")]);
1618 test_trim
!(trim3
, 2, vec
![M("abc"), M("abd")], vec
![C("a")]);
1619 test_trim
!(trim4
, 2, vec
![M("abc"), M("ghij")], vec
![C("a"), C("gh")]);
1621 // ************************************************************************
1622 // Tests for longest common prefix.
1623 // ************************************************************************
1625 macro_rules
! test_lcp
{
1626 ($name
:ident
, $given
:expr
, $expected
:expr
) => {
1629 let given
: Vec
<Literal
> = $given
1631 .map(|s
: &str| Literal
{
1632 v
: s
.to_owned().into_bytes(),
1636 let lits
= create_lits(given
);
1637 let got
= lits
.longest_common_prefix();
1638 assert_eq
!($expected
, escape_bytes(got
));
1643 test_lcp
!(lcp1
, vec
!["a"], "a");
1644 test_lcp
!(lcp2
, vec
![], "");
1645 test_lcp
!(lcp3
, vec
!["a", "b"], "");
1646 test_lcp
!(lcp4
, vec
!["ab", "ab"], "ab");
1647 test_lcp
!(lcp5
, vec
!["ab", "a"], "a");
1648 test_lcp
!(lcp6
, vec
!["a", "ab"], "a");
1649 test_lcp
!(lcp7
, vec
!["ab", "b"], "");
1650 test_lcp
!(lcp8
, vec
!["b", "ab"], "");
1651 test_lcp
!(lcp9
, vec
!["foobar", "foobaz"], "fooba");
1652 test_lcp
!(lcp10
, vec
!["foobar", "foobaz", "a"], "");
1653 test_lcp
!(lcp11
, vec
!["a", "foobar", "foobaz"], "");
1654 test_lcp
!(lcp12
, vec
!["foo", "flub", "flab", "floo"], "f");
1656 // ************************************************************************
1657 // Tests for longest common suffix.
1658 // ************************************************************************
1660 macro_rules
! test_lcs
{
1661 ($name
:ident
, $given
:expr
, $expected
:expr
) => {
1664 let given
: Vec
<Literal
> = $given
1666 .map(|s
: &str| Literal
{
1667 v
: s
.to_owned().into_bytes(),
1671 let lits
= create_lits(given
);
1672 let got
= lits
.longest_common_suffix();
1673 assert_eq
!($expected
, escape_bytes(got
));
1678 test_lcs
!(lcs1
, vec
!["a"], "a");
1679 test_lcs
!(lcs2
, vec
![], "");
1680 test_lcs
!(lcs3
, vec
!["a", "b"], "");
1681 test_lcs
!(lcs4
, vec
!["ab", "ab"], "ab");
1682 test_lcs
!(lcs5
, vec
!["ab", "a"], "");
1683 test_lcs
!(lcs6
, vec
!["a", "ab"], "");
1684 test_lcs
!(lcs7
, vec
!["ab", "b"], "b");
1685 test_lcs
!(lcs8
, vec
!["b", "ab"], "b");
1686 test_lcs
!(lcs9
, vec
!["barfoo", "bazfoo"], "foo");
1687 test_lcs
!(lcs10
, vec
!["barfoo", "bazfoo", "a"], "");
1688 test_lcs
!(lcs11
, vec
!["a", "barfoo", "bazfoo"], "");
1689 test_lcs
!(lcs12
, vec
!["flub", "bub", "boob", "dub"], "b");