1 // Copyright 2018 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
12 Defines a high-level intermediate representation for regular expressions.
21 use hir
::interval
::{Interval, IntervalSet, IntervalSetIter}
;
24 pub use hir
::visitor
::{Visitor, visit}
;
32 /// An error that can occur while translating an `Ast` to a `Hir`.
33 #[derive(Clone, Debug, Eq, PartialEq)]
35 /// The kind of error.
37 /// The original pattern that the translator's Ast was parsed from. Every
38 /// span in an error is a valid range into this string.
40 /// The span of this error, derived from the Ast given to the translator.
45 /// Return the type of this error.
46 pub fn kind(&self) -> &ErrorKind
{
50 /// The original pattern string in which this error occurred.
52 /// Every span reported by this error is reported in terms of this string.
53 pub fn pattern(&self) -> &str {
57 /// Return the span at which this error occurred.
58 pub fn span(&self) -> &Span
{
63 /// The type of an error that occurred while building an `Hir`.
64 #[derive(Clone, Debug, Eq, PartialEq)]
66 /// This error occurs when a Unicode feature is used when Unicode
67 /// support is disabled. For example `(?-u:\pL)` would trigger this error.
69 /// This error occurs when translating a pattern that could match a byte
70 /// sequence that isn't UTF-8 and `allow_invalid_utf8` was disabled.
72 /// This occurs when an unrecognized Unicode property name could not
74 UnicodePropertyNotFound
,
75 /// This occurs when an unrecognized Unicode property value could not
77 UnicodePropertyValueNotFound
,
78 /// This occurs when the translator attempts to construct a character class
81 /// Note that this restriction in the translator may be removed in the
84 /// Hints that destructuring should not be exhaustive.
86 /// This enum may grow additional variants, so this makes sure clients
87 /// don't count on exhaustive matching. (Otherwise, adding a new variant
88 /// could break existing code.)
94 fn description(&self) -> &str {
95 use self::ErrorKind
::*;
97 UnicodeNotAllowed
=> "Unicode not allowed here",
98 InvalidUtf8
=> "pattern can match invalid UTF-8",
99 UnicodePropertyNotFound
=> "Unicode property not found",
100 UnicodePropertyValueNotFound
=> "Unicode property value not found",
101 EmptyClassNotAllowed
=> "empty character classes are not allowed",
107 impl error
::Error
for Error
{
108 fn description(&self) -> &str {
109 self.kind
.description()
113 impl fmt
::Display
for Error
{
114 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
115 ::error
::Formatter
::from(self).fmt(f
)
119 impl fmt
::Display
for ErrorKind
{
120 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
121 f
.write_str(self.description())
125 /// A high-level intermediate representation (HIR) for a regular expression.
127 /// The HIR of a regular expression represents an intermediate step between its
128 /// abstract syntax (a structured description of the concrete syntax) and
129 /// compiled byte codes. The purpose of HIR is to make regular expressions
130 /// easier to analyze. In particular, the AST is much more complex than the
131 /// HIR. For example, while an AST supports arbitrarily nested character
132 /// classes, the HIR will flatten all nested classes into a single set. The HIR
133 /// will also "compile away" every flag present in the concrete syntax. For
134 /// example, users of HIR expressions never need to worry about case folding;
135 /// it is handled automatically by the translator (e.g., by translating `(?i)A`
138 /// If the HIR was produced by a translator that disallows invalid UTF-8, then
139 /// the HIR is guaranteed to match UTF-8 exclusively.
141 /// This type defines its own destructor that uses constant stack space and
142 /// heap space proportional to the size of the HIR.
144 /// The specific type of an HIR expression can be accessed via its `kind`
145 /// or `into_kind` methods. This extra level of indirection exists for two
148 /// 1. Construction of an HIR expression *must* use the constructor methods
149 /// on this `Hir` type instead of building the `HirKind` values directly.
150 /// This permits construction to enforce invariants like "concatenations
151 /// always consist of two or more sub-expressions."
152 /// 2. Every HIR expression contains attributes that are defined inductively,
153 /// and can be computed cheaply during the construction process. For
154 /// example, one such attribute is whether the expression must match at the
155 /// beginning of the text.
157 /// Also, an `Hir`'s `fmt::Display` implementation prints an HIR as a regular
158 /// expression pattern string, and uses constant stack space and heap space
159 /// proportional to the size of the `Hir`.
160 #[derive(Clone, Debug, Eq, PartialEq)]
162 /// The underlying HIR kind.
164 /// Analysis info about this HIR, computed during construction.
168 /// The kind of an arbitrary `Hir` expression.
169 #[derive(Clone, Debug, Eq, PartialEq)]
171 /// The empty regular expression, which matches everything, including the
174 /// A single literal character that matches exactly this character.
176 /// A single character class that matches any of the characters in the
177 /// class. A class can either consist of Unicode scalar values as
178 /// characters, or it can use bytes.
180 /// An anchor assertion. An anchor assertion match always has zero length.
182 /// A word boundary assertion, which may or may not be Unicode aware. A
183 /// word boundary assertion match always has zero length.
184 WordBoundary(WordBoundary
),
185 /// A repetition operation applied to a child expression.
186 Repetition(Repetition
),
187 /// A possibly capturing group, which contains a child expression.
189 /// A concatenation of expressions. A concatenation always has at least two
190 /// child expressions.
192 /// A concatenation matches only if each of its child expression matches
193 /// one after the other.
195 /// An alternation of expressions. An alternation always has at least two
196 /// child expressions.
198 /// An alternation matches only if at least one of its child expression
199 /// matches. If multiple expressions match, then the leftmost is preferred.
200 Alternation(Vec
<Hir
>),
204 /// Returns a reference to the underlying HIR kind.
205 pub fn kind(&self) -> &HirKind
{
209 /// Consumes ownership of this HIR expression and returns its underlying
211 pub fn into_kind(mut self) -> HirKind
{
213 mem
::replace(&mut self.kind
, HirKind
::Empty
)
216 /// Returns an empty HIR expression.
218 /// An empty HIR expression always matches, including the empty string.
219 pub fn empty() -> Hir
{
220 let mut info
= HirInfo
::new();
221 info
.set_always_utf8(true);
222 info
.set_all_assertions(true);
223 info
.set_anchored_start(false);
224 info
.set_anchored_end(false);
225 info
.set_any_anchored_start(false);
226 info
.set_any_anchored_end(false);
227 info
.set_match_empty(true);
229 kind
: HirKind
::Empty
,
234 /// Creates a literal HIR expression.
236 /// If the given literal has a `Byte` variant with an ASCII byte, then this
237 /// method panics. This enforces the invariant that `Byte` variants are
238 /// only used to express matching of invalid UTF-8.
239 pub fn literal(lit
: Literal
) -> Hir
{
240 if let Literal
::Byte(b
) = lit
{
244 let mut info
= HirInfo
::new();
245 info
.set_always_utf8(lit
.is_unicode());
246 info
.set_all_assertions(false);
247 info
.set_anchored_start(false);
248 info
.set_anchored_end(false);
249 info
.set_any_anchored_start(false);
250 info
.set_any_anchored_end(false);
251 info
.set_match_empty(false);
253 kind
: HirKind
::Literal(lit
),
258 /// Creates a class HIR expression.
259 pub fn class(class
: Class
) -> Hir
{
260 let mut info
= HirInfo
::new();
261 info
.set_always_utf8(class
.is_always_utf8());
262 info
.set_all_assertions(false);
263 info
.set_anchored_start(false);
264 info
.set_anchored_end(false);
265 info
.set_any_anchored_start(false);
266 info
.set_any_anchored_end(false);
267 info
.set_match_empty(false);
269 kind
: HirKind
::Class(class
),
274 /// Creates an anchor assertion HIR expression.
275 pub fn anchor(anchor
: Anchor
) -> Hir
{
276 let mut info
= HirInfo
::new();
277 info
.set_always_utf8(true);
278 info
.set_all_assertions(true);
279 info
.set_anchored_start(false);
280 info
.set_anchored_end(false);
281 info
.set_any_anchored_start(false);
282 info
.set_any_anchored_end(false);
283 info
.set_match_empty(true);
284 if let Anchor
::StartText
= anchor
{
285 info
.set_anchored_start(true);
286 info
.set_any_anchored_start(true);
288 if let Anchor
::EndText
= anchor
{
289 info
.set_anchored_end(true);
290 info
.set_any_anchored_end(true);
293 kind
: HirKind
::Anchor(anchor
),
298 /// Creates a word boundary assertion HIR expression.
299 pub fn word_boundary(word_boundary
: WordBoundary
) -> Hir
{
300 let mut info
= HirInfo
::new();
301 info
.set_always_utf8(true);
302 info
.set_all_assertions(true);
303 info
.set_anchored_start(false);
304 info
.set_anchored_end(false);
305 info
.set_any_anchored_start(false);
306 info
.set_any_anchored_end(false);
307 // A negated word boundary matches the empty string, but a normal
308 // word boundary does not!
309 info
.set_match_empty(word_boundary
.is_negated());
310 // Negated ASCII word boundaries can match invalid UTF-8.
311 if let WordBoundary
::AsciiNegate
= word_boundary
{
312 info
.set_always_utf8(false);
315 kind
: HirKind
::WordBoundary(word_boundary
),
320 /// Creates a repetition HIR expression.
321 pub fn repetition(rep
: Repetition
) -> Hir
{
322 let mut info
= HirInfo
::new();
323 info
.set_always_utf8(rep
.hir
.is_always_utf8());
324 info
.set_all_assertions(rep
.hir
.is_all_assertions());
325 // If this operator can match the empty string, then it can never
327 info
.set_anchored_start(
328 !rep
.is_match_empty() && rep
.hir
.is_anchored_start()
330 info
.set_anchored_end(
331 !rep
.is_match_empty() && rep
.hir
.is_anchored_end()
333 info
.set_any_anchored_start(rep
.hir
.is_any_anchored_start());
334 info
.set_any_anchored_end(rep
.hir
.is_any_anchored_end());
335 info
.set_match_empty(rep
.is_match_empty() || rep
.hir
.is_match_empty());
337 kind
: HirKind
::Repetition(rep
),
342 /// Creates a group HIR expression.
343 pub fn group(group
: Group
) -> Hir
{
344 let mut info
= HirInfo
::new();
345 info
.set_always_utf8(group
.hir
.is_always_utf8());
346 info
.set_all_assertions(group
.hir
.is_all_assertions());
347 info
.set_anchored_start(group
.hir
.is_anchored_start());
348 info
.set_anchored_end(group
.hir
.is_anchored_end());
349 info
.set_any_anchored_start(group
.hir
.is_any_anchored_start());
350 info
.set_any_anchored_end(group
.hir
.is_any_anchored_end());
351 info
.set_match_empty(group
.hir
.is_match_empty());
353 kind
: HirKind
::Group(group
),
358 /// Returns the concatenation of the given expressions.
360 /// This flattens the concatenation as appropriate.
361 pub fn concat(mut exprs
: Vec
<Hir
>) -> Hir
{
364 1 => exprs
.pop().unwrap(),
366 let mut info
= HirInfo
::new();
367 info
.set_always_utf8(true);
368 info
.set_all_assertions(true);
369 info
.set_any_anchored_start(false);
370 info
.set_any_anchored_end(false);
371 info
.set_match_empty(true);
373 // Some attributes require analyzing all sub-expressions.
375 let x
= info
.is_always_utf8() && e
.is_always_utf8();
376 info
.set_always_utf8(x
);
378 let x
= info
.is_all_assertions() && e
.is_all_assertions();
379 info
.set_all_assertions(x
);
382 info
.is_any_anchored_start()
383 || e
.is_any_anchored_start();
384 info
.set_any_anchored_start(x
);
387 info
.is_any_anchored_end()
388 || e
.is_any_anchored_end();
389 info
.set_any_anchored_end(x
);
391 let x
= info
.is_match_empty() && e
.is_match_empty();
392 info
.set_match_empty(x
);
394 // Anchored attributes require something slightly more
395 // sophisticated. Normally, WLOG, to determine whether an
396 // expression is anchored to the start, we'd only need to check
397 // the first expression of a concatenation. However,
398 // expressions like `$\b^` are still anchored to the start,
399 // but the first expression in the concatenation *isn't*
400 // anchored to the start. So the "first" expression to look at
401 // is actually one that is either not an assertion or is
402 // specifically the StartText assertion.
403 info
.set_anchored_start(
406 e
.is_anchored_start() || e
.is_all_assertions()
409 e
.is_anchored_start()
411 // Similarly for the end anchor, but in reverse.
412 info
.set_anchored_end(
416 e
.is_anchored_end() || e
.is_all_assertions()
422 kind
: HirKind
::Concat(exprs
),
429 /// Returns the alternation of the given expressions.
431 /// This flattens the alternation as appropriate.
432 pub fn alternation(mut exprs
: Vec
<Hir
>) -> Hir
{
435 1 => exprs
.pop().unwrap(),
437 let mut info
= HirInfo
::new();
438 info
.set_always_utf8(true);
439 info
.set_all_assertions(true);
440 info
.set_anchored_start(true);
441 info
.set_anchored_end(true);
442 info
.set_any_anchored_start(false);
443 info
.set_any_anchored_end(false);
444 info
.set_match_empty(false);
446 // Some attributes require analyzing all sub-expressions.
448 let x
= info
.is_always_utf8() && e
.is_always_utf8();
449 info
.set_always_utf8(x
);
451 let x
= info
.is_all_assertions() && e
.is_all_assertions();
452 info
.set_all_assertions(x
);
454 let x
= info
.is_anchored_start() && e
.is_anchored_start();
455 info
.set_anchored_start(x
);
457 let x
= info
.is_anchored_end() && e
.is_anchored_end();
458 info
.set_anchored_end(x
);
461 info
.is_any_anchored_start()
462 || e
.is_any_anchored_start();
463 info
.set_any_anchored_start(x
);
466 info
.is_any_anchored_end()
467 || e
.is_any_anchored_end();
468 info
.set_any_anchored_end(x
);
470 let x
= info
.is_match_empty() || e
.is_match_empty();
471 info
.set_match_empty(x
);
474 kind
: HirKind
::Alternation(exprs
),
481 /// Build an HIR expression for `.`.
483 /// A `.` expression matches any character except for `\n`. To build an
484 /// expression that matches any character, including `\n`, use the `any`
487 /// If `bytes` is `true`, then this assumes characters are limited to a
489 pub fn dot(bytes
: bool
) -> Hir
{
491 let mut cls
= ClassBytes
::empty();
492 cls
.push(ClassBytesRange
::new(b'
\0'
, b'
\x09'
));
493 cls
.push(ClassBytesRange
::new(b'
\x0B'
, b'
\xFF'
));
494 Hir
::class(Class
::Bytes(cls
))
496 let mut cls
= ClassUnicode
::empty();
497 cls
.push(ClassUnicodeRange
::new('
\0'
, '
\x09'
));
498 cls
.push(ClassUnicodeRange
::new('
\x0B'
, '
\u{10FFFF}'
));
499 Hir
::class(Class
::Unicode(cls
))
503 /// Build an HIR expression for `(?s).`.
505 /// A `(?s).` expression matches any character, including `\n`. To build an
506 /// expression that matches any character except for `\n`, then use the
509 /// If `bytes` is `true`, then this assumes characters are limited to a
511 pub fn any(bytes
: bool
) -> Hir
{
513 let mut cls
= ClassBytes
::empty();
514 cls
.push(ClassBytesRange
::new(b'
\0'
, b'
\xFF'
));
515 Hir
::class(Class
::Bytes(cls
))
517 let mut cls
= ClassUnicode
::empty();
518 cls
.push(ClassUnicodeRange
::new('
\0'
, '
\u{10FFFF}'
));
519 Hir
::class(Class
::Unicode(cls
))
523 /// Return true if and only if this HIR will always match valid UTF-8.
525 /// When this returns false, then it is possible for this HIR expression
526 /// to match invalid UTF-8.
527 pub fn is_always_utf8(&self) -> bool
{
528 self.info
.is_always_utf8()
531 /// Returns true if and only if this entire HIR expression is made up of
532 /// zero-width assertions.
534 /// This includes expressions like `^$\b\A\z` and even `((\b)+())*^`, but
536 pub fn is_all_assertions(&self) -> bool
{
537 self.info
.is_all_assertions()
540 /// Return true if and only if this HIR is required to match from the
541 /// beginning of text. This includes expressions like `^foo`, `^(foo|bar)`,
542 /// `^foo|^bar` but not `^foo|bar`.
543 pub fn is_anchored_start(&self) -> bool
{
544 self.info
.is_anchored_start()
547 /// Return true if and only if this HIR is required to match at the end
548 /// of text. This includes expressions like `foo$`, `(foo|bar)$`,
549 /// `foo$|bar$` but not `foo$|bar`.
550 pub fn is_anchored_end(&self) -> bool
{
551 self.info
.is_anchored_end()
554 /// Return true if and only if this HIR contains any sub-expression that
555 /// is required to match at the beginning of text. Specifically, this
556 /// returns true if the `^` symbol (when multiline mode is disabled) or the
557 /// `\A` escape appear anywhere in the regex.
558 pub fn is_any_anchored_start(&self) -> bool
{
559 self.info
.is_any_anchored_start()
562 /// Return true if and only if this HIR contains any sub-expression that is
563 /// required to match at the end of text. Specifically, this returns true
564 /// if the `$` symbol (when multiline mode is disabled) or the `\z` escape
565 /// appear anywhere in the regex.
566 pub fn is_any_anchored_end(&self) -> bool
{
567 self.info
.is_any_anchored_end()
570 /// Return true if and only if the empty string is part of the language
571 /// matched by this regular expression.
573 /// This includes `a*`, `a?b*`, `a{0}`, `()`, `()+`, `^$`, `a|b?`, `\B`,
574 /// but not `a`, `a+` or `\b`.
575 pub fn is_match_empty(&self) -> bool
{
576 self.info
.is_match_empty()
581 /// Return true if and only if this HIR is the empty regular expression.
583 /// Note that this is not defined inductively. That is, it only tests if
584 /// this kind is the `Empty` variant. To get the inductive definition,
585 /// use the `is_match_empty` method on [`Hir`](struct.Hir.html).
586 pub fn is_empty(&self) -> bool
{
588 HirKind
::Empty
=> true,
593 /// Returns true if and only if this kind has any (including possibly
594 /// empty) subexpressions.
595 pub fn has_subexprs(&self) -> bool
{
598 | HirKind
::Literal(_
)
601 | HirKind
::WordBoundary(_
) => false,
603 | HirKind
::Repetition(_
)
605 | HirKind
::Alternation(_
) => true,
610 /// Print a display representation of this Hir.
612 /// The result of this is a valid regular expression pattern string.
614 /// This implementation uses constant stack space and heap space proportional
615 /// to the size of the `Hir`.
616 impl fmt
::Display
for Hir
{
617 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
618 use hir
::print
::Printer
;
619 Printer
::new().print(self, f
)
623 /// The high-level intermediate representation of a literal.
625 /// A literal corresponds to a single character, where a character is either
626 /// defined by a Unicode scalar value or an arbitrary byte. Unicode characters
627 /// are preferred whenever possible. In particular, a `Byte` variant is only
628 /// ever produced when it could match invalid UTF-8.
629 #[derive(Clone, Debug, Eq, PartialEq)]
631 /// A single character represented by a Unicode scalar value.
633 /// A single character represented by an arbitrary byte.
638 /// Returns true if and only if this literal corresponds to a Unicode
640 pub fn is_unicode(&self) -> bool
{
642 Literal
::Unicode(_
) => true,
643 Literal
::Byte(b
) if b
<= 0x7F => true,
644 Literal
::Byte(_
) => false,
649 /// The high-level intermediate representation of a character class.
651 /// A character class corresponds to a set of characters. A character is either
652 /// defined by a Unicode scalar value or a byte. Unicode characters are used
653 /// by default, while bytes are used when Unicode mode (via the `u` flag) is
656 /// A character class, regardless of its character type, is represented by a
657 /// sequence of non-overlapping non-adjacent ranges of characters.
659 /// Note that unlike [`Literal`](enum.Literal.html), a `Bytes` variant may
660 /// be produced even when it exclusively matches valid UTF-8. This is because
661 /// a `Bytes` variant represents an intention by the author of the regular
662 /// expression to disable Unicode mode, which in turn impacts the semantics of
663 /// case insensitive matching. For example, `(?i)k` and `(?i-u)k` will not
664 /// match the same set of strings.
665 #[derive(Clone, Debug, Eq, PartialEq)]
667 /// A set of characters represented by Unicode scalar values.
668 Unicode(ClassUnicode
),
669 /// A set of characters represented by arbitrary bytes (one byte per
675 /// Apply Unicode simple case folding to this character class, in place.
676 /// The character class will be expanded to include all simple case folded
677 /// character variants.
679 /// If this is a byte oriented character class, then this will be limited
680 /// to the ASCII ranges `A-Z` and `a-z`.
681 pub fn case_fold_simple(&mut self) {
683 Class
::Unicode(ref mut x
) => x
.case_fold_simple(),
684 Class
::Bytes(ref mut x
) => x
.case_fold_simple(),
688 /// Negate this character class in place.
690 /// After completion, this character class will contain precisely the
691 /// characters that weren't previously in the class.
692 pub fn negate(&mut self) {
694 Class
::Unicode(ref mut x
) => x
.negate(),
695 Class
::Bytes(ref mut x
) => x
.negate(),
699 /// Returns true if and only if this character class will only ever match
702 /// A character class can match invalid UTF-8 only when the following
703 /// conditions are met:
705 /// 1. The translator was configured to permit generating an expression
706 /// that can match invalid UTF-8. (By default, this is disabled.)
707 /// 2. Unicode mode (via the `u` flag) was disabled either in the concrete
708 /// syntax or in the parser builder. By default, Unicode mode is
710 pub fn is_always_utf8(&self) -> bool
{
712 Class
::Unicode(_
) => true,
713 Class
::Bytes(ref x
) => x
.is_all_ascii(),
718 /// A set of characters represented by Unicode scalar values.
719 #[derive(Clone, Debug, Eq, PartialEq)]
720 pub struct ClassUnicode
{
721 set
: IntervalSet
<ClassUnicodeRange
>,
725 /// Create a new class from a sequence of ranges.
727 /// The given ranges do not need to be in any specific order, and ranges
729 pub fn new
<I
>(ranges
: I
) -> ClassUnicode
730 where I
: IntoIterator
<Item
=ClassUnicodeRange
>
732 ClassUnicode { set: IntervalSet::new(ranges) }
735 /// Create a new class with no ranges.
736 pub fn empty() -> ClassUnicode
{
737 ClassUnicode
::new(vec
![])
740 /// Add a new range to this set.
741 pub fn push(&mut self, range
: ClassUnicodeRange
) {
742 self.set
.push(range
);
745 /// Return an iterator over all ranges in this class.
747 /// The iterator yields ranges in ascending order.
748 pub fn iter(&self) -> ClassUnicodeIter
{
749 ClassUnicodeIter(self.set
.iter())
752 /// Return the underlying ranges as a slice.
753 pub fn ranges(&self) -> &[ClassUnicodeRange
] {
757 /// Expand this character class such that it contains all case folded
758 /// characters, according to Unicode's "simple" mapping. For example, if
759 /// this class consists of the range `a-z`, then applying case folding will
760 /// result in the class containing both the ranges `a-z` and `A-Z`.
761 pub fn case_fold_simple(&mut self) {
762 self.set
.case_fold_simple();
765 /// Negate this character class.
767 /// For all `c` where `c` is a Unicode scalar value, if `c` was in this
768 /// set, then it will not be in this set after negation.
769 pub fn negate(&mut self) {
773 /// Union this character class with the given character class, in place.
774 pub fn union(&mut self, other
: &ClassUnicode
) {
775 self.set
.union(&other
.set
);
778 /// Intersect this character class with the given character class, in
780 pub fn intersect(&mut self, other
: &ClassUnicode
) {
781 self.set
.intersect(&other
.set
);
784 /// Subtract the given character class from this character class, in place.
785 pub fn difference(&mut self, other
: &ClassUnicode
) {
786 self.set
.difference(&other
.set
);
789 /// Compute the symmetric difference of the given character classes, in
792 /// This computes the symmetric difference of two character classes. This
793 /// removes all elements in this class that are also in the given class,
794 /// but all adds all elements from the given class that aren't in this
795 /// class. That is, the class will contain all elements in either class,
796 /// but will not contain any elements that are in both classes.
797 pub fn symmetric_difference(&mut self, other
: &ClassUnicode
) {
798 self.set
.symmetric_difference(&other
.set
);
802 /// An iterator over all ranges in a Unicode character class.
804 /// The lifetime `'a` refers to the lifetime of the underlying class.
806 pub struct ClassUnicodeIter
<'a
>(IntervalSetIter
<'a
, ClassUnicodeRange
>);
808 impl<'a
> Iterator
for ClassUnicodeIter
<'a
> {
809 type Item
= &'a ClassUnicodeRange
;
811 fn next(&mut self) -> Option
<&'a ClassUnicodeRange
> {
816 /// A single range of characters represented by Unicode scalar values.
818 /// The range is closed. That is, the start and end of the range are included
820 #[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
821 pub struct ClassUnicodeRange
{
826 impl fmt
::Debug
for ClassUnicodeRange
{
827 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
829 if !self.start
.is_whitespace() && !self.start
.is_control() {
830 self.start
.to_string()
832 format
!("0x{:X}", self.start
as u32)
835 if !self.end
.is_whitespace() && !self.end
.is_control() {
838 format
!("0x{:X}", self.end
as u32)
840 f
.debug_struct("ClassUnicodeRange")
841 .field("start", &start
)
847 impl Interval
for ClassUnicodeRange
{
850 #[inline] fn lower(&self) -> char { self.start }
851 #[inline] fn upper(&self) -> char { self.end }
852 #[inline] fn set_lower(&mut self, bound: char) { self.start = bound; }
853 #[inline] fn set_upper(&mut self, bound: char) { self.end = bound; }
855 /// Apply simple case folding to this Unicode scalar value range.
857 /// Additional ranges are appended to the given vector. Canonical ordering
858 /// is *not* maintained in the given vector.
859 fn case_fold_simple(&self, ranges
: &mut Vec
<ClassUnicodeRange
>) {
860 if !unicode
::contains_simple_case_mapping(self.start
, self.end
) {
863 let start
= self.start
as u32;
864 let end
= (self.end
as u32).saturating_add(1);
865 let mut next_simple_cp
= None
;
866 for cp
in (start
..end
).filter_map(char::from_u32
) {
867 if next_simple_cp
.map_or(false, |next
| cp
< next
) {
870 let it
= match unicode
::simple_fold(cp
) {
873 next_simple_cp
= next
;
877 for cp_folded
in it
{
878 ranges
.push(ClassUnicodeRange
::new(cp_folded
, cp_folded
));
884 impl ClassUnicodeRange
{
885 /// Create a new Unicode scalar value range for a character class.
887 /// The returned range is always in a canonical form. That is, the range
888 /// returned always satisfies the invariant that `start <= end`.
889 pub fn new(start
: char, end
: char) -> ClassUnicodeRange
{
890 ClassUnicodeRange
::create(start
, end
)
893 /// Return the start of this range.
895 /// The start of a range is always less than or equal to the end of the
897 pub fn start(&self) -> char {
901 /// Return the end of this range.
903 /// The end of a range is always greater than or equal to the start of the
905 pub fn end(&self) -> char {
910 /// A set of characters represented by arbitrary bytes (where one byte
911 /// corresponds to one character).
912 #[derive(Clone, Debug, Eq, PartialEq)]
913 pub struct ClassBytes
{
914 set
: IntervalSet
<ClassBytesRange
>,
918 /// Create a new class from a sequence of ranges.
920 /// The given ranges do not need to be in any specific order, and ranges
922 pub fn new
<I
>(ranges
: I
) -> ClassBytes
923 where I
: IntoIterator
<Item
=ClassBytesRange
>
925 ClassBytes { set: IntervalSet::new(ranges) }
928 /// Create a new class with no ranges.
929 pub fn empty() -> ClassBytes
{
930 ClassBytes
::new(vec
![])
933 /// Add a new range to this set.
934 pub fn push(&mut self, range
: ClassBytesRange
) {
935 self.set
.push(range
);
938 /// Return an iterator over all ranges in this class.
940 /// The iterator yields ranges in ascending order.
941 pub fn iter(&self) -> ClassBytesIter
{
942 ClassBytesIter(self.set
.iter())
945 /// Return the underlying ranges as a slice.
946 pub fn ranges(&self) -> &[ClassBytesRange
] {
950 /// Expand this character class such that it contains all case folded
951 /// characters. For example, if this class consists of the range `a-z`,
952 /// then applying case folding will result in the class containing both the
953 /// ranges `a-z` and `A-Z`.
955 /// Note that this only applies ASCII case folding, which is limited to the
956 /// characters `a-z` and `A-Z`.
957 pub fn case_fold_simple(&mut self) {
958 self.set
.case_fold_simple();
961 /// Negate this byte class.
963 /// For all `b` where `b` is a any byte, if `b` was in this set, then it
964 /// will not be in this set after negation.
965 pub fn negate(&mut self) {
969 /// Union this byte class with the given byte class, in place.
970 pub fn union(&mut self, other
: &ClassBytes
) {
971 self.set
.union(&other
.set
);
974 /// Intersect this byte class with the given byte class, in place.
975 pub fn intersect(&mut self, other
: &ClassBytes
) {
976 self.set
.intersect(&other
.set
);
979 /// Subtract the given byte class from this byte class, in place.
980 pub fn difference(&mut self, other
: &ClassBytes
) {
981 self.set
.difference(&other
.set
);
984 /// Compute the symmetric difference of the given byte classes, in place.
986 /// This computes the symmetric difference of two byte classes. This
987 /// removes all elements in this class that are also in the given class,
988 /// but all adds all elements from the given class that aren't in this
989 /// class. That is, the class will contain all elements in either class,
990 /// but will not contain any elements that are in both classes.
991 pub fn symmetric_difference(&mut self, other
: &ClassBytes
) {
992 self.set
.symmetric_difference(&other
.set
);
995 /// Returns true if and only if this character class will either match
996 /// nothing or only ASCII bytes. Stated differently, this returns false
997 /// if and only if this class contains a non-ASCII byte.
998 pub fn is_all_ascii(&self) -> bool
{
999 self.set
.intervals().last().map_or(true, |r
| r
.end
<= 0x7F)
1003 /// An iterator over all ranges in a byte character class.
1005 /// The lifetime `'a` refers to the lifetime of the underlying class.
1007 pub struct ClassBytesIter
<'a
>(IntervalSetIter
<'a
, ClassBytesRange
>);
1009 impl<'a
> Iterator
for ClassBytesIter
<'a
> {
1010 type Item
= &'a ClassBytesRange
;
1012 fn next(&mut self) -> Option
<&'a ClassBytesRange
> {
1017 /// A single range of characters represented by arbitrary bytes.
1019 /// The range is closed. That is, the start and end of the range are included
1021 #[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
1022 pub struct ClassBytesRange
{
1027 impl Interval
for ClassBytesRange
{
1030 #[inline] fn lower(&self) -> u8 { self.start }
1031 #[inline] fn upper(&self) -> u8 { self.end }
1032 #[inline] fn set_lower(&mut self, bound: u8) { self.start = bound; }
1033 #[inline] fn set_upper(&mut self, bound: u8) { self.end = bound; }
1035 /// Apply simple case folding to this byte range. Only ASCII case mappings
1036 /// (for a-z) are applied.
1038 /// Additional ranges are appended to the given vector. Canonical ordering
1039 /// is *not* maintained in the given vector.
1040 fn case_fold_simple(&self, ranges
: &mut Vec
<ClassBytesRange
>) {
1041 if !ClassBytesRange
::new(b'a'
, b'z'
).is_intersection_empty(self) {
1042 let lower
= cmp
::max(self.start
, b'a'
);
1043 let upper
= cmp
::min(self.end
, b'z'
);
1044 ranges
.push(ClassBytesRange
::new(lower
- 32, upper
- 32));
1046 if !ClassBytesRange
::new(b'A'
, b'Z'
).is_intersection_empty(self) {
1047 let lower
= cmp
::max(self.start
, b'A'
);
1048 let upper
= cmp
::min(self.end
, b'Z'
);
1049 ranges
.push(ClassBytesRange
::new(lower
+ 32, upper
+ 32));
1054 impl ClassBytesRange
{
1055 /// Create a new byte range for a character class.
1057 /// The returned range is always in a canonical form. That is, the range
1058 /// returned always satisfies the invariant that `start <= end`.
1059 pub fn new(start
: u8, end
: u8) -> ClassBytesRange
{
1060 ClassBytesRange
::create(start
, end
)
1063 /// Return the start of this range.
1065 /// The start of a range is always less than or equal to the end of the
1067 pub fn start(&self) -> u8 {
1071 /// Return the end of this range.
1073 /// The end of a range is always greater than or equal to the start of the
1075 pub fn end(&self) -> u8 {
1080 impl fmt
::Debug
for ClassBytesRange
{
1081 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
1082 let mut debug
= f
.debug_struct("ClassBytesRange");
1083 if self.start
<= 0x7F {
1084 debug
.field("start", &(self.start
as char));
1086 debug
.field("start", &self.start
);
1088 if self.end
<= 0x7F {
1089 debug
.field("end", &(self.end
as char));
1091 debug
.field("end", &self.end
);
1097 /// The high-level intermediate representation for an anchor assertion.
1099 /// A matching anchor assertion is always zero-length.
1100 #[derive(Clone, Debug, Eq, PartialEq)]
1102 /// Match the beginning of a line or the beginning of text. Specifically,
1103 /// this matches at the starting position of the input, or at the position
1104 /// immediately following a `\n` character.
1106 /// Match the end of a line or the end of text. Specifically,
1107 /// this matches at the end position of the input, or at the position
1108 /// immediately preceding a `\n` character.
1110 /// Match the beginning of text. Specifically, this matches at the starting
1111 /// position of the input.
1113 /// Match the end of text. Specifically, this matches at the ending
1114 /// position of the input.
1118 /// The high-level intermediate representation for a word-boundary assertion.
1120 /// A matching word boundary assertion is always zero-length.
1121 #[derive(Clone, Debug, Eq, PartialEq)]
1122 pub enum WordBoundary
{
1123 /// Match a Unicode-aware word boundary. That is, this matches a position
1124 /// where the left adjacent character and right adjacent character
1125 /// correspond to a word and non-word or a non-word and word character.
1127 /// Match a Unicode-aware negation of a word boundary.
1129 /// Match an ASCII-only word boundary. That is, this matches a position
1130 /// where the left adjacent character and right adjacent character
1131 /// correspond to a word and non-word or a non-word and word character.
1133 /// Match an ASCII-only negation of a word boundary.
1138 /// Returns true if and only if this word boundary assertion is negated.
1139 pub fn is_negated(&self) -> bool
{
1141 WordBoundary
::Unicode
| WordBoundary
::Ascii
=> false,
1142 WordBoundary
::UnicodeNegate
| WordBoundary
::AsciiNegate
=> true,
1147 /// The high-level intermediate representation for a group.
1149 /// This represents one of three possible group types:
1151 /// 1. A non-capturing group (e.g., `(?:expr)`).
1152 /// 2. A capturing group (e.g., `(expr)`).
1153 /// 3. A named capturing group (e.g., `(?P<name>expr)`).
1154 #[derive(Clone, Debug, Eq, PartialEq)]
1156 /// The kind of this group. If it is a capturing group, then the kind
1157 /// contains the capture group index (and the name, if it is a named
1159 pub kind
: GroupKind
,
1160 /// The expression inside the capturing group, which may be empty.
1164 /// The kind of group.
1165 #[derive(Clone, Debug, Eq, PartialEq)]
1166 pub enum GroupKind
{
1167 /// A normal unnamed capturing group.
1169 /// The value is the capture index of the group.
1171 /// A named capturing group.
1173 /// The name of the group.
1175 /// The capture index of the group.
1178 /// A non-capturing group.
1182 /// The high-level intermediate representation of a repetition operator.
1184 /// A repetition operator permits the repetition of an arbitrary
1186 #[derive(Clone, Debug, Eq, PartialEq)]
1187 pub struct Repetition
{
1188 /// The kind of this repetition operator.
1189 pub kind
: RepetitionKind
,
1190 /// Whether this repetition operator is greedy or not. A greedy operator
1191 /// will match as much as it can. A non-greedy operator will match as
1192 /// little as it can.
1194 /// Typically, operators are greedy by default and are only non-greedy when
1195 /// a `?` suffix is used, e.g., `(expr)*` is greedy while `(expr)*?` is
1196 /// not. However, this can be inverted via the `U` "ungreedy" flag.
1198 /// The expression being repeated.
1203 /// Returns true if and only if this repetition operator makes it possible
1204 /// to match the empty string.
1206 /// Note that this is not defined inductively. For example, while `a*`
1207 /// will report `true`, `()+` will not, even though `()` matches the empty
1208 /// string and one or more occurrences of something that matches the empty
1209 /// string will always match the empty string. In order to get the
1210 /// inductive definition, see the corresponding method on
1211 /// [`Hir`](struct.Hir.html).
1212 pub fn is_match_empty(&self) -> bool
{
1214 RepetitionKind
::ZeroOrOne
=> true,
1215 RepetitionKind
::ZeroOrMore
=> true,
1216 RepetitionKind
::OneOrMore
=> false,
1217 RepetitionKind
::Range(RepetitionRange
::Exactly(m
)) => m
== 0,
1218 RepetitionKind
::Range(RepetitionRange
::AtLeast(m
)) => m
== 0,
1219 RepetitionKind
::Range(RepetitionRange
::Bounded(m
, _
)) => m
== 0,
1224 /// The kind of a repetition operator.
1225 #[derive(Clone, Debug, Eq, PartialEq)]
1226 pub enum RepetitionKind
{
1227 /// Matches a sub-expression zero or one times.
1229 /// Matches a sub-expression zero or more times.
1231 /// Matches a sub-expression one or more times.
1233 /// Matches a sub-expression within a bounded range of times.
1234 Range(RepetitionRange
),
1237 /// The kind of a counted repetition operator.
1238 #[derive(Clone, Debug, Eq, PartialEq)]
1239 pub enum RepetitionRange
{
1240 /// Matches a sub-expression exactly this many times.
1242 /// Matches a sub-expression at least this many times.
1244 /// Matches a sub-expression at least `m` times and at most `n` times.
1248 /// A custom `Drop` impl is used for `HirKind` such that it uses constant stack
1249 /// space but heap space proportional to the depth of the total `Hir`.
1251 fn drop(&mut self) {
1254 match *self.kind() {
1256 | HirKind
::Literal(_
)
1258 | HirKind
::Anchor(_
)
1259 | HirKind
::WordBoundary(_
) => return,
1260 HirKind
::Group(ref x
) if !x
.hir
.kind
.has_subexprs() => return,
1261 HirKind
::Repetition(ref x
) if !x
.hir
.kind
.has_subexprs() => return,
1262 HirKind
::Concat(ref x
) if x
.is_empty() => return,
1263 HirKind
::Alternation(ref x
) if x
.is_empty() => return,
1267 let mut stack
= vec
![mem
::replace(self, Hir
::empty())];
1268 while let Some(mut expr
) = stack
.pop() {
1271 | HirKind
::Literal(_
)
1273 | HirKind
::Anchor(_
)
1274 | HirKind
::WordBoundary(_
) => {}
1275 HirKind
::Group(ref mut x
) => {
1276 stack
.push(mem
::replace(&mut x
.hir
, Hir
::empty()));
1278 HirKind
::Repetition(ref mut x
) => {
1279 stack
.push(mem
::replace(&mut x
.hir
, Hir
::empty()));
1281 HirKind
::Concat(ref mut x
) => {
1282 stack
.extend(x
.drain(..));
1284 HirKind
::Alternation(ref mut x
) => {
1285 stack
.extend(x
.drain(..));
1292 /// A type that documents various attributes of an HIR expression.
1294 /// These attributes are typically defined inductively on the HIR.
1295 #[derive(Clone, Debug, Eq, PartialEq)]
1297 /// Represent yes/no questions by a bitfield to conserve space, since
1298 /// this is included in every HIR expression.
1300 /// If more attributes need to be added, it is OK to increase the size of
1301 /// this as appropriate.
1305 // A simple macro for defining bitfield accessors/mutators.
1306 macro_rules
! define_bool
{
1307 ($bit
:expr
, $is_fn_name
:ident
, $set_fn_name
:ident
) => {
1308 fn $
is_fn_name(&self) -> bool
{
1309 self.bools
& (0b1 << $bit
) > 0
1312 fn $
set_fn_name(&mut self, yes
: bool
) {
1314 self.bools
|= 1 << $bit
;
1316 self.bools
&= !(1 << $bit
);
1323 fn new() -> HirInfo
{
1329 define_bool
!(0, is_always_utf8
, set_always_utf8
);
1330 define_bool
!(1, is_all_assertions
, set_all_assertions
);
1331 define_bool
!(2, is_anchored_start
, set_anchored_start
);
1332 define_bool
!(3, is_anchored_end
, set_anchored_end
);
1333 define_bool
!(4, is_any_anchored_start
, set_any_anchored_start
);
1334 define_bool
!(5, is_any_anchored_end
, set_any_anchored_end
);
1335 define_bool
!(6, is_match_empty
, set_match_empty
);
1342 fn uclass(ranges
: &[(char, char)]) -> ClassUnicode
{
1343 let ranges
: Vec
<ClassUnicodeRange
> = ranges
1345 .map(|&(s
, e
)| ClassUnicodeRange
::new(s
, e
))
1347 ClassUnicode
::new(ranges
)
1350 fn bclass(ranges
: &[(u8, u8)]) -> ClassBytes
{
1351 let ranges
: Vec
<ClassBytesRange
> = ranges
1353 .map(|&(s
, e
)| ClassBytesRange
::new(s
, e
))
1355 ClassBytes
::new(ranges
)
1358 fn uranges(cls
: &ClassUnicode
) -> Vec
<(char, char)> {
1359 cls
.iter().map(|x
| (x
.start(), x
.end())).collect()
1362 fn ucasefold(cls
: &ClassUnicode
) -> ClassUnicode
{
1363 let mut cls_
= cls
.clone();
1364 cls_
.case_fold_simple();
1368 fn uunion(cls1
: &ClassUnicode
, cls2
: &ClassUnicode
) -> ClassUnicode
{
1369 let mut cls_
= cls1
.clone();
1374 fn uintersect(cls1
: &ClassUnicode
, cls2
: &ClassUnicode
) -> ClassUnicode
{
1375 let mut cls_
= cls1
.clone();
1376 cls_
.intersect(cls2
);
1380 fn udifference(cls1
: &ClassUnicode
, cls2
: &ClassUnicode
) -> ClassUnicode
{
1381 let mut cls_
= cls1
.clone();
1382 cls_
.difference(cls2
);
1386 fn usymdifference(cls1
: &ClassUnicode
, cls2
: &ClassUnicode
) -> ClassUnicode
{
1387 let mut cls_
= cls1
.clone();
1388 cls_
.symmetric_difference(cls2
);
1392 fn unegate(cls
: &ClassUnicode
) -> ClassUnicode
{
1393 let mut cls_
= cls
.clone();
1398 fn branges(cls
: &ClassBytes
) -> Vec
<(u8, u8)> {
1399 cls
.iter().map(|x
| (x
.start(), x
.end())).collect()
1402 fn bcasefold(cls
: &ClassBytes
) -> ClassBytes
{
1403 let mut cls_
= cls
.clone();
1404 cls_
.case_fold_simple();
1408 fn bunion(cls1
: &ClassBytes
, cls2
: &ClassBytes
) -> ClassBytes
{
1409 let mut cls_
= cls1
.clone();
1414 fn bintersect(cls1
: &ClassBytes
, cls2
: &ClassBytes
) -> ClassBytes
{
1415 let mut cls_
= cls1
.clone();
1416 cls_
.intersect(cls2
);
1420 fn bdifference(cls1
: &ClassBytes
, cls2
: &ClassBytes
) -> ClassBytes
{
1421 let mut cls_
= cls1
.clone();
1422 cls_
.difference(cls2
);
1426 fn bsymdifference(cls1
: &ClassBytes
, cls2
: &ClassBytes
) -> ClassBytes
{
1427 let mut cls_
= cls1
.clone();
1428 cls_
.symmetric_difference(cls2
);
1432 fn bnegate(cls
: &ClassBytes
) -> ClassBytes
{
1433 let mut cls_
= cls
.clone();
1439 fn class_range_canonical_unicode() {
1440 let range
= ClassUnicodeRange
::new('
\u{00FF}'
, '
\0'
);
1441 assert_eq
!('
\0'
, range
.start());
1442 assert_eq
!('
\u{00FF}'
, range
.end());
1446 fn class_range_canonical_bytes() {
1447 let range
= ClassBytesRange
::new(b'
\xFF'
, b'
\0'
);
1448 assert_eq
!(b'
\0'
, range
.start());
1449 assert_eq
!(b'
\xFF'
, range
.end());
1453 fn class_canonicalize_unicode() {
1454 let cls
= uclass(&[('a'
, 'c'
), ('x'
, 'z'
)]);
1455 let expected
= vec
![('a'
, 'c'
), ('x'
, 'z'
)];
1456 assert_eq
!(expected
, uranges(&cls
));
1458 let cls
= uclass(&[('x'
, 'z'
), ('a'
, 'c'
)]);
1459 let expected
= vec
![('a'
, 'c'
), ('x'
, 'z'
)];
1460 assert_eq
!(expected
, uranges(&cls
));
1462 let cls
= uclass(&[('x'
, 'z'
), ('w'
, 'y'
)]);
1463 let expected
= vec
![('w'
, 'z'
)];
1464 assert_eq
!(expected
, uranges(&cls
));
1467 ('c'
, 'f'
), ('a'
, 'g'
), ('d'
, 'j'
), ('a'
, 'c'
),
1468 ('m'
, 'p'
), ('l'
, 's'
),
1470 let expected
= vec
![('a'
, 'j'
), ('l'
, 's'
)];
1471 assert_eq
!(expected
, uranges(&cls
));
1473 let cls
= uclass(&[('x'
, 'z'
), ('u'
, 'w'
)]);
1474 let expected
= vec
![('u'
, 'z'
)];
1475 assert_eq
!(expected
, uranges(&cls
));
1477 let cls
= uclass(&[('
\x00'
, '
\u{10FFFF}'
), ('
\x00'
, '
\u{10FFFF}'
)]);
1478 let expected
= vec
![('
\x00'
, '
\u{10FFFF}'
)];
1479 assert_eq
!(expected
, uranges(&cls
));
1482 let cls
= uclass(&[('a'
, 'a'
), ('b'
, 'b'
)]);
1483 let expected
= vec
![('a'
, 'b'
)];
1484 assert_eq
!(expected
, uranges(&cls
));
1488 fn class_canonicalize_bytes() {
1489 let cls
= bclass(&[(b'a'
, b'c'
), (b'x'
, b'z'
)]);
1490 let expected
= vec
![(b'a'
, b'c'
), (b'x'
, b'z'
)];
1491 assert_eq
!(expected
, branges(&cls
));
1493 let cls
= bclass(&[(b'x'
, b'z'
), (b'a'
, b'c'
)]);
1494 let expected
= vec
![(b'a'
, b'c'
), (b'x'
, b'z'
)];
1495 assert_eq
!(expected
, branges(&cls
));
1497 let cls
= bclass(&[(b'x'
, b'z'
), (b'w'
, b'y'
)]);
1498 let expected
= vec
![(b'w'
, b'z'
)];
1499 assert_eq
!(expected
, branges(&cls
));
1502 (b'c'
, b'f'
), (b'a'
, b'g'
), (b'd'
, b'j'
), (b'a'
, b'c'
),
1503 (b'm'
, b'p'
), (b'l'
, b's'
),
1505 let expected
= vec
![(b'a'
, b'j'
), (b'l'
, b's'
)];
1506 assert_eq
!(expected
, branges(&cls
));
1508 let cls
= bclass(&[(b'x'
, b'z'
), (b'u'
, b'w'
)]);
1509 let expected
= vec
![(b'u'
, b'z'
)];
1510 assert_eq
!(expected
, branges(&cls
));
1512 let cls
= bclass(&[(b'
\x00'
, b'
\xFF'
), (b'
\x00'
, b'
\xFF'
)]);
1513 let expected
= vec
![(b'
\x00'
, b'
\xFF'
)];
1514 assert_eq
!(expected
, branges(&cls
));
1516 let cls
= bclass(&[(b'a'
, b'a'
), (b'b'
, b'b'
)]);
1517 let expected
= vec
![(b'a'
, b'b'
)];
1518 assert_eq
!(expected
, branges(&cls
));
1522 fn class_case_fold_unicode() {
1524 ('C'
, 'F'
), ('A'
, 'G'
), ('D'
, 'J'
), ('A'
, 'C'
),
1525 ('M'
, 'P'
), ('L'
, 'S'
), ('c'
, 'f'
),
1527 let expected
= uclass(&[
1528 ('A'
, 'J'
), ('L'
, 'S'
),
1529 ('a'
, 'j'
), ('l'
, 's'
),
1530 ('
\u{17F}'
, '
\u{17F}'
),
1532 assert_eq
!(expected
, ucasefold(&cls
));
1534 let cls
= uclass(&[('A'
, 'Z'
)]);
1535 let expected
= uclass(&[
1536 ('A'
, 'Z'
), ('a'
, 'z'
),
1537 ('
\u{17F}'
, '
\u{17F}'
),
1538 ('
\u{212A}'
, '
\u{212A}'
),
1540 assert_eq
!(expected
, ucasefold(&cls
));
1542 let cls
= uclass(&[('a'
, 'z'
)]);
1543 let expected
= uclass(&[
1544 ('A'
, 'Z'
), ('a'
, 'z'
),
1545 ('
\u{17F}'
, '
\u{17F}'
),
1546 ('
\u{212A}'
, '
\u{212A}'
),
1548 assert_eq
!(expected
, ucasefold(&cls
));
1550 let cls
= uclass(&[('A'
, 'A'
), ('_'
, '_'
)]);
1551 let expected
= uclass(&[('A'
, 'A'
), ('_'
, '_'
), ('a'
, 'a'
)]);
1552 assert_eq
!(expected
, ucasefold(&cls
));
1554 let cls
= uclass(&[('A'
, 'A'
), ('
='
, '
='
)]);
1555 let expected
= uclass(&[('
='
, '
='
), ('A'
, 'A'
), ('a'
, 'a'
)]);
1556 assert_eq
!(expected
, ucasefold(&cls
));
1558 let cls
= uclass(&[('
\x00'
, '
\x10'
)]);
1559 assert_eq
!(cls
, ucasefold(&cls
));
1561 let cls
= uclass(&[('k'
, 'k'
)]);
1562 let expected
= uclass(&[
1563 ('K'
, 'K'
), ('k'
, 'k'
), ('
\u{212A}'
, '
\u{212A}'
),
1565 assert_eq
!(expected
, ucasefold(&cls
));
1567 let cls
= uclass(&[('@'
, '@'
)]);
1568 assert_eq
!(cls
, ucasefold(&cls
));
1572 fn class_case_fold_bytes() {
1574 (b'C'
, b'F'
), (b'A'
, b'G'
), (b'D'
, b'J'
), (b'A'
, b'C'
),
1575 (b'M'
, b'P'
), (b'L'
, b'S'
), (b'c'
, b'f'
),
1577 let expected
= bclass(&[
1578 (b'A'
, b'J'
), (b'L'
, b'S'
),
1579 (b'a'
, b'j'
), (b'l'
, b's'
),
1581 assert_eq
!(expected
, bcasefold(&cls
));
1583 let cls
= bclass(&[(b'A'
, b'Z'
)]);
1584 let expected
= bclass(&[(b'A'
, b'Z'
), (b'a'
, b'z'
)]);
1585 assert_eq
!(expected
, bcasefold(&cls
));
1587 let cls
= bclass(&[(b'a'
, b'z'
)]);
1588 let expected
= bclass(&[(b'A'
, b'Z'
), (b'a'
, b'z'
)]);
1589 assert_eq
!(expected
, bcasefold(&cls
));
1591 let cls
= bclass(&[(b'A'
, b'A'
), (b'_'
, b'_'
)]);
1592 let expected
= bclass(&[(b'A'
, b'A'
), (b'_'
, b'_'
), (b'a'
, b'a'
)]);
1593 assert_eq
!(expected
, bcasefold(&cls
));
1595 let cls
= bclass(&[(b'A'
, b'A'
), (b'
='
, b'
='
)]);
1596 let expected
= bclass(&[(b'
='
, b'
='
), (b'A'
, b'A'
), (b'a'
, b'a'
)]);
1597 assert_eq
!(expected
, bcasefold(&cls
));
1599 let cls
= bclass(&[(b'
\x00'
, b'
\x10'
)]);
1600 assert_eq
!(cls
, bcasefold(&cls
));
1602 let cls
= bclass(&[(b'k'
, b'k'
)]);
1603 let expected
= bclass(&[(b'K'
, b'K'
), (b'k'
, b'k'
)]);
1604 assert_eq
!(expected
, bcasefold(&cls
));
1606 let cls
= bclass(&[(b'@'
, b'@'
)]);
1607 assert_eq
!(cls
, bcasefold(&cls
));
1611 fn class_negate_unicode() {
1612 let cls
= uclass(&[('a'
, 'a'
)]);
1613 let expected
= uclass(&[('
\x00'
, '
\x60'
), ('
\x62'
, '
\u{10FFFF}'
)]);
1614 assert_eq
!(expected
, unegate(&cls
));
1616 let cls
= uclass(&[('a'
, 'a'
), ('b'
, 'b'
)]);
1617 let expected
= uclass(&[('
\x00'
, '
\x60'
), ('
\x63'
, '
\u{10FFFF}'
)]);
1618 assert_eq
!(expected
, unegate(&cls
));
1620 let cls
= uclass(&[('a'
, 'c'
), ('x'
, 'z'
)]);
1621 let expected
= uclass(&[
1622 ('
\x00'
, '
\x60'
), ('
\x64'
, '
\x77'
), ('
\x7B'
, '
\u{10FFFF}'
),
1624 assert_eq
!(expected
, unegate(&cls
));
1626 let cls
= uclass(&[('
\x00'
, 'a'
)]);
1627 let expected
= uclass(&[('
\x62'
, '
\u{10FFFF}'
)]);
1628 assert_eq
!(expected
, unegate(&cls
));
1630 let cls
= uclass(&[('a'
, '
\u{10FFFF}'
)]);
1631 let expected
= uclass(&[('
\x00'
, '
\x60'
)]);
1632 assert_eq
!(expected
, unegate(&cls
));
1634 let cls
= uclass(&[('
\x00'
, '
\u{10FFFF}'
)]);
1635 let expected
= uclass(&[]);
1636 assert_eq
!(expected
, unegate(&cls
));
1638 let cls
= uclass(&[]);
1639 let expected
= uclass(&[('
\x00'
, '
\u{10FFFF}'
)]);
1640 assert_eq
!(expected
, unegate(&cls
));
1643 ('
\x00'
, '
\u{10FFFD}'
), ('
\u{10FFFF}'
, '
\u{10FFFF}'
),
1645 let expected
= uclass(&[('
\u{10FFFE}'
, '
\u{10FFFE}'
)]);
1646 assert_eq
!(expected
, unegate(&cls
));
1648 let cls
= uclass(&[('
\x00'
, '
\u{D7FF}'
)]);
1649 let expected
= uclass(&[('
\u{E000}'
, '
\u{10FFFF}'
)]);
1650 assert_eq
!(expected
, unegate(&cls
));
1652 let cls
= uclass(&[('
\x00'
, '
\u{D7FE}'
)]);
1653 let expected
= uclass(&[('
\u{D7FF}'
, '
\u{10FFFF}'
)]);
1654 assert_eq
!(expected
, unegate(&cls
));
1656 let cls
= uclass(&[('
\u{E000}'
, '
\u{10FFFF}'
)]);
1657 let expected
= uclass(&[('
\x00'
, '
\u{D7FF}'
)]);
1658 assert_eq
!(expected
, unegate(&cls
));
1660 let cls
= uclass(&[('
\u{E001}'
, '
\u{10FFFF}'
)]);
1661 let expected
= uclass(&[('
\x00'
, '
\u{E000}'
)]);
1662 assert_eq
!(expected
, unegate(&cls
));
1666 fn class_negate_bytes() {
1667 let cls
= bclass(&[(b'a'
, b'a'
)]);
1668 let expected
= bclass(&[(b'
\x00'
, b'
\x60'
), (b'
\x62'
, b'
\xFF'
)]);
1669 assert_eq
!(expected
, bnegate(&cls
));
1671 let cls
= bclass(&[(b'a'
, b'a'
), (b'b'
, b'b'
)]);
1672 let expected
= bclass(&[(b'
\x00'
, b'
\x60'
), (b'
\x63'
, b'
\xFF'
)]);
1673 assert_eq
!(expected
, bnegate(&cls
));
1675 let cls
= bclass(&[(b'a'
, b'c'
), (b'x'
, b'z'
)]);
1676 let expected
= bclass(&[
1677 (b'
\x00'
, b'
\x60'
), (b'
\x64'
, b'
\x77'
), (b'
\x7B'
, b'
\xFF'
),
1679 assert_eq
!(expected
, bnegate(&cls
));
1681 let cls
= bclass(&[(b'
\x00'
, b'a'
)]);
1682 let expected
= bclass(&[(b'
\x62'
, b'
\xFF'
)]);
1683 assert_eq
!(expected
, bnegate(&cls
));
1685 let cls
= bclass(&[(b'a'
, b'
\xFF'
)]);
1686 let expected
= bclass(&[(b'
\x00'
, b'
\x60'
)]);
1687 assert_eq
!(expected
, bnegate(&cls
));
1689 let cls
= bclass(&[(b'
\x00'
, b'
\xFF'
)]);
1690 let expected
= bclass(&[]);
1691 assert_eq
!(expected
, bnegate(&cls
));
1693 let cls
= bclass(&[]);
1694 let expected
= bclass(&[(b'
\x00'
, b'
\xFF'
)]);
1695 assert_eq
!(expected
, bnegate(&cls
));
1697 let cls
= bclass(&[(b'
\x00'
, b'
\xFD'
), (b'
\xFF'
, b'
\xFF'
)]);
1698 let expected
= bclass(&[(b'
\xFE'
, b'
\xFE'
)]);
1699 assert_eq
!(expected
, bnegate(&cls
));
1703 fn class_union_unicode() {
1704 let cls1
= uclass(&[('a'
, 'g'
), ('m'
, 't'
), ('A'
, 'C'
)]);
1705 let cls2
= uclass(&[('a'
, 'z'
)]);
1706 let expected
= uclass(&[('a'
, 'z'
), ('A'
, 'C'
)]);
1707 assert_eq
!(expected
, uunion(&cls1
, &cls2
));
1711 fn class_union_bytes() {
1712 let cls1
= bclass(&[(b'a'
, b'g'
), (b'm'
, b't'
), (b'A'
, b'C'
)]);
1713 let cls2
= bclass(&[(b'a'
, b'z'
)]);
1714 let expected
= bclass(&[(b'a'
, b'z'
), (b'A'
, b'C'
)]);
1715 assert_eq
!(expected
, bunion(&cls1
, &cls2
));
1719 fn class_intersect_unicode() {
1720 let cls1
= uclass(&[]);
1721 let cls2
= uclass(&[('a'
, 'a'
)]);
1722 let expected
= uclass(&[]);
1723 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1725 let cls1
= uclass(&[('a'
, 'a'
)]);
1726 let cls2
= uclass(&[('a'
, 'a'
)]);
1727 let expected
= uclass(&[('a'
, 'a'
)]);
1728 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1730 let cls1
= uclass(&[('a'
, 'a'
)]);
1731 let cls2
= uclass(&[('b'
, 'b'
)]);
1732 let expected
= uclass(&[]);
1733 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1735 let cls1
= uclass(&[('a'
, 'a'
)]);
1736 let cls2
= uclass(&[('a'
, 'c'
)]);
1737 let expected
= uclass(&[('a'
, 'a'
)]);
1738 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1740 let cls1
= uclass(&[('a'
, 'b'
)]);
1741 let cls2
= uclass(&[('a'
, 'c'
)]);
1742 let expected
= uclass(&[('a'
, 'b'
)]);
1743 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1745 let cls1
= uclass(&[('a'
, 'b'
)]);
1746 let cls2
= uclass(&[('b'
, 'c'
)]);
1747 let expected
= uclass(&[('b'
, 'b'
)]);
1748 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1750 let cls1
= uclass(&[('a'
, 'b'
)]);
1751 let cls2
= uclass(&[('c'
, 'd'
)]);
1752 let expected
= uclass(&[]);
1753 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1755 let cls1
= uclass(&[('b'
, 'c'
)]);
1756 let cls2
= uclass(&[('a'
, 'd'
)]);
1757 let expected
= uclass(&[('b'
, 'c'
)]);
1758 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1760 let cls1
= uclass(&[('a'
, 'b'
), ('d'
, 'e'
), ('g'
, 'h'
)]);
1761 let cls2
= uclass(&[('a'
, 'h'
)]);
1762 let expected
= uclass(&[('a'
, 'b'
), ('d'
, 'e'
), ('g'
, 'h'
)]);
1763 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1765 let cls1
= uclass(&[('a'
, 'b'
), ('d'
, 'e'
), ('g'
, 'h'
)]);
1766 let cls2
= uclass(&[('a'
, 'b'
), ('d'
, 'e'
), ('g'
, 'h'
)]);
1767 let expected
= uclass(&[('a'
, 'b'
), ('d'
, 'e'
), ('g'
, 'h'
)]);
1768 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1770 let cls1
= uclass(&[('a'
, 'b'
), ('g'
, 'h'
)]);
1771 let cls2
= uclass(&[('d'
, 'e'
), ('k'
, 'l'
)]);
1772 let expected
= uclass(&[]);
1773 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1775 let cls1
= uclass(&[('a'
, 'b'
), ('d'
, 'e'
), ('g'
, 'h'
)]);
1776 let cls2
= uclass(&[('h'
, 'h'
)]);
1777 let expected
= uclass(&[('h'
, 'h'
)]);
1778 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1780 let cls1
= uclass(&[('a'
, 'b'
), ('e'
, 'f'
), ('i'
, 'j'
)]);
1781 let cls2
= uclass(&[('c'
, 'd'
), ('g'
, 'h'
), ('k'
, 'l'
)]);
1782 let expected
= uclass(&[]);
1783 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1785 let cls1
= uclass(&[('a'
, 'b'
), ('c'
, 'd'
), ('e'
, 'f'
)]);
1786 let cls2
= uclass(&[('b'
, 'c'
), ('d'
, 'e'
), ('f'
, 'g'
)]);
1787 let expected
= uclass(&[('b'
, 'f'
)]);
1788 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1792 fn class_intersect_bytes() {
1793 let cls1
= bclass(&[]);
1794 let cls2
= bclass(&[(b'a'
, b'a'
)]);
1795 let expected
= bclass(&[]);
1796 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
1798 let cls1
= bclass(&[(b'a'
, b'a'
)]);
1799 let cls2
= bclass(&[(b'a'
, b'a'
)]);
1800 let expected
= bclass(&[(b'a'
, b'a'
)]);
1801 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
1803 let cls1
= bclass(&[(b'a'
, b'a'
)]);
1804 let cls2
= bclass(&[(b'b'
, b'b'
)]);
1805 let expected
= bclass(&[]);
1806 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
1808 let cls1
= bclass(&[(b'a'
, b'a'
)]);
1809 let cls2
= bclass(&[(b'a'
, b'c'
)]);
1810 let expected
= bclass(&[(b'a'
, b'a'
)]);
1811 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
1813 let cls1
= bclass(&[(b'a'
, b'b'
)]);
1814 let cls2
= bclass(&[(b'a'
, b'c'
)]);
1815 let expected
= bclass(&[(b'a'
, b'b'
)]);
1816 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
1818 let cls1
= bclass(&[(b'a'
, b'b'
)]);
1819 let cls2
= bclass(&[(b'b'
, b'c'
)]);
1820 let expected
= bclass(&[(b'b'
, b'b'
)]);
1821 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
1823 let cls1
= bclass(&[(b'a'
, b'b'
)]);
1824 let cls2
= bclass(&[(b'c'
, b'd'
)]);
1825 let expected
= bclass(&[]);
1826 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
1828 let cls1
= bclass(&[(b'b'
, b'c'
)]);
1829 let cls2
= bclass(&[(b'a'
, b'd'
)]);
1830 let expected
= bclass(&[(b'b'
, b'c'
)]);
1831 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
1833 let cls1
= bclass(&[(b'a'
, b'b'
), (b'd'
, b'e'
), (b'g'
, b'h'
)]);
1834 let cls2
= bclass(&[(b'a'
, b'h'
)]);
1835 let expected
= bclass(&[(b'a'
, b'b'
), (b'd'
, b'e'
), (b'g'
, b'h'
)]);
1836 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
1838 let cls1
= bclass(&[(b'a'
, b'b'
), (b'd'
, b'e'
), (b'g'
, b'h'
)]);
1839 let cls2
= bclass(&[(b'a'
, b'b'
), (b'd'
, b'e'
), (b'g'
, b'h'
)]);
1840 let expected
= bclass(&[(b'a'
, b'b'
), (b'd'
, b'e'
), (b'g'
, b'h'
)]);
1841 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
1843 let cls1
= bclass(&[(b'a'
, b'b'
), (b'g'
, b'h'
)]);
1844 let cls2
= bclass(&[(b'd'
, b'e'
), (b'k'
, b'l'
)]);
1845 let expected
= bclass(&[]);
1846 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
1848 let cls1
= bclass(&[(b'a'
, b'b'
), (b'd'
, b'e'
), (b'g'
, b'h'
)]);
1849 let cls2
= bclass(&[(b'h'
, b'h'
)]);
1850 let expected
= bclass(&[(b'h'
, b'h'
)]);
1851 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
1853 let cls1
= bclass(&[(b'a'
, b'b'
), (b'e'
, b'f'
), (b'i'
, b'j'
)]);
1854 let cls2
= bclass(&[(b'c'
, b'd'
), (b'g'
, b'h'
), (b'k'
, b'l'
)]);
1855 let expected
= bclass(&[]);
1856 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
1858 let cls1
= bclass(&[(b'a'
, b'b'
), (b'c'
, b'd'
), (b'e'
, b'f'
)]);
1859 let cls2
= bclass(&[(b'b'
, b'c'
), (b'd'
, b'e'
), (b'f'
, b'g'
)]);
1860 let expected
= bclass(&[(b'b'
, b'f'
)]);
1861 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
1865 fn class_difference_unicode() {
1866 let cls1
= uclass(&[('a'
, 'a'
)]);
1867 let cls2
= uclass(&[('a'
, 'a'
)]);
1868 let expected
= uclass(&[]);
1869 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
1871 let cls1
= uclass(&[('a'
, 'a'
)]);
1872 let cls2
= uclass(&[]);
1873 let expected
= uclass(&[('a'
, 'a'
)]);
1874 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
1876 let cls1
= uclass(&[]);
1877 let cls2
= uclass(&[('a'
, 'a'
)]);
1878 let expected
= uclass(&[]);
1879 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
1881 let cls1
= uclass(&[('a'
, 'z'
)]);
1882 let cls2
= uclass(&[('a'
, 'a'
)]);
1883 let expected
= uclass(&[('b'
, 'z'
)]);
1884 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
1886 let cls1
= uclass(&[('a'
, 'z'
)]);
1887 let cls2
= uclass(&[('z'
, 'z'
)]);
1888 let expected
= uclass(&[('a'
, 'y'
)]);
1889 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
1891 let cls1
= uclass(&[('a'
, 'z'
)]);
1892 let cls2
= uclass(&[('m'
, 'm'
)]);
1893 let expected
= uclass(&[('a'
, 'l'
), ('n'
, 'z'
)]);
1894 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
1896 let cls1
= uclass(&[('a'
, 'c'
), ('g'
, 'i'
), ('r'
, 't'
)]);
1897 let cls2
= uclass(&[('a'
, 'z'
)]);
1898 let expected
= uclass(&[]);
1899 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
1901 let cls1
= uclass(&[('a'
, 'c'
), ('g'
, 'i'
), ('r'
, 't'
)]);
1902 let cls2
= uclass(&[('d'
, 'v'
)]);
1903 let expected
= uclass(&[('a'
, 'c'
)]);
1904 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
1906 let cls1
= uclass(&[('a'
, 'c'
), ('g'
, 'i'
), ('r'
, 't'
)]);
1907 let cls2
= uclass(&[('b'
, 'g'
), ('s'
, 'u'
)]);
1908 let expected
= uclass(&[('a'
, 'a'
), ('h'
, 'i'
), ('r'
, 'r'
)]);
1909 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
1911 let cls1
= uclass(&[('a'
, 'c'
), ('g'
, 'i'
), ('r'
, 't'
)]);
1912 let cls2
= uclass(&[('b'
, 'd'
), ('e'
, 'g'
), ('s'
, 'u'
)]);
1913 let expected
= uclass(&[('a'
, 'a'
), ('h'
, 'i'
), ('r'
, 'r'
)]);
1914 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
1916 let cls1
= uclass(&[('x'
, 'z'
)]);
1917 let cls2
= uclass(&[('a'
, 'c'
), ('e'
, 'g'
), ('s'
, 'u'
)]);
1918 let expected
= uclass(&[('x'
, 'z'
)]);
1919 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
1921 let cls1
= uclass(&[('a'
, 'z'
)]);
1922 let cls2
= uclass(&[('a'
, 'c'
), ('e'
, 'g'
), ('s'
, 'u'
)]);
1923 let expected
= uclass(&[('d'
, 'd'
), ('h'
, 'r'
), ('v'
, 'z'
)]);
1924 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
1928 fn class_difference_bytes() {
1929 let cls1
= bclass(&[(b'a'
, b'a'
)]);
1930 let cls2
= bclass(&[(b'a'
, b'a'
)]);
1931 let expected
= bclass(&[]);
1932 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
1934 let cls1
= bclass(&[(b'a'
, b'a'
)]);
1935 let cls2
= bclass(&[]);
1936 let expected
= bclass(&[(b'a'
, b'a'
)]);
1937 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
1939 let cls1
= bclass(&[]);
1940 let cls2
= bclass(&[(b'a'
, b'a'
)]);
1941 let expected
= bclass(&[]);
1942 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
1944 let cls1
= bclass(&[(b'a'
, b'z'
)]);
1945 let cls2
= bclass(&[(b'a'
, b'a'
)]);
1946 let expected
= bclass(&[(b'b'
, b'z'
)]);
1947 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
1949 let cls1
= bclass(&[(b'a'
, b'z'
)]);
1950 let cls2
= bclass(&[(b'z'
, b'z'
)]);
1951 let expected
= bclass(&[(b'a'
, b'y'
)]);
1952 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
1954 let cls1
= bclass(&[(b'a'
, b'z'
)]);
1955 let cls2
= bclass(&[(b'm'
, b'm'
)]);
1956 let expected
= bclass(&[(b'a'
, b'l'
), (b'n'
, b'z'
)]);
1957 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
1959 let cls1
= bclass(&[(b'a'
, b'c'
), (b'g'
, b'i'
), (b'r'
, b't'
)]);
1960 let cls2
= bclass(&[(b'a'
, b'z'
)]);
1961 let expected
= bclass(&[]);
1962 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
1964 let cls1
= bclass(&[(b'a'
, b'c'
), (b'g'
, b'i'
), (b'r'
, b't'
)]);
1965 let cls2
= bclass(&[(b'd'
, b'v'
)]);
1966 let expected
= bclass(&[(b'a'
, b'c'
)]);
1967 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
1969 let cls1
= bclass(&[(b'a'
, b'c'
), (b'g'
, b'i'
), (b'r'
, b't'
)]);
1970 let cls2
= bclass(&[(b'b'
, b'g'
), (b's'
, b'u'
)]);
1971 let expected
= bclass(&[(b'a'
, b'a'
), (b'h'
, b'i'
), (b'r'
, b'r'
)]);
1972 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
1974 let cls1
= bclass(&[(b'a'
, b'c'
), (b'g'
, b'i'
), (b'r'
, b't'
)]);
1975 let cls2
= bclass(&[(b'b'
, b'd'
), (b'e'
, b'g'
), (b's'
, b'u'
)]);
1976 let expected
= bclass(&[(b'a'
, b'a'
), (b'h'
, b'i'
), (b'r'
, b'r'
)]);
1977 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
1979 let cls1
= bclass(&[(b'x'
, b'z'
)]);
1980 let cls2
= bclass(&[(b'a'
, b'c'
), (b'e'
, b'g'
), (b's'
, b'u'
)]);
1981 let expected
= bclass(&[(b'x'
, b'z'
)]);
1982 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
1984 let cls1
= bclass(&[(b'a'
, b'z'
)]);
1985 let cls2
= bclass(&[(b'a'
, b'c'
), (b'e'
, b'g'
), (b's'
, b'u'
)]);
1986 let expected
= bclass(&[(b'd'
, b'd'
), (b'h'
, b'r'
), (b'v'
, b'z'
)]);
1987 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
1991 fn class_symmetric_difference_unicode() {
1992 let cls1
= uclass(&[('a'
, 'm'
)]);
1993 let cls2
= uclass(&[('g'
, 't'
)]);
1994 let expected
= uclass(&[('a'
, 'f'
), ('n'
, 't'
)]);
1995 assert_eq
!(expected
, usymdifference(&cls1
, &cls2
));
1999 fn class_symmetric_difference_bytes() {
2000 let cls1
= bclass(&[(b'a'
, b'm'
)]);
2001 let cls2
= bclass(&[(b'g'
, b't'
)]);
2002 let expected
= bclass(&[(b'a'
, b'f'
), (b'n'
, b't'
)]);
2003 assert_eq
!(expected
, bsymdifference(&cls1
, &cls2
));
2008 fn hir_byte_literal_non_ascii() {
2009 Hir
::literal(Literal
::Byte(b'a'
));
2012 // We use a thread with an explicit stack size to test that our destructor
2013 // for Hir can handle arbitrarily sized expressions in constant stack
2014 // space. In case we run on a platform without threads (WASM?), we limit
2015 // this test to Windows/Unix.
2017 #[cfg(any(unix, windows))]
2018 fn no_stack_overflow_on_drop() {
2022 let mut expr
= Hir
::empty();
2024 expr
= Hir
::group(Group
{
2025 kind
: GroupKind
::NonCapturing
,
2026 hir
: Box
::new(expr
),
2028 expr
= Hir
::repetition(Repetition
{
2029 kind
: RepetitionKind
::ZeroOrOne
,
2031 hir
: Box
::new(expr
),
2035 kind
: HirKind
::Concat(vec
![expr
]),
2036 info
: HirInfo
::new(),
2039 kind
: HirKind
::Alternation(vec
![expr
]),
2040 info
: HirInfo
::new(),
2043 assert
!(!expr
.kind
.is_empty());
2046 // We run our test on a thread with a small stack size so we can
2047 // force the issue more easily.
2048 thread
::Builder
::new()