2 Defines a high-level intermediate representation for regular expressions.
12 use hir
::interval
::{Interval, IntervalSet, IntervalSetIter}
;
15 pub use hir
::visitor
::{visit, Visitor}
;
16 pub use unicode
::CaseFoldError
;
24 /// An error that can occur while translating an `Ast` to a `Hir`.
25 #[derive(Clone, Debug, Eq, PartialEq)]
27 /// The kind of error.
29 /// The original pattern that the translator's Ast was parsed from. Every
30 /// span in an error is a valid range into this string.
32 /// The span of this error, derived from the Ast given to the translator.
37 /// Return the type of this error.
38 pub fn kind(&self) -> &ErrorKind
{
42 /// The original pattern string in which this error occurred.
44 /// Every span reported by this error is reported in terms of this string.
45 pub fn pattern(&self) -> &str {
49 /// Return the span at which this error occurred.
50 pub fn span(&self) -> &Span
{
55 /// The type of an error that occurred while building an `Hir`.
56 #[derive(Clone, Debug, Eq, PartialEq)]
58 /// This error occurs when a Unicode feature is used when Unicode
59 /// support is disabled. For example `(?-u:\pL)` would trigger this error.
61 /// This error occurs when translating a pattern that could match a byte
62 /// sequence that isn't UTF-8 and `allow_invalid_utf8` was disabled.
64 /// This occurs when an unrecognized Unicode property name could not
66 UnicodePropertyNotFound
,
67 /// This occurs when an unrecognized Unicode property value could not
69 UnicodePropertyValueNotFound
,
70 /// This occurs when a Unicode-aware Perl character class (`\w`, `\s` or
71 /// `\d`) could not be found. This can occur when the `unicode-perl`
72 /// crate feature is not enabled.
73 UnicodePerlClassNotFound
,
74 /// This occurs when the Unicode simple case mapping tables are not
75 /// available, and the regular expression required Unicode aware case
77 UnicodeCaseUnavailable
,
78 /// This occurs when the translator attempts to construct a character class
81 /// Note that this restriction in the translator may be removed in the
84 /// Hints that destructuring should not be exhaustive.
86 /// This enum may grow additional variants, so this makes sure clients
87 /// don't count on exhaustive matching. (Otherwise, adding a new variant
88 /// could break existing code.)
94 // TODO: Remove this method entirely on the next breaking semver release.
96 fn description(&self) -> &str {
97 use self::ErrorKind
::*;
99 UnicodeNotAllowed
=> "Unicode not allowed here",
100 InvalidUtf8
=> "pattern can match invalid UTF-8",
101 UnicodePropertyNotFound
=> "Unicode property not found",
102 UnicodePropertyValueNotFound
=> "Unicode property value not found",
103 UnicodePerlClassNotFound
=> {
104 "Unicode-aware Perl class not found \
105 (make sure the unicode-perl feature is enabled)"
107 UnicodeCaseUnavailable
=> {
108 "Unicode-aware case insensitivity matching is not available \
109 (make sure the unicode-case feature is enabled)"
111 EmptyClassNotAllowed
=> "empty character classes are not allowed",
112 __Nonexhaustive
=> unreachable
!(),
117 impl error
::Error
for Error
{
118 // TODO: Remove this method entirely on the next breaking semver release.
120 fn description(&self) -> &str {
121 self.kind
.description()
125 impl fmt
::Display
for Error
{
126 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
127 ::error
::Formatter
::from(self).fmt(f
)
131 impl fmt
::Display
for ErrorKind
{
132 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
133 // TODO: Remove this on the next breaking semver release.
135 f
.write_str(self.description())
139 /// A high-level intermediate representation (HIR) for a regular expression.
141 /// The HIR of a regular expression represents an intermediate step between its
142 /// abstract syntax (a structured description of the concrete syntax) and
143 /// compiled byte codes. The purpose of HIR is to make regular expressions
144 /// easier to analyze. In particular, the AST is much more complex than the
145 /// HIR. For example, while an AST supports arbitrarily nested character
146 /// classes, the HIR will flatten all nested classes into a single set. The HIR
147 /// will also "compile away" every flag present in the concrete syntax. For
148 /// example, users of HIR expressions never need to worry about case folding;
149 /// it is handled automatically by the translator (e.g., by translating `(?i)A`
152 /// If the HIR was produced by a translator that disallows invalid UTF-8, then
153 /// the HIR is guaranteed to match UTF-8 exclusively.
155 /// This type defines its own destructor that uses constant stack space and
156 /// heap space proportional to the size of the HIR.
158 /// The specific type of an HIR expression can be accessed via its `kind`
159 /// or `into_kind` methods. This extra level of indirection exists for two
162 /// 1. Construction of an HIR expression *must* use the constructor methods
163 /// on this `Hir` type instead of building the `HirKind` values directly.
164 /// This permits construction to enforce invariants like "concatenations
165 /// always consist of two or more sub-expressions."
166 /// 2. Every HIR expression contains attributes that are defined inductively,
167 /// and can be computed cheaply during the construction process. For
168 /// example, one such attribute is whether the expression must match at the
169 /// beginning of the text.
171 /// Also, an `Hir`'s `fmt::Display` implementation prints an HIR as a regular
172 /// expression pattern string, and uses constant stack space and heap space
173 /// proportional to the size of the `Hir`.
174 #[derive(Clone, Debug, Eq, PartialEq)]
176 /// The underlying HIR kind.
178 /// Analysis info about this HIR, computed during construction.
182 /// The kind of an arbitrary `Hir` expression.
183 #[derive(Clone, Debug, Eq, PartialEq)]
185 /// The empty regular expression, which matches everything, including the
188 /// A single literal character that matches exactly this character.
190 /// A single character class that matches any of the characters in the
191 /// class. A class can either consist of Unicode scalar values as
192 /// characters, or it can use bytes.
194 /// An anchor assertion. An anchor assertion match always has zero length.
196 /// A word boundary assertion, which may or may not be Unicode aware. A
197 /// word boundary assertion match always has zero length.
198 WordBoundary(WordBoundary
),
199 /// A repetition operation applied to a child expression.
200 Repetition(Repetition
),
201 /// A possibly capturing group, which contains a child expression.
203 /// A concatenation of expressions. A concatenation always has at least two
204 /// child expressions.
206 /// A concatenation matches only if each of its child expression matches
207 /// one after the other.
209 /// An alternation of expressions. An alternation always has at least two
210 /// child expressions.
212 /// An alternation matches only if at least one of its child expression
213 /// matches. If multiple expressions match, then the leftmost is preferred.
214 Alternation(Vec
<Hir
>),
218 /// Returns a reference to the underlying HIR kind.
219 pub fn kind(&self) -> &HirKind
{
223 /// Consumes ownership of this HIR expression and returns its underlying
225 pub fn into_kind(mut self) -> HirKind
{
227 mem
::replace(&mut self.kind
, HirKind
::Empty
)
230 /// Returns an empty HIR expression.
232 /// An empty HIR expression always matches, including the empty string.
233 pub fn empty() -> Hir
{
234 let mut info
= HirInfo
::new();
235 info
.set_always_utf8(true);
236 info
.set_all_assertions(true);
237 info
.set_anchored_start(false);
238 info
.set_anchored_end(false);
239 info
.set_line_anchored_start(false);
240 info
.set_line_anchored_end(false);
241 info
.set_any_anchored_start(false);
242 info
.set_any_anchored_end(false);
243 info
.set_match_empty(true);
244 info
.set_literal(true);
245 info
.set_alternation_literal(true);
246 Hir { kind: HirKind::Empty, info: info }
249 /// Creates a literal HIR expression.
251 /// If the given literal has a `Byte` variant with an ASCII byte, then this
252 /// method panics. This enforces the invariant that `Byte` variants are
253 /// only used to express matching of invalid UTF-8.
254 pub fn literal(lit
: Literal
) -> Hir
{
255 if let Literal
::Byte(b
) = lit
{
259 let mut info
= HirInfo
::new();
260 info
.set_always_utf8(lit
.is_unicode());
261 info
.set_all_assertions(false);
262 info
.set_anchored_start(false);
263 info
.set_anchored_end(false);
264 info
.set_line_anchored_start(false);
265 info
.set_line_anchored_end(false);
266 info
.set_any_anchored_start(false);
267 info
.set_any_anchored_end(false);
268 info
.set_match_empty(false);
269 info
.set_literal(true);
270 info
.set_alternation_literal(true);
271 Hir { kind: HirKind::Literal(lit), info: info }
274 /// Creates a class HIR expression.
275 pub fn class(class
: Class
) -> Hir
{
276 let mut info
= HirInfo
::new();
277 info
.set_always_utf8(class
.is_always_utf8());
278 info
.set_all_assertions(false);
279 info
.set_anchored_start(false);
280 info
.set_anchored_end(false);
281 info
.set_line_anchored_start(false);
282 info
.set_line_anchored_end(false);
283 info
.set_any_anchored_start(false);
284 info
.set_any_anchored_end(false);
285 info
.set_match_empty(false);
286 info
.set_literal(false);
287 info
.set_alternation_literal(false);
288 Hir { kind: HirKind::Class(class), info: info }
291 /// Creates an anchor assertion HIR expression.
292 pub fn anchor(anchor
: Anchor
) -> Hir
{
293 let mut info
= HirInfo
::new();
294 info
.set_always_utf8(true);
295 info
.set_all_assertions(true);
296 info
.set_anchored_start(false);
297 info
.set_anchored_end(false);
298 info
.set_line_anchored_start(false);
299 info
.set_line_anchored_end(false);
300 info
.set_any_anchored_start(false);
301 info
.set_any_anchored_end(false);
302 info
.set_match_empty(true);
303 info
.set_literal(false);
304 info
.set_alternation_literal(false);
305 if let Anchor
::StartText
= anchor
{
306 info
.set_anchored_start(true);
307 info
.set_line_anchored_start(true);
308 info
.set_any_anchored_start(true);
310 if let Anchor
::EndText
= anchor
{
311 info
.set_anchored_end(true);
312 info
.set_line_anchored_end(true);
313 info
.set_any_anchored_end(true);
315 if let Anchor
::StartLine
= anchor
{
316 info
.set_line_anchored_start(true);
318 if let Anchor
::EndLine
= anchor
{
319 info
.set_line_anchored_end(true);
321 Hir { kind: HirKind::Anchor(anchor), info: info }
324 /// Creates a word boundary assertion HIR expression.
325 pub fn word_boundary(word_boundary
: WordBoundary
) -> Hir
{
326 let mut info
= HirInfo
::new();
327 info
.set_always_utf8(true);
328 info
.set_all_assertions(true);
329 info
.set_anchored_start(false);
330 info
.set_anchored_end(false);
331 info
.set_line_anchored_start(false);
332 info
.set_line_anchored_end(false);
333 info
.set_any_anchored_start(false);
334 info
.set_any_anchored_end(false);
335 info
.set_literal(false);
336 info
.set_alternation_literal(false);
337 // A negated word boundary matches the empty string, but a normal
338 // word boundary does not!
339 info
.set_match_empty(word_boundary
.is_negated());
340 // Negated ASCII word boundaries can match invalid UTF-8.
341 if let WordBoundary
::AsciiNegate
= word_boundary
{
342 info
.set_always_utf8(false);
344 Hir { kind: HirKind::WordBoundary(word_boundary), info: info }
347 /// Creates a repetition HIR expression.
348 pub fn repetition(rep
: Repetition
) -> Hir
{
349 let mut info
= HirInfo
::new();
350 info
.set_always_utf8(rep
.hir
.is_always_utf8());
351 info
.set_all_assertions(rep
.hir
.is_all_assertions());
352 // If this operator can match the empty string, then it can never
354 info
.set_anchored_start(
355 !rep
.is_match_empty() && rep
.hir
.is_anchored_start(),
357 info
.set_anchored_end(
358 !rep
.is_match_empty() && rep
.hir
.is_anchored_end(),
360 info
.set_line_anchored_start(
361 !rep
.is_match_empty() && rep
.hir
.is_anchored_start(),
363 info
.set_line_anchored_end(
364 !rep
.is_match_empty() && rep
.hir
.is_anchored_end(),
366 info
.set_any_anchored_start(rep
.hir
.is_any_anchored_start());
367 info
.set_any_anchored_end(rep
.hir
.is_any_anchored_end());
368 info
.set_match_empty(rep
.is_match_empty() || rep
.hir
.is_match_empty());
369 info
.set_literal(false);
370 info
.set_alternation_literal(false);
371 Hir { kind: HirKind::Repetition(rep), info: info }
374 /// Creates a group HIR expression.
375 pub fn group(group
: Group
) -> Hir
{
376 let mut info
= HirInfo
::new();
377 info
.set_always_utf8(group
.hir
.is_always_utf8());
378 info
.set_all_assertions(group
.hir
.is_all_assertions());
379 info
.set_anchored_start(group
.hir
.is_anchored_start());
380 info
.set_anchored_end(group
.hir
.is_anchored_end());
381 info
.set_line_anchored_start(group
.hir
.is_line_anchored_start());
382 info
.set_line_anchored_end(group
.hir
.is_line_anchored_end());
383 info
.set_any_anchored_start(group
.hir
.is_any_anchored_start());
384 info
.set_any_anchored_end(group
.hir
.is_any_anchored_end());
385 info
.set_match_empty(group
.hir
.is_match_empty());
386 info
.set_literal(false);
387 info
.set_alternation_literal(false);
388 Hir { kind: HirKind::Group(group), info: info }
391 /// Returns the concatenation of the given expressions.
393 /// This flattens the concatenation as appropriate.
394 pub fn concat(mut exprs
: Vec
<Hir
>) -> Hir
{
397 1 => exprs
.pop().unwrap(),
399 let mut info
= HirInfo
::new();
400 info
.set_always_utf8(true);
401 info
.set_all_assertions(true);
402 info
.set_any_anchored_start(false);
403 info
.set_any_anchored_end(false);
404 info
.set_match_empty(true);
405 info
.set_literal(true);
406 info
.set_alternation_literal(true);
408 // Some attributes require analyzing all sub-expressions.
410 let x
= info
.is_always_utf8() && e
.is_always_utf8();
411 info
.set_always_utf8(x
);
413 let x
= info
.is_all_assertions() && e
.is_all_assertions();
414 info
.set_all_assertions(x
);
416 let x
= info
.is_any_anchored_start()
417 || e
.is_any_anchored_start();
418 info
.set_any_anchored_start(x
);
421 info
.is_any_anchored_end() || e
.is_any_anchored_end();
422 info
.set_any_anchored_end(x
);
424 let x
= info
.is_match_empty() && e
.is_match_empty();
425 info
.set_match_empty(x
);
427 let x
= info
.is_literal() && e
.is_literal();
430 let x
= info
.is_alternation_literal()
431 && e
.is_alternation_literal();
432 info
.set_alternation_literal(x
);
434 // Anchored attributes require something slightly more
435 // sophisticated. Normally, WLOG, to determine whether an
436 // expression is anchored to the start, we'd only need to check
437 // the first expression of a concatenation. However,
438 // expressions like `$\b^` are still anchored to the start,
439 // but the first expression in the concatenation *isn't*
440 // anchored to the start. So the "first" expression to look at
441 // is actually one that is either not an assertion or is
442 // specifically the StartText assertion.
443 info
.set_anchored_start(
447 e
.is_anchored_start() || e
.is_all_assertions()
449 .any(|e
| e
.is_anchored_start()),
451 // Similarly for the end anchor, but in reverse.
452 info
.set_anchored_end(
457 e
.is_anchored_end() || e
.is_all_assertions()
459 .any(|e
| e
.is_anchored_end()),
461 // Repeat the process for line anchors.
462 info
.set_line_anchored_start(
466 e
.is_line_anchored_start() || e
.is_all_assertions()
468 .any(|e
| e
.is_line_anchored_start()),
470 info
.set_line_anchored_end(
475 e
.is_line_anchored_end() || e
.is_all_assertions()
477 .any(|e
| e
.is_line_anchored_end()),
479 Hir { kind: HirKind::Concat(exprs), info: info }
484 /// Returns the alternation of the given expressions.
486 /// This flattens the alternation as appropriate.
487 pub fn alternation(mut exprs
: Vec
<Hir
>) -> Hir
{
490 1 => exprs
.pop().unwrap(),
492 let mut info
= HirInfo
::new();
493 info
.set_always_utf8(true);
494 info
.set_all_assertions(true);
495 info
.set_anchored_start(true);
496 info
.set_anchored_end(true);
497 info
.set_line_anchored_start(true);
498 info
.set_line_anchored_end(true);
499 info
.set_any_anchored_start(false);
500 info
.set_any_anchored_end(false);
501 info
.set_match_empty(false);
502 info
.set_literal(false);
503 info
.set_alternation_literal(true);
505 // Some attributes require analyzing all sub-expressions.
507 let x
= info
.is_always_utf8() && e
.is_always_utf8();
508 info
.set_always_utf8(x
);
510 let x
= info
.is_all_assertions() && e
.is_all_assertions();
511 info
.set_all_assertions(x
);
513 let x
= info
.is_anchored_start() && e
.is_anchored_start();
514 info
.set_anchored_start(x
);
516 let x
= info
.is_anchored_end() && e
.is_anchored_end();
517 info
.set_anchored_end(x
);
519 let x
= info
.is_line_anchored_start()
520 && e
.is_line_anchored_start();
521 info
.set_line_anchored_start(x
);
523 let x
= info
.is_line_anchored_end()
524 && e
.is_line_anchored_end();
525 info
.set_line_anchored_end(x
);
527 let x
= info
.is_any_anchored_start()
528 || e
.is_any_anchored_start();
529 info
.set_any_anchored_start(x
);
532 info
.is_any_anchored_end() || e
.is_any_anchored_end();
533 info
.set_any_anchored_end(x
);
535 let x
= info
.is_match_empty() || e
.is_match_empty();
536 info
.set_match_empty(x
);
538 let x
= info
.is_alternation_literal() && e
.is_literal();
539 info
.set_alternation_literal(x
);
541 Hir { kind: HirKind::Alternation(exprs), info: info }
546 /// Build an HIR expression for `.`.
548 /// A `.` expression matches any character except for `\n`. To build an
549 /// expression that matches any character, including `\n`, use the `any`
552 /// If `bytes` is `true`, then this assumes characters are limited to a
554 pub fn dot(bytes
: bool
) -> Hir
{
556 let mut cls
= ClassBytes
::empty();
557 cls
.push(ClassBytesRange
::new(b'
\0'
, b'
\x09'
));
558 cls
.push(ClassBytesRange
::new(b'
\x0B'
, b'
\xFF'
));
559 Hir
::class(Class
::Bytes(cls
))
561 let mut cls
= ClassUnicode
::empty();
562 cls
.push(ClassUnicodeRange
::new('
\0'
, '
\x09'
));
563 cls
.push(ClassUnicodeRange
::new('
\x0B'
, '
\u{10FFFF}'
));
564 Hir
::class(Class
::Unicode(cls
))
568 /// Build an HIR expression for `(?s).`.
570 /// A `(?s).` expression matches any character, including `\n`. To build an
571 /// expression that matches any character except for `\n`, then use the
574 /// If `bytes` is `true`, then this assumes characters are limited to a
576 pub fn any(bytes
: bool
) -> Hir
{
578 let mut cls
= ClassBytes
::empty();
579 cls
.push(ClassBytesRange
::new(b'
\0'
, b'
\xFF'
));
580 Hir
::class(Class
::Bytes(cls
))
582 let mut cls
= ClassUnicode
::empty();
583 cls
.push(ClassUnicodeRange
::new('
\0'
, '
\u{10FFFF}'
));
584 Hir
::class(Class
::Unicode(cls
))
588 /// Return true if and only if this HIR will always match valid UTF-8.
590 /// When this returns false, then it is possible for this HIR expression
591 /// to match invalid UTF-8.
592 pub fn is_always_utf8(&self) -> bool
{
593 self.info
.is_always_utf8()
596 /// Returns true if and only if this entire HIR expression is made up of
597 /// zero-width assertions.
599 /// This includes expressions like `^$\b\A\z` and even `((\b)+())*^`, but
601 pub fn is_all_assertions(&self) -> bool
{
602 self.info
.is_all_assertions()
605 /// Return true if and only if this HIR is required to match from the
606 /// beginning of text. This includes expressions like `^foo`, `^(foo|bar)`,
607 /// `^foo|^bar` but not `^foo|bar`.
608 pub fn is_anchored_start(&self) -> bool
{
609 self.info
.is_anchored_start()
612 /// Return true if and only if this HIR is required to match at the end
613 /// of text. This includes expressions like `foo$`, `(foo|bar)$`,
614 /// `foo$|bar$` but not `foo$|bar`.
615 pub fn is_anchored_end(&self) -> bool
{
616 self.info
.is_anchored_end()
619 /// Return true if and only if this HIR is required to match from the
620 /// beginning of text or the beginning of a line. This includes expressions
621 /// like `^foo`, `(?m)^foo`, `^(foo|bar)`, `^(foo|bar)`, `(?m)^foo|^bar`
622 /// but not `^foo|bar` or `(?m)^foo|bar`.
624 /// Note that if `is_anchored_start` is `true`, then
625 /// `is_line_anchored_start` will also be `true`. The reverse implication
626 /// is not true. For example, `(?m)^foo` is line anchored, but not
627 /// `is_anchored_start`.
628 pub fn is_line_anchored_start(&self) -> bool
{
629 self.info
.is_line_anchored_start()
632 /// Return true if and only if this HIR is required to match at the
633 /// end of text or the end of a line. This includes expressions like
634 /// `foo$`, `(?m)foo$`, `(foo|bar)$`, `(?m)(foo|bar)$`, `foo$|bar$`,
635 /// `(?m)(foo|bar)$`, but not `foo$|bar` or `(?m)foo$|bar`.
637 /// Note that if `is_anchored_end` is `true`, then
638 /// `is_line_anchored_end` will also be `true`. The reverse implication
639 /// is not true. For example, `(?m)foo$` is line anchored, but not
640 /// `is_anchored_end`.
641 pub fn is_line_anchored_end(&self) -> bool
{
642 self.info
.is_line_anchored_end()
645 /// Return true if and only if this HIR contains any sub-expression that
646 /// is required to match at the beginning of text. Specifically, this
647 /// returns true if the `^` symbol (when multiline mode is disabled) or the
648 /// `\A` escape appear anywhere in the regex.
649 pub fn is_any_anchored_start(&self) -> bool
{
650 self.info
.is_any_anchored_start()
653 /// Return true if and only if this HIR contains any sub-expression that is
654 /// required to match at the end of text. Specifically, this returns true
655 /// if the `$` symbol (when multiline mode is disabled) or the `\z` escape
656 /// appear anywhere in the regex.
657 pub fn is_any_anchored_end(&self) -> bool
{
658 self.info
.is_any_anchored_end()
661 /// Return true if and only if the empty string is part of the language
662 /// matched by this regular expression.
664 /// This includes `a*`, `a?b*`, `a{0}`, `()`, `()+`, `^$`, `a|b?`, `\B`,
665 /// but not `a`, `a+` or `\b`.
666 pub fn is_match_empty(&self) -> bool
{
667 self.info
.is_match_empty()
670 /// Return true if and only if this HIR is a simple literal. This is only
671 /// true when this HIR expression is either itself a `Literal` or a
672 /// concatenation of only `Literal`s.
674 /// For example, `f` and `foo` are literals, but `f+`, `(foo)`, `foo()`
675 /// are not (even though that contain sub-expressions that are literals).
676 pub fn is_literal(&self) -> bool
{
677 self.info
.is_literal()
680 /// Return true if and only if this HIR is either a simple literal or an
681 /// alternation of simple literals. This is only
682 /// true when this HIR expression is either itself a `Literal` or a
683 /// concatenation of only `Literal`s or an alternation of only `Literal`s.
685 /// For example, `f`, `foo`, `a|b|c`, and `foo|bar|baz` are alternaiton
686 /// literals, but `f+`, `(foo)`, `foo()`
687 /// are not (even though that contain sub-expressions that are literals).
688 pub fn is_alternation_literal(&self) -> bool
{
689 self.info
.is_alternation_literal()
694 /// Return true if and only if this HIR is the empty regular expression.
696 /// Note that this is not defined inductively. That is, it only tests if
697 /// this kind is the `Empty` variant. To get the inductive definition,
698 /// use the `is_match_empty` method on [`Hir`](struct.Hir.html).
699 pub fn is_empty(&self) -> bool
{
701 HirKind
::Empty
=> true,
706 /// Returns true if and only if this kind has any (including possibly
707 /// empty) subexpressions.
708 pub fn has_subexprs(&self) -> bool
{
711 | HirKind
::Literal(_
)
714 | HirKind
::WordBoundary(_
) => false,
716 | HirKind
::Repetition(_
)
718 | HirKind
::Alternation(_
) => true,
723 /// Print a display representation of this Hir.
725 /// The result of this is a valid regular expression pattern string.
727 /// This implementation uses constant stack space and heap space proportional
728 /// to the size of the `Hir`.
729 impl fmt
::Display
for Hir
{
730 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
731 use hir
::print
::Printer
;
732 Printer
::new().print(self, f
)
736 /// The high-level intermediate representation of a literal.
738 /// A literal corresponds to a single character, where a character is either
739 /// defined by a Unicode scalar value or an arbitrary byte. Unicode characters
740 /// are preferred whenever possible. In particular, a `Byte` variant is only
741 /// ever produced when it could match invalid UTF-8.
742 #[derive(Clone, Debug, Eq, PartialEq)]
744 /// A single character represented by a Unicode scalar value.
746 /// A single character represented by an arbitrary byte.
751 /// Returns true if and only if this literal corresponds to a Unicode
753 pub fn is_unicode(&self) -> bool
{
755 Literal
::Unicode(_
) => true,
756 Literal
::Byte(b
) if b
<= 0x7F => true,
757 Literal
::Byte(_
) => false,
762 /// The high-level intermediate representation of a character class.
764 /// A character class corresponds to a set of characters. A character is either
765 /// defined by a Unicode scalar value or a byte. Unicode characters are used
766 /// by default, while bytes are used when Unicode mode (via the `u` flag) is
769 /// A character class, regardless of its character type, is represented by a
770 /// sequence of non-overlapping non-adjacent ranges of characters.
772 /// Note that unlike [`Literal`](enum.Literal.html), a `Bytes` variant may
773 /// be produced even when it exclusively matches valid UTF-8. This is because
774 /// a `Bytes` variant represents an intention by the author of the regular
775 /// expression to disable Unicode mode, which in turn impacts the semantics of
776 /// case insensitive matching. For example, `(?i)k` and `(?i-u)k` will not
777 /// match the same set of strings.
778 #[derive(Clone, Debug, Eq, PartialEq)]
780 /// A set of characters represented by Unicode scalar values.
781 Unicode(ClassUnicode
),
782 /// A set of characters represented by arbitrary bytes (one byte per
788 /// Apply Unicode simple case folding to this character class, in place.
789 /// The character class will be expanded to include all simple case folded
790 /// character variants.
792 /// If this is a byte oriented character class, then this will be limited
793 /// to the ASCII ranges `A-Z` and `a-z`.
794 pub fn case_fold_simple(&mut self) {
796 Class
::Unicode(ref mut x
) => x
.case_fold_simple(),
797 Class
::Bytes(ref mut x
) => x
.case_fold_simple(),
801 /// Negate this character class in place.
803 /// After completion, this character class will contain precisely the
804 /// characters that weren't previously in the class.
805 pub fn negate(&mut self) {
807 Class
::Unicode(ref mut x
) => x
.negate(),
808 Class
::Bytes(ref mut x
) => x
.negate(),
812 /// Returns true if and only if this character class will only ever match
815 /// A character class can match invalid UTF-8 only when the following
816 /// conditions are met:
818 /// 1. The translator was configured to permit generating an expression
819 /// that can match invalid UTF-8. (By default, this is disabled.)
820 /// 2. Unicode mode (via the `u` flag) was disabled either in the concrete
821 /// syntax or in the parser builder. By default, Unicode mode is
823 pub fn is_always_utf8(&self) -> bool
{
825 Class
::Unicode(_
) => true,
826 Class
::Bytes(ref x
) => x
.is_all_ascii(),
831 /// A set of characters represented by Unicode scalar values.
832 #[derive(Clone, Debug, Eq, PartialEq)]
833 pub struct ClassUnicode
{
834 set
: IntervalSet
<ClassUnicodeRange
>,
838 /// Create a new class from a sequence of ranges.
840 /// The given ranges do not need to be in any specific order, and ranges
842 pub fn new
<I
>(ranges
: I
) -> ClassUnicode
844 I
: IntoIterator
<Item
= ClassUnicodeRange
>,
846 ClassUnicode { set: IntervalSet::new(ranges) }
849 /// Create a new class with no ranges.
850 pub fn empty() -> ClassUnicode
{
851 ClassUnicode
::new(vec
![])
854 /// Add a new range to this set.
855 pub fn push(&mut self, range
: ClassUnicodeRange
) {
856 self.set
.push(range
);
859 /// Return an iterator over all ranges in this class.
861 /// The iterator yields ranges in ascending order.
862 pub fn iter(&self) -> ClassUnicodeIter
{
863 ClassUnicodeIter(self.set
.iter())
866 /// Return the underlying ranges as a slice.
867 pub fn ranges(&self) -> &[ClassUnicodeRange
] {
871 /// Expand this character class such that it contains all case folded
872 /// characters, according to Unicode's "simple" mapping. For example, if
873 /// this class consists of the range `a-z`, then applying case folding will
874 /// result in the class containing both the ranges `a-z` and `A-Z`.
878 /// This routine panics when the case mapping data necessary for this
879 /// routine to complete is unavailable. This occurs when the `unicode-case`
880 /// feature is not enabled.
882 /// Callers should prefer using `try_case_fold_simple` instead, which will
883 /// return an error instead of panicking.
884 pub fn case_fold_simple(&mut self) {
887 .expect("unicode-case feature must be enabled");
890 /// Expand this character class such that it contains all case folded
891 /// characters, according to Unicode's "simple" mapping. For example, if
892 /// this class consists of the range `a-z`, then applying case folding will
893 /// result in the class containing both the ranges `a-z` and `A-Z`.
897 /// This routine returns an error when the case mapping data necessary
898 /// for this routine to complete is unavailable. This occurs when the
899 /// `unicode-case` feature is not enabled.
900 pub fn try_case_fold_simple(
902 ) -> result
::Result
<(), CaseFoldError
> {
903 self.set
.case_fold_simple()
906 /// Negate this character class.
908 /// For all `c` where `c` is a Unicode scalar value, if `c` was in this
909 /// set, then it will not be in this set after negation.
910 pub fn negate(&mut self) {
914 /// Union this character class with the given character class, in place.
915 pub fn union(&mut self, other
: &ClassUnicode
) {
916 self.set
.union(&other
.set
);
919 /// Intersect this character class with the given character class, in
921 pub fn intersect(&mut self, other
: &ClassUnicode
) {
922 self.set
.intersect(&other
.set
);
925 /// Subtract the given character class from this character class, in place.
926 pub fn difference(&mut self, other
: &ClassUnicode
) {
927 self.set
.difference(&other
.set
);
930 /// Compute the symmetric difference of the given character classes, in
933 /// This computes the symmetric difference of two character classes. This
934 /// removes all elements in this class that are also in the given class,
935 /// but all adds all elements from the given class that aren't in this
936 /// class. That is, the class will contain all elements in either class,
937 /// but will not contain any elements that are in both classes.
938 pub fn symmetric_difference(&mut self, other
: &ClassUnicode
) {
939 self.set
.symmetric_difference(&other
.set
);
942 /// Returns true if and only if this character class will either match
943 /// nothing or only ASCII bytes. Stated differently, this returns false
944 /// if and only if this class contains a non-ASCII codepoint.
945 pub fn is_all_ascii(&self) -> bool
{
946 self.set
.intervals().last().map_or(true, |r
| r
.end
<= '
\x7F'
)
950 /// An iterator over all ranges in a Unicode character class.
952 /// The lifetime `'a` refers to the lifetime of the underlying class.
954 pub struct ClassUnicodeIter
<'a
>(IntervalSetIter
<'a
, ClassUnicodeRange
>);
956 impl<'a
> Iterator
for ClassUnicodeIter
<'a
> {
957 type Item
= &'a ClassUnicodeRange
;
959 fn next(&mut self) -> Option
<&'a ClassUnicodeRange
> {
964 /// A single range of characters represented by Unicode scalar values.
966 /// The range is closed. That is, the start and end of the range are included
968 #[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
969 pub struct ClassUnicodeRange
{
974 impl fmt
::Debug
for ClassUnicodeRange
{
975 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
976 let start
= if !self.start
.is_whitespace() && !self.start
.is_control()
978 self.start
.to_string()
980 format
!("0x{:X}", self.start
as u32)
982 let end
= if !self.end
.is_whitespace() && !self.end
.is_control() {
985 format
!("0x{:X}", self.end
as u32)
987 f
.debug_struct("ClassUnicodeRange")
988 .field("start", &start
)
994 impl Interval
for ClassUnicodeRange
{
998 fn lower(&self) -> char {
1002 fn upper(&self) -> char {
1006 fn set_lower(&mut self, bound
: char) {
1010 fn set_upper(&mut self, bound
: char) {
1014 /// Apply simple case folding to this Unicode scalar value range.
1016 /// Additional ranges are appended to the given vector. Canonical ordering
1017 /// is *not* maintained in the given vector.
1018 fn case_fold_simple(
1020 ranges
: &mut Vec
<ClassUnicodeRange
>,
1021 ) -> Result
<(), unicode
::CaseFoldError
> {
1022 if !unicode
::contains_simple_case_mapping(self.start
, self.end
)?
{
1025 let start
= self.start
as u32;
1026 let end
= (self.end
as u32).saturating_add(1);
1027 let mut next_simple_cp
= None
;
1028 for cp
in (start
..end
).filter_map(char::from_u32
) {
1029 if next_simple_cp
.map_or(false, |next
| cp
< next
) {
1032 let it
= match unicode
::simple_fold(cp
)?
{
1035 next_simple_cp
= next
;
1039 for cp_folded
in it
{
1040 ranges
.push(ClassUnicodeRange
::new(cp_folded
, cp_folded
));
1047 impl ClassUnicodeRange
{
1048 /// Create a new Unicode scalar value range for a character class.
1050 /// The returned range is always in a canonical form. That is, the range
1051 /// returned always satisfies the invariant that `start <= end`.
1052 pub fn new(start
: char, end
: char) -> ClassUnicodeRange
{
1053 ClassUnicodeRange
::create(start
, end
)
1056 /// Return the start of this range.
1058 /// The start of a range is always less than or equal to the end of the
1060 pub fn start(&self) -> char {
1064 /// Return the end of this range.
1066 /// The end of a range is always greater than or equal to the start of the
1068 pub fn end(&self) -> char {
1073 /// A set of characters represented by arbitrary bytes (where one byte
1074 /// corresponds to one character).
1075 #[derive(Clone, Debug, Eq, PartialEq)]
1076 pub struct ClassBytes
{
1077 set
: IntervalSet
<ClassBytesRange
>,
1081 /// Create a new class from a sequence of ranges.
1083 /// The given ranges do not need to be in any specific order, and ranges
1085 pub fn new
<I
>(ranges
: I
) -> ClassBytes
1087 I
: IntoIterator
<Item
= ClassBytesRange
>,
1089 ClassBytes { set: IntervalSet::new(ranges) }
1092 /// Create a new class with no ranges.
1093 pub fn empty() -> ClassBytes
{
1094 ClassBytes
::new(vec
![])
1097 /// Add a new range to this set.
1098 pub fn push(&mut self, range
: ClassBytesRange
) {
1099 self.set
.push(range
);
1102 /// Return an iterator over all ranges in this class.
1104 /// The iterator yields ranges in ascending order.
1105 pub fn iter(&self) -> ClassBytesIter
{
1106 ClassBytesIter(self.set
.iter())
1109 /// Return the underlying ranges as a slice.
1110 pub fn ranges(&self) -> &[ClassBytesRange
] {
1111 self.set
.intervals()
1114 /// Expand this character class such that it contains all case folded
1115 /// characters. For example, if this class consists of the range `a-z`,
1116 /// then applying case folding will result in the class containing both the
1117 /// ranges `a-z` and `A-Z`.
1119 /// Note that this only applies ASCII case folding, which is limited to the
1120 /// characters `a-z` and `A-Z`.
1121 pub fn case_fold_simple(&mut self) {
1122 self.set
.case_fold_simple().expect("ASCII case folding never fails");
1125 /// Negate this byte class.
1127 /// For all `b` where `b` is a any byte, if `b` was in this set, then it
1128 /// will not be in this set after negation.
1129 pub fn negate(&mut self) {
1133 /// Union this byte class with the given byte class, in place.
1134 pub fn union(&mut self, other
: &ClassBytes
) {
1135 self.set
.union(&other
.set
);
1138 /// Intersect this byte class with the given byte class, in place.
1139 pub fn intersect(&mut self, other
: &ClassBytes
) {
1140 self.set
.intersect(&other
.set
);
1143 /// Subtract the given byte class from this byte class, in place.
1144 pub fn difference(&mut self, other
: &ClassBytes
) {
1145 self.set
.difference(&other
.set
);
1148 /// Compute the symmetric difference of the given byte classes, in place.
1150 /// This computes the symmetric difference of two byte classes. This
1151 /// removes all elements in this class that are also in the given class,
1152 /// but all adds all elements from the given class that aren't in this
1153 /// class. That is, the class will contain all elements in either class,
1154 /// but will not contain any elements that are in both classes.
1155 pub fn symmetric_difference(&mut self, other
: &ClassBytes
) {
1156 self.set
.symmetric_difference(&other
.set
);
1159 /// Returns true if and only if this character class will either match
1160 /// nothing or only ASCII bytes. Stated differently, this returns false
1161 /// if and only if this class contains a non-ASCII byte.
1162 pub fn is_all_ascii(&self) -> bool
{
1163 self.set
.intervals().last().map_or(true, |r
| r
.end
<= 0x7F)
1167 /// An iterator over all ranges in a byte character class.
1169 /// The lifetime `'a` refers to the lifetime of the underlying class.
1171 pub struct ClassBytesIter
<'a
>(IntervalSetIter
<'a
, ClassBytesRange
>);
1173 impl<'a
> Iterator
for ClassBytesIter
<'a
> {
1174 type Item
= &'a ClassBytesRange
;
1176 fn next(&mut self) -> Option
<&'a ClassBytesRange
> {
1181 /// A single range of characters represented by arbitrary bytes.
1183 /// The range is closed. That is, the start and end of the range are included
1185 #[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
1186 pub struct ClassBytesRange
{
1191 impl Interval
for ClassBytesRange
{
1195 fn lower(&self) -> u8 {
1199 fn upper(&self) -> u8 {
1203 fn set_lower(&mut self, bound
: u8) {
1207 fn set_upper(&mut self, bound
: u8) {
1211 /// Apply simple case folding to this byte range. Only ASCII case mappings
1212 /// (for a-z) are applied.
1214 /// Additional ranges are appended to the given vector. Canonical ordering
1215 /// is *not* maintained in the given vector.
1216 fn case_fold_simple(
1218 ranges
: &mut Vec
<ClassBytesRange
>,
1219 ) -> Result
<(), unicode
::CaseFoldError
> {
1220 if !ClassBytesRange
::new(b'a'
, b'z'
).is_intersection_empty(self) {
1221 let lower
= cmp
::max(self.start
, b'a'
);
1222 let upper
= cmp
::min(self.end
, b'z'
);
1223 ranges
.push(ClassBytesRange
::new(lower
- 32, upper
- 32));
1225 if !ClassBytesRange
::new(b'A'
, b'Z'
).is_intersection_empty(self) {
1226 let lower
= cmp
::max(self.start
, b'A'
);
1227 let upper
= cmp
::min(self.end
, b'Z'
);
1228 ranges
.push(ClassBytesRange
::new(lower
+ 32, upper
+ 32));
1234 impl ClassBytesRange
{
1235 /// Create a new byte range for a character class.
1237 /// The returned range is always in a canonical form. That is, the range
1238 /// returned always satisfies the invariant that `start <= end`.
1239 pub fn new(start
: u8, end
: u8) -> ClassBytesRange
{
1240 ClassBytesRange
::create(start
, end
)
1243 /// Return the start of this range.
1245 /// The start of a range is always less than or equal to the end of the
1247 pub fn start(&self) -> u8 {
1251 /// Return the end of this range.
1253 /// The end of a range is always greater than or equal to the start of the
1255 pub fn end(&self) -> u8 {
1260 impl fmt
::Debug
for ClassBytesRange
{
1261 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
1262 let mut debug
= f
.debug_struct("ClassBytesRange");
1263 if self.start
<= 0x7F {
1264 debug
.field("start", &(self.start
as char));
1266 debug
.field("start", &self.start
);
1268 if self.end
<= 0x7F {
1269 debug
.field("end", &(self.end
as char));
1271 debug
.field("end", &self.end
);
1277 /// The high-level intermediate representation for an anchor assertion.
1279 /// A matching anchor assertion is always zero-length.
1280 #[derive(Clone, Debug, Eq, PartialEq)]
1282 /// Match the beginning of a line or the beginning of text. Specifically,
1283 /// this matches at the starting position of the input, or at the position
1284 /// immediately following a `\n` character.
1286 /// Match the end of a line or the end of text. Specifically,
1287 /// this matches at the end position of the input, or at the position
1288 /// immediately preceding a `\n` character.
1290 /// Match the beginning of text. Specifically, this matches at the starting
1291 /// position of the input.
1293 /// Match the end of text. Specifically, this matches at the ending
1294 /// position of the input.
1298 /// The high-level intermediate representation for a word-boundary assertion.
1300 /// A matching word boundary assertion is always zero-length.
1301 #[derive(Clone, Debug, Eq, PartialEq)]
1302 pub enum WordBoundary
{
1303 /// Match a Unicode-aware word boundary. That is, this matches a position
1304 /// where the left adjacent character and right adjacent character
1305 /// correspond to a word and non-word or a non-word and word character.
1307 /// Match a Unicode-aware negation of a word boundary.
1309 /// Match an ASCII-only word boundary. That is, this matches a position
1310 /// where the left adjacent character and right adjacent character
1311 /// correspond to a word and non-word or a non-word and word character.
1313 /// Match an ASCII-only negation of a word boundary.
1318 /// Returns true if and only if this word boundary assertion is negated.
1319 pub fn is_negated(&self) -> bool
{
1321 WordBoundary
::Unicode
| WordBoundary
::Ascii
=> false,
1322 WordBoundary
::UnicodeNegate
| WordBoundary
::AsciiNegate
=> true,
1327 /// The high-level intermediate representation for a group.
1329 /// This represents one of three possible group types:
1331 /// 1. A non-capturing group (e.g., `(?:expr)`).
1332 /// 2. A capturing group (e.g., `(expr)`).
1333 /// 3. A named capturing group (e.g., `(?P<name>expr)`).
1334 #[derive(Clone, Debug, Eq, PartialEq)]
1336 /// The kind of this group. If it is a capturing group, then the kind
1337 /// contains the capture group index (and the name, if it is a named
1339 pub kind
: GroupKind
,
1340 /// The expression inside the capturing group, which may be empty.
1344 /// The kind of group.
1345 #[derive(Clone, Debug, Eq, PartialEq)]
1346 pub enum GroupKind
{
1347 /// A normal unnamed capturing group.
1349 /// The value is the capture index of the group.
1351 /// A named capturing group.
1353 /// The name of the group.
1355 /// The capture index of the group.
1358 /// A non-capturing group.
1362 /// The high-level intermediate representation of a repetition operator.
1364 /// A repetition operator permits the repetition of an arbitrary
1366 #[derive(Clone, Debug, Eq, PartialEq)]
1367 pub struct Repetition
{
1368 /// The kind of this repetition operator.
1369 pub kind
: RepetitionKind
,
1370 /// Whether this repetition operator is greedy or not. A greedy operator
1371 /// will match as much as it can. A non-greedy operator will match as
1372 /// little as it can.
1374 /// Typically, operators are greedy by default and are only non-greedy when
1375 /// a `?` suffix is used, e.g., `(expr)*` is greedy while `(expr)*?` is
1376 /// not. However, this can be inverted via the `U` "ungreedy" flag.
1378 /// The expression being repeated.
1383 /// Returns true if and only if this repetition operator makes it possible
1384 /// to match the empty string.
1386 /// Note that this is not defined inductively. For example, while `a*`
1387 /// will report `true`, `()+` will not, even though `()` matches the empty
1388 /// string and one or more occurrences of something that matches the empty
1389 /// string will always match the empty string. In order to get the
1390 /// inductive definition, see the corresponding method on
1391 /// [`Hir`](struct.Hir.html).
1392 pub fn is_match_empty(&self) -> bool
{
1394 RepetitionKind
::ZeroOrOne
=> true,
1395 RepetitionKind
::ZeroOrMore
=> true,
1396 RepetitionKind
::OneOrMore
=> false,
1397 RepetitionKind
::Range(RepetitionRange
::Exactly(m
)) => m
== 0,
1398 RepetitionKind
::Range(RepetitionRange
::AtLeast(m
)) => m
== 0,
1399 RepetitionKind
::Range(RepetitionRange
::Bounded(m
, _
)) => m
== 0,
1404 /// The kind of a repetition operator.
1405 #[derive(Clone, Debug, Eq, PartialEq)]
1406 pub enum RepetitionKind
{
1407 /// Matches a sub-expression zero or one times.
1409 /// Matches a sub-expression zero or more times.
1411 /// Matches a sub-expression one or more times.
1413 /// Matches a sub-expression within a bounded range of times.
1414 Range(RepetitionRange
),
1417 /// The kind of a counted repetition operator.
1418 #[derive(Clone, Debug, Eq, PartialEq)]
1419 pub enum RepetitionRange
{
1420 /// Matches a sub-expression exactly this many times.
1422 /// Matches a sub-expression at least this many times.
1424 /// Matches a sub-expression at least `m` times and at most `n` times.
1428 /// A custom `Drop` impl is used for `HirKind` such that it uses constant stack
1429 /// space but heap space proportional to the depth of the total `Hir`.
1431 fn drop(&mut self) {
1434 match *self.kind() {
1436 | HirKind
::Literal(_
)
1438 | HirKind
::Anchor(_
)
1439 | HirKind
::WordBoundary(_
) => return,
1440 HirKind
::Group(ref x
) if !x
.hir
.kind
.has_subexprs() => return,
1441 HirKind
::Repetition(ref x
) if !x
.hir
.kind
.has_subexprs() => return,
1442 HirKind
::Concat(ref x
) if x
.is_empty() => return,
1443 HirKind
::Alternation(ref x
) if x
.is_empty() => return,
1447 let mut stack
= vec
![mem
::replace(self, Hir
::empty())];
1448 while let Some(mut expr
) = stack
.pop() {
1451 | HirKind
::Literal(_
)
1453 | HirKind
::Anchor(_
)
1454 | HirKind
::WordBoundary(_
) => {}
1455 HirKind
::Group(ref mut x
) => {
1456 stack
.push(mem
::replace(&mut x
.hir
, Hir
::empty()));
1458 HirKind
::Repetition(ref mut x
) => {
1459 stack
.push(mem
::replace(&mut x
.hir
, Hir
::empty()));
1461 HirKind
::Concat(ref mut x
) => {
1462 stack
.extend(x
.drain(..));
1464 HirKind
::Alternation(ref mut x
) => {
1465 stack
.extend(x
.drain(..));
1472 /// A type that documents various attributes of an HIR expression.
1474 /// These attributes are typically defined inductively on the HIR.
1475 #[derive(Clone, Debug, Eq, PartialEq)]
1477 /// Represent yes/no questions by a bitfield to conserve space, since
1478 /// this is included in every HIR expression.
1480 /// If more attributes need to be added, it is OK to increase the size of
1481 /// this as appropriate.
1485 // A simple macro for defining bitfield accessors/mutators.
1486 macro_rules
! define_bool
{
1487 ($bit
:expr
, $is_fn_name
:ident
, $set_fn_name
:ident
) => {
1488 fn $
is_fn_name(&self) -> bool
{
1489 self.bools
& (0b1 << $bit
) > 0
1492 fn $
set_fn_name(&mut self, yes
: bool
) {
1494 self.bools
|= 1 << $bit
;
1496 self.bools
&= !(1 << $bit
);
1503 fn new() -> HirInfo
{
1504 HirInfo { bools: 0 }
1507 define_bool
!(0, is_always_utf8
, set_always_utf8
);
1508 define_bool
!(1, is_all_assertions
, set_all_assertions
);
1509 define_bool
!(2, is_anchored_start
, set_anchored_start
);
1510 define_bool
!(3, is_anchored_end
, set_anchored_end
);
1511 define_bool
!(4, is_line_anchored_start
, set_line_anchored_start
);
1512 define_bool
!(5, is_line_anchored_end
, set_line_anchored_end
);
1513 define_bool
!(6, is_any_anchored_start
, set_any_anchored_start
);
1514 define_bool
!(7, is_any_anchored_end
, set_any_anchored_end
);
1515 define_bool
!(8, is_match_empty
, set_match_empty
);
1516 define_bool
!(9, is_literal
, set_literal
);
1517 define_bool
!(10, is_alternation_literal
, set_alternation_literal
);
1524 fn uclass(ranges
: &[(char, char)]) -> ClassUnicode
{
1525 let ranges
: Vec
<ClassUnicodeRange
> = ranges
1527 .map(|&(s
, e
)| ClassUnicodeRange
::new(s
, e
))
1529 ClassUnicode
::new(ranges
)
1532 fn bclass(ranges
: &[(u8, u8)]) -> ClassBytes
{
1533 let ranges
: Vec
<ClassBytesRange
> =
1534 ranges
.iter().map(|&(s
, e
)| ClassBytesRange
::new(s
, e
)).collect();
1535 ClassBytes
::new(ranges
)
1538 fn uranges(cls
: &ClassUnicode
) -> Vec
<(char, char)> {
1539 cls
.iter().map(|x
| (x
.start(), x
.end())).collect()
1542 #[cfg(feature = "unicode-case")]
1543 fn ucasefold(cls
: &ClassUnicode
) -> ClassUnicode
{
1544 let mut cls_
= cls
.clone();
1545 cls_
.case_fold_simple();
1549 fn uunion(cls1
: &ClassUnicode
, cls2
: &ClassUnicode
) -> ClassUnicode
{
1550 let mut cls_
= cls1
.clone();
1555 fn uintersect(cls1
: &ClassUnicode
, cls2
: &ClassUnicode
) -> ClassUnicode
{
1556 let mut cls_
= cls1
.clone();
1557 cls_
.intersect(cls2
);
1561 fn udifference(cls1
: &ClassUnicode
, cls2
: &ClassUnicode
) -> ClassUnicode
{
1562 let mut cls_
= cls1
.clone();
1563 cls_
.difference(cls2
);
1568 cls1
: &ClassUnicode
,
1569 cls2
: &ClassUnicode
,
1571 let mut cls_
= cls1
.clone();
1572 cls_
.symmetric_difference(cls2
);
1576 fn unegate(cls
: &ClassUnicode
) -> ClassUnicode
{
1577 let mut cls_
= cls
.clone();
1582 fn branges(cls
: &ClassBytes
) -> Vec
<(u8, u8)> {
1583 cls
.iter().map(|x
| (x
.start(), x
.end())).collect()
1586 fn bcasefold(cls
: &ClassBytes
) -> ClassBytes
{
1587 let mut cls_
= cls
.clone();
1588 cls_
.case_fold_simple();
1592 fn bunion(cls1
: &ClassBytes
, cls2
: &ClassBytes
) -> ClassBytes
{
1593 let mut cls_
= cls1
.clone();
1598 fn bintersect(cls1
: &ClassBytes
, cls2
: &ClassBytes
) -> ClassBytes
{
1599 let mut cls_
= cls1
.clone();
1600 cls_
.intersect(cls2
);
1604 fn bdifference(cls1
: &ClassBytes
, cls2
: &ClassBytes
) -> ClassBytes
{
1605 let mut cls_
= cls1
.clone();
1606 cls_
.difference(cls2
);
1610 fn bsymdifference(cls1
: &ClassBytes
, cls2
: &ClassBytes
) -> ClassBytes
{
1611 let mut cls_
= cls1
.clone();
1612 cls_
.symmetric_difference(cls2
);
1616 fn bnegate(cls
: &ClassBytes
) -> ClassBytes
{
1617 let mut cls_
= cls
.clone();
1623 fn class_range_canonical_unicode() {
1624 let range
= ClassUnicodeRange
::new('
\u{00FF}'
, '
\0'
);
1625 assert_eq
!('
\0'
, range
.start());
1626 assert_eq
!('
\u{00FF}'
, range
.end());
1630 fn class_range_canonical_bytes() {
1631 let range
= ClassBytesRange
::new(b'
\xFF'
, b'
\0'
);
1632 assert_eq
!(b'
\0'
, range
.start());
1633 assert_eq
!(b'
\xFF'
, range
.end());
1637 fn class_canonicalize_unicode() {
1638 let cls
= uclass(&[('a'
, 'c'
), ('x'
, 'z'
)]);
1639 let expected
= vec
![('a'
, 'c'
), ('x'
, 'z'
)];
1640 assert_eq
!(expected
, uranges(&cls
));
1642 let cls
= uclass(&[('x'
, 'z'
), ('a'
, 'c'
)]);
1643 let expected
= vec
![('a'
, 'c'
), ('x'
, 'z'
)];
1644 assert_eq
!(expected
, uranges(&cls
));
1646 let cls
= uclass(&[('x'
, 'z'
), ('w'
, 'y'
)]);
1647 let expected
= vec
![('w'
, 'z'
)];
1648 assert_eq
!(expected
, uranges(&cls
));
1658 let expected
= vec
![('a'
, 'j'
), ('l'
, 's'
)];
1659 assert_eq
!(expected
, uranges(&cls
));
1661 let cls
= uclass(&[('x'
, 'z'
), ('u'
, 'w'
)]);
1662 let expected
= vec
![('u'
, 'z'
)];
1663 assert_eq
!(expected
, uranges(&cls
));
1665 let cls
= uclass(&[('
\x00'
, '
\u{10FFFF}'
), ('
\x00'
, '
\u{10FFFF}'
)]);
1666 let expected
= vec
![('
\x00'
, '
\u{10FFFF}'
)];
1667 assert_eq
!(expected
, uranges(&cls
));
1669 let cls
= uclass(&[('a'
, 'a'
), ('b'
, 'b'
)]);
1670 let expected
= vec
![('a'
, 'b'
)];
1671 assert_eq
!(expected
, uranges(&cls
));
1675 fn class_canonicalize_bytes() {
1676 let cls
= bclass(&[(b'a'
, b'c'
), (b'x'
, b'z'
)]);
1677 let expected
= vec
![(b'a'
, b'c'
), (b'x'
, b'z'
)];
1678 assert_eq
!(expected
, branges(&cls
));
1680 let cls
= bclass(&[(b'x'
, b'z'
), (b'a'
, b'c'
)]);
1681 let expected
= vec
![(b'a'
, b'c'
), (b'x'
, b'z'
)];
1682 assert_eq
!(expected
, branges(&cls
));
1684 let cls
= bclass(&[(b'x'
, b'z'
), (b'w'
, b'y'
)]);
1685 let expected
= vec
![(b'w'
, b'z'
)];
1686 assert_eq
!(expected
, branges(&cls
));
1696 let expected
= vec
![(b'a'
, b'j'
), (b'l'
, b's'
)];
1697 assert_eq
!(expected
, branges(&cls
));
1699 let cls
= bclass(&[(b'x'
, b'z'
), (b'u'
, b'w'
)]);
1700 let expected
= vec
![(b'u'
, b'z'
)];
1701 assert_eq
!(expected
, branges(&cls
));
1703 let cls
= bclass(&[(b'
\x00'
, b'
\xFF'
), (b'
\x00'
, b'
\xFF'
)]);
1704 let expected
= vec
![(b'
\x00'
, b'
\xFF'
)];
1705 assert_eq
!(expected
, branges(&cls
));
1707 let cls
= bclass(&[(b'a'
, b'a'
), (b'b'
, b'b'
)]);
1708 let expected
= vec
![(b'a'
, b'b'
)];
1709 assert_eq
!(expected
, branges(&cls
));
1713 #[cfg(feature = "unicode-case")]
1714 fn class_case_fold_unicode() {
1724 let expected
= uclass(&[
1729 ('
\u{17F}'
, '
\u{17F}'
),
1731 assert_eq
!(expected
, ucasefold(&cls
));
1733 let cls
= uclass(&[('A'
, 'Z'
)]);
1734 let expected
= uclass(&[
1737 ('
\u{17F}'
, '
\u{17F}'
),
1738 ('
\u{212A}'
, '
\u{212A}'
),
1740 assert_eq
!(expected
, ucasefold(&cls
));
1742 let cls
= uclass(&[('a'
, 'z'
)]);
1743 let expected
= uclass(&[
1746 ('
\u{17F}'
, '
\u{17F}'
),
1747 ('
\u{212A}'
, '
\u{212A}'
),
1749 assert_eq
!(expected
, ucasefold(&cls
));
1751 let cls
= uclass(&[('A'
, 'A'
), ('_'
, '_'
)]);
1752 let expected
= uclass(&[('A'
, 'A'
), ('_'
, '_'
), ('a'
, 'a'
)]);
1753 assert_eq
!(expected
, ucasefold(&cls
));
1755 let cls
= uclass(&[('A'
, 'A'
), ('
='
, '
='
)]);
1756 let expected
= uclass(&[('
='
, '
='
), ('A'
, 'A'
), ('a'
, 'a'
)]);
1757 assert_eq
!(expected
, ucasefold(&cls
));
1759 let cls
= uclass(&[('
\x00'
, '
\x10'
)]);
1760 assert_eq
!(cls
, ucasefold(&cls
));
1762 let cls
= uclass(&[('k'
, 'k'
)]);
1764 uclass(&[('K'
, 'K'
), ('k'
, 'k'
), ('
\u{212A}'
, '
\u{212A}'
)]);
1765 assert_eq
!(expected
, ucasefold(&cls
));
1767 let cls
= uclass(&[('@'
, '@'
)]);
1768 assert_eq
!(cls
, ucasefold(&cls
));
1772 #[cfg(not(feature = "unicode-case"))]
1773 fn class_case_fold_unicode_disabled() {
1774 let mut cls
= uclass(&[
1783 assert
!(cls
.try_case_fold_simple().is_err());
1788 #[cfg(not(feature = "unicode-case"))]
1789 fn class_case_fold_unicode_disabled_panics() {
1790 let mut cls
= uclass(&[
1799 cls
.case_fold_simple();
1803 fn class_case_fold_bytes() {
1814 bclass(&[(b'A'
, b'J'
), (b'L'
, b'S'
), (b'a'
, b'j'
), (b'l'
, b's'
)]);
1815 assert_eq
!(expected
, bcasefold(&cls
));
1817 let cls
= bclass(&[(b'A'
, b'Z'
)]);
1818 let expected
= bclass(&[(b'A'
, b'Z'
), (b'a'
, b'z'
)]);
1819 assert_eq
!(expected
, bcasefold(&cls
));
1821 let cls
= bclass(&[(b'a'
, b'z'
)]);
1822 let expected
= bclass(&[(b'A'
, b'Z'
), (b'a'
, b'z'
)]);
1823 assert_eq
!(expected
, bcasefold(&cls
));
1825 let cls
= bclass(&[(b'A'
, b'A'
), (b'_'
, b'_'
)]);
1826 let expected
= bclass(&[(b'A'
, b'A'
), (b'_'
, b'_'
), (b'a'
, b'a'
)]);
1827 assert_eq
!(expected
, bcasefold(&cls
));
1829 let cls
= bclass(&[(b'A'
, b'A'
), (b'
='
, b'
='
)]);
1830 let expected
= bclass(&[(b'
='
, b'
='
), (b'A'
, b'A'
), (b'a'
, b'a'
)]);
1831 assert_eq
!(expected
, bcasefold(&cls
));
1833 let cls
= bclass(&[(b'
\x00'
, b'
\x10'
)]);
1834 assert_eq
!(cls
, bcasefold(&cls
));
1836 let cls
= bclass(&[(b'k'
, b'k'
)]);
1837 let expected
= bclass(&[(b'K'
, b'K'
), (b'k'
, b'k'
)]);
1838 assert_eq
!(expected
, bcasefold(&cls
));
1840 let cls
= bclass(&[(b'@'
, b'@'
)]);
1841 assert_eq
!(cls
, bcasefold(&cls
));
1845 fn class_negate_unicode() {
1846 let cls
= uclass(&[('a'
, 'a'
)]);
1847 let expected
= uclass(&[('
\x00'
, '
\x60'
), ('
\x62'
, '
\u{10FFFF}'
)]);
1848 assert_eq
!(expected
, unegate(&cls
));
1850 let cls
= uclass(&[('a'
, 'a'
), ('b'
, 'b'
)]);
1851 let expected
= uclass(&[('
\x00'
, '
\x60'
), ('
\x63'
, '
\u{10FFFF}'
)]);
1852 assert_eq
!(expected
, unegate(&cls
));
1854 let cls
= uclass(&[('a'
, 'c'
), ('x'
, 'z'
)]);
1855 let expected
= uclass(&[
1858 ('
\x7B'
, '
\u{10FFFF}'
),
1860 assert_eq
!(expected
, unegate(&cls
));
1862 let cls
= uclass(&[('
\x00'
, 'a'
)]);
1863 let expected
= uclass(&[('
\x62'
, '
\u{10FFFF}'
)]);
1864 assert_eq
!(expected
, unegate(&cls
));
1866 let cls
= uclass(&[('a'
, '
\u{10FFFF}'
)]);
1867 let expected
= uclass(&[('
\x00'
, '
\x60'
)]);
1868 assert_eq
!(expected
, unegate(&cls
));
1870 let cls
= uclass(&[('
\x00'
, '
\u{10FFFF}'
)]);
1871 let expected
= uclass(&[]);
1872 assert_eq
!(expected
, unegate(&cls
));
1874 let cls
= uclass(&[]);
1875 let expected
= uclass(&[('
\x00'
, '
\u{10FFFF}'
)]);
1876 assert_eq
!(expected
, unegate(&cls
));
1879 uclass(&[('
\x00'
, '
\u{10FFFD}'
), ('
\u{10FFFF}'
, '
\u{10FFFF}'
)]);
1880 let expected
= uclass(&[('
\u{10FFFE}'
, '
\u{10FFFE}'
)]);
1881 assert_eq
!(expected
, unegate(&cls
));
1883 let cls
= uclass(&[('
\x00'
, '
\u{D7FF}'
)]);
1884 let expected
= uclass(&[('
\u{E000}'
, '
\u{10FFFF}'
)]);
1885 assert_eq
!(expected
, unegate(&cls
));
1887 let cls
= uclass(&[('
\x00'
, '
\u{D7FE}'
)]);
1888 let expected
= uclass(&[('
\u{D7FF}'
, '
\u{10FFFF}'
)]);
1889 assert_eq
!(expected
, unegate(&cls
));
1891 let cls
= uclass(&[('
\u{E000}'
, '
\u{10FFFF}'
)]);
1892 let expected
= uclass(&[('
\x00'
, '
\u{D7FF}'
)]);
1893 assert_eq
!(expected
, unegate(&cls
));
1895 let cls
= uclass(&[('
\u{E001}'
, '
\u{10FFFF}'
)]);
1896 let expected
= uclass(&[('
\x00'
, '
\u{E000}'
)]);
1897 assert_eq
!(expected
, unegate(&cls
));
1901 fn class_negate_bytes() {
1902 let cls
= bclass(&[(b'a'
, b'a'
)]);
1903 let expected
= bclass(&[(b'
\x00'
, b'
\x60'
), (b'
\x62'
, b'
\xFF'
)]);
1904 assert_eq
!(expected
, bnegate(&cls
));
1906 let cls
= bclass(&[(b'a'
, b'a'
), (b'b'
, b'b'
)]);
1907 let expected
= bclass(&[(b'
\x00'
, b'
\x60'
), (b'
\x63'
, b'
\xFF'
)]);
1908 assert_eq
!(expected
, bnegate(&cls
));
1910 let cls
= bclass(&[(b'a'
, b'c'
), (b'x'
, b'z'
)]);
1911 let expected
= bclass(&[
1916 assert_eq
!(expected
, bnegate(&cls
));
1918 let cls
= bclass(&[(b'
\x00'
, b'a'
)]);
1919 let expected
= bclass(&[(b'
\x62'
, b'
\xFF'
)]);
1920 assert_eq
!(expected
, bnegate(&cls
));
1922 let cls
= bclass(&[(b'a'
, b'
\xFF'
)]);
1923 let expected
= bclass(&[(b'
\x00'
, b'
\x60'
)]);
1924 assert_eq
!(expected
, bnegate(&cls
));
1926 let cls
= bclass(&[(b'
\x00'
, b'
\xFF'
)]);
1927 let expected
= bclass(&[]);
1928 assert_eq
!(expected
, bnegate(&cls
));
1930 let cls
= bclass(&[]);
1931 let expected
= bclass(&[(b'
\x00'
, b'
\xFF'
)]);
1932 assert_eq
!(expected
, bnegate(&cls
));
1934 let cls
= bclass(&[(b'
\x00'
, b'
\xFD'
), (b'
\xFF'
, b'
\xFF'
)]);
1935 let expected
= bclass(&[(b'
\xFE'
, b'
\xFE'
)]);
1936 assert_eq
!(expected
, bnegate(&cls
));
1940 fn class_union_unicode() {
1941 let cls1
= uclass(&[('a'
, 'g'
), ('m'
, 't'
), ('A'
, 'C'
)]);
1942 let cls2
= uclass(&[('a'
, 'z'
)]);
1943 let expected
= uclass(&[('a'
, 'z'
), ('A'
, 'C'
)]);
1944 assert_eq
!(expected
, uunion(&cls1
, &cls2
));
1948 fn class_union_bytes() {
1949 let cls1
= bclass(&[(b'a'
, b'g'
), (b'm'
, b't'
), (b'A'
, b'C'
)]);
1950 let cls2
= bclass(&[(b'a'
, b'z'
)]);
1951 let expected
= bclass(&[(b'a'
, b'z'
), (b'A'
, b'C'
)]);
1952 assert_eq
!(expected
, bunion(&cls1
, &cls2
));
1956 fn class_intersect_unicode() {
1957 let cls1
= uclass(&[]);
1958 let cls2
= uclass(&[('a'
, 'a'
)]);
1959 let expected
= uclass(&[]);
1960 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1962 let cls1
= uclass(&[('a'
, 'a'
)]);
1963 let cls2
= uclass(&[('a'
, 'a'
)]);
1964 let expected
= uclass(&[('a'
, 'a'
)]);
1965 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1967 let cls1
= uclass(&[('a'
, 'a'
)]);
1968 let cls2
= uclass(&[('b'
, 'b'
)]);
1969 let expected
= uclass(&[]);
1970 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1972 let cls1
= uclass(&[('a'
, 'a'
)]);
1973 let cls2
= uclass(&[('a'
, 'c'
)]);
1974 let expected
= uclass(&[('a'
, 'a'
)]);
1975 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1977 let cls1
= uclass(&[('a'
, 'b'
)]);
1978 let cls2
= uclass(&[('a'
, 'c'
)]);
1979 let expected
= uclass(&[('a'
, 'b'
)]);
1980 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1982 let cls1
= uclass(&[('a'
, 'b'
)]);
1983 let cls2
= uclass(&[('b'
, 'c'
)]);
1984 let expected
= uclass(&[('b'
, 'b'
)]);
1985 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1987 let cls1
= uclass(&[('a'
, 'b'
)]);
1988 let cls2
= uclass(&[('c'
, 'd'
)]);
1989 let expected
= uclass(&[]);
1990 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1992 let cls1
= uclass(&[('b'
, 'c'
)]);
1993 let cls2
= uclass(&[('a'
, 'd'
)]);
1994 let expected
= uclass(&[('b'
, 'c'
)]);
1995 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
1997 let cls1
= uclass(&[('a'
, 'b'
), ('d'
, 'e'
), ('g'
, 'h'
)]);
1998 let cls2
= uclass(&[('a'
, 'h'
)]);
1999 let expected
= uclass(&[('a'
, 'b'
), ('d'
, 'e'
), ('g'
, 'h'
)]);
2000 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
2002 let cls1
= uclass(&[('a'
, 'b'
), ('d'
, 'e'
), ('g'
, 'h'
)]);
2003 let cls2
= uclass(&[('a'
, 'b'
), ('d'
, 'e'
), ('g'
, 'h'
)]);
2004 let expected
= uclass(&[('a'
, 'b'
), ('d'
, 'e'
), ('g'
, 'h'
)]);
2005 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
2007 let cls1
= uclass(&[('a'
, 'b'
), ('g'
, 'h'
)]);
2008 let cls2
= uclass(&[('d'
, 'e'
), ('k'
, 'l'
)]);
2009 let expected
= uclass(&[]);
2010 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
2012 let cls1
= uclass(&[('a'
, 'b'
), ('d'
, 'e'
), ('g'
, 'h'
)]);
2013 let cls2
= uclass(&[('h'
, 'h'
)]);
2014 let expected
= uclass(&[('h'
, 'h'
)]);
2015 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
2017 let cls1
= uclass(&[('a'
, 'b'
), ('e'
, 'f'
), ('i'
, 'j'
)]);
2018 let cls2
= uclass(&[('c'
, 'd'
), ('g'
, 'h'
), ('k'
, 'l'
)]);
2019 let expected
= uclass(&[]);
2020 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
2022 let cls1
= uclass(&[('a'
, 'b'
), ('c'
, 'd'
), ('e'
, 'f'
)]);
2023 let cls2
= uclass(&[('b'
, 'c'
), ('d'
, 'e'
), ('f'
, 'g'
)]);
2024 let expected
= uclass(&[('b'
, 'f'
)]);
2025 assert_eq
!(expected
, uintersect(&cls1
, &cls2
));
2029 fn class_intersect_bytes() {
2030 let cls1
= bclass(&[]);
2031 let cls2
= bclass(&[(b'a'
, b'a'
)]);
2032 let expected
= bclass(&[]);
2033 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
2035 let cls1
= bclass(&[(b'a'
, b'a'
)]);
2036 let cls2
= bclass(&[(b'a'
, b'a'
)]);
2037 let expected
= bclass(&[(b'a'
, b'a'
)]);
2038 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
2040 let cls1
= bclass(&[(b'a'
, b'a'
)]);
2041 let cls2
= bclass(&[(b'b'
, b'b'
)]);
2042 let expected
= bclass(&[]);
2043 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
2045 let cls1
= bclass(&[(b'a'
, b'a'
)]);
2046 let cls2
= bclass(&[(b'a'
, b'c'
)]);
2047 let expected
= bclass(&[(b'a'
, b'a'
)]);
2048 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
2050 let cls1
= bclass(&[(b'a'
, b'b'
)]);
2051 let cls2
= bclass(&[(b'a'
, b'c'
)]);
2052 let expected
= bclass(&[(b'a'
, b'b'
)]);
2053 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
2055 let cls1
= bclass(&[(b'a'
, b'b'
)]);
2056 let cls2
= bclass(&[(b'b'
, b'c'
)]);
2057 let expected
= bclass(&[(b'b'
, b'b'
)]);
2058 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
2060 let cls1
= bclass(&[(b'a'
, b'b'
)]);
2061 let cls2
= bclass(&[(b'c'
, b'd'
)]);
2062 let expected
= bclass(&[]);
2063 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
2065 let cls1
= bclass(&[(b'b'
, b'c'
)]);
2066 let cls2
= bclass(&[(b'a'
, b'd'
)]);
2067 let expected
= bclass(&[(b'b'
, b'c'
)]);
2068 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
2070 let cls1
= bclass(&[(b'a'
, b'b'
), (b'd'
, b'e'
), (b'g'
, b'h'
)]);
2071 let cls2
= bclass(&[(b'a'
, b'h'
)]);
2072 let expected
= bclass(&[(b'a'
, b'b'
), (b'd'
, b'e'
), (b'g'
, b'h'
)]);
2073 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
2075 let cls1
= bclass(&[(b'a'
, b'b'
), (b'd'
, b'e'
), (b'g'
, b'h'
)]);
2076 let cls2
= bclass(&[(b'a'
, b'b'
), (b'd'
, b'e'
), (b'g'
, b'h'
)]);
2077 let expected
= bclass(&[(b'a'
, b'b'
), (b'd'
, b'e'
), (b'g'
, b'h'
)]);
2078 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
2080 let cls1
= bclass(&[(b'a'
, b'b'
), (b'g'
, b'h'
)]);
2081 let cls2
= bclass(&[(b'd'
, b'e'
), (b'k'
, b'l'
)]);
2082 let expected
= bclass(&[]);
2083 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
2085 let cls1
= bclass(&[(b'a'
, b'b'
), (b'd'
, b'e'
), (b'g'
, b'h'
)]);
2086 let cls2
= bclass(&[(b'h'
, b'h'
)]);
2087 let expected
= bclass(&[(b'h'
, b'h'
)]);
2088 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
2090 let cls1
= bclass(&[(b'a'
, b'b'
), (b'e'
, b'f'
), (b'i'
, b'j'
)]);
2091 let cls2
= bclass(&[(b'c'
, b'd'
), (b'g'
, b'h'
), (b'k'
, b'l'
)]);
2092 let expected
= bclass(&[]);
2093 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
2095 let cls1
= bclass(&[(b'a'
, b'b'
), (b'c'
, b'd'
), (b'e'
, b'f'
)]);
2096 let cls2
= bclass(&[(b'b'
, b'c'
), (b'd'
, b'e'
), (b'f'
, b'g'
)]);
2097 let expected
= bclass(&[(b'b'
, b'f'
)]);
2098 assert_eq
!(expected
, bintersect(&cls1
, &cls2
));
2102 fn class_difference_unicode() {
2103 let cls1
= uclass(&[('a'
, 'a'
)]);
2104 let cls2
= uclass(&[('a'
, 'a'
)]);
2105 let expected
= uclass(&[]);
2106 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
2108 let cls1
= uclass(&[('a'
, 'a'
)]);
2109 let cls2
= uclass(&[]);
2110 let expected
= uclass(&[('a'
, 'a'
)]);
2111 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
2113 let cls1
= uclass(&[]);
2114 let cls2
= uclass(&[('a'
, 'a'
)]);
2115 let expected
= uclass(&[]);
2116 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
2118 let cls1
= uclass(&[('a'
, 'z'
)]);
2119 let cls2
= uclass(&[('a'
, 'a'
)]);
2120 let expected
= uclass(&[('b'
, 'z'
)]);
2121 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
2123 let cls1
= uclass(&[('a'
, 'z'
)]);
2124 let cls2
= uclass(&[('z'
, 'z'
)]);
2125 let expected
= uclass(&[('a'
, 'y'
)]);
2126 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
2128 let cls1
= uclass(&[('a'
, 'z'
)]);
2129 let cls2
= uclass(&[('m'
, 'm'
)]);
2130 let expected
= uclass(&[('a'
, 'l'
), ('n'
, 'z'
)]);
2131 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
2133 let cls1
= uclass(&[('a'
, 'c'
), ('g'
, 'i'
), ('r'
, 't'
)]);
2134 let cls2
= uclass(&[('a'
, 'z'
)]);
2135 let expected
= uclass(&[]);
2136 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
2138 let cls1
= uclass(&[('a'
, 'c'
), ('g'
, 'i'
), ('r'
, 't'
)]);
2139 let cls2
= uclass(&[('d'
, 'v'
)]);
2140 let expected
= uclass(&[('a'
, 'c'
)]);
2141 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
2143 let cls1
= uclass(&[('a'
, 'c'
), ('g'
, 'i'
), ('r'
, 't'
)]);
2144 let cls2
= uclass(&[('b'
, 'g'
), ('s'
, 'u'
)]);
2145 let expected
= uclass(&[('a'
, 'a'
), ('h'
, 'i'
), ('r'
, 'r'
)]);
2146 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
2148 let cls1
= uclass(&[('a'
, 'c'
), ('g'
, 'i'
), ('r'
, 't'
)]);
2149 let cls2
= uclass(&[('b'
, 'd'
), ('e'
, 'g'
), ('s'
, 'u'
)]);
2150 let expected
= uclass(&[('a'
, 'a'
), ('h'
, 'i'
), ('r'
, 'r'
)]);
2151 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
2153 let cls1
= uclass(&[('x'
, 'z'
)]);
2154 let cls2
= uclass(&[('a'
, 'c'
), ('e'
, 'g'
), ('s'
, 'u'
)]);
2155 let expected
= uclass(&[('x'
, 'z'
)]);
2156 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
2158 let cls1
= uclass(&[('a'
, 'z'
)]);
2159 let cls2
= uclass(&[('a'
, 'c'
), ('e'
, 'g'
), ('s'
, 'u'
)]);
2160 let expected
= uclass(&[('d'
, 'd'
), ('h'
, 'r'
), ('v'
, 'z'
)]);
2161 assert_eq
!(expected
, udifference(&cls1
, &cls2
));
2165 fn class_difference_bytes() {
2166 let cls1
= bclass(&[(b'a'
, b'a'
)]);
2167 let cls2
= bclass(&[(b'a'
, b'a'
)]);
2168 let expected
= bclass(&[]);
2169 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
2171 let cls1
= bclass(&[(b'a'
, b'a'
)]);
2172 let cls2
= bclass(&[]);
2173 let expected
= bclass(&[(b'a'
, b'a'
)]);
2174 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
2176 let cls1
= bclass(&[]);
2177 let cls2
= bclass(&[(b'a'
, b'a'
)]);
2178 let expected
= bclass(&[]);
2179 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
2181 let cls1
= bclass(&[(b'a'
, b'z'
)]);
2182 let cls2
= bclass(&[(b'a'
, b'a'
)]);
2183 let expected
= bclass(&[(b'b'
, b'z'
)]);
2184 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
2186 let cls1
= bclass(&[(b'a'
, b'z'
)]);
2187 let cls2
= bclass(&[(b'z'
, b'z'
)]);
2188 let expected
= bclass(&[(b'a'
, b'y'
)]);
2189 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
2191 let cls1
= bclass(&[(b'a'
, b'z'
)]);
2192 let cls2
= bclass(&[(b'm'
, b'm'
)]);
2193 let expected
= bclass(&[(b'a'
, b'l'
), (b'n'
, b'z'
)]);
2194 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
2196 let cls1
= bclass(&[(b'a'
, b'c'
), (b'g'
, b'i'
), (b'r'
, b't'
)]);
2197 let cls2
= bclass(&[(b'a'
, b'z'
)]);
2198 let expected
= bclass(&[]);
2199 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
2201 let cls1
= bclass(&[(b'a'
, b'c'
), (b'g'
, b'i'
), (b'r'
, b't'
)]);
2202 let cls2
= bclass(&[(b'd'
, b'v'
)]);
2203 let expected
= bclass(&[(b'a'
, b'c'
)]);
2204 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
2206 let cls1
= bclass(&[(b'a'
, b'c'
), (b'g'
, b'i'
), (b'r'
, b't'
)]);
2207 let cls2
= bclass(&[(b'b'
, b'g'
), (b's'
, b'u'
)]);
2208 let expected
= bclass(&[(b'a'
, b'a'
), (b'h'
, b'i'
), (b'r'
, b'r'
)]);
2209 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
2211 let cls1
= bclass(&[(b'a'
, b'c'
), (b'g'
, b'i'
), (b'r'
, b't'
)]);
2212 let cls2
= bclass(&[(b'b'
, b'd'
), (b'e'
, b'g'
), (b's'
, b'u'
)]);
2213 let expected
= bclass(&[(b'a'
, b'a'
), (b'h'
, b'i'
), (b'r'
, b'r'
)]);
2214 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
2216 let cls1
= bclass(&[(b'x'
, b'z'
)]);
2217 let cls2
= bclass(&[(b'a'
, b'c'
), (b'e'
, b'g'
), (b's'
, b'u'
)]);
2218 let expected
= bclass(&[(b'x'
, b'z'
)]);
2219 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
2221 let cls1
= bclass(&[(b'a'
, b'z'
)]);
2222 let cls2
= bclass(&[(b'a'
, b'c'
), (b'e'
, b'g'
), (b's'
, b'u'
)]);
2223 let expected
= bclass(&[(b'd'
, b'd'
), (b'h'
, b'r'
), (b'v'
, b'z'
)]);
2224 assert_eq
!(expected
, bdifference(&cls1
, &cls2
));
2228 fn class_symmetric_difference_unicode() {
2229 let cls1
= uclass(&[('a'
, 'm'
)]);
2230 let cls2
= uclass(&[('g'
, 't'
)]);
2231 let expected
= uclass(&[('a'
, 'f'
), ('n'
, 't'
)]);
2232 assert_eq
!(expected
, usymdifference(&cls1
, &cls2
));
2236 fn class_symmetric_difference_bytes() {
2237 let cls1
= bclass(&[(b'a'
, b'm'
)]);
2238 let cls2
= bclass(&[(b'g'
, b't'
)]);
2239 let expected
= bclass(&[(b'a'
, b'f'
), (b'n'
, b't'
)]);
2240 assert_eq
!(expected
, bsymdifference(&cls1
, &cls2
));
2245 fn hir_byte_literal_non_ascii() {
2246 Hir
::literal(Literal
::Byte(b'a'
));
2249 // We use a thread with an explicit stack size to test that our destructor
2250 // for Hir can handle arbitrarily sized expressions in constant stack
2251 // space. In case we run on a platform without threads (WASM?), we limit
2252 // this test to Windows/Unix.
2254 #[cfg(any(unix, windows))]
2255 fn no_stack_overflow_on_drop() {
2259 let mut expr
= Hir
::empty();
2261 expr
= Hir
::group(Group
{
2262 kind
: GroupKind
::NonCapturing
,
2263 hir
: Box
::new(expr
),
2265 expr
= Hir
::repetition(Repetition
{
2266 kind
: RepetitionKind
::ZeroOrOne
,
2268 hir
: Box
::new(expr
),
2272 kind
: HirKind
::Concat(vec
![expr
]),
2273 info
: HirInfo
::new(),
2276 kind
: HirKind
::Alternation(vec
![expr
]),
2277 info
: HirInfo
::new(),
2280 assert
!(!expr
.kind
.is_empty());
2283 // We run our test on a thread with a small stack size so we can
2284 // force the issue more easily.
2285 thread
::Builder
::new()
2286 .stack_size(1 << 10)