2 Defines a translator that converts an `Ast` to an `Hir`.
5 use std
::cell
::{Cell, RefCell}
;
8 use crate::ast
::{self, Ast, Span, Visitor}
;
9 use crate::hir
::{self, Error, ErrorKind, Hir}
;
10 use crate::unicode
::{self, ClassQuery}
;
12 type Result
<T
> = result
::Result
<T
, Error
>;
14 /// A builder for constructing an AST->HIR translator.
15 #[derive(Clone, Debug)]
16 pub struct TranslatorBuilder
{
17 allow_invalid_utf8
: bool
,
21 impl Default
for TranslatorBuilder
{
22 fn default() -> TranslatorBuilder
{
23 TranslatorBuilder
::new()
27 impl TranslatorBuilder
{
28 /// Create a new translator builder with a default c onfiguration.
29 pub fn new() -> TranslatorBuilder
{
31 allow_invalid_utf8
: false,
32 flags
: Flags
::default(),
36 /// Build a translator using the current configuration.
37 pub fn build(&self) -> Translator
{
39 stack
: RefCell
::new(vec
![]),
40 flags
: Cell
::new(self.flags
),
41 allow_invalid_utf8
: self.allow_invalid_utf8
,
45 /// When enabled, translation will permit the construction of a regular
46 /// expression that may match invalid UTF-8.
48 /// When disabled (the default), the translator is guaranteed to produce
49 /// an expression that will only ever match valid UTF-8 (otherwise, the
50 /// translator will return an error).
52 /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
53 /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
54 /// the parser to return an error. Namely, a negated ASCII word boundary
55 /// can result in matching positions that aren't valid UTF-8 boundaries.
56 pub fn allow_invalid_utf8(&mut self, yes
: bool
) -> &mut TranslatorBuilder
{
57 self.allow_invalid_utf8
= yes
;
61 /// Enable or disable the case insensitive flag (`i`) by default.
62 pub fn case_insensitive(&mut self, yes
: bool
) -> &mut TranslatorBuilder
{
63 self.flags
.case_insensitive
= if yes { Some(true) }
else { None }
;
67 /// Enable or disable the multi-line matching flag (`m`) by default.
68 pub fn multi_line(&mut self, yes
: bool
) -> &mut TranslatorBuilder
{
69 self.flags
.multi_line
= if yes { Some(true) }
else { None }
;
73 /// Enable or disable the "dot matches any character" flag (`s`) by
75 pub fn dot_matches_new_line(
78 ) -> &mut TranslatorBuilder
{
79 self.flags
.dot_matches_new_line
= if yes { Some(true) }
else { None }
;
83 /// Enable or disable the "swap greed" flag (`U`) by default.
84 pub fn swap_greed(&mut self, yes
: bool
) -> &mut TranslatorBuilder
{
85 self.flags
.swap_greed
= if yes { Some(true) }
else { None }
;
89 /// Enable or disable the Unicode flag (`u`) by default.
90 pub fn unicode(&mut self, yes
: bool
) -> &mut TranslatorBuilder
{
91 self.flags
.unicode
= if yes { None }
else { Some(false) }
;
96 /// A translator maps abstract syntax to a high level intermediate
99 /// A translator may be benefit from reuse. That is, a translator can translate
100 /// many abstract syntax trees.
102 /// A `Translator` can be configured in more detail via a
103 /// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
104 #[derive(Clone, Debug)]
105 pub struct Translator
{
106 /// Our call stack, but on the heap.
107 stack
: RefCell
<Vec
<HirFrame
>>,
108 /// The current flag settings.
110 /// Whether we're allowed to produce HIR that can match arbitrary bytes.
111 allow_invalid_utf8
: bool
,
115 /// Create a new translator using the default configuration.
116 pub fn new() -> Translator
{
117 TranslatorBuilder
::new().build()
120 /// Translate the given abstract syntax tree (AST) into a high level
121 /// intermediate representation (HIR).
123 /// If there was a problem doing the translation, then an HIR-specific
124 /// error is returned.
126 /// The original pattern string used to produce the `Ast` *must* also be
127 /// provided. The translator does not use the pattern string during any
128 /// correct translation, but is used for error reporting.
129 pub fn translate(&mut self, pattern
: &str, ast
: &Ast
) -> Result
<Hir
> {
130 ast
::visit(ast
, TranslatorI
::new(self, pattern
))
134 /// An HirFrame is a single stack frame, represented explicitly, which is
135 /// created for each item in the Ast that we traverse.
137 /// Note that technically, this type doesn't represent our entire stack
138 /// frame. In particular, the Ast visitor represents any state associated with
139 /// traversing the Ast itself.
140 #[derive(Clone, Debug)]
142 /// An arbitrary HIR expression. These get pushed whenever we hit a base
143 /// case in the Ast. They get popped after an inductive (i.e., recursive)
144 /// step is complete.
146 /// A Unicode character class. This frame is mutated as we descend into
147 /// the Ast of a character class (which is itself its own mini recursive
149 ClassUnicode(hir
::ClassUnicode
),
150 /// A byte-oriented character class. This frame is mutated as we descend
151 /// into the Ast of a character class (which is itself its own mini
152 /// recursive structure).
154 /// Byte character classes are created when Unicode mode (`u`) is disabled.
155 /// If `allow_invalid_utf8` is disabled (the default), then a byte
156 /// character is only permitted to match ASCII text.
157 ClassBytes(hir
::ClassBytes
),
158 /// This is pushed on to the stack upon first seeing any kind of group,
159 /// indicated by parentheses (including non-capturing groups). It is popped
160 /// upon leaving a group.
162 /// The old active flags when this group was opened.
164 /// If this group sets flags, then the new active flags are set to the
165 /// result of merging the old flags with the flags introduced by this
166 /// group. If the group doesn't set any flags, then this is simply
167 /// equivalent to whatever flags were set when the group was opened.
169 /// When this group is popped, the active flags should be restored to
170 /// the flags set here.
172 /// The "active" flags correspond to whatever flags are set in the
176 /// This is pushed whenever a concatenation is observed. After visiting
177 /// every sub-expression in the concatenation, the translator's stack is
178 /// popped until it sees a Concat frame.
180 /// This is pushed whenever an alternation is observed. After visiting
181 /// every sub-expression in the alternation, the translator's stack is
182 /// popped until it sees an Alternation frame.
187 /// Assert that the current stack frame is an Hir expression and return it.
188 fn unwrap_expr(self) -> Hir
{
190 HirFrame
::Expr(expr
) => expr
,
191 _
=> panic
!("tried to unwrap expr from HirFrame, got: {:?}", self),
195 /// Assert that the current stack frame is a Unicode class expression and
197 fn unwrap_class_unicode(self) -> hir
::ClassUnicode
{
199 HirFrame
::ClassUnicode(cls
) => cls
,
201 "tried to unwrap Unicode class \
202 from HirFrame, got: {:?}",
208 /// Assert that the current stack frame is a byte class expression and
210 fn unwrap_class_bytes(self) -> hir
::ClassBytes
{
212 HirFrame
::ClassBytes(cls
) => cls
,
214 "tried to unwrap byte class \
215 from HirFrame, got: {:?}",
221 /// Assert that the current stack frame is a group indicator and return
222 /// its corresponding flags (the flags that were active at the time the
223 /// group was entered).
224 fn unwrap_group(self) -> Flags
{
226 HirFrame
::Group { old_flags }
=> old_flags
,
228 panic
!("tried to unwrap group from HirFrame, got: {:?}", self)
234 impl<'t
, 'p
> Visitor
for TranslatorI
<'t
, 'p
> {
238 fn finish(self) -> Result
<Hir
> {
239 // ... otherwise, we should have exactly one HIR on the stack.
240 assert_eq
!(self.trans().stack
.borrow().len(), 1);
241 Ok(self.pop().unwrap().unwrap_expr())
244 fn visit_pre(&mut self, ast
: &Ast
) -> Result
<()> {
246 Ast
::Class(ast
::Class
::Bracketed(_
)) => {
247 if self.flags().unicode() {
248 let cls
= hir
::ClassUnicode
::empty();
249 self.push(HirFrame
::ClassUnicode(cls
));
251 let cls
= hir
::ClassBytes
::empty();
252 self.push(HirFrame
::ClassBytes(cls
));
255 Ast
::Group(ref x
) => {
258 .map(|ast
| self.set_flags(ast
))
259 .unwrap_or_else(|| self.flags());
260 self.push(HirFrame
::Group { old_flags }
);
262 Ast
::Concat(ref x
) if x
.asts
.is_empty() => {}
264 self.push(HirFrame
::Concat
);
266 Ast
::Alternation(ref x
) if x
.asts
.is_empty() => {}
267 Ast
::Alternation(_
) => {
268 self.push(HirFrame
::Alternation
);
275 fn visit_post(&mut self, ast
: &Ast
) -> Result
<()> {
278 self.push(HirFrame
::Expr(Hir
::empty()));
280 Ast
::Flags(ref x
) => {
281 self.set_flags(&x
.flags
);
282 // Flags in the AST are generally considered directives and
283 // not actual sub-expressions. However, they can be used in
284 // the concrete syntax like `((?i))`, and we need some kind of
285 // indication of an expression there, and Empty is the correct
288 // There can also be things like `(?i)+`, but we rule those out
289 // in the parser. In the future, we might allow them for
291 self.push(HirFrame
::Expr(Hir
::empty()));
293 Ast
::Literal(ref x
) => {
294 self.push(HirFrame
::Expr(self.hir_literal(x
)?
));
297 self.push(HirFrame
::Expr(self.hir_dot(span
)?
));
299 Ast
::Assertion(ref x
) => {
300 self.push(HirFrame
::Expr(self.hir_assertion(x
)?
));
302 Ast
::Class(ast
::Class
::Perl(ref x
)) => {
303 if self.flags().unicode() {
304 let cls
= self.hir_perl_unicode_class(x
)?
;
305 let hcls
= hir
::Class
::Unicode(cls
);
306 self.push(HirFrame
::Expr(Hir
::class(hcls
)));
308 let cls
= self.hir_perl_byte_class(x
);
309 let hcls
= hir
::Class
::Bytes(cls
);
310 self.push(HirFrame
::Expr(Hir
::class(hcls
)));
313 Ast
::Class(ast
::Class
::Unicode(ref x
)) => {
314 let cls
= hir
::Class
::Unicode(self.hir_unicode_class(x
)?
);
315 self.push(HirFrame
::Expr(Hir
::class(cls
)));
317 Ast
::Class(ast
::Class
::Bracketed(ref ast
)) => {
318 if self.flags().unicode() {
319 let mut cls
= self.pop().unwrap().unwrap_class_unicode();
320 self.unicode_fold_and_negate(
325 if cls
.ranges().is_empty() {
326 return Err(self.error(
328 ErrorKind
::EmptyClassNotAllowed
,
331 let expr
= Hir
::class(hir
::Class
::Unicode(cls
));
332 self.push(HirFrame
::Expr(expr
));
334 let mut cls
= self.pop().unwrap().unwrap_class_bytes();
335 self.bytes_fold_and_negate(
340 if cls
.ranges().is_empty() {
341 return Err(self.error(
343 ErrorKind
::EmptyClassNotAllowed
,
347 let expr
= Hir
::class(hir
::Class
::Bytes(cls
));
348 self.push(HirFrame
::Expr(expr
));
351 Ast
::Repetition(ref x
) => {
352 let expr
= self.pop().unwrap().unwrap_expr();
353 self.push(HirFrame
::Expr(self.hir_repetition(x
, expr
)));
355 Ast
::Group(ref x
) => {
356 let expr
= self.pop().unwrap().unwrap_expr();
357 let old_flags
= self.pop().unwrap().unwrap_group();
358 self.trans().flags
.set(old_flags
);
359 self.push(HirFrame
::Expr(self.hir_group(x
, expr
)));
362 let mut exprs
= vec
![];
363 while let Some(HirFrame
::Expr(expr
)) = self.pop() {
364 if !expr
.kind().is_empty() {
369 self.push(HirFrame
::Expr(Hir
::concat(exprs
)));
371 Ast
::Alternation(_
) => {
372 let mut exprs
= vec
![];
373 while let Some(HirFrame
::Expr(expr
)) = self.pop() {
377 self.push(HirFrame
::Expr(Hir
::alternation(exprs
)));
383 fn visit_class_set_item_pre(
385 ast
: &ast
::ClassSetItem
,
388 ast
::ClassSetItem
::Bracketed(_
) => {
389 if self.flags().unicode() {
390 let cls
= hir
::ClassUnicode
::empty();
391 self.push(HirFrame
::ClassUnicode(cls
));
393 let cls
= hir
::ClassBytes
::empty();
394 self.push(HirFrame
::ClassBytes(cls
));
397 // We needn't handle the Union case here since the visitor will
404 fn visit_class_set_item_post(
406 ast
: &ast
::ClassSetItem
,
409 ast
::ClassSetItem
::Empty(_
) => {}
410 ast
::ClassSetItem
::Literal(ref x
) => {
411 if self.flags().unicode() {
412 let mut cls
= self.pop().unwrap().unwrap_class_unicode();
413 cls
.push(hir
::ClassUnicodeRange
::new(x
.c
, x
.c
));
414 self.push(HirFrame
::ClassUnicode(cls
));
416 let mut cls
= self.pop().unwrap().unwrap_class_bytes();
417 let byte
= self.class_literal_byte(x
)?
;
418 cls
.push(hir
::ClassBytesRange
::new(byte
, byte
));
419 self.push(HirFrame
::ClassBytes(cls
));
422 ast
::ClassSetItem
::Range(ref x
) => {
423 if self.flags().unicode() {
424 let mut cls
= self.pop().unwrap().unwrap_class_unicode();
425 cls
.push(hir
::ClassUnicodeRange
::new(x
.start
.c
, x
.end
.c
));
426 self.push(HirFrame
::ClassUnicode(cls
));
428 let mut cls
= self.pop().unwrap().unwrap_class_bytes();
429 let start
= self.class_literal_byte(&x
.start
)?
;
430 let end
= self.class_literal_byte(&x
.end
)?
;
431 cls
.push(hir
::ClassBytesRange
::new(start
, end
));
432 self.push(HirFrame
::ClassBytes(cls
));
435 ast
::ClassSetItem
::Ascii(ref x
) => {
436 if self.flags().unicode() {
437 let xcls
= self.hir_ascii_unicode_class(x
)?
;
438 let mut cls
= self.pop().unwrap().unwrap_class_unicode();
440 self.push(HirFrame
::ClassUnicode(cls
));
442 let xcls
= self.hir_ascii_byte_class(x
)?
;
443 let mut cls
= self.pop().unwrap().unwrap_class_bytes();
445 self.push(HirFrame
::ClassBytes(cls
));
448 ast
::ClassSetItem
::Unicode(ref x
) => {
449 let xcls
= self.hir_unicode_class(x
)?
;
450 let mut cls
= self.pop().unwrap().unwrap_class_unicode();
452 self.push(HirFrame
::ClassUnicode(cls
));
454 ast
::ClassSetItem
::Perl(ref x
) => {
455 if self.flags().unicode() {
456 let xcls
= self.hir_perl_unicode_class(x
)?
;
457 let mut cls
= self.pop().unwrap().unwrap_class_unicode();
459 self.push(HirFrame
::ClassUnicode(cls
));
461 let xcls
= self.hir_perl_byte_class(x
);
462 let mut cls
= self.pop().unwrap().unwrap_class_bytes();
464 self.push(HirFrame
::ClassBytes(cls
));
467 ast
::ClassSetItem
::Bracketed(ref ast
) => {
468 if self.flags().unicode() {
469 let mut cls1
= self.pop().unwrap().unwrap_class_unicode();
470 self.unicode_fold_and_negate(
476 let mut cls2
= self.pop().unwrap().unwrap_class_unicode();
478 self.push(HirFrame
::ClassUnicode(cls2
));
480 let mut cls1
= self.pop().unwrap().unwrap_class_bytes();
481 self.bytes_fold_and_negate(
487 let mut cls2
= self.pop().unwrap().unwrap_class_bytes();
489 self.push(HirFrame
::ClassBytes(cls2
));
492 // This is handled automatically by the visitor.
493 ast
::ClassSetItem
::Union(_
) => {}
498 fn visit_class_set_binary_op_pre(
500 _op
: &ast
::ClassSetBinaryOp
,
502 if self.flags().unicode() {
503 let cls
= hir
::ClassUnicode
::empty();
504 self.push(HirFrame
::ClassUnicode(cls
));
506 let cls
= hir
::ClassBytes
::empty();
507 self.push(HirFrame
::ClassBytes(cls
));
512 fn visit_class_set_binary_op_in(
514 _op
: &ast
::ClassSetBinaryOp
,
516 if self.flags().unicode() {
517 let cls
= hir
::ClassUnicode
::empty();
518 self.push(HirFrame
::ClassUnicode(cls
));
520 let cls
= hir
::ClassBytes
::empty();
521 self.push(HirFrame
::ClassBytes(cls
));
526 fn visit_class_set_binary_op_post(
528 op
: &ast
::ClassSetBinaryOp
,
530 use crate::ast
::ClassSetBinaryOpKind
::*;
532 if self.flags().unicode() {
533 let mut rhs
= self.pop().unwrap().unwrap_class_unicode();
534 let mut lhs
= self.pop().unwrap().unwrap_class_unicode();
535 let mut cls
= self.pop().unwrap().unwrap_class_unicode();
536 if self.flags().case_insensitive() {
537 rhs
.try_case_fold_simple().map_err(|_
| {
539 op
.rhs
.span().clone(),
540 ErrorKind
::UnicodeCaseUnavailable
,
543 lhs
.try_case_fold_simple().map_err(|_
| {
545 op
.lhs
.span().clone(),
546 ErrorKind
::UnicodeCaseUnavailable
,
551 Intersection
=> lhs
.intersect(&rhs
),
552 Difference
=> lhs
.difference(&rhs
),
553 SymmetricDifference
=> lhs
.symmetric_difference(&rhs
),
556 self.push(HirFrame
::ClassUnicode(cls
));
558 let mut rhs
= self.pop().unwrap().unwrap_class_bytes();
559 let mut lhs
= self.pop().unwrap().unwrap_class_bytes();
560 let mut cls
= self.pop().unwrap().unwrap_class_bytes();
561 if self.flags().case_insensitive() {
562 rhs
.case_fold_simple();
563 lhs
.case_fold_simple();
566 Intersection
=> lhs
.intersect(&rhs
),
567 Difference
=> lhs
.difference(&rhs
),
568 SymmetricDifference
=> lhs
.symmetric_difference(&rhs
),
571 self.push(HirFrame
::ClassBytes(cls
));
577 /// The internal implementation of a translator.
579 /// This type is responsible for carrying around the original pattern string,
580 /// which is not tied to the internal state of a translator.
582 /// A TranslatorI exists for the time it takes to translate a single Ast.
583 #[derive(Clone, Debug)]
584 struct TranslatorI
<'t
, 'p
> {
585 trans
: &'t Translator
,
589 impl<'t
, 'p
> TranslatorI
<'t
, 'p
> {
590 /// Build a new internal translator.
591 fn new(trans
: &'t Translator
, pattern
: &'p
str) -> TranslatorI
<'t
, 'p
> {
592 TranslatorI { trans: trans, pattern: pattern }
595 /// Return a reference to the underlying translator.
596 fn trans(&self) -> &Translator
{
600 /// Push the given frame on to the call stack.
601 fn push(&self, frame
: HirFrame
) {
602 self.trans().stack
.borrow_mut().push(frame
);
605 /// Pop the top of the call stack. If the call stack is empty, return None.
606 fn pop(&self) -> Option
<HirFrame
> {
607 self.trans().stack
.borrow_mut().pop()
610 /// Create a new error with the given span and error type.
611 fn error(&self, span
: Span
, kind
: ErrorKind
) -> Error
{
612 Error { kind: kind, pattern: self.pattern.to_string(), span: span }
615 /// Return a copy of the active flags.
616 fn flags(&self) -> Flags
{
617 self.trans().flags
.get()
620 /// Set the flags of this translator from the flags set in the given AST.
621 /// Then, return the old flags.
622 fn set_flags(&self, ast_flags
: &ast
::Flags
) -> Flags
{
623 let old_flags
= self.flags();
624 let mut new_flags
= Flags
::from_ast(ast_flags
);
625 new_flags
.merge(&old_flags
);
626 self.trans().flags
.set(new_flags
);
630 fn hir_literal(&self, lit
: &ast
::Literal
) -> Result
<Hir
> {
631 let ch
= match self.literal_to_char(lit
)?
{
632 byte @ hir
::Literal
::Byte(_
) => return Ok(Hir
::literal(byte
)),
633 hir
::Literal
::Unicode(ch
) => ch
,
635 if self.flags().case_insensitive() {
636 self.hir_from_char_case_insensitive(lit
.span
, ch
)
638 self.hir_from_char(lit
.span
, ch
)
642 /// Convert an Ast literal to its scalar representation.
644 /// When Unicode mode is enabled, then this always succeeds and returns a
645 /// `char` (Unicode scalar value).
647 /// When Unicode mode is disabled, then a raw byte is returned. If that
648 /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
650 fn literal_to_char(&self, lit
: &ast
::Literal
) -> Result
<hir
::Literal
> {
651 if self.flags().unicode() {
652 return Ok(hir
::Literal
::Unicode(lit
.c
));
654 let byte
= match lit
.byte() {
655 None
=> return Ok(hir
::Literal
::Unicode(lit
.c
)),
659 return Ok(hir
::Literal
::Unicode(byte
as char));
661 if !self.trans().allow_invalid_utf8
{
662 return Err(self.error(lit
.span
, ErrorKind
::InvalidUtf8
));
664 Ok(hir
::Literal
::Byte(byte
))
667 fn hir_from_char(&self, span
: Span
, c
: char) -> Result
<Hir
> {
668 if !self.flags().unicode() && c
.len_utf8() > 1 {
669 return Err(self.error(span
, ErrorKind
::UnicodeNotAllowed
));
671 Ok(Hir
::literal(hir
::Literal
::Unicode(c
)))
674 fn hir_from_char_case_insensitive(
679 if self.flags().unicode() {
680 // If case folding won't do anything, then don't bother trying.
682 unicode
::contains_simple_case_mapping(c
, c
).map_err(|_
| {
683 self.error(span
, ErrorKind
::UnicodeCaseUnavailable
)
686 return self.hir_from_char(span
, c
);
689 hir
::ClassUnicode
::new(vec
![hir
::ClassUnicodeRange
::new(
692 cls
.try_case_fold_simple().map_err(|_
| {
693 self.error(span
, ErrorKind
::UnicodeCaseUnavailable
)
695 Ok(Hir
::class(hir
::Class
::Unicode(cls
)))
697 if c
.len_utf8() > 1 {
698 return Err(self.error(span
, ErrorKind
::UnicodeNotAllowed
));
700 // If case folding won't do anything, then don't bother trying.
702 'A'
..='Z'
| 'a'
..='z'
=> {}
703 _
=> return self.hir_from_char(span
, c
),
706 hir
::ClassBytes
::new(vec
![hir
::ClassBytesRange
::new(
709 cls
.case_fold_simple();
710 Ok(Hir
::class(hir
::Class
::Bytes(cls
)))
714 fn hir_dot(&self, span
: Span
) -> Result
<Hir
> {
715 let unicode
= self.flags().unicode();
716 if !unicode
&& !self.trans().allow_invalid_utf8
{
717 return Err(self.error(span
, ErrorKind
::InvalidUtf8
));
719 Ok(if self.flags().dot_matches_new_line() {
726 fn hir_assertion(&self, asst
: &ast
::Assertion
) -> Result
<Hir
> {
727 let unicode
= self.flags().unicode();
728 let multi_line
= self.flags().multi_line();
730 ast
::AssertionKind
::StartLine
=> Hir
::anchor(if multi_line
{
731 hir
::Anchor
::StartLine
733 hir
::Anchor
::StartText
735 ast
::AssertionKind
::EndLine
=> Hir
::anchor(if multi_line
{
740 ast
::AssertionKind
::StartText
=> {
741 Hir
::anchor(hir
::Anchor
::StartText
)
743 ast
::AssertionKind
::EndText
=> Hir
::anchor(hir
::Anchor
::EndText
),
744 ast
::AssertionKind
::WordBoundary
=> {
745 Hir
::word_boundary(if unicode
{
746 hir
::WordBoundary
::Unicode
748 hir
::WordBoundary
::Ascii
751 ast
::AssertionKind
::NotWordBoundary
=> {
752 Hir
::word_boundary(if unicode
{
753 hir
::WordBoundary
::UnicodeNegate
755 // It is possible for negated ASCII word boundaries to
756 // match at invalid UTF-8 boundaries, even when searching
758 if !self.trans().allow_invalid_utf8
{
760 self.error(asst
.span
, ErrorKind
::InvalidUtf8
)
763 hir
::WordBoundary
::AsciiNegate
769 fn hir_group(&self, group
: &ast
::Group
, expr
: Hir
) -> Hir
{
770 let kind
= match group
.kind
{
771 ast
::GroupKind
::CaptureIndex(idx
) => {
772 hir
::GroupKind
::CaptureIndex(idx
)
774 ast
::GroupKind
::CaptureName(ref capname
) => {
775 hir
::GroupKind
::CaptureName
{
776 name
: capname
.name
.clone(),
777 index
: capname
.index
,
780 ast
::GroupKind
::NonCapturing(_
) => hir
::GroupKind
::NonCapturing
,
782 Hir
::group(hir
::Group { kind: kind, hir: Box::new(expr) }
)
785 fn hir_repetition(&self, rep
: &ast
::Repetition
, expr
: Hir
) -> Hir
{
786 let kind
= match rep
.op
.kind
{
787 ast
::RepetitionKind
::ZeroOrOne
=> hir
::RepetitionKind
::ZeroOrOne
,
788 ast
::RepetitionKind
::ZeroOrMore
=> hir
::RepetitionKind
::ZeroOrMore
,
789 ast
::RepetitionKind
::OneOrMore
=> hir
::RepetitionKind
::OneOrMore
,
790 ast
::RepetitionKind
::Range(ast
::RepetitionRange
::Exactly(m
)) => {
791 hir
::RepetitionKind
::Range(hir
::RepetitionRange
::Exactly(m
))
793 ast
::RepetitionKind
::Range(ast
::RepetitionRange
::AtLeast(m
)) => {
794 hir
::RepetitionKind
::Range(hir
::RepetitionRange
::AtLeast(m
))
796 ast
::RepetitionKind
::Range(ast
::RepetitionRange
::Bounded(
800 hir
::RepetitionKind
::Range(hir
::RepetitionRange
::Bounded(m
, n
))
804 if self.flags().swap_greed() { !rep.greedy }
else { rep.greedy }
;
805 Hir
::repetition(hir
::Repetition
{
812 fn hir_unicode_class(
814 ast_class
: &ast
::ClassUnicode
,
815 ) -> Result
<hir
::ClassUnicode
> {
816 use crate::ast
::ClassUnicodeKind
::*;
818 if !self.flags().unicode() {
820 self.error(ast_class
.span
, ErrorKind
::UnicodeNotAllowed
)
823 let query
= match ast_class
.kind
{
824 OneLetter(name
) => ClassQuery
::OneLetter(name
),
825 Named(ref name
) => ClassQuery
::Binary(name
),
826 NamedValue { ref name, ref value, .. }
=> ClassQuery
::ByValue
{
828 property_value
: value
,
831 let mut result
= self.convert_unicode_class_error(
833 unicode
::class(query
),
835 if let Ok(ref mut class
) = result
{
836 self.unicode_fold_and_negate(
841 if class
.ranges().is_empty() {
843 .error(ast_class
.span
, ErrorKind
::EmptyClassNotAllowed
);
850 fn hir_ascii_unicode_class(
852 ast
: &ast
::ClassAscii
,
853 ) -> Result
<hir
::ClassUnicode
> {
854 let mut cls
= hir
::ClassUnicode
::new(
855 ascii_class(&ast
.kind
)
857 .map(|&(s
, e
)| hir
::ClassUnicodeRange
::new(s
, e
)),
859 self.unicode_fold_and_negate(&ast
.span
, ast
.negated
, &mut cls
)?
;
863 fn hir_ascii_byte_class(
865 ast
: &ast
::ClassAscii
,
866 ) -> Result
<hir
::ClassBytes
> {
867 let mut cls
= hir
::ClassBytes
::new(
868 ascii_class(&ast
.kind
)
870 .map(|&(s
, e
)| hir
::ClassBytesRange
::new(s
as u8, e
as u8)),
872 self.bytes_fold_and_negate(&ast
.span
, ast
.negated
, &mut cls
)?
;
876 fn hir_perl_unicode_class(
878 ast_class
: &ast
::ClassPerl
,
879 ) -> Result
<hir
::ClassUnicode
> {
880 use crate::ast
::ClassPerlKind
::*;
882 assert
!(self.flags().unicode());
883 let result
= match ast_class
.kind
{
884 Digit
=> unicode
::perl_digit(),
885 Space
=> unicode
::perl_space(),
886 Word
=> unicode
::perl_word(),
889 self.convert_unicode_class_error(&ast_class
.span
, result
)?
;
890 // We needn't apply case folding here because the Perl Unicode classes
891 // are already closed under Unicode simple case folding.
892 if ast_class
.negated
{
898 fn hir_perl_byte_class(
900 ast_class
: &ast
::ClassPerl
,
901 ) -> hir
::ClassBytes
{
902 use crate::ast
::ClassPerlKind
::*;
904 assert
!(!self.flags().unicode());
905 let mut class
= match ast_class
.kind
{
906 Digit
=> hir_ascii_class_bytes(&ast
::ClassAsciiKind
::Digit
),
907 Space
=> hir_ascii_class_bytes(&ast
::ClassAsciiKind
::Space
),
908 Word
=> hir_ascii_class_bytes(&ast
::ClassAsciiKind
::Word
),
910 // We needn't apply case folding here because the Perl ASCII classes
911 // are already closed (under ASCII case folding).
912 if ast_class
.negated
{
918 /// Converts the given Unicode specific error to an HIR translation error.
920 /// The span given should approximate the position at which an error would
922 fn convert_unicode_class_error(
925 result
: unicode
::Result
<hir
::ClassUnicode
>,
926 ) -> Result
<hir
::ClassUnicode
> {
927 result
.map_err(|err
| {
928 let sp
= span
.clone();
930 unicode
::Error
::PropertyNotFound
=> {
931 self.error(sp
, ErrorKind
::UnicodePropertyNotFound
)
933 unicode
::Error
::PropertyValueNotFound
=> {
934 self.error(sp
, ErrorKind
::UnicodePropertyValueNotFound
)
936 unicode
::Error
::PerlClassNotFound
=> {
937 self.error(sp
, ErrorKind
::UnicodePerlClassNotFound
)
943 fn unicode_fold_and_negate(
947 class
: &mut hir
::ClassUnicode
,
949 // Note that we must apply case folding before negation!
950 // Consider `(?i)[^x]`. If we applied negation field, then
951 // the result would be the character class that matched any
952 // Unicode scalar value.
953 if self.flags().case_insensitive() {
954 class
.try_case_fold_simple().map_err(|_
| {
955 self.error(span
.clone(), ErrorKind
::UnicodeCaseUnavailable
)
964 fn bytes_fold_and_negate(
968 class
: &mut hir
::ClassBytes
,
970 // Note that we must apply case folding before negation!
971 // Consider `(?i)[^x]`. If we applied negation first, then
972 // the result would be the character class that matched any
973 // Unicode scalar value.
974 if self.flags().case_insensitive() {
975 class
.case_fold_simple();
980 if !self.trans().allow_invalid_utf8
&& !class
.is_all_ascii() {
981 return Err(self.error(span
.clone(), ErrorKind
::InvalidUtf8
));
986 /// Return a scalar byte value suitable for use as a literal in a byte
988 fn class_literal_byte(&self, ast
: &ast
::Literal
) -> Result
<u8> {
989 match self.literal_to_char(ast
)?
{
990 hir
::Literal
::Byte(byte
) => Ok(byte
),
991 hir
::Literal
::Unicode(ch
) => {
992 if ch
<= 0x7F as char {
995 // We can't feasibly support Unicode in
996 // byte oriented classes. Byte classes don't
997 // do Unicode case folding.
998 Err(self.error(ast
.span
, ErrorKind
::UnicodeNotAllowed
))
1005 /// A translator's representation of a regular expression's flags at any given
1008 /// Each flag can be in one of three states: absent, present but disabled or
1009 /// present but enabled.
1010 #[derive(Clone, Copy, Debug, Default)]
1012 case_insensitive
: Option
<bool
>,
1013 multi_line
: Option
<bool
>,
1014 dot_matches_new_line
: Option
<bool
>,
1015 swap_greed
: Option
<bool
>,
1016 unicode
: Option
<bool
>,
1017 // Note that `ignore_whitespace` is omitted here because it is handled
1018 // entirely in the parser.
1022 fn from_ast(ast
: &ast
::Flags
) -> Flags
{
1023 let mut flags
= Flags
::default();
1024 let mut enable
= true;
1025 for item
in &ast
.items
{
1027 ast
::FlagsItemKind
::Negation
=> {
1030 ast
::FlagsItemKind
::Flag(ast
::Flag
::CaseInsensitive
) => {
1031 flags
.case_insensitive
= Some(enable
);
1033 ast
::FlagsItemKind
::Flag(ast
::Flag
::MultiLine
) => {
1034 flags
.multi_line
= Some(enable
);
1036 ast
::FlagsItemKind
::Flag(ast
::Flag
::DotMatchesNewLine
) => {
1037 flags
.dot_matches_new_line
= Some(enable
);
1039 ast
::FlagsItemKind
::Flag(ast
::Flag
::SwapGreed
) => {
1040 flags
.swap_greed
= Some(enable
);
1042 ast
::FlagsItemKind
::Flag(ast
::Flag
::Unicode
) => {
1043 flags
.unicode
= Some(enable
);
1045 ast
::FlagsItemKind
::Flag(ast
::Flag
::IgnoreWhitespace
) => {}
1051 fn merge(&mut self, previous
: &Flags
) {
1052 if self.case_insensitive
.is_none() {
1053 self.case_insensitive
= previous
.case_insensitive
;
1055 if self.multi_line
.is_none() {
1056 self.multi_line
= previous
.multi_line
;
1058 if self.dot_matches_new_line
.is_none() {
1059 self.dot_matches_new_line
= previous
.dot_matches_new_line
;
1061 if self.swap_greed
.is_none() {
1062 self.swap_greed
= previous
.swap_greed
;
1064 if self.unicode
.is_none() {
1065 self.unicode
= previous
.unicode
;
1069 fn case_insensitive(&self) -> bool
{
1070 self.case_insensitive
.unwrap_or(false)
1073 fn multi_line(&self) -> bool
{
1074 self.multi_line
.unwrap_or(false)
1077 fn dot_matches_new_line(&self) -> bool
{
1078 self.dot_matches_new_line
.unwrap_or(false)
1081 fn swap_greed(&self) -> bool
{
1082 self.swap_greed
.unwrap_or(false)
1085 fn unicode(&self) -> bool
{
1086 self.unicode
.unwrap_or(true)
1090 fn hir_ascii_class_bytes(kind
: &ast
::ClassAsciiKind
) -> hir
::ClassBytes
{
1091 let ranges
: Vec
<_
> = ascii_class(kind
)
1094 .map(|(s
, e
)| hir
::ClassBytesRange
::new(s
as u8, e
as u8))
1096 hir
::ClassBytes
::new(ranges
)
1099 fn ascii_class(kind
: &ast
::ClassAsciiKind
) -> &'
static [(char, char)] {
1100 use crate::ast
::ClassAsciiKind
::*;
1102 Alnum
=> &[('
0'
, '
9'
), ('A'
, 'Z'
), ('a'
, 'z'
)],
1103 Alpha
=> &[('A'
, 'Z'
), ('a'
, 'z'
)],
1104 Ascii
=> &[('
\x00'
, '
\x7F'
)],
1105 Blank
=> &[('
\t'
, '
\t'
), (' '
, ' '
)],
1106 Cntrl
=> &[('
\x00'
, '
\x1F'
), ('
\x7F'
, '
\x7F'
)],
1107 Digit
=> &[('
0'
, '
9'
)],
1108 Graph
=> &[('
!'
, '
~'
)],
1109 Lower
=> &[('a'
, 'z'
)],
1110 Print
=> &[(' '
, '
~'
)],
1111 Punct
=> &[('
!'
, '
/'
), ('
:'
, '@'
), ('
['
, '`'
), ('
{'
, '
~'
)],
1120 Upper
=> &[('A'
, 'Z'
)],
1121 Word
=> &[('
0'
, '
9'
), ('A'
, 'Z'
), ('_'
, '_'
), ('a'
, 'z'
)],
1122 Xdigit
=> &[('
0'
, '
9'
), ('A'
, 'F'
), ('a'
, 'f'
)],
1128 use crate::ast
::parse
::ParserBuilder
;
1129 use crate::ast
::{self, Ast, Position, Span}
;
1130 use crate::hir
::{self, Hir, HirKind}
;
1131 use crate::unicode
::{self, ClassQuery}
;
1133 use super::{ascii_class, TranslatorBuilder}
;
1135 // We create these errors to compare with real hir::Errors in the tests.
1136 // We define equality between TestError and hir::Error to disregard the
1137 // pattern string in hir::Error, which is annoying to provide in tests.
1138 #[derive(Clone, Debug)]
1141 kind
: hir
::ErrorKind
,
1144 impl PartialEq
<hir
::Error
> for TestError
{
1145 fn eq(&self, other
: &hir
::Error
) -> bool
{
1146 self.span
== other
.span
&& self.kind
== other
.kind
1150 impl PartialEq
<TestError
> for hir
::Error
{
1151 fn eq(&self, other
: &TestError
) -> bool
{
1152 self.span
== other
.span
&& self.kind
== other
.kind
1156 fn parse(pattern
: &str) -> Ast
{
1157 ParserBuilder
::new().octal(true).build().parse(pattern
).unwrap()
1160 fn t(pattern
: &str) -> Hir
{
1161 TranslatorBuilder
::new()
1162 .allow_invalid_utf8(false)
1164 .translate(pattern
, &parse(pattern
))
1168 fn t_err(pattern
: &str) -> hir
::Error
{
1169 TranslatorBuilder
::new()
1170 .allow_invalid_utf8(false)
1172 .translate(pattern
, &parse(pattern
))
1176 fn t_bytes(pattern
: &str) -> Hir
{
1177 TranslatorBuilder
::new()
1178 .allow_invalid_utf8(true)
1180 .translate(pattern
, &parse(pattern
))
1184 fn hir_lit(s
: &str) -> Hir
{
1190 .map(hir
::Literal
::Unicode
)
1198 fn hir_blit(s
: &[u8]) -> Hir
{
1201 1 => Hir
::literal(hir
::Literal
::Byte(s
[0])),
1206 .map(hir
::Literal
::Byte
)
1214 fn hir_group(i
: u32, expr
: Hir
) -> Hir
{
1215 Hir
::group(hir
::Group
{
1216 kind
: hir
::GroupKind
::CaptureIndex(i
),
1217 hir
: Box
::new(expr
),
1221 fn hir_group_name(i
: u32, name
: &str, expr
: Hir
) -> Hir
{
1222 Hir
::group(hir
::Group
{
1223 kind
: hir
::GroupKind
::CaptureName
{
1224 name
: name
.to_string(),
1227 hir
: Box
::new(expr
),
1231 fn hir_group_nocap(expr
: Hir
) -> Hir
{
1232 Hir
::group(hir
::Group
{
1233 kind
: hir
::GroupKind
::NonCapturing
,
1234 hir
: Box
::new(expr
),
1238 fn hir_quest(greedy
: bool
, expr
: Hir
) -> Hir
{
1239 Hir
::repetition(hir
::Repetition
{
1240 kind
: hir
::RepetitionKind
::ZeroOrOne
,
1242 hir
: Box
::new(expr
),
1246 fn hir_star(greedy
: bool
, expr
: Hir
) -> Hir
{
1247 Hir
::repetition(hir
::Repetition
{
1248 kind
: hir
::RepetitionKind
::ZeroOrMore
,
1250 hir
: Box
::new(expr
),
1254 fn hir_plus(greedy
: bool
, expr
: Hir
) -> Hir
{
1255 Hir
::repetition(hir
::Repetition
{
1256 kind
: hir
::RepetitionKind
::OneOrMore
,
1258 hir
: Box
::new(expr
),
1262 fn hir_range(greedy
: bool
, range
: hir
::RepetitionRange
, expr
: Hir
) -> Hir
{
1263 Hir
::repetition(hir
::Repetition
{
1264 kind
: hir
::RepetitionKind
::Range(range
),
1266 hir
: Box
::new(expr
),
1270 fn hir_alt(alts
: Vec
<Hir
>) -> Hir
{
1271 Hir
::alternation(alts
)
1274 fn hir_cat(exprs
: Vec
<Hir
>) -> Hir
{
1279 fn hir_uclass_query(query
: ClassQuery
<'_
>) -> Hir
{
1280 Hir
::class(hir
::Class
::Unicode(unicode
::class(query
).unwrap()))
1284 fn hir_uclass_perl_word() -> Hir
{
1285 Hir
::class(hir
::Class
::Unicode(unicode
::perl_word().unwrap()))
1288 fn hir_uclass(ranges
: &[(char, char)]) -> Hir
{
1289 let ranges
: Vec
<hir
::ClassUnicodeRange
> = ranges
1291 .map(|&(s
, e
)| hir
::ClassUnicodeRange
::new(s
, e
))
1293 Hir
::class(hir
::Class
::Unicode(hir
::ClassUnicode
::new(ranges
)))
1296 fn hir_bclass(ranges
: &[(u8, u8)]) -> Hir
{
1297 let ranges
: Vec
<hir
::ClassBytesRange
> = ranges
1299 .map(|&(s
, e
)| hir
::ClassBytesRange
::new(s
, e
))
1301 Hir
::class(hir
::Class
::Bytes(hir
::ClassBytes
::new(ranges
)))
1304 fn hir_bclass_from_char(ranges
: &[(char, char)]) -> Hir
{
1305 let ranges
: Vec
<hir
::ClassBytesRange
> = ranges
1308 assert
!(s
as u32 <= 0x7F);
1309 assert
!(e
as u32 <= 0x7F);
1310 hir
::ClassBytesRange
::new(s
as u8, e
as u8)
1313 Hir
::class(hir
::Class
::Bytes(hir
::ClassBytes
::new(ranges
)))
1316 fn hir_case_fold(expr
: Hir
) -> Hir
{
1317 match expr
.into_kind() {
1318 HirKind
::Class(mut cls
) => {
1319 cls
.case_fold_simple();
1322 _
=> panic
!("cannot case fold non-class Hir expr"),
1326 fn hir_negate(expr
: Hir
) -> Hir
{
1327 match expr
.into_kind() {
1328 HirKind
::Class(mut cls
) => {
1332 _
=> panic
!("cannot negate non-class Hir expr"),
1337 fn hir_union(expr1
: Hir
, expr2
: Hir
) -> Hir
{
1338 use crate::hir
::Class
::{Bytes, Unicode}
;
1340 match (expr1
.into_kind(), expr2
.into_kind()) {
1341 (HirKind
::Class(Unicode(mut c1
)), HirKind
::Class(Unicode(c2
))) => {
1343 Hir
::class(hir
::Class
::Unicode(c1
))
1345 (HirKind
::Class(Bytes(mut c1
)), HirKind
::Class(Bytes(c2
))) => {
1347 Hir
::class(hir
::Class
::Bytes(c1
))
1349 _
=> panic
!("cannot union non-class Hir exprs"),
1354 fn hir_difference(expr1
: Hir
, expr2
: Hir
) -> Hir
{
1355 use crate::hir
::Class
::{Bytes, Unicode}
;
1357 match (expr1
.into_kind(), expr2
.into_kind()) {
1358 (HirKind
::Class(Unicode(mut c1
)), HirKind
::Class(Unicode(c2
))) => {
1360 Hir
::class(hir
::Class
::Unicode(c1
))
1362 (HirKind
::Class(Bytes(mut c1
)), HirKind
::Class(Bytes(c2
))) => {
1364 Hir
::class(hir
::Class
::Bytes(c1
))
1366 _
=> panic
!("cannot difference non-class Hir exprs"),
1370 fn hir_anchor(anchor
: hir
::Anchor
) -> Hir
{
1374 fn hir_word(wb
: hir
::WordBoundary
) -> Hir
{
1375 Hir
::word_boundary(wb
)
1380 assert_eq
!(t(""), Hir
::empty());
1381 assert_eq
!(t("(?i)"), Hir
::empty());
1382 assert_eq
!(t("()"), hir_group(1, Hir
::empty()));
1383 assert_eq
!(t("(?:)"), hir_group_nocap(Hir
::empty()));
1384 assert_eq
!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir
::empty()));
1385 assert_eq
!(t("|"), hir_alt(vec
![Hir
::empty(), Hir
::empty()]));
1389 hir_group(1, Hir
::empty()),
1390 hir_group(2, Hir
::empty()),
1395 hir_group(1, hir_alt(vec
![Hir
::empty(), hir_lit("b"),]))
1399 hir_group(1, hir_alt(vec
![hir_lit("a"), Hir
::empty(),]))
1405 hir_alt(vec
![hir_lit("a"), Hir
::empty(), hir_lit("c"),])
1412 hir_alt(vec
![Hir
::empty(), Hir
::empty(), Hir
::empty(),])
1419 assert_eq
!(t("a"), hir_lit("a"));
1420 assert_eq
!(t("(?-u)a"), hir_lit("a"));
1421 assert_eq
!(t("☃"), hir_lit("☃"));
1422 assert_eq
!(t("abcd"), hir_lit("abcd"));
1424 assert_eq
!(t_bytes("(?-u)a"), hir_lit("a"));
1425 assert_eq
!(t_bytes("(?-u)\x61"), hir_lit("a"));
1426 assert_eq
!(t_bytes(r
"(?-u)\x61"), hir_lit("a"));
1427 assert_eq
!(t_bytes(r
"(?-u)\xFF"), hir_blit(b
"\xFF"));
1432 kind
: hir
::ErrorKind
::UnicodeNotAllowed
,
1434 Position
::new(5, 1, 6),
1435 Position
::new(8, 1, 7)
1440 t_err(r
"(?-u)\xFF"),
1442 kind
: hir
::ErrorKind
::InvalidUtf8
,
1444 Position
::new(5, 1, 6),
1445 Position
::new(9, 1, 10)
1452 fn literal_case_insensitive() {
1453 #[cfg(feature = "unicode-case")]
1454 assert_eq
!(t("(?i)a"), hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
),]));
1455 #[cfg(feature = "unicode-case")]
1458 hir_group_nocap(hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)],))
1460 #[cfg(feature = "unicode-case")]
1465 hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]),
1469 #[cfg(feature = "unicode-case")]
1473 hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]),
1474 hir_uclass(&[('B'
, 'B'
), ('b'
, 'b'
)]),
1476 hir_uclass(&[('C'
, 'C'
), ('c'
, 'c'
)]),
1479 #[cfg(feature = "unicode-case")]
1482 hir_uclass(&[('Β'
, 'Β'
), ('β'
, 'β'
), ('ϐ'
, 'ϐ'
),])
1485 assert_eq
!(t("(?i-u)a"), hir_bclass(&[(b'A'
, b'A'
), (b'a'
, b'a'
),]));
1486 #[cfg(feature = "unicode-case")]
1488 t("(?-u)a(?i)a(?-i)a"),
1491 hir_bclass(&[(b'A'
, b'A'
), (b'a'
, b'a'
)]),
1498 hir_bclass(&[(b'A'
, b'A'
), (b'a'
, b'a'
)]),
1499 hir_bclass(&[(b'B'
, b'B'
), (b'b'
, b'b'
)]),
1501 hir_bclass(&[(b'C'
, b'C'
), (b'c'
, b'c'
)]),
1507 hir_bclass(&[(b'A'
, b'A'
), (b'a'
, b'a'
),])
1510 t_bytes("(?i-u)\x61"),
1511 hir_bclass(&[(b'A'
, b'A'
), (b'a'
, b'a'
),])
1514 t_bytes(r
"(?i-u)\x61"),
1515 hir_bclass(&[(b'A'
, b'A'
), (b'a'
, b'a'
),])
1517 assert_eq
!(t_bytes(r
"(?i-u)\xFF"), hir_blit(b
"\xFF"));
1522 kind
: hir
::ErrorKind
::UnicodeNotAllowed
,
1524 Position
::new(6, 1, 7),
1525 Position
::new(8, 1, 8),
1535 hir_uclass(&[('
\0'
, '
\t'
), ('
\x0B'
, '
\u{10FFFF}'
),])
1537 assert_eq
!(t("(?s)."), hir_uclass(&[('
\0'
, '
\u{10FFFF}'
),]));
1540 hir_bclass(&[(b'
\0'
, b'
\t'
), (b'
\x0B'
, b'
\xFF'
),])
1542 assert_eq
!(t_bytes("(?s-u)."), hir_bclass(&[(b'
\0'
, b'
\xFF'
),]));
1544 // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
1548 kind
: hir
::ErrorKind
::InvalidUtf8
,
1550 Position
::new(5, 1, 6),
1551 Position
::new(6, 1, 7)
1558 kind
: hir
::ErrorKind
::InvalidUtf8
,
1560 Position
::new(6, 1, 7),
1561 Position
::new(7, 1, 8)
1569 assert_eq
!(t("^"), hir_anchor(hir
::Anchor
::StartText
));
1570 assert_eq
!(t("$"), hir_anchor(hir
::Anchor
::EndText
));
1571 assert_eq
!(t(r
"\A"), hir_anchor(hir
::Anchor
::StartText
));
1572 assert_eq
!(t(r
"\z"), hir_anchor(hir
::Anchor
::EndText
));
1573 assert_eq
!(t("(?m)^"), hir_anchor(hir
::Anchor
::StartLine
));
1574 assert_eq
!(t("(?m)$"), hir_anchor(hir
::Anchor
::EndLine
));
1575 assert_eq
!(t(r
"(?m)\A"), hir_anchor(hir
::Anchor
::StartText
));
1576 assert_eq
!(t(r
"(?m)\z"), hir_anchor(hir
::Anchor
::EndText
));
1578 assert_eq
!(t(r
"\b"), hir_word(hir
::WordBoundary
::Unicode
));
1579 assert_eq
!(t(r
"\B"), hir_word(hir
::WordBoundary
::UnicodeNegate
));
1580 assert_eq
!(t(r
"(?-u)\b"), hir_word(hir
::WordBoundary
::Ascii
));
1582 t_bytes(r
"(?-u)\B"),
1583 hir_word(hir
::WordBoundary
::AsciiNegate
)
1589 kind
: hir
::ErrorKind
::InvalidUtf8
,
1591 Position
::new(5, 1, 6),
1592 Position
::new(7, 1, 8)
1600 assert_eq
!(t("(a)"), hir_group(1, hir_lit("a")));
1604 hir_group(1, hir_lit("a")),
1605 hir_group(2, hir_lit("b")),
1611 hir_group(1, hir_lit("a")),
1612 hir_group(2, hir_lit("b")),
1615 assert_eq
!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir
::empty()));
1616 assert_eq
!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a")));
1618 t("(?P<foo>a)(?P<bar>b)"),
1620 hir_group_name(1, "foo", hir_lit("a")),
1621 hir_group_name(2, "bar", hir_lit("b")),
1624 assert_eq
!(t("(?:)"), hir_group_nocap(Hir
::empty()));
1625 assert_eq
!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
1629 hir_group_nocap(hir_lit("a")),
1630 hir_group(1, hir_lit("b")),
1636 hir_group(1, hir_lit("a")),
1637 hir_group_nocap(hir_lit("b")),
1638 hir_group(2, hir_lit("c")),
1642 t("(a)(?P<foo>b)(c)"),
1644 hir_group(1, hir_lit("a")),
1645 hir_group_name(2, "foo", hir_lit("b")),
1646 hir_group(3, hir_lit("c")),
1649 assert_eq
!(t("()"), hir_group(1, Hir
::empty()));
1650 assert_eq
!(t("((?i))"), hir_group(1, Hir
::empty()));
1651 assert_eq
!(t("((?x))"), hir_group(1, Hir
::empty()));
1652 assert_eq
!(t("(((?x)))"), hir_group(1, hir_group(2, Hir
::empty())));
1657 #[cfg(feature = "unicode-case")]
1661 hir_group_nocap(hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)])),
1668 hir_group_nocap(hir_bclass(&[(b'A'
, b'A'
), (b'a'
, b'a'
)])),
1675 hir_group_nocap(hir_bclass(&[(b'A'
, b'A'
), (b'a'
, b'a'
)])),
1682 hir_group(1, hir_bclass(&[(b'A'
, b'A'
), (b'a'
, b'a'
)])),
1686 #[cfg(feature = "unicode-case")]
1690 hir_group_nocap(hir_lit("a")),
1691 hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]),
1694 #[cfg(feature = "unicode-case")]
1698 hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]),
1699 hir_anchor(hir
::Anchor
::StartLine
),
1702 #[cfg(feature = "unicode-case")]
1704 t("(?im)a^(?i-m)a^"),
1706 hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]),
1707 hir_anchor(hir
::Anchor
::StartLine
),
1708 hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]),
1709 hir_anchor(hir
::Anchor
::StartText
),
1713 t("(?U)a*a*?(?-U)a*a*?"),
1715 hir_star(false, hir_lit("a")),
1716 hir_star(true, hir_lit("a")),
1717 hir_star(true, hir_lit("a")),
1718 hir_star(false, hir_lit("a")),
1721 #[cfg(feature = "unicode-case")]
1725 hir_group_nocap(hir_cat(vec
![
1727 hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]),
1732 #[cfg(feature = "unicode-case")]
1734 t("(?i)(?:a(?-i)a)a"),
1736 hir_group_nocap(hir_cat(vec
![
1737 hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]),
1740 hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]),
1748 t(r
"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
1749 hir_lit(r
"\.+*?()|[]{}^$#")
1755 assert_eq
!(t("a?"), hir_quest(true, hir_lit("a")));
1756 assert_eq
!(t("a*"), hir_star(true, hir_lit("a")));
1757 assert_eq
!(t("a+"), hir_plus(true, hir_lit("a")));
1758 assert_eq
!(t("a??"), hir_quest(false, hir_lit("a")));
1759 assert_eq
!(t("a*?"), hir_star(false, hir_lit("a")));
1760 assert_eq
!(t("a+?"), hir_plus(false, hir_lit("a")));
1764 hir_range(true, hir
::RepetitionRange
::Exactly(1), hir_lit("a"),)
1768 hir_range(true, hir
::RepetitionRange
::AtLeast(1), hir_lit("a"),)
1772 hir_range(true, hir
::RepetitionRange
::Bounded(1, 2), hir_lit("a"),)
1776 hir_range(false, hir
::RepetitionRange
::Exactly(1), hir_lit("a"),)
1780 hir_range(false, hir
::RepetitionRange
::AtLeast(1), hir_lit("a"),)
1786 hir
::RepetitionRange
::Bounded(1, 2),
1793 hir_cat(vec
![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1799 hir_group(1, hir_cat(vec
![hir_lit("a"), hir_lit("b"),]))
1804 hir_alt(vec
![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1812 hir_group(1, hir_cat(vec
![hir_lit("a"), hir_lit("b"),]))
1814 assert_eq
!(t("a|b"), hir_alt(vec
![hir_lit("a"), hir_lit("b"),]));
1817 hir_alt(vec
![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1821 hir_alt(vec
![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1825 hir_group(1, hir_alt(vec
![hir_lit("a"), hir_lit("b"),]))
1831 hir_alt(vec
![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1838 hir_alt(vec
![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1842 t("(ab|(bc|(cd)))"),
1851 hir_group(3, hir_lit("cd")),
1863 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Alnum
))
1867 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Alpha
))
1871 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Ascii
))
1875 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Blank
))
1879 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Cntrl
))
1883 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Digit
))
1887 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Graph
))
1891 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Lower
))
1895 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Print
))
1899 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Punct
))
1903 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Space
))
1907 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Upper
))
1911 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Word
))
1915 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Xdigit
))
1920 hir_negate(hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Lower
)))
1922 #[cfg(feature = "unicode-case")]
1924 t("(?i)[[:lower:]]"),
1928 ('
\u{17F}'
, '
\u{17F}'
),
1929 ('
\u{212A}'
, '
\u{212A}'
),
1934 t("(?-u)[[:lower:]]"),
1935 hir_bclass_from_char(ascii_class(&ast
::ClassAsciiKind
::Lower
))
1938 t("(?i-u)[[:lower:]]"),
1939 hir_case_fold(hir_bclass_from_char(ascii_class(
1940 &ast
::ClassAsciiKind
::Lower
1945 t_err("(?-u)[[:^lower:]]"),
1947 kind
: hir
::ErrorKind
::InvalidUtf8
,
1949 Position
::new(6, 1, 7),
1950 Position
::new(16, 1, 17)
1955 t_err("(?i-u)[[:^lower:]]"),
1957 kind
: hir
::ErrorKind
::InvalidUtf8
,
1959 Position
::new(7, 1, 8),
1960 Position
::new(17, 1, 18)
1967 fn class_ascii_multiple() {
1968 // See: https://github.com/rust-lang/regex/issues/680
1970 t("[[:alnum:][:^ascii:]]"),
1972 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Alnum
)),
1973 hir_uclass(&[('
\u{80}'
, '
\u{10FFFF}'
)]),
1977 t_bytes("(?-u)[[:alnum:][:^ascii:]]"),
1979 hir_bclass_from_char(ascii_class(&ast
::ClassAsciiKind
::Alnum
)),
1980 hir_bclass(&[(0x80, 0xFF)]),
1986 #[cfg(feature = "unicode-perl")]
1989 assert_eq
!(t(r
"\d"), hir_uclass_query(ClassQuery
::Binary("digit")));
1990 assert_eq
!(t(r
"\s"), hir_uclass_query(ClassQuery
::Binary("space")));
1991 assert_eq
!(t(r
"\w"), hir_uclass_perl_word());
1992 #[cfg(feature = "unicode-case")]
1995 hir_uclass_query(ClassQuery
::Binary("digit"))
1997 #[cfg(feature = "unicode-case")]
2000 hir_uclass_query(ClassQuery
::Binary("space"))
2002 #[cfg(feature = "unicode-case")]
2003 assert_eq
!(t(r
"(?i)\w"), hir_uclass_perl_word());
2008 hir_negate(hir_uclass_query(ClassQuery
::Binary("digit")))
2012 hir_negate(hir_uclass_query(ClassQuery
::Binary("space")))
2014 assert_eq
!(t(r
"\W"), hir_negate(hir_uclass_perl_word()));
2015 #[cfg(feature = "unicode-case")]
2018 hir_negate(hir_uclass_query(ClassQuery
::Binary("digit")))
2020 #[cfg(feature = "unicode-case")]
2023 hir_negate(hir_uclass_query(ClassQuery
::Binary("space")))
2025 #[cfg(feature = "unicode-case")]
2026 assert_eq
!(t(r
"(?i)\W"), hir_negate(hir_uclass_perl_word()));
2031 hir_bclass_from_char(ascii_class(&ast
::ClassAsciiKind
::Digit
))
2035 hir_bclass_from_char(ascii_class(&ast
::ClassAsciiKind
::Space
))
2039 hir_bclass_from_char(ascii_class(&ast
::ClassAsciiKind
::Word
))
2043 hir_bclass_from_char(ascii_class(&ast
::ClassAsciiKind
::Digit
))
2047 hir_bclass_from_char(ascii_class(&ast
::ClassAsciiKind
::Space
))
2051 hir_bclass_from_char(ascii_class(&ast
::ClassAsciiKind
::Word
))
2054 // ASCII only, negated
2057 hir_negate(hir_bclass_from_char(ascii_class(
2058 &ast
::ClassAsciiKind
::Digit
2063 hir_negate(hir_bclass_from_char(ascii_class(
2064 &ast
::ClassAsciiKind
::Space
2069 hir_negate(hir_bclass_from_char(ascii_class(
2070 &ast
::ClassAsciiKind
::Word
2075 hir_negate(hir_bclass_from_char(ascii_class(
2076 &ast
::ClassAsciiKind
::Digit
2081 hir_negate(hir_bclass_from_char(ascii_class(
2082 &ast
::ClassAsciiKind
::Space
2087 hir_negate(hir_bclass_from_char(ascii_class(
2088 &ast
::ClassAsciiKind
::Word
2094 #[cfg(not(feature = "unicode-perl"))]
2095 fn class_perl_word_disabled() {
2099 kind
: hir
::ErrorKind
::UnicodePerlClassNotFound
,
2101 Position
::new(0, 1, 1),
2102 Position
::new(2, 1, 3)
2109 #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
2110 fn class_perl_space_disabled() {
2114 kind
: hir
::ErrorKind
::UnicodePerlClassNotFound
,
2116 Position
::new(0, 1, 1),
2117 Position
::new(2, 1, 3)
2125 not(feature
= "unicode-perl"),
2126 not(feature
= "unicode-gencat")
2128 fn class_perl_digit_disabled() {
2132 kind
: hir
::ErrorKind
::UnicodePerlClassNotFound
,
2134 Position
::new(0, 1, 1),
2135 Position
::new(2, 1, 3)
2142 #[cfg(feature = "unicode-gencat")]
2143 fn class_unicode_gencat() {
2144 assert_eq
!(t(r
"\pZ"), hir_uclass_query(ClassQuery
::Binary("Z")));
2145 assert_eq
!(t(r
"\pz"), hir_uclass_query(ClassQuery
::Binary("Z")));
2147 t(r
"\p{Separator}"),
2148 hir_uclass_query(ClassQuery
::Binary("Z"))
2151 t(r
"\p{se PaRa ToR}"),
2152 hir_uclass_query(ClassQuery
::Binary("Z"))
2155 t(r
"\p{gc:Separator}"),
2156 hir_uclass_query(ClassQuery
::Binary("Z"))
2159 t(r
"\p{gc=Separator}"),
2160 hir_uclass_query(ClassQuery
::Binary("Z"))
2164 hir_uclass_query(ClassQuery
::Binary("Other"))
2166 assert_eq
!(t(r
"\pC"), hir_uclass_query(ClassQuery
::Binary("Other")));
2170 hir_negate(hir_uclass_query(ClassQuery
::Binary("Z")))
2173 t(r
"\P{separator}"),
2174 hir_negate(hir_uclass_query(ClassQuery
::Binary("Z")))
2177 t(r
"\P{gc!=separator}"),
2178 hir_negate(hir_uclass_query(ClassQuery
::Binary("Z")))
2181 assert_eq
!(t(r
"\p{any}"), hir_uclass_query(ClassQuery
::Binary("Any")));
2184 hir_uclass_query(ClassQuery
::Binary("Assigned"))
2188 hir_uclass_query(ClassQuery
::Binary("ASCII"))
2192 hir_uclass_query(ClassQuery
::Binary("Any"))
2195 t(r
"\p{gc:assigned}"),
2196 hir_uclass_query(ClassQuery
::Binary("Assigned"))
2200 hir_uclass_query(ClassQuery
::Binary("ASCII"))
2206 kind
: hir
::ErrorKind
::UnicodeNotAllowed
,
2208 Position
::new(5, 1, 6),
2209 Position
::new(8, 1, 9)
2214 t_err(r
"(?-u)\p{Separator}"),
2216 kind
: hir
::ErrorKind
::UnicodeNotAllowed
,
2218 Position
::new(5, 1, 6),
2219 Position
::new(18, 1, 19)
2226 kind
: hir
::ErrorKind
::UnicodePropertyNotFound
,
2228 Position
::new(0, 1, 1),
2229 Position
::new(3, 1, 4)
2236 kind
: hir
::ErrorKind
::UnicodePropertyNotFound
,
2238 Position
::new(0, 1, 1),
2239 Position
::new(7, 1, 8)
2244 t_err(r
"\p{gc:Foo}"),
2246 kind
: hir
::ErrorKind
::UnicodePropertyValueNotFound
,
2248 Position
::new(0, 1, 1),
2249 Position
::new(10, 1, 11)
2256 #[cfg(not(feature = "unicode-gencat"))]
2257 fn class_unicode_gencat_disabled() {
2259 t_err(r
"\p{Separator}"),
2261 kind
: hir
::ErrorKind
::UnicodePropertyNotFound
,
2263 Position
::new(0, 1, 1),
2264 Position
::new(13, 1, 14)
2272 kind
: hir
::ErrorKind
::UnicodePropertyNotFound
,
2274 Position
::new(0, 1, 1),
2275 Position
::new(7, 1, 8)
2282 #[cfg(feature = "unicode-script")]
2283 fn class_unicode_script() {
2286 hir_uclass_query(ClassQuery
::Binary("Greek"))
2288 #[cfg(feature = "unicode-case")]
2290 t(r
"(?i)\p{Greek}"),
2291 hir_case_fold(hir_uclass_query(ClassQuery
::Binary("Greek")))
2293 #[cfg(feature = "unicode-case")]
2295 t(r
"(?i)\P{Greek}"),
2296 hir_negate(hir_case_fold(hir_uclass_query(ClassQuery
::Binary(
2302 t_err(r
"\p{sc:Foo}"),
2304 kind
: hir
::ErrorKind
::UnicodePropertyValueNotFound
,
2306 Position
::new(0, 1, 1),
2307 Position
::new(10, 1, 11)
2312 t_err(r
"\p{scx:Foo}"),
2314 kind
: hir
::ErrorKind
::UnicodePropertyValueNotFound
,
2316 Position
::new(0, 1, 1),
2317 Position
::new(11, 1, 12)
2324 #[cfg(not(feature = "unicode-script"))]
2325 fn class_unicode_script_disabled() {
2327 t_err(r
"\p{Greek}"),
2329 kind
: hir
::ErrorKind
::UnicodePropertyNotFound
,
2331 Position
::new(0, 1, 1),
2332 Position
::new(9, 1, 10)
2338 t_err(r
"\p{scx:Greek}"),
2340 kind
: hir
::ErrorKind
::UnicodePropertyNotFound
,
2342 Position
::new(0, 1, 1),
2343 Position
::new(13, 1, 14)
2350 #[cfg(feature = "unicode-age")]
2351 fn class_unicode_age() {
2353 t_err(r
"\p{age:Foo}"),
2355 kind
: hir
::ErrorKind
::UnicodePropertyValueNotFound
,
2357 Position
::new(0, 1, 1),
2358 Position
::new(11, 1, 12)
2365 #[cfg(feature = "unicode-gencat")]
2366 fn class_unicode_any_empty() {
2370 kind
: hir
::ErrorKind
::EmptyClassNotAllowed
,
2372 Position
::new(0, 1, 1),
2373 Position
::new(7, 1, 8)
2380 #[cfg(not(feature = "unicode-age"))]
2381 fn class_unicode_age_disabled() {
2383 t_err(r
"\p{age:3.0}"),
2385 kind
: hir
::ErrorKind
::UnicodePropertyNotFound
,
2387 Position
::new(0, 1, 1),
2388 Position
::new(11, 1, 12)
2395 fn class_bracketed() {
2396 assert_eq
!(t("[a]"), hir_uclass(&[('a'
, 'a'
)]));
2397 assert_eq
!(t("[^[a]]"), hir_negate(hir_uclass(&[('a'
, 'a'
)])));
2398 assert_eq
!(t("[a-z]"), hir_uclass(&[('a'
, 'z'
)]));
2399 assert_eq
!(t("[a-fd-h]"), hir_uclass(&[('a'
, 'h'
)]));
2400 assert_eq
!(t("[a-fg-m]"), hir_uclass(&[('a'
, 'm'
)]));
2401 assert_eq
!(t(r
"[\x00]"), hir_uclass(&[('
\0'
, '
\0'
)]));
2402 assert_eq
!(t(r
"[\n]"), hir_uclass(&[('
\n'
, '
\n'
)]));
2403 assert_eq
!(t("[\n]"), hir_uclass(&[('
\n'
, '
\n'
)]));
2404 #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2405 assert_eq
!(t(r
"[\d]"), hir_uclass_query(ClassQuery
::Binary("digit")));
2406 #[cfg(feature = "unicode-gencat")]
2409 hir_uclass_query(ClassQuery
::Binary("separator"))
2411 #[cfg(feature = "unicode-gencat")]
2413 t(r
"[\p{separator}]"),
2414 hir_uclass_query(ClassQuery
::Binary("separator"))
2416 #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2417 assert_eq
!(t(r
"[^\D]"), hir_uclass_query(ClassQuery
::Binary("digit")));
2418 #[cfg(feature = "unicode-gencat")]
2421 hir_uclass_query(ClassQuery
::Binary("separator"))
2423 #[cfg(feature = "unicode-gencat")]
2425 t(r
"[^\P{separator}]"),
2426 hir_uclass_query(ClassQuery
::Binary("separator"))
2429 feature
= "unicode-case",
2430 any(feature
= "unicode-perl", feature
= "unicode-gencat")
2434 hir_uclass_query(ClassQuery
::Binary("digit"))
2436 #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2438 t(r
"(?i)[^\P{greek}]"),
2439 hir_case_fold(hir_uclass_query(ClassQuery
::Binary("greek")))
2442 assert_eq
!(t("(?-u)[a]"), hir_bclass(&[(b'a'
, b'a'
)]));
2443 assert_eq
!(t(r
"(?-u)[\x00]"), hir_bclass(&[(b'
\0'
, b'
\0'
)]));
2444 assert_eq
!(t_bytes(r
"(?-u)[\xFF]"), hir_bclass(&[(b'
\xFF'
, b'
\xFF'
)]));
2446 #[cfg(feature = "unicode-case")]
2447 assert_eq
!(t("(?i)[a]"), hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]));
2448 #[cfg(feature = "unicode-case")]
2451 hir_uclass(&[('K'
, 'K'
), ('k'
, 'k'
), ('
\u{212A}'
, '
\u{212A}'
),])
2453 #[cfg(feature = "unicode-case")]
2456 hir_uclass(&[('Β'
, 'Β'
), ('β'
, 'β'
), ('ϐ'
, 'ϐ'
),])
2458 assert_eq
!(t("(?i-u)[k]"), hir_bclass(&[(b'K'
, b'K'
), (b'k'
, b'k'
),]));
2460 assert_eq
!(t("[^a]"), hir_negate(hir_uclass(&[('a'
, 'a'
)])));
2461 assert_eq
!(t(r
"[^\x00]"), hir_negate(hir_uclass(&[('
\0'
, '
\0'
)])));
2463 t_bytes("(?-u)[^a]"),
2464 hir_negate(hir_bclass(&[(b'a'
, b'a'
)]))
2466 #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2469 hir_negate(hir_uclass_query(ClassQuery
::Binary("digit")))
2471 #[cfg(feature = "unicode-gencat")]
2474 hir_negate(hir_uclass_query(ClassQuery
::Binary("separator")))
2476 #[cfg(feature = "unicode-gencat")]
2478 t(r
"[^\p{separator}]"),
2479 hir_negate(hir_uclass_query(ClassQuery
::Binary("separator")))
2481 #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2483 t(r
"(?i)[^\p{greek}]"),
2484 hir_negate(hir_case_fold(hir_uclass_query(ClassQuery
::Binary(
2488 #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2490 t(r
"(?i)[\P{greek}]"),
2491 hir_negate(hir_case_fold(hir_uclass_query(ClassQuery
::Binary(
2496 // Test some weird cases.
2497 assert_eq
!(t(r
"[\[]"), hir_uclass(&[('
['
, '
['
)]));
2499 assert_eq
!(t(r
"[&]"), hir_uclass(&[('
&'
, '
&'
)]));
2500 assert_eq
!(t(r
"[\&]"), hir_uclass(&[('
&'
, '
&'
)]));
2501 assert_eq
!(t(r
"[\&\&]"), hir_uclass(&[('
&'
, '
&'
)]));
2502 assert_eq
!(t(r
"[\x00-&]"), hir_uclass(&[('
\0'
, '
&'
)]));
2503 assert_eq
!(t(r
"[&-\xFF]"), hir_uclass(&[('
&'
, '
\u{FF}'
)]));
2505 assert_eq
!(t(r
"[~]"), hir_uclass(&[('
~'
, '
~'
)]));
2506 assert_eq
!(t(r
"[\~]"), hir_uclass(&[('
~'
, '
~'
)]));
2507 assert_eq
!(t(r
"[\~\~]"), hir_uclass(&[('
~'
, '
~'
)]));
2508 assert_eq
!(t(r
"[\x00-~]"), hir_uclass(&[('
\0'
, '
~'
)]));
2509 assert_eq
!(t(r
"[~-\xFF]"), hir_uclass(&[('
~'
, '
\u{FF}'
)]));
2511 assert_eq
!(t(r
"[-]"), hir_uclass(&[('
-'
, '
-'
)]));
2512 assert_eq
!(t(r
"[\-]"), hir_uclass(&[('
-'
, '
-'
)]));
2513 assert_eq
!(t(r
"[\-\-]"), hir_uclass(&[('
-'
, '
-'
)]));
2514 assert_eq
!(t(r
"[\x00-\-]"), hir_uclass(&[('
\0'
, '
-'
)]));
2515 assert_eq
!(t(r
"[\--\xFF]"), hir_uclass(&[('
-'
, '
\u{FF}'
)]));
2520 kind
: hir
::ErrorKind
::InvalidUtf8
,
2522 Position
::new(5, 1, 6),
2523 Position
::new(9, 1, 10)
2527 #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2531 kind
: hir
::ErrorKind
::EmptyClassNotAllowed
,
2533 Position
::new(0, 1, 1),
2534 Position
::new(7, 1, 8)
2538 #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2540 t_err(r
"(?-u)[^\s\S]"),
2542 kind
: hir
::ErrorKind
::EmptyClassNotAllowed
,
2544 Position
::new(5, 1, 6),
2545 Position
::new(12, 1, 13)
2552 fn class_bracketed_union() {
2553 assert_eq
!(t("[a-zA-Z]"), hir_uclass(&[('A'
, 'Z'
), ('a'
, 'z'
)]));
2554 #[cfg(feature = "unicode-gencat")]
2558 hir_uclass(&[('a'
, 'b'
)]),
2559 hir_uclass_query(ClassQuery
::Binary("separator"))
2562 #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
2564 t(r
"[\pZ\p{Greek}]"),
2566 hir_uclass_query(ClassQuery
::Binary("greek")),
2567 hir_uclass_query(ClassQuery
::Binary("separator"))
2571 feature
= "unicode-age",
2572 feature
= "unicode-gencat",
2573 feature
= "unicode-script"
2576 t(r
"[\p{age:3.0}\pZ\p{Greek}]"),
2578 hir_uclass_query(ClassQuery
::ByValue
{
2579 property_name
: "age",
2580 property_value
: "3.0",
2583 hir_uclass_query(ClassQuery
::Binary("greek")),
2584 hir_uclass_query(ClassQuery
::Binary("separator"))
2589 feature
= "unicode-age",
2590 feature
= "unicode-gencat",
2591 feature
= "unicode-script"
2594 t(r
"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
2596 hir_uclass_query(ClassQuery
::ByValue
{
2597 property_name
: "age",
2598 property_value
: "3.0",
2601 hir_uclass_query(ClassQuery
::Binary("cyrillic")),
2603 hir_uclass_query(ClassQuery
::Binary("greek")),
2604 hir_uclass_query(ClassQuery
::Binary("separator"))
2611 feature
= "unicode-age",
2612 feature
= "unicode-case",
2613 feature
= "unicode-gencat",
2614 feature
= "unicode-script"
2617 t(r
"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
2618 hir_case_fold(hir_union(
2619 hir_uclass_query(ClassQuery
::ByValue
{
2620 property_name
: "age",
2621 property_value
: "3.0",
2624 hir_uclass_query(ClassQuery
::Binary("greek")),
2625 hir_uclass_query(ClassQuery
::Binary("separator"))
2630 feature
= "unicode-age",
2631 feature
= "unicode-gencat",
2632 feature
= "unicode-script"
2635 t(r
"[^\p{age:3.0}\pZ\p{Greek}]"),
2636 hir_negate(hir_union(
2637 hir_uclass_query(ClassQuery
::ByValue
{
2638 property_name
: "age",
2639 property_value
: "3.0",
2642 hir_uclass_query(ClassQuery
::Binary("greek")),
2643 hir_uclass_query(ClassQuery
::Binary("separator"))
2648 feature
= "unicode-age",
2649 feature
= "unicode-case",
2650 feature
= "unicode-gencat",
2651 feature
= "unicode-script"
2654 t(r
"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
2655 hir_negate(hir_case_fold(hir_union(
2656 hir_uclass_query(ClassQuery
::ByValue
{
2657 property_name
: "age",
2658 property_value
: "3.0",
2661 hir_uclass_query(ClassQuery
::Binary("greek")),
2662 hir_uclass_query(ClassQuery
::Binary("separator"))
2669 fn class_bracketed_nested() {
2670 assert_eq
!(t(r
"[a[^c]]"), hir_negate(hir_uclass(&[('c'
, 'c'
)])));
2671 assert_eq
!(t(r
"[a-b[^c]]"), hir_negate(hir_uclass(&[('c'
, 'c'
)])));
2672 assert_eq
!(t(r
"[a-c[^c]]"), hir_negate(hir_uclass(&[])));
2674 assert_eq
!(t(r
"[^a[^c]]"), hir_uclass(&[('c'
, 'c'
)]));
2675 assert_eq
!(t(r
"[^a-b[^c]]"), hir_uclass(&[('c'
, 'c'
)]));
2677 #[cfg(feature = "unicode-case")]
2680 hir_negate(hir_case_fold(hir_uclass(&[('c'
, 'c'
)])))
2682 #[cfg(feature = "unicode-case")]
2684 t(r
"(?i)[a-b[^c]]"),
2685 hir_negate(hir_case_fold(hir_uclass(&[('c'
, 'c'
)])))
2688 #[cfg(feature = "unicode-case")]
2689 assert_eq
!(t(r
"(?i)[^a[^c]]"), hir_uclass(&[('C'
, 'C'
), ('c'
, 'c'
)]));
2690 #[cfg(feature = "unicode-case")]
2692 t(r
"(?i)[^a-b[^c]]"),
2693 hir_uclass(&[('C'
, 'C'
), ('c'
, 'c'
)])
2697 t_err(r
"[^a-c[^c]]"),
2699 kind
: hir
::ErrorKind
::EmptyClassNotAllowed
,
2701 Position
::new(0, 1, 1),
2702 Position
::new(10, 1, 11)
2706 #[cfg(feature = "unicode-case")]
2708 t_err(r
"(?i)[^a-c[^c]]"),
2710 kind
: hir
::ErrorKind
::EmptyClassNotAllowed
,
2712 Position
::new(4, 1, 5),
2713 Position
::new(14, 1, 15)
2720 fn class_bracketed_intersect() {
2721 assert_eq
!(t("[abc&&b-c]"), hir_uclass(&[('b'
, 'c'
)]));
2722 assert_eq
!(t("[abc&&[b-c]]"), hir_uclass(&[('b'
, 'c'
)]));
2723 assert_eq
!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b'
, 'c'
)]));
2724 assert_eq
!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c'
, 'x'
)]));
2725 assert_eq
!(t("[c-da-b&&a-d]"), hir_uclass(&[('a'
, 'd'
)]));
2726 assert_eq
!(t("[a-d&&c-da-b]"), hir_uclass(&[('a'
, 'd'
)]));
2727 assert_eq
!(t(r
"[a-z&&a-c]"), hir_uclass(&[('a'
, 'c'
)]));
2728 assert_eq
!(t(r
"[[a-z&&a-c]]"), hir_uclass(&[('a'
, 'c'
)]));
2729 assert_eq
!(t(r
"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a'
, 'c'
)])));
2731 assert_eq
!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b'
, b'c'
)]));
2732 assert_eq
!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b'
, b'c'
)]));
2733 assert_eq
!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b'
, b'c'
)]));
2734 assert_eq
!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c'
, b'x'
)]));
2735 assert_eq
!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a'
, b'd'
)]));
2736 assert_eq
!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a'
, b'd'
)]));
2738 #[cfg(feature = "unicode-case")]
2740 t("(?i)[abc&&b-c]"),
2741 hir_case_fold(hir_uclass(&[('b'
, 'c'
)]))
2743 #[cfg(feature = "unicode-case")]
2745 t("(?i)[abc&&[b-c]]"),
2746 hir_case_fold(hir_uclass(&[('b'
, 'c'
)]))
2748 #[cfg(feature = "unicode-case")]
2750 t("(?i)[[abc]&&[b-c]]"),
2751 hir_case_fold(hir_uclass(&[('b'
, 'c'
)]))
2753 #[cfg(feature = "unicode-case")]
2755 t("(?i)[a-z&&b-y&&c-x]"),
2756 hir_case_fold(hir_uclass(&[('c'
, 'x'
)]))
2758 #[cfg(feature = "unicode-case")]
2760 t("(?i)[c-da-b&&a-d]"),
2761 hir_case_fold(hir_uclass(&[('a'
, 'd'
)]))
2763 #[cfg(feature = "unicode-case")]
2765 t("(?i)[a-d&&c-da-b]"),
2766 hir_case_fold(hir_uclass(&[('a'
, 'd'
)]))
2770 t("(?i-u)[abc&&b-c]"),
2771 hir_case_fold(hir_bclass(&[(b'b'
, b'c'
)]))
2774 t("(?i-u)[abc&&[b-c]]"),
2775 hir_case_fold(hir_bclass(&[(b'b'
, b'c'
)]))
2778 t("(?i-u)[[abc]&&[b-c]]"),
2779 hir_case_fold(hir_bclass(&[(b'b'
, b'c'
)]))
2782 t("(?i-u)[a-z&&b-y&&c-x]"),
2783 hir_case_fold(hir_bclass(&[(b'c'
, b'x'
)]))
2786 t("(?i-u)[c-da-b&&a-d]"),
2787 hir_case_fold(hir_bclass(&[(b'a'
, b'd'
)]))
2790 t("(?i-u)[a-d&&c-da-b]"),
2791 hir_case_fold(hir_bclass(&[(b'a'
, b'd'
)]))
2794 // In `[a^]`, `^` does not need to be escaped, so it makes sense that
2795 // `^` is also allowed to be unescaped after `&&`.
2796 assert_eq
!(t(r
"[\^&&^]"), hir_uclass(&[('
^', '
^')]));
2797 // `]` needs to be escaped after `&&` since it's not at start of class.
2798 assert_eq
!(t(r
"[]&&\]]"), hir_uclass(&[('
]'
, '
]'
)]));
2799 assert_eq
!(t(r
"[-&&-]"), hir_uclass(&[('
-'
, '
-'
)]));
2800 assert_eq
!(t(r
"[\&&&&]"), hir_uclass(&[('
&'
, '
&'
)]));
2801 assert_eq
!(t(r
"[\&&&\&]"), hir_uclass(&[('
&'
, '
&'
)]));
2804 t(r
"[a-w&&[^c-g]z]"),
2805 hir_uclass(&[('a'
, 'b'
), ('h'
, 'w'
)])
2810 fn class_bracketed_intersect_negate() {
2811 #[cfg(feature = "unicode-perl")]
2814 hir_negate(hir_uclass_query(ClassQuery
::Binary("digit")))
2816 assert_eq
!(t(r
"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a'
, 'c'
)])));
2817 #[cfg(feature = "unicode-perl")]
2820 hir_negate(hir_uclass_query(ClassQuery
::Binary("digit")))
2822 #[cfg(feature = "unicode-perl")]
2825 hir_uclass_query(ClassQuery
::Binary("digit"))
2827 #[cfg(feature = "unicode-perl")]
2828 assert_eq
!(t(r
"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
2830 #[cfg(feature = "unicode-perl")]
2832 t_bytes(r
"(?-u)[^\w&&\d]"),
2833 hir_negate(hir_bclass_from_char(ascii_class(
2834 &ast
::ClassAsciiKind
::Digit
2838 t_bytes(r
"(?-u)[^[a-z&&a-c]]"),
2839 hir_negate(hir_bclass(&[(b'a'
, b'c'
)]))
2842 t_bytes(r
"(?-u)[^[\w&&\d]]"),
2843 hir_negate(hir_bclass_from_char(ascii_class(
2844 &ast
::ClassAsciiKind
::Digit
2848 t_bytes(r
"(?-u)[^[^\w&&\d]]"),
2849 hir_bclass_from_char(ascii_class(&ast
::ClassAsciiKind
::Digit
))
2852 t_bytes(r
"(?-u)[[[^\w]&&[^\d]]]"),
2853 hir_negate(hir_bclass_from_char(ascii_class(
2854 &ast
::ClassAsciiKind
::Word
2860 fn class_bracketed_difference() {
2861 #[cfg(feature = "unicode-gencat")]
2863 t(r
"[\pL--[:ascii:]]"),
2865 hir_uclass_query(ClassQuery
::Binary("letter")),
2866 hir_uclass(&[('
\0'
, '
\x7F'
)])
2871 t(r
"(?-u)[[:alpha:]--[:lower:]]"),
2872 hir_bclass(&[(b'A'
, b'Z'
)])
2877 fn class_bracketed_symmetric_difference() {
2878 #[cfg(feature = "unicode-script")]
2880 t(r
"[\p{sc:Greek}~~\p{scx:Greek}]"),
2882 ('
\u{0342}'
, '
\u{0342}'
),
2883 ('
\u{0345}'
, '
\u{0345}'
),
2884 ('
\u{1DC0}'
, '
\u{1DC1}'
),
2887 assert_eq
!(t(r
"[a-g~~c-j]"), hir_uclass(&[('a'
, 'b'
), ('h'
, 'j'
)]));
2890 t(r
"(?-u)[a-g~~c-j]"),
2891 hir_bclass(&[(b'a'
, b'b'
), (b'h'
, b'j'
)])
2896 fn ignore_whitespace() {
2897 assert_eq
!(t(r
"(?x)\12 3"), hir_lit("\n3"));
2898 assert_eq
!(t(r
"(?x)\x { 53 }"), hir_lit("S"));
2900 t(r
"(?x)\x # comment
2907 assert_eq
!(t(r
"(?x)\x 53"), hir_lit("S"));
2909 t(r
"(?x)\x # comment
2913 assert_eq
!(t(r
"(?x)\x5 3"), hir_lit("S"));
2915 #[cfg(feature = "unicode-gencat")]
2917 t(r
"(?x)\p # comment
2921 hir_uclass_query(ClassQuery
::Binary("separator"))
2933 hir
::RepetitionRange
::Bounded(5, 10),
2938 assert_eq
!(t(r
"(?x)a\ # hi there"), hir_lit("a "));
2942 fn analysis_is_always_utf8() {
2943 // Positive examples.
2944 assert
!(t_bytes(r
"a").is_always_utf8());
2945 assert
!(t_bytes(r
"ab").is_always_utf8());
2946 assert
!(t_bytes(r
"(?-u)a").is_always_utf8());
2947 assert
!(t_bytes(r
"(?-u)ab").is_always_utf8());
2948 assert
!(t_bytes(r
"\xFF").is_always_utf8());
2949 assert
!(t_bytes(r
"\xFF\xFF").is_always_utf8());
2950 assert
!(t_bytes(r
"[^a]").is_always_utf8());
2951 assert
!(t_bytes(r
"[^a][^a]").is_always_utf8());
2952 assert
!(t_bytes(r
"\b").is_always_utf8());
2953 assert
!(t_bytes(r
"\B").is_always_utf8());
2954 assert
!(t_bytes(r
"(?-u)\b").is_always_utf8());
2956 // Negative examples.
2957 assert
!(!t_bytes(r
"(?-u)\xFF").is_always_utf8());
2958 assert
!(!t_bytes(r
"(?-u)\xFF\xFF").is_always_utf8());
2959 assert
!(!t_bytes(r
"(?-u)[^a]").is_always_utf8());
2960 assert
!(!t_bytes(r
"(?-u)[^a][^a]").is_always_utf8());
2961 assert
!(!t_bytes(r
"(?-u)\B").is_always_utf8());
2965 fn analysis_is_all_assertions() {
2966 // Positive examples.
2967 assert
!(t(r
"\b").is_all_assertions());
2968 assert
!(t(r
"\B").is_all_assertions());
2969 assert
!(t(r
"^").is_all_assertions());
2970 assert
!(t(r
"$").is_all_assertions());
2971 assert
!(t(r
"\A").is_all_assertions());
2972 assert
!(t(r
"\z").is_all_assertions());
2973 assert
!(t(r
"$^\z\A\b\B").is_all_assertions());
2974 assert
!(t(r
"$|^|\z|\A|\b|\B").is_all_assertions());
2975 assert
!(t(r
"^$|$^").is_all_assertions());
2976 assert
!(t(r
"((\b)+())*^").is_all_assertions());
2978 // Negative examples.
2979 assert
!(!t(r
"^a").is_all_assertions());
2983 fn analysis_is_anchored() {
2984 // Positive examples.
2985 assert
!(t(r
"^").is_anchored_start());
2986 assert
!(t(r
"$").is_anchored_end());
2987 assert
!(t(r
"^").is_line_anchored_start());
2988 assert
!(t(r
"$").is_line_anchored_end());
2990 assert
!(t(r
"^^").is_anchored_start());
2991 assert
!(t(r
"$$").is_anchored_end());
2992 assert
!(t(r
"^^").is_line_anchored_start());
2993 assert
!(t(r
"$$").is_line_anchored_end());
2995 assert
!(t(r
"^$").is_anchored_start());
2996 assert
!(t(r
"^$").is_anchored_end());
2997 assert
!(t(r
"^$").is_line_anchored_start());
2998 assert
!(t(r
"^$").is_line_anchored_end());
3000 assert
!(t(r
"^foo").is_anchored_start());
3001 assert
!(t(r
"foo$").is_anchored_end());
3002 assert
!(t(r
"^foo").is_line_anchored_start());
3003 assert
!(t(r
"foo$").is_line_anchored_end());
3005 assert
!(t(r
"^foo|^bar").is_anchored_start());
3006 assert
!(t(r
"foo$|bar$").is_anchored_end());
3007 assert
!(t(r
"^foo|^bar").is_line_anchored_start());
3008 assert
!(t(r
"foo$|bar$").is_line_anchored_end());
3010 assert
!(t(r
"^(foo|bar)").is_anchored_start());
3011 assert
!(t(r
"(foo|bar)$").is_anchored_end());
3012 assert
!(t(r
"^(foo|bar)").is_line_anchored_start());
3013 assert
!(t(r
"(foo|bar)$").is_line_anchored_end());
3015 assert
!(t(r
"^+").is_anchored_start());
3016 assert
!(t(r
"$+").is_anchored_end());
3017 assert
!(t(r
"^+").is_line_anchored_start());
3018 assert
!(t(r
"$+").is_line_anchored_end());
3019 assert
!(t(r
"^++").is_anchored_start());
3020 assert
!(t(r
"$++").is_anchored_end());
3021 assert
!(t(r
"^++").is_line_anchored_start());
3022 assert
!(t(r
"$++").is_line_anchored_end());
3023 assert
!(t(r
"(^)+").is_anchored_start());
3024 assert
!(t(r
"($)+").is_anchored_end());
3025 assert
!(t(r
"(^)+").is_line_anchored_start());
3026 assert
!(t(r
"($)+").is_line_anchored_end());
3028 assert
!(t(r
"$^").is_anchored_start());
3029 assert
!(t(r
"$^").is_anchored_start());
3030 assert
!(t(r
"$^").is_line_anchored_end());
3031 assert
!(t(r
"$^").is_line_anchored_end());
3032 assert
!(t(r
"$^|^$").is_anchored_start());
3033 assert
!(t(r
"$^|^$").is_anchored_end());
3034 assert
!(t(r
"$^|^$").is_line_anchored_start());
3035 assert
!(t(r
"$^|^$").is_line_anchored_end());
3037 assert
!(t(r
"\b^").is_anchored_start());
3038 assert
!(t(r
"$\b").is_anchored_end());
3039 assert
!(t(r
"\b^").is_line_anchored_start());
3040 assert
!(t(r
"$\b").is_line_anchored_end());
3041 assert
!(t(r
"^(?m:^)").is_anchored_start());
3042 assert
!(t(r
"(?m:$)$").is_anchored_end());
3043 assert
!(t(r
"^(?m:^)").is_line_anchored_start());
3044 assert
!(t(r
"(?m:$)$").is_line_anchored_end());
3045 assert
!(t(r
"(?m:^)^").is_anchored_start());
3046 assert
!(t(r
"$(?m:$)").is_anchored_end());
3047 assert
!(t(r
"(?m:^)^").is_line_anchored_start());
3048 assert
!(t(r
"$(?m:$)").is_line_anchored_end());
3050 // Negative examples.
3051 assert
!(!t(r
"(?m)^").is_anchored_start());
3052 assert
!(!t(r
"(?m)$").is_anchored_end());
3053 assert
!(!t(r
"(?m:^$)|$^").is_anchored_start());
3054 assert
!(!t(r
"(?m:^$)|$^").is_anchored_end());
3055 assert
!(!t(r
"$^|(?m:^$)").is_anchored_start());
3056 assert
!(!t(r
"$^|(?m:^$)").is_anchored_end());
3058 assert
!(!t(r
"a^").is_anchored_start());
3059 assert
!(!t(r
"$a").is_anchored_start());
3060 assert
!(!t(r
"a^").is_line_anchored_start());
3061 assert
!(!t(r
"$a").is_line_anchored_start());
3063 assert
!(!t(r
"a^").is_anchored_end());
3064 assert
!(!t(r
"$a").is_anchored_end());
3065 assert
!(!t(r
"a^").is_line_anchored_end());
3066 assert
!(!t(r
"$a").is_line_anchored_end());
3068 assert
!(!t(r
"^foo|bar").is_anchored_start());
3069 assert
!(!t(r
"foo|bar$").is_anchored_end());
3070 assert
!(!t(r
"^foo|bar").is_line_anchored_start());
3071 assert
!(!t(r
"foo|bar$").is_line_anchored_end());
3073 assert
!(!t(r
"^*").is_anchored_start());
3074 assert
!(!t(r
"$*").is_anchored_end());
3075 assert
!(!t(r
"^*").is_line_anchored_start());
3076 assert
!(!t(r
"$*").is_line_anchored_end());
3077 assert
!(!t(r
"^*+").is_anchored_start());
3078 assert
!(!t(r
"$*+").is_anchored_end());
3079 assert
!(!t(r
"^*+").is_line_anchored_start());
3080 assert
!(!t(r
"$*+").is_line_anchored_end());
3081 assert
!(!t(r
"^+*").is_anchored_start());
3082 assert
!(!t(r
"$+*").is_anchored_end());
3083 assert
!(!t(r
"^+*").is_line_anchored_start());
3084 assert
!(!t(r
"$+*").is_line_anchored_end());
3085 assert
!(!t(r
"(^)*").is_anchored_start());
3086 assert
!(!t(r
"($)*").is_anchored_end());
3087 assert
!(!t(r
"(^)*").is_line_anchored_start());
3088 assert
!(!t(r
"($)*").is_line_anchored_end());
3092 fn analysis_is_line_anchored() {
3093 assert
!(t(r
"(?m)^(foo|bar)").is_line_anchored_start());
3094 assert
!(t(r
"(?m)(foo|bar)$").is_line_anchored_end());
3096 assert
!(t(r
"(?m)^foo|^bar").is_line_anchored_start());
3097 assert
!(t(r
"(?m)foo$|bar$").is_line_anchored_end());
3099 assert
!(t(r
"(?m)^").is_line_anchored_start());
3100 assert
!(t(r
"(?m)$").is_line_anchored_end());
3102 assert
!(t(r
"(?m:^$)|$^").is_line_anchored_start());
3103 assert
!(t(r
"(?m:^$)|$^").is_line_anchored_end());
3105 assert
!(t(r
"$^|(?m:^$)").is_line_anchored_start());
3106 assert
!(t(r
"$^|(?m:^$)").is_line_anchored_end());
3110 fn analysis_is_any_anchored() {
3111 // Positive examples.
3112 assert
!(t(r
"^").is_any_anchored_start());
3113 assert
!(t(r
"$").is_any_anchored_end());
3114 assert
!(t(r
"\A").is_any_anchored_start());
3115 assert
!(t(r
"\z").is_any_anchored_end());
3117 // Negative examples.
3118 assert
!(!t(r
"(?m)^").is_any_anchored_start());
3119 assert
!(!t(r
"(?m)$").is_any_anchored_end());
3120 assert
!(!t(r
"$").is_any_anchored_start());
3121 assert
!(!t(r
"^").is_any_anchored_end());
3125 fn analysis_is_match_empty() {
3126 // Positive examples.
3127 assert
!(t(r
"").is_match_empty());
3128 assert
!(t(r
"()").is_match_empty());
3129 assert
!(t(r
"()*").is_match_empty());
3130 assert
!(t(r
"()+").is_match_empty());
3131 assert
!(t(r
"()?").is_match_empty());
3132 assert
!(t(r
"a*").is_match_empty());
3133 assert
!(t(r
"a?").is_match_empty());
3134 assert
!(t(r
"a{0}").is_match_empty());
3135 assert
!(t(r
"a{0,}").is_match_empty());
3136 assert
!(t(r
"a{0,1}").is_match_empty());
3137 assert
!(t(r
"a{0,10}").is_match_empty());
3138 #[cfg(feature = "unicode-gencat")]
3139 assert
!(t(r
"\pL*").is_match_empty());
3140 assert
!(t(r
"a*|b").is_match_empty());
3141 assert
!(t(r
"b|a*").is_match_empty());
3142 assert
!(t(r
"a|").is_match_empty());
3143 assert
!(t(r
"|a").is_match_empty());
3144 assert
!(t(r
"a||b").is_match_empty());
3145 assert
!(t(r
"a*a?(abcd)*").is_match_empty());
3146 assert
!(t(r
"^").is_match_empty());
3147 assert
!(t(r
"$").is_match_empty());
3148 assert
!(t(r
"(?m)^").is_match_empty());
3149 assert
!(t(r
"(?m)$").is_match_empty());
3150 assert
!(t(r
"\A").is_match_empty());
3151 assert
!(t(r
"\z").is_match_empty());
3152 assert
!(t(r
"\B").is_match_empty());
3153 assert
!(t_bytes(r
"(?-u)\B").is_match_empty());
3154 assert
!(t(r
"\b").is_match_empty());
3155 assert
!(t(r
"(?-u)\b").is_match_empty());
3157 // Negative examples.
3158 assert
!(!t(r
"a+").is_match_empty());
3159 assert
!(!t(r
"a{1}").is_match_empty());
3160 assert
!(!t(r
"a{1,}").is_match_empty());
3161 assert
!(!t(r
"a{1,2}").is_match_empty());
3162 assert
!(!t(r
"a{1,10}").is_match_empty());
3163 assert
!(!t(r
"b|a").is_match_empty());
3164 assert
!(!t(r
"a*a+(abcd)*").is_match_empty());
3168 fn analysis_is_literal() {
3169 // Positive examples.
3170 assert
!(t(r
"a").is_literal());
3171 assert
!(t(r
"ab").is_literal());
3172 assert
!(t(r
"abc").is_literal());
3173 assert
!(t(r
"(?m)abc").is_literal());
3175 // Negative examples.
3176 assert
!(!t(r
"").is_literal());
3177 assert
!(!t(r
"^").is_literal());
3178 assert
!(!t(r
"a|b").is_literal());
3179 assert
!(!t(r
"(a)").is_literal());
3180 assert
!(!t(r
"a+").is_literal());
3181 assert
!(!t(r
"foo(a)").is_literal());
3182 assert
!(!t(r
"(a)foo").is_literal());
3183 assert
!(!t(r
"[a]").is_literal());
3187 fn analysis_is_alternation_literal() {
3188 // Positive examples.
3189 assert
!(t(r
"a").is_alternation_literal());
3190 assert
!(t(r
"ab").is_alternation_literal());
3191 assert
!(t(r
"abc").is_alternation_literal());
3192 assert
!(t(r
"(?m)abc").is_alternation_literal());
3193 assert
!(t(r
"a|b").is_alternation_literal());
3194 assert
!(t(r
"a|b|c").is_alternation_literal());
3195 assert
!(t(r
"foo|bar").is_alternation_literal());
3196 assert
!(t(r
"foo|bar|baz").is_alternation_literal());
3198 // Negative examples.
3199 assert
!(!t(r
"").is_alternation_literal());
3200 assert
!(!t(r
"^").is_alternation_literal());
3201 assert
!(!t(r
"(a)").is_alternation_literal());
3202 assert
!(!t(r
"a+").is_alternation_literal());
3203 assert
!(!t(r
"foo(a)").is_alternation_literal());
3204 assert
!(!t(r
"(a)foo").is_alternation_literal());
3205 assert
!(!t(r
"[a]").is_alternation_literal());
3206 assert
!(!t(r
"[a]|b").is_alternation_literal());
3207 assert
!(!t(r
"a|[b]").is_alternation_literal());
3208 assert
!(!t(r
"(a)|b").is_alternation_literal());
3209 assert
!(!t(r
"a|(b)").is_alternation_literal());