2 Defines a translator that converts an `Ast` to an `Hir`.
5 use std
::cell
::{Cell, RefCell}
;
8 use ast
::{self, Ast, Span, Visitor}
;
9 use hir
::{self, Error, ErrorKind, Hir}
;
10 use unicode
::{self, ClassQuery}
;
12 type Result
<T
> = result
::Result
<T
, Error
>;
14 /// A builder for constructing an AST->HIR translator.
15 #[derive(Clone, Debug)]
16 pub struct TranslatorBuilder
{
17 allow_invalid_utf8
: bool
,
21 impl Default
for TranslatorBuilder
{
22 fn default() -> TranslatorBuilder
{
23 TranslatorBuilder
::new()
27 impl TranslatorBuilder
{
28 /// Create a new translator builder with a default c onfiguration.
29 pub fn new() -> TranslatorBuilder
{
31 allow_invalid_utf8
: false,
32 flags
: Flags
::default(),
36 /// Build a translator using the current configuration.
37 pub fn build(&self) -> Translator
{
39 stack
: RefCell
::new(vec
![]),
40 flags
: Cell
::new(self.flags
),
41 allow_invalid_utf8
: self.allow_invalid_utf8
,
45 /// When enabled, translation will permit the construction of a regular
46 /// expression that may match invalid UTF-8.
48 /// When disabled (the default), the translator is guaranteed to produce
49 /// an expression that will only ever match valid UTF-8 (otherwise, the
50 /// translator will return an error).
52 /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
53 /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
54 /// the parser to return an error. Namely, a negated ASCII word boundary
55 /// can result in matching positions that aren't valid UTF-8 boundaries.
56 pub fn allow_invalid_utf8(&mut self, yes
: bool
) -> &mut TranslatorBuilder
{
57 self.allow_invalid_utf8
= yes
;
61 /// Enable or disable the case insensitive flag (`i`) by default.
62 pub fn case_insensitive(&mut self, yes
: bool
) -> &mut TranslatorBuilder
{
63 self.flags
.case_insensitive
= if yes { Some(true) }
else { None }
;
67 /// Enable or disable the multi-line matching flag (`m`) by default.
68 pub fn multi_line(&mut self, yes
: bool
) -> &mut TranslatorBuilder
{
69 self.flags
.multi_line
= if yes { Some(true) }
else { None }
;
73 /// Enable or disable the "dot matches any character" flag (`s`) by
75 pub fn dot_matches_new_line(
78 ) -> &mut TranslatorBuilder
{
79 self.flags
.dot_matches_new_line
= if yes { Some(true) }
else { None }
;
83 /// Enable or disable the "swap greed" flag (`U`) by default.
84 pub fn swap_greed(&mut self, yes
: bool
) -> &mut TranslatorBuilder
{
85 self.flags
.swap_greed
= if yes { Some(true) }
else { None }
;
89 /// Enable or disable the Unicode flag (`u`) by default.
90 pub fn unicode(&mut self, yes
: bool
) -> &mut TranslatorBuilder
{
91 self.flags
.unicode
= if yes { None }
else { Some(false) }
;
96 /// A translator maps abstract syntax to a high level intermediate
99 /// A translator may be benefit from reuse. That is, a translator can translate
100 /// many abstract syntax trees.
102 /// A `Translator` can be configured in more detail via a
103 /// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
104 #[derive(Clone, Debug)]
105 pub struct Translator
{
106 /// Our call stack, but on the heap.
107 stack
: RefCell
<Vec
<HirFrame
>>,
108 /// The current flag settings.
110 /// Whether we're allowed to produce HIR that can match arbitrary bytes.
111 allow_invalid_utf8
: bool
,
115 /// Create a new translator using the default configuration.
116 pub fn new() -> Translator
{
117 TranslatorBuilder
::new().build()
120 /// Translate the given abstract syntax tree (AST) into a high level
121 /// intermediate representation (HIR).
123 /// If there was a problem doing the translation, then an HIR-specific
124 /// error is returned.
126 /// The original pattern string used to produce the `Ast` *must* also be
127 /// provided. The translator does not use the pattern string during any
128 /// correct translation, but is used for error reporting.
129 pub fn translate(&mut self, pattern
: &str, ast
: &Ast
) -> Result
<Hir
> {
130 ast
::visit(ast
, TranslatorI
::new(self, pattern
))
134 /// An HirFrame is a single stack frame, represented explicitly, which is
135 /// created for each item in the Ast that we traverse.
137 /// Note that technically, this type doesn't represent our entire stack
138 /// frame. In particular, the Ast visitor represents any state associated with
139 /// traversing the Ast itself.
140 #[derive(Clone, Debug)]
142 /// An arbitrary HIR expression. These get pushed whenever we hit a base
143 /// case in the Ast. They get popped after an inductive (i.e., recursive)
144 /// step is complete.
146 /// A Unicode character class. This frame is mutated as we descend into
147 /// the Ast of a character class (which is itself its own mini recursive
149 ClassUnicode(hir
::ClassUnicode
),
150 /// A byte-oriented character class. This frame is mutated as we descend
151 /// into the Ast of a character class (which is itself its own mini
152 /// recursive structure).
154 /// Byte character classes are created when Unicode mode (`u`) is disabled.
155 /// If `allow_invalid_utf8` is disabled (the default), then a byte
156 /// character is only permitted to match ASCII text.
157 ClassBytes(hir
::ClassBytes
),
158 /// This is pushed on to the stack upon first seeing any kind of group,
159 /// indicated by parentheses (including non-capturing groups). It is popped
160 /// upon leaving a group.
162 /// The old active flags when this group was opened.
164 /// If this group sets flags, then the new active flags are set to the
165 /// result of merging the old flags with the flags introduced by this
166 /// group. If the group doesn't set any flags, then this is simply
167 /// equivalent to whatever flags were set when the group was opened.
169 /// When this group is popped, the active flags should be restored to
170 /// the flags set here.
172 /// The "active" flags correspond to whatever flags are set in the
176 /// This is pushed whenever a concatenation is observed. After visiting
177 /// every sub-expression in the concatenation, the translator's stack is
178 /// popped until it sees a Concat frame.
180 /// This is pushed whenever an alternation is observed. After visiting
181 /// every sub-expression in the alternation, the translator's stack is
182 /// popped until it sees an Alternation frame.
187 /// Assert that the current stack frame is an Hir expression and return it.
188 fn unwrap_expr(self) -> Hir
{
190 HirFrame
::Expr(expr
) => expr
,
191 _
=> panic
!("tried to unwrap expr from HirFrame, got: {:?}", self),
195 /// Assert that the current stack frame is a Unicode class expression and
197 fn unwrap_class_unicode(self) -> hir
::ClassUnicode
{
199 HirFrame
::ClassUnicode(cls
) => cls
,
201 "tried to unwrap Unicode class \
202 from HirFrame, got: {:?}",
208 /// Assert that the current stack frame is a byte class expression and
210 fn unwrap_class_bytes(self) -> hir
::ClassBytes
{
212 HirFrame
::ClassBytes(cls
) => cls
,
214 "tried to unwrap byte class \
215 from HirFrame, got: {:?}",
221 /// Assert that the current stack frame is a group indicator and return
222 /// its corresponding flags (the flags that were active at the time the
223 /// group was entered).
224 fn unwrap_group(self) -> Flags
{
226 HirFrame
::Group { old_flags }
=> old_flags
,
228 panic
!("tried to unwrap group from HirFrame, got: {:?}", self)
234 impl<'t
, 'p
> Visitor
for TranslatorI
<'t
, 'p
> {
238 fn finish(self) -> Result
<Hir
> {
239 // ... otherwise, we should have exactly one HIR on the stack.
240 assert_eq
!(self.trans().stack
.borrow().len(), 1);
241 Ok(self.pop().unwrap().unwrap_expr())
244 fn visit_pre(&mut self, ast
: &Ast
) -> Result
<()> {
246 Ast
::Class(ast
::Class
::Bracketed(_
)) => {
247 if self.flags().unicode() {
248 let cls
= hir
::ClassUnicode
::empty();
249 self.push(HirFrame
::ClassUnicode(cls
));
251 let cls
= hir
::ClassBytes
::empty();
252 self.push(HirFrame
::ClassBytes(cls
));
255 Ast
::Group(ref x
) => {
258 .map(|ast
| self.set_flags(ast
))
259 .unwrap_or_else(|| self.flags());
260 self.push(HirFrame
::Group { old_flags }
);
262 Ast
::Concat(ref x
) if x
.asts
.is_empty() => {}
264 self.push(HirFrame
::Concat
);
266 Ast
::Alternation(ref x
) if x
.asts
.is_empty() => {}
267 Ast
::Alternation(_
) => {
268 self.push(HirFrame
::Alternation
);
275 fn visit_post(&mut self, ast
: &Ast
) -> Result
<()> {
278 self.push(HirFrame
::Expr(Hir
::empty()));
280 Ast
::Flags(ref x
) => {
281 self.set_flags(&x
.flags
);
282 // Flags in the AST are generally considered directives and
283 // not actual sub-expressions. However, they can be used in
284 // the concrete syntax like `((?i))`, and we need some kind of
285 // indication of an expression there, and Empty is the correct
288 // There can also be things like `(?i)+`, but we rule those out
289 // in the parser. In the future, we might allow them for
291 self.push(HirFrame
::Expr(Hir
::empty()));
293 Ast
::Literal(ref x
) => {
294 self.push(HirFrame
::Expr(self.hir_literal(x
)?
));
297 self.push(HirFrame
::Expr(self.hir_dot(span
)?
));
299 Ast
::Assertion(ref x
) => {
300 self.push(HirFrame
::Expr(self.hir_assertion(x
)?
));
302 Ast
::Class(ast
::Class
::Perl(ref x
)) => {
303 if self.flags().unicode() {
304 let cls
= self.hir_perl_unicode_class(x
)?
;
305 let hcls
= hir
::Class
::Unicode(cls
);
306 self.push(HirFrame
::Expr(Hir
::class(hcls
)));
308 let cls
= self.hir_perl_byte_class(x
);
309 let hcls
= hir
::Class
::Bytes(cls
);
310 self.push(HirFrame
::Expr(Hir
::class(hcls
)));
313 Ast
::Class(ast
::Class
::Unicode(ref x
)) => {
314 let cls
= hir
::Class
::Unicode(self.hir_unicode_class(x
)?
);
315 self.push(HirFrame
::Expr(Hir
::class(cls
)));
317 Ast
::Class(ast
::Class
::Bracketed(ref ast
)) => {
318 if self.flags().unicode() {
319 let mut cls
= self.pop().unwrap().unwrap_class_unicode();
320 self.unicode_fold_and_negate(
325 if cls
.iter().next().is_none() {
326 return Err(self.error(
328 ErrorKind
::EmptyClassNotAllowed
,
331 let expr
= Hir
::class(hir
::Class
::Unicode(cls
));
332 self.push(HirFrame
::Expr(expr
));
334 let mut cls
= self.pop().unwrap().unwrap_class_bytes();
335 self.bytes_fold_and_negate(
340 if cls
.iter().next().is_none() {
341 return Err(self.error(
343 ErrorKind
::EmptyClassNotAllowed
,
347 let expr
= Hir
::class(hir
::Class
::Bytes(cls
));
348 self.push(HirFrame
::Expr(expr
));
351 Ast
::Repetition(ref x
) => {
352 let expr
= self.pop().unwrap().unwrap_expr();
353 self.push(HirFrame
::Expr(self.hir_repetition(x
, expr
)));
355 Ast
::Group(ref x
) => {
356 let expr
= self.pop().unwrap().unwrap_expr();
357 let old_flags
= self.pop().unwrap().unwrap_group();
358 self.trans().flags
.set(old_flags
);
359 self.push(HirFrame
::Expr(self.hir_group(x
, expr
)));
362 let mut exprs
= vec
![];
363 while let Some(HirFrame
::Expr(expr
)) = self.pop() {
364 if !expr
.kind().is_empty() {
369 self.push(HirFrame
::Expr(Hir
::concat(exprs
)));
371 Ast
::Alternation(_
) => {
372 let mut exprs
= vec
![];
373 while let Some(HirFrame
::Expr(expr
)) = self.pop() {
377 self.push(HirFrame
::Expr(Hir
::alternation(exprs
)));
383 fn visit_class_set_item_pre(
385 ast
: &ast
::ClassSetItem
,
388 ast
::ClassSetItem
::Bracketed(_
) => {
389 if self.flags().unicode() {
390 let cls
= hir
::ClassUnicode
::empty();
391 self.push(HirFrame
::ClassUnicode(cls
));
393 let cls
= hir
::ClassBytes
::empty();
394 self.push(HirFrame
::ClassBytes(cls
));
397 // We needn't handle the Union case here since the visitor will
404 fn visit_class_set_item_post(
406 ast
: &ast
::ClassSetItem
,
409 ast
::ClassSetItem
::Empty(_
) => {}
410 ast
::ClassSetItem
::Literal(ref x
) => {
411 if self.flags().unicode() {
412 let mut cls
= self.pop().unwrap().unwrap_class_unicode();
413 cls
.push(hir
::ClassUnicodeRange
::new(x
.c
, x
.c
));
414 self.push(HirFrame
::ClassUnicode(cls
));
416 let mut cls
= self.pop().unwrap().unwrap_class_bytes();
417 let byte
= self.class_literal_byte(x
)?
;
418 cls
.push(hir
::ClassBytesRange
::new(byte
, byte
));
419 self.push(HirFrame
::ClassBytes(cls
));
422 ast
::ClassSetItem
::Range(ref x
) => {
423 if self.flags().unicode() {
424 let mut cls
= self.pop().unwrap().unwrap_class_unicode();
425 cls
.push(hir
::ClassUnicodeRange
::new(x
.start
.c
, x
.end
.c
));
426 self.push(HirFrame
::ClassUnicode(cls
));
428 let mut cls
= self.pop().unwrap().unwrap_class_bytes();
429 let start
= self.class_literal_byte(&x
.start
)?
;
430 let end
= self.class_literal_byte(&x
.end
)?
;
431 cls
.push(hir
::ClassBytesRange
::new(start
, end
));
432 self.push(HirFrame
::ClassBytes(cls
));
435 ast
::ClassSetItem
::Ascii(ref x
) => {
436 if self.flags().unicode() {
437 let mut cls
= self.pop().unwrap().unwrap_class_unicode();
438 for &(s
, e
) in ascii_class(&x
.kind
) {
439 cls
.push(hir
::ClassUnicodeRange
::new(s
, e
));
441 self.unicode_fold_and_negate(
442 &x
.span
, x
.negated
, &mut cls
,
444 self.push(HirFrame
::ClassUnicode(cls
));
446 let mut cls
= self.pop().unwrap().unwrap_class_bytes();
447 for &(s
, e
) in ascii_class(&x
.kind
) {
448 cls
.push(hir
::ClassBytesRange
::new(s
as u8, e
as u8));
450 self.bytes_fold_and_negate(&x
.span
, x
.negated
, &mut cls
)?
;
451 self.push(HirFrame
::ClassBytes(cls
));
454 ast
::ClassSetItem
::Unicode(ref x
) => {
455 let xcls
= self.hir_unicode_class(x
)?
;
456 let mut cls
= self.pop().unwrap().unwrap_class_unicode();
458 self.push(HirFrame
::ClassUnicode(cls
));
460 ast
::ClassSetItem
::Perl(ref x
) => {
461 if self.flags().unicode() {
462 let xcls
= self.hir_perl_unicode_class(x
)?
;
463 let mut cls
= self.pop().unwrap().unwrap_class_unicode();
465 self.push(HirFrame
::ClassUnicode(cls
));
467 let xcls
= self.hir_perl_byte_class(x
);
468 let mut cls
= self.pop().unwrap().unwrap_class_bytes();
470 self.push(HirFrame
::ClassBytes(cls
));
473 ast
::ClassSetItem
::Bracketed(ref ast
) => {
474 if self.flags().unicode() {
475 let mut cls1
= self.pop().unwrap().unwrap_class_unicode();
476 self.unicode_fold_and_negate(
482 let mut cls2
= self.pop().unwrap().unwrap_class_unicode();
484 self.push(HirFrame
::ClassUnicode(cls2
));
486 let mut cls1
= self.pop().unwrap().unwrap_class_bytes();
487 self.bytes_fold_and_negate(
493 let mut cls2
= self.pop().unwrap().unwrap_class_bytes();
495 self.push(HirFrame
::ClassBytes(cls2
));
498 // This is handled automatically by the visitor.
499 ast
::ClassSetItem
::Union(_
) => {}
504 fn visit_class_set_binary_op_pre(
506 _op
: &ast
::ClassSetBinaryOp
,
508 if self.flags().unicode() {
509 let cls
= hir
::ClassUnicode
::empty();
510 self.push(HirFrame
::ClassUnicode(cls
));
512 let cls
= hir
::ClassBytes
::empty();
513 self.push(HirFrame
::ClassBytes(cls
));
518 fn visit_class_set_binary_op_in(
520 _op
: &ast
::ClassSetBinaryOp
,
522 if self.flags().unicode() {
523 let cls
= hir
::ClassUnicode
::empty();
524 self.push(HirFrame
::ClassUnicode(cls
));
526 let cls
= hir
::ClassBytes
::empty();
527 self.push(HirFrame
::ClassBytes(cls
));
532 fn visit_class_set_binary_op_post(
534 op
: &ast
::ClassSetBinaryOp
,
536 use ast
::ClassSetBinaryOpKind
::*;
538 if self.flags().unicode() {
539 let mut rhs
= self.pop().unwrap().unwrap_class_unicode();
540 let mut lhs
= self.pop().unwrap().unwrap_class_unicode();
541 let mut cls
= self.pop().unwrap().unwrap_class_unicode();
542 if self.flags().case_insensitive() {
543 rhs
.try_case_fold_simple().map_err(|_
| {
545 op
.rhs
.span().clone(),
546 ErrorKind
::UnicodeCaseUnavailable
,
549 lhs
.try_case_fold_simple().map_err(|_
| {
551 op
.lhs
.span().clone(),
552 ErrorKind
::UnicodeCaseUnavailable
,
557 Intersection
=> lhs
.intersect(&rhs
),
558 Difference
=> lhs
.difference(&rhs
),
559 SymmetricDifference
=> lhs
.symmetric_difference(&rhs
),
562 self.push(HirFrame
::ClassUnicode(cls
));
564 let mut rhs
= self.pop().unwrap().unwrap_class_bytes();
565 let mut lhs
= self.pop().unwrap().unwrap_class_bytes();
566 let mut cls
= self.pop().unwrap().unwrap_class_bytes();
567 if self.flags().case_insensitive() {
568 rhs
.case_fold_simple();
569 lhs
.case_fold_simple();
572 Intersection
=> lhs
.intersect(&rhs
),
573 Difference
=> lhs
.difference(&rhs
),
574 SymmetricDifference
=> lhs
.symmetric_difference(&rhs
),
577 self.push(HirFrame
::ClassBytes(cls
));
583 /// The internal implementation of a translator.
585 /// This type is responsible for carrying around the original pattern string,
586 /// which is not tied to the internal state of a translator.
588 /// A TranslatorI exists for the time it takes to translate a single Ast.
589 #[derive(Clone, Debug)]
590 struct TranslatorI
<'t
, 'p
> {
591 trans
: &'t Translator
,
595 impl<'t
, 'p
> TranslatorI
<'t
, 'p
> {
596 /// Build a new internal translator.
597 fn new(trans
: &'t Translator
, pattern
: &'p
str) -> TranslatorI
<'t
, 'p
> {
598 TranslatorI { trans: trans, pattern: pattern }
601 /// Return a reference to the underlying translator.
602 fn trans(&self) -> &Translator
{
606 /// Push the given frame on to the call stack.
607 fn push(&self, frame
: HirFrame
) {
608 self.trans().stack
.borrow_mut().push(frame
);
611 /// Pop the top of the call stack. If the call stack is empty, return None.
612 fn pop(&self) -> Option
<HirFrame
> {
613 self.trans().stack
.borrow_mut().pop()
616 /// Create a new error with the given span and error type.
617 fn error(&self, span
: Span
, kind
: ErrorKind
) -> Error
{
618 Error { kind: kind, pattern: self.pattern.to_string(), span: span }
621 /// Return a copy of the active flags.
622 fn flags(&self) -> Flags
{
623 self.trans().flags
.get()
626 /// Set the flags of this translator from the flags set in the given AST.
627 /// Then, return the old flags.
628 fn set_flags(&self, ast_flags
: &ast
::Flags
) -> Flags
{
629 let old_flags
= self.flags();
630 let mut new_flags
= Flags
::from_ast(ast_flags
);
631 new_flags
.merge(&old_flags
);
632 self.trans().flags
.set(new_flags
);
636 fn hir_literal(&self, lit
: &ast
::Literal
) -> Result
<Hir
> {
637 let ch
= match self.literal_to_char(lit
)?
{
638 byte @ hir
::Literal
::Byte(_
) => return Ok(Hir
::literal(byte
)),
639 hir
::Literal
::Unicode(ch
) => ch
,
641 if self.flags().case_insensitive() {
642 self.hir_from_char_case_insensitive(lit
.span
, ch
)
644 self.hir_from_char(lit
.span
, ch
)
648 /// Convert an Ast literal to its scalar representation.
650 /// When Unicode mode is enabled, then this always succeeds and returns a
651 /// `char` (Unicode scalar value).
653 /// When Unicode mode is disabled, then a raw byte is returned. If that
654 /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
656 fn literal_to_char(&self, lit
: &ast
::Literal
) -> Result
<hir
::Literal
> {
657 if self.flags().unicode() {
658 return Ok(hir
::Literal
::Unicode(lit
.c
));
660 let byte
= match lit
.byte() {
661 None
=> return Ok(hir
::Literal
::Unicode(lit
.c
)),
665 return Ok(hir
::Literal
::Unicode(byte
as char));
667 if !self.trans().allow_invalid_utf8
{
668 return Err(self.error(lit
.span
, ErrorKind
::InvalidUtf8
));
670 Ok(hir
::Literal
::Byte(byte
))
673 fn hir_from_char(&self, span
: Span
, c
: char) -> Result
<Hir
> {
674 if !self.flags().unicode() && c
.len_utf8() > 1 {
675 return Err(self.error(span
, ErrorKind
::UnicodeNotAllowed
));
677 Ok(Hir
::literal(hir
::Literal
::Unicode(c
)))
680 fn hir_from_char_case_insensitive(
685 if self.flags().unicode() {
686 // If case folding won't do anything, then don't bother trying.
688 unicode
::contains_simple_case_mapping(c
, c
).map_err(|_
| {
689 self.error(span
, ErrorKind
::UnicodeCaseUnavailable
)
692 return self.hir_from_char(span
, c
);
695 hir
::ClassUnicode
::new(vec
![hir
::ClassUnicodeRange
::new(
698 cls
.try_case_fold_simple().map_err(|_
| {
699 self.error(span
, ErrorKind
::UnicodeCaseUnavailable
)
701 Ok(Hir
::class(hir
::Class
::Unicode(cls
)))
703 if c
.len_utf8() > 1 {
704 return Err(self.error(span
, ErrorKind
::UnicodeNotAllowed
));
706 // If case folding won't do anything, then don't bother trying.
708 'A'
..='Z'
| 'a'
..='z'
=> {}
709 _
=> return self.hir_from_char(span
, c
),
712 hir
::ClassBytes
::new(vec
![hir
::ClassBytesRange
::new(
715 cls
.case_fold_simple();
716 Ok(Hir
::class(hir
::Class
::Bytes(cls
)))
720 fn hir_dot(&self, span
: Span
) -> Result
<Hir
> {
721 let unicode
= self.flags().unicode();
722 if !unicode
&& !self.trans().allow_invalid_utf8
{
723 return Err(self.error(span
, ErrorKind
::InvalidUtf8
));
725 Ok(if self.flags().dot_matches_new_line() {
732 fn hir_assertion(&self, asst
: &ast
::Assertion
) -> Result
<Hir
> {
733 let unicode
= self.flags().unicode();
734 let multi_line
= self.flags().multi_line();
736 ast
::AssertionKind
::StartLine
=> Hir
::anchor(if multi_line
{
737 hir
::Anchor
::StartLine
739 hir
::Anchor
::StartText
741 ast
::AssertionKind
::EndLine
=> Hir
::anchor(if multi_line
{
746 ast
::AssertionKind
::StartText
=> {
747 Hir
::anchor(hir
::Anchor
::StartText
)
749 ast
::AssertionKind
::EndText
=> Hir
::anchor(hir
::Anchor
::EndText
),
750 ast
::AssertionKind
::WordBoundary
=> {
751 Hir
::word_boundary(if unicode
{
752 hir
::WordBoundary
::Unicode
754 hir
::WordBoundary
::Ascii
757 ast
::AssertionKind
::NotWordBoundary
=> {
758 Hir
::word_boundary(if unicode
{
759 hir
::WordBoundary
::UnicodeNegate
761 // It is possible for negated ASCII word boundaries to
762 // match at invalid UTF-8 boundaries, even when searching
764 if !self.trans().allow_invalid_utf8
{
766 self.error(asst
.span
, ErrorKind
::InvalidUtf8
)
769 hir
::WordBoundary
::AsciiNegate
775 fn hir_group(&self, group
: &ast
::Group
, expr
: Hir
) -> Hir
{
776 let kind
= match group
.kind
{
777 ast
::GroupKind
::CaptureIndex(idx
) => {
778 hir
::GroupKind
::CaptureIndex(idx
)
780 ast
::GroupKind
::CaptureName(ref capname
) => {
781 hir
::GroupKind
::CaptureName
{
782 name
: capname
.name
.clone(),
783 index
: capname
.index
,
786 ast
::GroupKind
::NonCapturing(_
) => hir
::GroupKind
::NonCapturing
,
788 Hir
::group(hir
::Group { kind: kind, hir: Box::new(expr) }
)
791 fn hir_repetition(&self, rep
: &ast
::Repetition
, expr
: Hir
) -> Hir
{
792 let kind
= match rep
.op
.kind
{
793 ast
::RepetitionKind
::ZeroOrOne
=> hir
::RepetitionKind
::ZeroOrOne
,
794 ast
::RepetitionKind
::ZeroOrMore
=> hir
::RepetitionKind
::ZeroOrMore
,
795 ast
::RepetitionKind
::OneOrMore
=> hir
::RepetitionKind
::OneOrMore
,
796 ast
::RepetitionKind
::Range(ast
::RepetitionRange
::Exactly(m
)) => {
797 hir
::RepetitionKind
::Range(hir
::RepetitionRange
::Exactly(m
))
799 ast
::RepetitionKind
::Range(ast
::RepetitionRange
::AtLeast(m
)) => {
800 hir
::RepetitionKind
::Range(hir
::RepetitionRange
::AtLeast(m
))
802 ast
::RepetitionKind
::Range(ast
::RepetitionRange
::Bounded(
806 hir
::RepetitionKind
::Range(hir
::RepetitionRange
::Bounded(m
, n
))
810 if self.flags().swap_greed() { !rep.greedy }
else { rep.greedy }
;
811 Hir
::repetition(hir
::Repetition
{
818 fn hir_unicode_class(
820 ast_class
: &ast
::ClassUnicode
,
821 ) -> Result
<hir
::ClassUnicode
> {
822 use ast
::ClassUnicodeKind
::*;
824 if !self.flags().unicode() {
826 self.error(ast_class
.span
, ErrorKind
::UnicodeNotAllowed
)
829 let query
= match ast_class
.kind
{
830 OneLetter(name
) => ClassQuery
::OneLetter(name
),
831 Named(ref name
) => ClassQuery
::Binary(name
),
832 NamedValue { ref name, ref value, .. }
=> ClassQuery
::ByValue
{
834 property_value
: value
,
837 let mut result
= self.convert_unicode_class_error(
839 unicode
::class(query
),
841 if let Ok(ref mut class
) = result
{
842 self.unicode_fold_and_negate(
851 fn hir_perl_unicode_class(
853 ast_class
: &ast
::ClassPerl
,
854 ) -> Result
<hir
::ClassUnicode
> {
855 use ast
::ClassPerlKind
::*;
857 assert
!(self.flags().unicode());
858 let result
= match ast_class
.kind
{
859 Digit
=> unicode
::perl_digit(),
860 Space
=> unicode
::perl_space(),
861 Word
=> unicode
::perl_word(),
864 self.convert_unicode_class_error(&ast_class
.span
, result
)?
;
865 // We needn't apply case folding here because the Perl Unicode classes
866 // are already closed under Unicode simple case folding.
867 if ast_class
.negated
{
873 fn hir_perl_byte_class(
875 ast_class
: &ast
::ClassPerl
,
876 ) -> hir
::ClassBytes
{
877 use ast
::ClassPerlKind
::*;
879 assert
!(!self.flags().unicode());
880 let mut class
= match ast_class
.kind
{
881 Digit
=> hir_ascii_class_bytes(&ast
::ClassAsciiKind
::Digit
),
882 Space
=> hir_ascii_class_bytes(&ast
::ClassAsciiKind
::Space
),
883 Word
=> hir_ascii_class_bytes(&ast
::ClassAsciiKind
::Word
),
885 // We needn't apply case folding here because the Perl ASCII classes
886 // are already closed (under ASCII case folding).
887 if ast_class
.negated
{
893 /// Converts the given Unicode specific error to an HIR translation error.
895 /// The span given should approximate the position at which an error would
897 fn convert_unicode_class_error(
900 result
: unicode
::Result
<hir
::ClassUnicode
>,
901 ) -> Result
<hir
::ClassUnicode
> {
902 result
.map_err(|err
| {
903 let sp
= span
.clone();
905 unicode
::Error
::PropertyNotFound
=> {
906 self.error(sp
, ErrorKind
::UnicodePropertyNotFound
)
908 unicode
::Error
::PropertyValueNotFound
=> {
909 self.error(sp
, ErrorKind
::UnicodePropertyValueNotFound
)
911 unicode
::Error
::PerlClassNotFound
=> {
912 self.error(sp
, ErrorKind
::UnicodePerlClassNotFound
)
918 fn unicode_fold_and_negate(
922 class
: &mut hir
::ClassUnicode
,
924 // Note that we must apply case folding before negation!
925 // Consider `(?i)[^x]`. If we applied negation field, then
926 // the result would be the character class that matched any
927 // Unicode scalar value.
928 if self.flags().case_insensitive() {
929 class
.try_case_fold_simple().map_err(|_
| {
930 self.error(span
.clone(), ErrorKind
::UnicodeCaseUnavailable
)
939 fn bytes_fold_and_negate(
943 class
: &mut hir
::ClassBytes
,
945 // Note that we must apply case folding before negation!
946 // Consider `(?i)[^x]`. If we applied negation field, then
947 // the result would be the character class that matched any
948 // Unicode scalar value.
949 if self.flags().case_insensitive() {
950 class
.case_fold_simple();
955 if !self.trans().allow_invalid_utf8
&& !class
.is_all_ascii() {
956 return Err(self.error(span
.clone(), ErrorKind
::InvalidUtf8
));
961 /// Return a scalar byte value suitable for use as a literal in a byte
963 fn class_literal_byte(&self, ast
: &ast
::Literal
) -> Result
<u8> {
964 match self.literal_to_char(ast
)?
{
965 hir
::Literal
::Byte(byte
) => Ok(byte
),
966 hir
::Literal
::Unicode(ch
) => {
967 if ch
<= 0x7F as char {
970 // We can't feasibly support Unicode in
971 // byte oriented classes. Byte classes don't
972 // do Unicode case folding.
973 Err(self.error(ast
.span
, ErrorKind
::UnicodeNotAllowed
))
980 /// A translator's representation of a regular expression's flags at any given
983 /// Each flag can be in one of three states: absent, present but disabled or
984 /// present but enabled.
985 #[derive(Clone, Copy, Debug, Default)]
987 case_insensitive
: Option
<bool
>,
988 multi_line
: Option
<bool
>,
989 dot_matches_new_line
: Option
<bool
>,
990 swap_greed
: Option
<bool
>,
991 unicode
: Option
<bool
>,
992 // Note that `ignore_whitespace` is omitted here because it is handled
993 // entirely in the parser.
997 fn from_ast(ast
: &ast
::Flags
) -> Flags
{
998 let mut flags
= Flags
::default();
999 let mut enable
= true;
1000 for item
in &ast
.items
{
1002 ast
::FlagsItemKind
::Negation
=> {
1005 ast
::FlagsItemKind
::Flag(ast
::Flag
::CaseInsensitive
) => {
1006 flags
.case_insensitive
= Some(enable
);
1008 ast
::FlagsItemKind
::Flag(ast
::Flag
::MultiLine
) => {
1009 flags
.multi_line
= Some(enable
);
1011 ast
::FlagsItemKind
::Flag(ast
::Flag
::DotMatchesNewLine
) => {
1012 flags
.dot_matches_new_line
= Some(enable
);
1014 ast
::FlagsItemKind
::Flag(ast
::Flag
::SwapGreed
) => {
1015 flags
.swap_greed
= Some(enable
);
1017 ast
::FlagsItemKind
::Flag(ast
::Flag
::Unicode
) => {
1018 flags
.unicode
= Some(enable
);
1020 ast
::FlagsItemKind
::Flag(ast
::Flag
::IgnoreWhitespace
) => {}
1026 fn merge(&mut self, previous
: &Flags
) {
1027 if self.case_insensitive
.is_none() {
1028 self.case_insensitive
= previous
.case_insensitive
;
1030 if self.multi_line
.is_none() {
1031 self.multi_line
= previous
.multi_line
;
1033 if self.dot_matches_new_line
.is_none() {
1034 self.dot_matches_new_line
= previous
.dot_matches_new_line
;
1036 if self.swap_greed
.is_none() {
1037 self.swap_greed
= previous
.swap_greed
;
1039 if self.unicode
.is_none() {
1040 self.unicode
= previous
.unicode
;
1044 fn case_insensitive(&self) -> bool
{
1045 self.case_insensitive
.unwrap_or(false)
1048 fn multi_line(&self) -> bool
{
1049 self.multi_line
.unwrap_or(false)
1052 fn dot_matches_new_line(&self) -> bool
{
1053 self.dot_matches_new_line
.unwrap_or(false)
1056 fn swap_greed(&self) -> bool
{
1057 self.swap_greed
.unwrap_or(false)
1060 fn unicode(&self) -> bool
{
1061 self.unicode
.unwrap_or(true)
1065 fn hir_ascii_class_bytes(kind
: &ast
::ClassAsciiKind
) -> hir
::ClassBytes
{
1066 let ranges
: Vec
<_
> = ascii_class(kind
)
1069 .map(|(s
, e
)| hir
::ClassBytesRange
::new(s
as u8, e
as u8))
1071 hir
::ClassBytes
::new(ranges
)
1074 fn ascii_class(kind
: &ast
::ClassAsciiKind
) -> &'
static [(char, char)] {
1075 use ast
::ClassAsciiKind
::*;
1077 Alnum
=> &[('
0'
, '
9'
), ('A'
, 'Z'
), ('a'
, 'z'
)],
1078 Alpha
=> &[('A'
, 'Z'
), ('a'
, 'z'
)],
1079 Ascii
=> &[('
\x00'
, '
\x7F'
)],
1080 Blank
=> &[('
\t'
, '
\t'
), (' '
, ' '
)],
1081 Cntrl
=> &[('
\x00'
, '
\x1F'
), ('
\x7F'
, '
\x7F'
)],
1082 Digit
=> &[('
0'
, '
9'
)],
1083 Graph
=> &[('
!'
, '
~'
)],
1084 Lower
=> &[('a'
, 'z'
)],
1085 Print
=> &[(' '
, '
~'
)],
1086 Punct
=> &[('
!'
, '
/'
), ('
:'
, '@'
), ('
['
, '`'
), ('
{'
, '
~'
)],
1095 Upper
=> &[('A'
, 'Z'
)],
1096 Word
=> &[('
0'
, '
9'
), ('A'
, 'Z'
), ('_'
, '_'
), ('a'
, 'z'
)],
1097 Xdigit
=> &[('
0'
, '
9'
), ('A'
, 'F'
), ('a'
, 'f'
)],
1103 use ast
::parse
::ParserBuilder
;
1104 use ast
::{self, Ast, Position, Span}
;
1105 use hir
::{self, Hir, HirKind}
;
1106 use unicode
::{self, ClassQuery}
;
1108 use super::{ascii_class, TranslatorBuilder}
;
1110 // We create these errors to compare with real hir::Errors in the tests.
1111 // We define equality between TestError and hir::Error to disregard the
1112 // pattern string in hir::Error, which is annoying to provide in tests.
1113 #[derive(Clone, Debug)]
1116 kind
: hir
::ErrorKind
,
1119 impl PartialEq
<hir
::Error
> for TestError
{
1120 fn eq(&self, other
: &hir
::Error
) -> bool
{
1121 self.span
== other
.span
&& self.kind
== other
.kind
1125 impl PartialEq
<TestError
> for hir
::Error
{
1126 fn eq(&self, other
: &TestError
) -> bool
{
1127 self.span
== other
.span
&& self.kind
== other
.kind
1131 fn parse(pattern
: &str) -> Ast
{
1132 ParserBuilder
::new().octal(true).build().parse(pattern
).unwrap()
1135 fn t(pattern
: &str) -> Hir
{
1136 TranslatorBuilder
::new()
1137 .allow_invalid_utf8(false)
1139 .translate(pattern
, &parse(pattern
))
1143 fn t_err(pattern
: &str) -> hir
::Error
{
1144 TranslatorBuilder
::new()
1145 .allow_invalid_utf8(false)
1147 .translate(pattern
, &parse(pattern
))
1151 fn t_bytes(pattern
: &str) -> Hir
{
1152 TranslatorBuilder
::new()
1153 .allow_invalid_utf8(true)
1155 .translate(pattern
, &parse(pattern
))
1159 fn hir_lit(s
: &str) -> Hir
{
1165 .map(hir
::Literal
::Unicode
)
1173 fn hir_blit(s
: &[u8]) -> Hir
{
1176 1 => Hir
::literal(hir
::Literal
::Byte(s
[0])),
1181 .map(hir
::Literal
::Byte
)
1189 fn hir_group(i
: u32, expr
: Hir
) -> Hir
{
1190 Hir
::group(hir
::Group
{
1191 kind
: hir
::GroupKind
::CaptureIndex(i
),
1192 hir
: Box
::new(expr
),
1196 fn hir_group_name(i
: u32, name
: &str, expr
: Hir
) -> Hir
{
1197 Hir
::group(hir
::Group
{
1198 kind
: hir
::GroupKind
::CaptureName
{
1199 name
: name
.to_string(),
1202 hir
: Box
::new(expr
),
1206 fn hir_group_nocap(expr
: Hir
) -> Hir
{
1207 Hir
::group(hir
::Group
{
1208 kind
: hir
::GroupKind
::NonCapturing
,
1209 hir
: Box
::new(expr
),
1213 fn hir_quest(greedy
: bool
, expr
: Hir
) -> Hir
{
1214 Hir
::repetition(hir
::Repetition
{
1215 kind
: hir
::RepetitionKind
::ZeroOrOne
,
1217 hir
: Box
::new(expr
),
1221 fn hir_star(greedy
: bool
, expr
: Hir
) -> Hir
{
1222 Hir
::repetition(hir
::Repetition
{
1223 kind
: hir
::RepetitionKind
::ZeroOrMore
,
1225 hir
: Box
::new(expr
),
1229 fn hir_plus(greedy
: bool
, expr
: Hir
) -> Hir
{
1230 Hir
::repetition(hir
::Repetition
{
1231 kind
: hir
::RepetitionKind
::OneOrMore
,
1233 hir
: Box
::new(expr
),
1237 fn hir_range(greedy
: bool
, range
: hir
::RepetitionRange
, expr
: Hir
) -> Hir
{
1238 Hir
::repetition(hir
::Repetition
{
1239 kind
: hir
::RepetitionKind
::Range(range
),
1241 hir
: Box
::new(expr
),
1245 fn hir_alt(alts
: Vec
<Hir
>) -> Hir
{
1246 Hir
::alternation(alts
)
1249 fn hir_cat(exprs
: Vec
<Hir
>) -> Hir
{
1254 fn hir_uclass_query(query
: ClassQuery
) -> Hir
{
1255 Hir
::class(hir
::Class
::Unicode(unicode
::class(query
).unwrap()))
1259 fn hir_uclass_perl_word() -> Hir
{
1260 Hir
::class(hir
::Class
::Unicode(unicode
::perl_word().unwrap()))
1263 fn hir_uclass(ranges
: &[(char, char)]) -> Hir
{
1264 let ranges
: Vec
<hir
::ClassUnicodeRange
> = ranges
1266 .map(|&(s
, e
)| hir
::ClassUnicodeRange
::new(s
, e
))
1268 Hir
::class(hir
::Class
::Unicode(hir
::ClassUnicode
::new(ranges
)))
1271 fn hir_bclass(ranges
: &[(u8, u8)]) -> Hir
{
1272 let ranges
: Vec
<hir
::ClassBytesRange
> = ranges
1274 .map(|&(s
, e
)| hir
::ClassBytesRange
::new(s
, e
))
1276 Hir
::class(hir
::Class
::Bytes(hir
::ClassBytes
::new(ranges
)))
1279 fn hir_bclass_from_char(ranges
: &[(char, char)]) -> Hir
{
1280 let ranges
: Vec
<hir
::ClassBytesRange
> = ranges
1283 assert
!(s
as u32 <= 0x7F);
1284 assert
!(e
as u32 <= 0x7F);
1285 hir
::ClassBytesRange
::new(s
as u8, e
as u8)
1288 Hir
::class(hir
::Class
::Bytes(hir
::ClassBytes
::new(ranges
)))
1291 fn hir_case_fold(expr
: Hir
) -> Hir
{
1292 match expr
.into_kind() {
1293 HirKind
::Class(mut cls
) => {
1294 cls
.case_fold_simple();
1297 _
=> panic
!("cannot case fold non-class Hir expr"),
1301 fn hir_negate(expr
: Hir
) -> Hir
{
1302 match expr
.into_kind() {
1303 HirKind
::Class(mut cls
) => {
1307 _
=> panic
!("cannot negate non-class Hir expr"),
1312 fn hir_union(expr1
: Hir
, expr2
: Hir
) -> Hir
{
1313 use hir
::Class
::{Bytes, Unicode}
;
1315 match (expr1
.into_kind(), expr2
.into_kind()) {
1316 (HirKind
::Class(Unicode(mut c1
)), HirKind
::Class(Unicode(c2
))) => {
1318 Hir
::class(hir
::Class
::Unicode(c1
))
1320 (HirKind
::Class(Bytes(mut c1
)), HirKind
::Class(Bytes(c2
))) => {
1322 Hir
::class(hir
::Class
::Bytes(c1
))
1324 _
=> panic
!("cannot union non-class Hir exprs"),
1329 fn hir_difference(expr1
: Hir
, expr2
: Hir
) -> Hir
{
1330 use hir
::Class
::{Bytes, Unicode}
;
1332 match (expr1
.into_kind(), expr2
.into_kind()) {
1333 (HirKind
::Class(Unicode(mut c1
)), HirKind
::Class(Unicode(c2
))) => {
1335 Hir
::class(hir
::Class
::Unicode(c1
))
1337 (HirKind
::Class(Bytes(mut c1
)), HirKind
::Class(Bytes(c2
))) => {
1339 Hir
::class(hir
::Class
::Bytes(c1
))
1341 _
=> panic
!("cannot difference non-class Hir exprs"),
1345 fn hir_anchor(anchor
: hir
::Anchor
) -> Hir
{
1349 fn hir_word(wb
: hir
::WordBoundary
) -> Hir
{
1350 Hir
::word_boundary(wb
)
1355 assert_eq
!(t(""), Hir
::empty());
1356 assert_eq
!(t("(?i)"), Hir
::empty());
1357 assert_eq
!(t("()"), hir_group(1, Hir
::empty()));
1358 assert_eq
!(t("(?:)"), hir_group_nocap(Hir
::empty()));
1359 assert_eq
!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir
::empty()));
1360 assert_eq
!(t("|"), hir_alt(vec
![Hir
::empty(), Hir
::empty()]));
1364 hir_group(1, Hir
::empty()),
1365 hir_group(2, Hir
::empty()),
1370 hir_group(1, hir_alt(vec
![Hir
::empty(), hir_lit("b"),]))
1374 hir_group(1, hir_alt(vec
![hir_lit("a"), Hir
::empty(),]))
1380 hir_alt(vec
![hir_lit("a"), Hir
::empty(), hir_lit("c"),])
1387 hir_alt(vec
![Hir
::empty(), Hir
::empty(), Hir
::empty(),])
1394 assert_eq
!(t("a"), hir_lit("a"));
1395 assert_eq
!(t("(?-u)a"), hir_lit("a"));
1396 assert_eq
!(t("☃"), hir_lit("☃"));
1397 assert_eq
!(t("abcd"), hir_lit("abcd"));
1399 assert_eq
!(t_bytes("(?-u)a"), hir_lit("a"));
1400 assert_eq
!(t_bytes("(?-u)\x61"), hir_lit("a"));
1401 assert_eq
!(t_bytes(r
"(?-u)\x61"), hir_lit("a"));
1402 assert_eq
!(t_bytes(r
"(?-u)\xFF"), hir_blit(b
"\xFF"));
1407 kind
: hir
::ErrorKind
::UnicodeNotAllowed
,
1409 Position
::new(5, 1, 6),
1410 Position
::new(8, 1, 7)
1415 t_err(r
"(?-u)\xFF"),
1417 kind
: hir
::ErrorKind
::InvalidUtf8
,
1419 Position
::new(5, 1, 6),
1420 Position
::new(9, 1, 10)
1427 fn literal_case_insensitive() {
1428 #[cfg(feature = "unicode-case")]
1429 assert_eq
!(t("(?i)a"), hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
),]));
1430 #[cfg(feature = "unicode-case")]
1433 hir_group_nocap(hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)],))
1435 #[cfg(feature = "unicode-case")]
1440 hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]),
1444 #[cfg(feature = "unicode-case")]
1448 hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]),
1449 hir_uclass(&[('B'
, 'B'
), ('b'
, 'b'
)]),
1451 hir_uclass(&[('C'
, 'C'
), ('c'
, 'c'
)]),
1454 #[cfg(feature = "unicode-case")]
1457 hir_uclass(&[('Β'
, 'Β'
), ('β'
, 'β'
), ('ϐ'
, 'ϐ'
),])
1460 assert_eq
!(t("(?i-u)a"), hir_bclass(&[(b'A'
, b'A'
), (b'a'
, b'a'
),]));
1461 #[cfg(feature = "unicode-case")]
1463 t("(?-u)a(?i)a(?-i)a"),
1466 hir_bclass(&[(b'A'
, b'A'
), (b'a'
, b'a'
)]),
1473 hir_bclass(&[(b'A'
, b'A'
), (b'a'
, b'a'
)]),
1474 hir_bclass(&[(b'B'
, b'B'
), (b'b'
, b'b'
)]),
1476 hir_bclass(&[(b'C'
, b'C'
), (b'c'
, b'c'
)]),
1482 hir_bclass(&[(b'A'
, b'A'
), (b'a'
, b'a'
),])
1485 t_bytes("(?i-u)\x61"),
1486 hir_bclass(&[(b'A'
, b'A'
), (b'a'
, b'a'
),])
1489 t_bytes(r
"(?i-u)\x61"),
1490 hir_bclass(&[(b'A'
, b'A'
), (b'a'
, b'a'
),])
1492 assert_eq
!(t_bytes(r
"(?i-u)\xFF"), hir_blit(b
"\xFF"));
1497 kind
: hir
::ErrorKind
::UnicodeNotAllowed
,
1499 Position
::new(6, 1, 7),
1500 Position
::new(8, 1, 8),
1510 hir_uclass(&[('
\0'
, '
\t'
), ('
\x0B'
, '
\u{10FFFF}'
),])
1512 assert_eq
!(t("(?s)."), hir_uclass(&[('
\0'
, '
\u{10FFFF}'
),]));
1515 hir_bclass(&[(b'
\0'
, b'
\t'
), (b'
\x0B'
, b'
\xFF'
),])
1517 assert_eq
!(t_bytes("(?s-u)."), hir_bclass(&[(b'
\0'
, b'
\xFF'
),]));
1519 // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
1523 kind
: hir
::ErrorKind
::InvalidUtf8
,
1525 Position
::new(5, 1, 6),
1526 Position
::new(6, 1, 7)
1533 kind
: hir
::ErrorKind
::InvalidUtf8
,
1535 Position
::new(6, 1, 7),
1536 Position
::new(7, 1, 8)
1544 assert_eq
!(t("^"), hir_anchor(hir
::Anchor
::StartText
));
1545 assert_eq
!(t("$"), hir_anchor(hir
::Anchor
::EndText
));
1546 assert_eq
!(t(r
"\A"), hir_anchor(hir
::Anchor
::StartText
));
1547 assert_eq
!(t(r
"\z"), hir_anchor(hir
::Anchor
::EndText
));
1548 assert_eq
!(t("(?m)^"), hir_anchor(hir
::Anchor
::StartLine
));
1549 assert_eq
!(t("(?m)$"), hir_anchor(hir
::Anchor
::EndLine
));
1550 assert_eq
!(t(r
"(?m)\A"), hir_anchor(hir
::Anchor
::StartText
));
1551 assert_eq
!(t(r
"(?m)\z"), hir_anchor(hir
::Anchor
::EndText
));
1553 assert_eq
!(t(r
"\b"), hir_word(hir
::WordBoundary
::Unicode
));
1554 assert_eq
!(t(r
"\B"), hir_word(hir
::WordBoundary
::UnicodeNegate
));
1555 assert_eq
!(t(r
"(?-u)\b"), hir_word(hir
::WordBoundary
::Ascii
));
1557 t_bytes(r
"(?-u)\B"),
1558 hir_word(hir
::WordBoundary
::AsciiNegate
)
1564 kind
: hir
::ErrorKind
::InvalidUtf8
,
1566 Position
::new(5, 1, 6),
1567 Position
::new(7, 1, 8)
1575 assert_eq
!(t("(a)"), hir_group(1, hir_lit("a")));
1579 hir_group(1, hir_lit("a")),
1580 hir_group(2, hir_lit("b")),
1586 hir_group(1, hir_lit("a")),
1587 hir_group(2, hir_lit("b")),
1590 assert_eq
!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir
::empty()));
1591 assert_eq
!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a")));
1593 t("(?P<foo>a)(?P<bar>b)"),
1595 hir_group_name(1, "foo", hir_lit("a")),
1596 hir_group_name(2, "bar", hir_lit("b")),
1599 assert_eq
!(t("(?:)"), hir_group_nocap(Hir
::empty()));
1600 assert_eq
!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
1604 hir_group_nocap(hir_lit("a")),
1605 hir_group(1, hir_lit("b")),
1611 hir_group(1, hir_lit("a")),
1612 hir_group_nocap(hir_lit("b")),
1613 hir_group(2, hir_lit("c")),
1617 t("(a)(?P<foo>b)(c)"),
1619 hir_group(1, hir_lit("a")),
1620 hir_group_name(2, "foo", hir_lit("b")),
1621 hir_group(3, hir_lit("c")),
1624 assert_eq
!(t("()"), hir_group(1, Hir
::empty()));
1625 assert_eq
!(t("((?i))"), hir_group(1, Hir
::empty()));
1626 assert_eq
!(t("((?x))"), hir_group(1, Hir
::empty()));
1627 assert_eq
!(t("(((?x)))"), hir_group(1, hir_group(2, Hir
::empty())));
1632 #[cfg(feature = "unicode-case")]
1636 hir_group_nocap(hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)])),
1643 hir_group_nocap(hir_bclass(&[(b'A'
, b'A'
), (b'a'
, b'a'
)])),
1650 hir_group_nocap(hir_bclass(&[(b'A'
, b'A'
), (b'a'
, b'a'
)])),
1657 hir_group(1, hir_bclass(&[(b'A'
, b'A'
), (b'a'
, b'a'
)])),
1661 #[cfg(feature = "unicode-case")]
1665 hir_group_nocap(hir_lit("a")),
1666 hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]),
1669 #[cfg(feature = "unicode-case")]
1673 hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]),
1674 hir_anchor(hir
::Anchor
::StartLine
),
1677 #[cfg(feature = "unicode-case")]
1679 t("(?im)a^(?i-m)a^"),
1681 hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]),
1682 hir_anchor(hir
::Anchor
::StartLine
),
1683 hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]),
1684 hir_anchor(hir
::Anchor
::StartText
),
1688 t("(?U)a*a*?(?-U)a*a*?"),
1690 hir_star(false, hir_lit("a")),
1691 hir_star(true, hir_lit("a")),
1692 hir_star(true, hir_lit("a")),
1693 hir_star(false, hir_lit("a")),
1696 #[cfg(feature = "unicode-case")]
1700 hir_group_nocap(hir_cat(vec
![
1702 hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]),
1707 #[cfg(feature = "unicode-case")]
1709 t("(?i)(?:a(?-i)a)a"),
1711 hir_group_nocap(hir_cat(vec
![
1712 hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]),
1715 hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]),
1723 t(r
"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
1724 hir_lit(r
"\.+*?()|[]{}^$#")
1730 assert_eq
!(t("a?"), hir_quest(true, hir_lit("a")));
1731 assert_eq
!(t("a*"), hir_star(true, hir_lit("a")));
1732 assert_eq
!(t("a+"), hir_plus(true, hir_lit("a")));
1733 assert_eq
!(t("a??"), hir_quest(false, hir_lit("a")));
1734 assert_eq
!(t("a*?"), hir_star(false, hir_lit("a")));
1735 assert_eq
!(t("a+?"), hir_plus(false, hir_lit("a")));
1739 hir_range(true, hir
::RepetitionRange
::Exactly(1), hir_lit("a"),)
1743 hir_range(true, hir
::RepetitionRange
::AtLeast(1), hir_lit("a"),)
1747 hir_range(true, hir
::RepetitionRange
::Bounded(1, 2), hir_lit("a"),)
1751 hir_range(false, hir
::RepetitionRange
::Exactly(1), hir_lit("a"),)
1755 hir_range(false, hir
::RepetitionRange
::AtLeast(1), hir_lit("a"),)
1761 hir
::RepetitionRange
::Bounded(1, 2),
1768 hir_cat(vec
![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1774 hir_group(1, hir_cat(vec
![hir_lit("a"), hir_lit("b"),]))
1779 hir_alt(vec
![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1787 hir_group(1, hir_cat(vec
![hir_lit("a"), hir_lit("b"),]))
1789 assert_eq
!(t("a|b"), hir_alt(vec
![hir_lit("a"), hir_lit("b"),]));
1792 hir_alt(vec
![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1796 hir_alt(vec
![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1800 hir_group(1, hir_alt(vec
![hir_lit("a"), hir_lit("b"),]))
1806 hir_alt(vec
![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1813 hir_alt(vec
![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1817 t("(ab|(bc|(cd)))"),
1826 hir_group(3, hir_lit("cd")),
1838 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Alnum
))
1842 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Alpha
))
1846 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Ascii
))
1850 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Blank
))
1854 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Cntrl
))
1858 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Digit
))
1862 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Graph
))
1866 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Lower
))
1870 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Print
))
1874 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Punct
))
1878 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Space
))
1882 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Upper
))
1886 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Word
))
1890 hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Xdigit
))
1895 hir_negate(hir_uclass(ascii_class(&ast
::ClassAsciiKind
::Lower
)))
1897 #[cfg(feature = "unicode-case")]
1899 t("(?i)[[:lower:]]"),
1903 ('
\u{17F}'
, '
\u{17F}'
),
1904 ('
\u{212A}'
, '
\u{212A}'
),
1909 t("(?-u)[[:lower:]]"),
1910 hir_bclass_from_char(ascii_class(&ast
::ClassAsciiKind
::Lower
))
1913 t("(?i-u)[[:lower:]]"),
1914 hir_case_fold(hir_bclass_from_char(ascii_class(
1915 &ast
::ClassAsciiKind
::Lower
1920 t_err("(?-u)[[:^lower:]]"),
1922 kind
: hir
::ErrorKind
::InvalidUtf8
,
1924 Position
::new(6, 1, 7),
1925 Position
::new(16, 1, 17)
1930 t_err("(?i-u)[[:^lower:]]"),
1932 kind
: hir
::ErrorKind
::InvalidUtf8
,
1934 Position
::new(7, 1, 8),
1935 Position
::new(17, 1, 18)
1942 #[cfg(feature = "unicode-perl")]
1945 assert_eq
!(t(r
"\d"), hir_uclass_query(ClassQuery
::Binary("digit")));
1946 assert_eq
!(t(r
"\s"), hir_uclass_query(ClassQuery
::Binary("space")));
1947 assert_eq
!(t(r
"\w"), hir_uclass_perl_word());
1948 #[cfg(feature = "unicode-case")]
1951 hir_uclass_query(ClassQuery
::Binary("digit"))
1953 #[cfg(feature = "unicode-case")]
1956 hir_uclass_query(ClassQuery
::Binary("space"))
1958 #[cfg(feature = "unicode-case")]
1959 assert_eq
!(t(r
"(?i)\w"), hir_uclass_perl_word());
1964 hir_negate(hir_uclass_query(ClassQuery
::Binary("digit")))
1968 hir_negate(hir_uclass_query(ClassQuery
::Binary("space")))
1970 assert_eq
!(t(r
"\W"), hir_negate(hir_uclass_perl_word()));
1971 #[cfg(feature = "unicode-case")]
1974 hir_negate(hir_uclass_query(ClassQuery
::Binary("digit")))
1976 #[cfg(feature = "unicode-case")]
1979 hir_negate(hir_uclass_query(ClassQuery
::Binary("space")))
1981 #[cfg(feature = "unicode-case")]
1982 assert_eq
!(t(r
"(?i)\W"), hir_negate(hir_uclass_perl_word()));
1987 hir_bclass_from_char(ascii_class(&ast
::ClassAsciiKind
::Digit
))
1991 hir_bclass_from_char(ascii_class(&ast
::ClassAsciiKind
::Space
))
1995 hir_bclass_from_char(ascii_class(&ast
::ClassAsciiKind
::Word
))
1999 hir_bclass_from_char(ascii_class(&ast
::ClassAsciiKind
::Digit
))
2003 hir_bclass_from_char(ascii_class(&ast
::ClassAsciiKind
::Space
))
2007 hir_bclass_from_char(ascii_class(&ast
::ClassAsciiKind
::Word
))
2010 // ASCII only, negated
2013 hir_negate(hir_bclass_from_char(ascii_class(
2014 &ast
::ClassAsciiKind
::Digit
2019 hir_negate(hir_bclass_from_char(ascii_class(
2020 &ast
::ClassAsciiKind
::Space
2025 hir_negate(hir_bclass_from_char(ascii_class(
2026 &ast
::ClassAsciiKind
::Word
2031 hir_negate(hir_bclass_from_char(ascii_class(
2032 &ast
::ClassAsciiKind
::Digit
2037 hir_negate(hir_bclass_from_char(ascii_class(
2038 &ast
::ClassAsciiKind
::Space
2043 hir_negate(hir_bclass_from_char(ascii_class(
2044 &ast
::ClassAsciiKind
::Word
2050 #[cfg(not(feature = "unicode-perl"))]
2051 fn class_perl_word_disabled() {
2055 kind
: hir
::ErrorKind
::UnicodePerlClassNotFound
,
2057 Position
::new(0, 1, 1),
2058 Position
::new(2, 1, 3)
2065 #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
2066 fn class_perl_space_disabled() {
2070 kind
: hir
::ErrorKind
::UnicodePerlClassNotFound
,
2072 Position
::new(0, 1, 1),
2073 Position
::new(2, 1, 3)
2081 not(feature
= "unicode-perl"),
2082 not(feature
= "unicode-gencat")
2084 fn class_perl_digit_disabled() {
2088 kind
: hir
::ErrorKind
::UnicodePerlClassNotFound
,
2090 Position
::new(0, 1, 1),
2091 Position
::new(2, 1, 3)
2098 #[cfg(feature = "unicode-gencat")]
2099 fn class_unicode_gencat() {
2100 assert_eq
!(t(r
"\pZ"), hir_uclass_query(ClassQuery
::Binary("Z")));
2101 assert_eq
!(t(r
"\pz"), hir_uclass_query(ClassQuery
::Binary("Z")));
2103 t(r
"\p{Separator}"),
2104 hir_uclass_query(ClassQuery
::Binary("Z"))
2107 t(r
"\p{se PaRa ToR}"),
2108 hir_uclass_query(ClassQuery
::Binary("Z"))
2111 t(r
"\p{gc:Separator}"),
2112 hir_uclass_query(ClassQuery
::Binary("Z"))
2115 t(r
"\p{gc=Separator}"),
2116 hir_uclass_query(ClassQuery
::Binary("Z"))
2120 hir_uclass_query(ClassQuery
::Binary("Other"))
2122 assert_eq
!(t(r
"\pC"), hir_uclass_query(ClassQuery
::Binary("Other")));
2126 hir_negate(hir_uclass_query(ClassQuery
::Binary("Z")))
2129 t(r
"\P{separator}"),
2130 hir_negate(hir_uclass_query(ClassQuery
::Binary("Z")))
2133 t(r
"\P{gc!=separator}"),
2134 hir_negate(hir_uclass_query(ClassQuery
::Binary("Z")))
2137 assert_eq
!(t(r
"\p{any}"), hir_uclass_query(ClassQuery
::Binary("Any")));
2140 hir_uclass_query(ClassQuery
::Binary("Assigned"))
2144 hir_uclass_query(ClassQuery
::Binary("ASCII"))
2148 hir_uclass_query(ClassQuery
::Binary("Any"))
2151 t(r
"\p{gc:assigned}"),
2152 hir_uclass_query(ClassQuery
::Binary("Assigned"))
2156 hir_uclass_query(ClassQuery
::Binary("ASCII"))
2162 kind
: hir
::ErrorKind
::UnicodeNotAllowed
,
2164 Position
::new(5, 1, 6),
2165 Position
::new(8, 1, 9)
2170 t_err(r
"(?-u)\p{Separator}"),
2172 kind
: hir
::ErrorKind
::UnicodeNotAllowed
,
2174 Position
::new(5, 1, 6),
2175 Position
::new(18, 1, 19)
2182 kind
: hir
::ErrorKind
::UnicodePropertyNotFound
,
2184 Position
::new(0, 1, 1),
2185 Position
::new(3, 1, 4)
2192 kind
: hir
::ErrorKind
::UnicodePropertyNotFound
,
2194 Position
::new(0, 1, 1),
2195 Position
::new(7, 1, 8)
2200 t_err(r
"\p{gc:Foo}"),
2202 kind
: hir
::ErrorKind
::UnicodePropertyValueNotFound
,
2204 Position
::new(0, 1, 1),
2205 Position
::new(10, 1, 11)
2212 #[cfg(not(feature = "unicode-gencat"))]
2213 fn class_unicode_gencat_disabled() {
2215 t_err(r
"\p{Separator}"),
2217 kind
: hir
::ErrorKind
::UnicodePropertyNotFound
,
2219 Position
::new(0, 1, 1),
2220 Position
::new(13, 1, 14)
2228 kind
: hir
::ErrorKind
::UnicodePropertyNotFound
,
2230 Position
::new(0, 1, 1),
2231 Position
::new(7, 1, 8)
2238 #[cfg(feature = "unicode-script")]
2239 fn class_unicode_script() {
2242 hir_uclass_query(ClassQuery
::Binary("Greek"))
2244 #[cfg(feature = "unicode-case")]
2246 t(r
"(?i)\p{Greek}"),
2247 hir_case_fold(hir_uclass_query(ClassQuery
::Binary("Greek")))
2249 #[cfg(feature = "unicode-case")]
2251 t(r
"(?i)\P{Greek}"),
2252 hir_negate(hir_case_fold(hir_uclass_query(ClassQuery
::Binary(
2258 t_err(r
"\p{sc:Foo}"),
2260 kind
: hir
::ErrorKind
::UnicodePropertyValueNotFound
,
2262 Position
::new(0, 1, 1),
2263 Position
::new(10, 1, 11)
2268 t_err(r
"\p{scx:Foo}"),
2270 kind
: hir
::ErrorKind
::UnicodePropertyValueNotFound
,
2272 Position
::new(0, 1, 1),
2273 Position
::new(11, 1, 12)
2280 #[cfg(not(feature = "unicode-script"))]
2281 fn class_unicode_script_disabled() {
2283 t_err(r
"\p{Greek}"),
2285 kind
: hir
::ErrorKind
::UnicodePropertyNotFound
,
2287 Position
::new(0, 1, 1),
2288 Position
::new(9, 1, 10)
2294 t_err(r
"\p{scx:Greek}"),
2296 kind
: hir
::ErrorKind
::UnicodePropertyNotFound
,
2298 Position
::new(0, 1, 1),
2299 Position
::new(13, 1, 14)
2306 #[cfg(feature = "unicode-age")]
2307 fn class_unicode_age() {
2309 t_err(r
"\p{age:Foo}"),
2311 kind
: hir
::ErrorKind
::UnicodePropertyValueNotFound
,
2313 Position
::new(0, 1, 1),
2314 Position
::new(11, 1, 12)
2321 #[cfg(not(feature = "unicode-age"))]
2322 fn class_unicode_age_disabled() {
2324 t_err(r
"\p{age:3.0}"),
2326 kind
: hir
::ErrorKind
::UnicodePropertyNotFound
,
2328 Position
::new(0, 1, 1),
2329 Position
::new(11, 1, 12)
2336 fn class_bracketed() {
2337 assert_eq
!(t("[a]"), hir_uclass(&[('a'
, 'a'
)]));
2338 assert_eq
!(t("[^[a]]"), hir_negate(hir_uclass(&[('a'
, 'a'
)])));
2339 assert_eq
!(t("[a-z]"), hir_uclass(&[('a'
, 'z'
)]));
2340 assert_eq
!(t("[a-fd-h]"), hir_uclass(&[('a'
, 'h'
)]));
2341 assert_eq
!(t("[a-fg-m]"), hir_uclass(&[('a'
, 'm'
)]));
2342 assert_eq
!(t(r
"[\x00]"), hir_uclass(&[('
\0'
, '
\0'
)]));
2343 assert_eq
!(t(r
"[\n]"), hir_uclass(&[('
\n'
, '
\n'
)]));
2344 assert_eq
!(t("[\n]"), hir_uclass(&[('
\n'
, '
\n'
)]));
2345 #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2346 assert_eq
!(t(r
"[\d]"), hir_uclass_query(ClassQuery
::Binary("digit")));
2347 #[cfg(feature = "unicode-gencat")]
2350 hir_uclass_query(ClassQuery
::Binary("separator"))
2352 #[cfg(feature = "unicode-gencat")]
2354 t(r
"[\p{separator}]"),
2355 hir_uclass_query(ClassQuery
::Binary("separator"))
2357 #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2358 assert_eq
!(t(r
"[^\D]"), hir_uclass_query(ClassQuery
::Binary("digit")));
2359 #[cfg(feature = "unicode-gencat")]
2362 hir_uclass_query(ClassQuery
::Binary("separator"))
2364 #[cfg(feature = "unicode-gencat")]
2366 t(r
"[^\P{separator}]"),
2367 hir_uclass_query(ClassQuery
::Binary("separator"))
2370 feature
= "unicode-case",
2371 any(feature
= "unicode-perl", feature
= "unicode-gencat")
2375 hir_uclass_query(ClassQuery
::Binary("digit"))
2377 #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2379 t(r
"(?i)[^\P{greek}]"),
2380 hir_case_fold(hir_uclass_query(ClassQuery
::Binary("greek")))
2383 assert_eq
!(t("(?-u)[a]"), hir_bclass(&[(b'a'
, b'a'
)]));
2384 assert_eq
!(t(r
"(?-u)[\x00]"), hir_bclass(&[(b'
\0'
, b'
\0'
)]));
2385 assert_eq
!(t_bytes(r
"(?-u)[\xFF]"), hir_bclass(&[(b'
\xFF'
, b'
\xFF'
)]));
2387 #[cfg(feature = "unicode-case")]
2388 assert_eq
!(t("(?i)[a]"), hir_uclass(&[('A'
, 'A'
), ('a'
, 'a'
)]));
2389 #[cfg(feature = "unicode-case")]
2392 hir_uclass(&[('K'
, 'K'
), ('k'
, 'k'
), ('
\u{212A}'
, '
\u{212A}'
),])
2394 #[cfg(feature = "unicode-case")]
2397 hir_uclass(&[('Β'
, 'Β'
), ('β'
, 'β'
), ('ϐ'
, 'ϐ'
),])
2399 assert_eq
!(t("(?i-u)[k]"), hir_bclass(&[(b'K'
, b'K'
), (b'k'
, b'k'
),]));
2401 assert_eq
!(t("[^a]"), hir_negate(hir_uclass(&[('a'
, 'a'
)])));
2402 assert_eq
!(t(r
"[^\x00]"), hir_negate(hir_uclass(&[('
\0'
, '
\0'
)])));
2404 t_bytes("(?-u)[^a]"),
2405 hir_negate(hir_bclass(&[(b'a'
, b'a'
)]))
2407 #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2410 hir_negate(hir_uclass_query(ClassQuery
::Binary("digit")))
2412 #[cfg(feature = "unicode-gencat")]
2415 hir_negate(hir_uclass_query(ClassQuery
::Binary("separator")))
2417 #[cfg(feature = "unicode-gencat")]
2419 t(r
"[^\p{separator}]"),
2420 hir_negate(hir_uclass_query(ClassQuery
::Binary("separator")))
2422 #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2424 t(r
"(?i)[^\p{greek}]"),
2425 hir_negate(hir_case_fold(hir_uclass_query(ClassQuery
::Binary(
2429 #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2431 t(r
"(?i)[\P{greek}]"),
2432 hir_negate(hir_case_fold(hir_uclass_query(ClassQuery
::Binary(
2437 // Test some weird cases.
2438 assert_eq
!(t(r
"[\[]"), hir_uclass(&[('
['
, '
['
)]));
2440 assert_eq
!(t(r
"[&]"), hir_uclass(&[('
&'
, '
&'
)]));
2441 assert_eq
!(t(r
"[\&]"), hir_uclass(&[('
&'
, '
&'
)]));
2442 assert_eq
!(t(r
"[\&\&]"), hir_uclass(&[('
&'
, '
&'
)]));
2443 assert_eq
!(t(r
"[\x00-&]"), hir_uclass(&[('
\0'
, '
&'
)]));
2444 assert_eq
!(t(r
"[&-\xFF]"), hir_uclass(&[('
&'
, '
\u{FF}'
)]));
2446 assert_eq
!(t(r
"[~]"), hir_uclass(&[('
~'
, '
~'
)]));
2447 assert_eq
!(t(r
"[\~]"), hir_uclass(&[('
~'
, '
~'
)]));
2448 assert_eq
!(t(r
"[\~\~]"), hir_uclass(&[('
~'
, '
~'
)]));
2449 assert_eq
!(t(r
"[\x00-~]"), hir_uclass(&[('
\0'
, '
~'
)]));
2450 assert_eq
!(t(r
"[~-\xFF]"), hir_uclass(&[('
~'
, '
\u{FF}'
)]));
2452 assert_eq
!(t(r
"[-]"), hir_uclass(&[('
-'
, '
-'
)]));
2453 assert_eq
!(t(r
"[\-]"), hir_uclass(&[('
-'
, '
-'
)]));
2454 assert_eq
!(t(r
"[\-\-]"), hir_uclass(&[('
-'
, '
-'
)]));
2455 assert_eq
!(t(r
"[\x00-\-]"), hir_uclass(&[('
\0'
, '
-'
)]));
2456 assert_eq
!(t(r
"[\--\xFF]"), hir_uclass(&[('
-'
, '
\u{FF}'
)]));
2461 kind
: hir
::ErrorKind
::InvalidUtf8
,
2463 Position
::new(5, 1, 6),
2464 Position
::new(9, 1, 10)
2468 #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2472 kind
: hir
::ErrorKind
::EmptyClassNotAllowed
,
2474 Position
::new(0, 1, 1),
2475 Position
::new(7, 1, 8)
2479 #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2481 t_err(r
"(?-u)[^\s\S]"),
2483 kind
: hir
::ErrorKind
::EmptyClassNotAllowed
,
2485 Position
::new(5, 1, 6),
2486 Position
::new(12, 1, 13)
2493 fn class_bracketed_union() {
2494 assert_eq
!(t("[a-zA-Z]"), hir_uclass(&[('A'
, 'Z'
), ('a'
, 'z'
)]));
2495 #[cfg(feature = "unicode-gencat")]
2499 hir_uclass(&[('a'
, 'b'
)]),
2500 hir_uclass_query(ClassQuery
::Binary("separator"))
2503 #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
2505 t(r
"[\pZ\p{Greek}]"),
2507 hir_uclass_query(ClassQuery
::Binary("greek")),
2508 hir_uclass_query(ClassQuery
::Binary("separator"))
2512 feature
= "unicode-age",
2513 feature
= "unicode-gencat",
2514 feature
= "unicode-script"
2517 t(r
"[\p{age:3.0}\pZ\p{Greek}]"),
2519 hir_uclass_query(ClassQuery
::ByValue
{
2520 property_name
: "age",
2521 property_value
: "3.0",
2524 hir_uclass_query(ClassQuery
::Binary("greek")),
2525 hir_uclass_query(ClassQuery
::Binary("separator"))
2530 feature
= "unicode-age",
2531 feature
= "unicode-gencat",
2532 feature
= "unicode-script"
2535 t(r
"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
2537 hir_uclass_query(ClassQuery
::ByValue
{
2538 property_name
: "age",
2539 property_value
: "3.0",
2542 hir_uclass_query(ClassQuery
::Binary("cyrillic")),
2544 hir_uclass_query(ClassQuery
::Binary("greek")),
2545 hir_uclass_query(ClassQuery
::Binary("separator"))
2552 feature
= "unicode-age",
2553 feature
= "unicode-case",
2554 feature
= "unicode-gencat",
2555 feature
= "unicode-script"
2558 t(r
"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
2559 hir_case_fold(hir_union(
2560 hir_uclass_query(ClassQuery
::ByValue
{
2561 property_name
: "age",
2562 property_value
: "3.0",
2565 hir_uclass_query(ClassQuery
::Binary("greek")),
2566 hir_uclass_query(ClassQuery
::Binary("separator"))
2571 feature
= "unicode-age",
2572 feature
= "unicode-gencat",
2573 feature
= "unicode-script"
2576 t(r
"[^\p{age:3.0}\pZ\p{Greek}]"),
2577 hir_negate(hir_union(
2578 hir_uclass_query(ClassQuery
::ByValue
{
2579 property_name
: "age",
2580 property_value
: "3.0",
2583 hir_uclass_query(ClassQuery
::Binary("greek")),
2584 hir_uclass_query(ClassQuery
::Binary("separator"))
2589 feature
= "unicode-age",
2590 feature
= "unicode-case",
2591 feature
= "unicode-gencat",
2592 feature
= "unicode-script"
2595 t(r
"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
2596 hir_negate(hir_case_fold(hir_union(
2597 hir_uclass_query(ClassQuery
::ByValue
{
2598 property_name
: "age",
2599 property_value
: "3.0",
2602 hir_uclass_query(ClassQuery
::Binary("greek")),
2603 hir_uclass_query(ClassQuery
::Binary("separator"))
2610 fn class_bracketed_nested() {
2611 assert_eq
!(t(r
"[a[^c]]"), hir_negate(hir_uclass(&[('c'
, 'c'
)])));
2612 assert_eq
!(t(r
"[a-b[^c]]"), hir_negate(hir_uclass(&[('c'
, 'c'
)])));
2613 assert_eq
!(t(r
"[a-c[^c]]"), hir_negate(hir_uclass(&[])));
2615 assert_eq
!(t(r
"[^a[^c]]"), hir_uclass(&[('c'
, 'c'
)]));
2616 assert_eq
!(t(r
"[^a-b[^c]]"), hir_uclass(&[('c'
, 'c'
)]));
2618 #[cfg(feature = "unicode-case")]
2621 hir_negate(hir_case_fold(hir_uclass(&[('c'
, 'c'
)])))
2623 #[cfg(feature = "unicode-case")]
2625 t(r
"(?i)[a-b[^c]]"),
2626 hir_negate(hir_case_fold(hir_uclass(&[('c'
, 'c'
)])))
2629 #[cfg(feature = "unicode-case")]
2630 assert_eq
!(t(r
"(?i)[^a[^c]]"), hir_uclass(&[('C'
, 'C'
), ('c'
, 'c'
)]));
2631 #[cfg(feature = "unicode-case")]
2633 t(r
"(?i)[^a-b[^c]]"),
2634 hir_uclass(&[('C'
, 'C'
), ('c'
, 'c'
)])
2638 t_err(r
"[^a-c[^c]]"),
2640 kind
: hir
::ErrorKind
::EmptyClassNotAllowed
,
2642 Position
::new(0, 1, 1),
2643 Position
::new(10, 1, 11)
2647 #[cfg(feature = "unicode-case")]
2649 t_err(r
"(?i)[^a-c[^c]]"),
2651 kind
: hir
::ErrorKind
::EmptyClassNotAllowed
,
2653 Position
::new(4, 1, 5),
2654 Position
::new(14, 1, 15)
2661 fn class_bracketed_intersect() {
2662 assert_eq
!(t("[abc&&b-c]"), hir_uclass(&[('b'
, 'c'
)]));
2663 assert_eq
!(t("[abc&&[b-c]]"), hir_uclass(&[('b'
, 'c'
)]));
2664 assert_eq
!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b'
, 'c'
)]));
2665 assert_eq
!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c'
, 'x'
)]));
2666 assert_eq
!(t("[c-da-b&&a-d]"), hir_uclass(&[('a'
, 'd'
)]));
2667 assert_eq
!(t("[a-d&&c-da-b]"), hir_uclass(&[('a'
, 'd'
)]));
2668 assert_eq
!(t(r
"[a-z&&a-c]"), hir_uclass(&[('a'
, 'c'
)]));
2669 assert_eq
!(t(r
"[[a-z&&a-c]]"), hir_uclass(&[('a'
, 'c'
)]));
2670 assert_eq
!(t(r
"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a'
, 'c'
)])));
2672 assert_eq
!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b'
, b'c'
)]));
2673 assert_eq
!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b'
, b'c'
)]));
2674 assert_eq
!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b'
, b'c'
)]));
2675 assert_eq
!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c'
, b'x'
)]));
2676 assert_eq
!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a'
, b'd'
)]));
2677 assert_eq
!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a'
, b'd'
)]));
2679 #[cfg(feature = "unicode-case")]
2681 t("(?i)[abc&&b-c]"),
2682 hir_case_fold(hir_uclass(&[('b'
, 'c'
)]))
2684 #[cfg(feature = "unicode-case")]
2686 t("(?i)[abc&&[b-c]]"),
2687 hir_case_fold(hir_uclass(&[('b'
, 'c'
)]))
2689 #[cfg(feature = "unicode-case")]
2691 t("(?i)[[abc]&&[b-c]]"),
2692 hir_case_fold(hir_uclass(&[('b'
, 'c'
)]))
2694 #[cfg(feature = "unicode-case")]
2696 t("(?i)[a-z&&b-y&&c-x]"),
2697 hir_case_fold(hir_uclass(&[('c'
, 'x'
)]))
2699 #[cfg(feature = "unicode-case")]
2701 t("(?i)[c-da-b&&a-d]"),
2702 hir_case_fold(hir_uclass(&[('a'
, 'd'
)]))
2704 #[cfg(feature = "unicode-case")]
2706 t("(?i)[a-d&&c-da-b]"),
2707 hir_case_fold(hir_uclass(&[('a'
, 'd'
)]))
2711 t("(?i-u)[abc&&b-c]"),
2712 hir_case_fold(hir_bclass(&[(b'b'
, b'c'
)]))
2715 t("(?i-u)[abc&&[b-c]]"),
2716 hir_case_fold(hir_bclass(&[(b'b'
, b'c'
)]))
2719 t("(?i-u)[[abc]&&[b-c]]"),
2720 hir_case_fold(hir_bclass(&[(b'b'
, b'c'
)]))
2723 t("(?i-u)[a-z&&b-y&&c-x]"),
2724 hir_case_fold(hir_bclass(&[(b'c'
, b'x'
)]))
2727 t("(?i-u)[c-da-b&&a-d]"),
2728 hir_case_fold(hir_bclass(&[(b'a'
, b'd'
)]))
2731 t("(?i-u)[a-d&&c-da-b]"),
2732 hir_case_fold(hir_bclass(&[(b'a'
, b'd'
)]))
2735 // In `[a^]`, `^` does not need to be escaped, so it makes sense that
2736 // `^` is also allowed to be unescaped after `&&`.
2737 assert_eq
!(t(r
"[\^&&^]"), hir_uclass(&[('
^', '
^')]));
2738 // `]` needs to be escaped after `&&` since it's not at start of class.
2739 assert_eq
!(t(r
"[]&&\]]"), hir_uclass(&[('
]'
, '
]'
)]));
2740 assert_eq
!(t(r
"[-&&-]"), hir_uclass(&[('
-'
, '
-'
)]));
2741 assert_eq
!(t(r
"[\&&&&]"), hir_uclass(&[('
&'
, '
&'
)]));
2742 assert_eq
!(t(r
"[\&&&\&]"), hir_uclass(&[('
&'
, '
&'
)]));
2745 t(r
"[a-w&&[^c-g]z]"),
2746 hir_uclass(&[('a'
, 'b'
), ('h'
, 'w'
)])
2751 fn class_bracketed_intersect_negate() {
2752 #[cfg(feature = "unicode-perl")]
2755 hir_negate(hir_uclass_query(ClassQuery
::Binary("digit")))
2757 assert_eq
!(t(r
"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a'
, 'c'
)])));
2758 #[cfg(feature = "unicode-perl")]
2761 hir_negate(hir_uclass_query(ClassQuery
::Binary("digit")))
2763 #[cfg(feature = "unicode-perl")]
2766 hir_uclass_query(ClassQuery
::Binary("digit"))
2768 #[cfg(feature = "unicode-perl")]
2769 assert_eq
!(t(r
"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
2771 #[cfg(feature = "unicode-perl")]
2773 t_bytes(r
"(?-u)[^\w&&\d]"),
2774 hir_negate(hir_bclass_from_char(ascii_class(
2775 &ast
::ClassAsciiKind
::Digit
2779 t_bytes(r
"(?-u)[^[a-z&&a-c]]"),
2780 hir_negate(hir_bclass(&[(b'a'
, b'c'
)]))
2783 t_bytes(r
"(?-u)[^[\w&&\d]]"),
2784 hir_negate(hir_bclass_from_char(ascii_class(
2785 &ast
::ClassAsciiKind
::Digit
2789 t_bytes(r
"(?-u)[^[^\w&&\d]]"),
2790 hir_bclass_from_char(ascii_class(&ast
::ClassAsciiKind
::Digit
))
2793 t_bytes(r
"(?-u)[[[^\w]&&[^\d]]]"),
2794 hir_negate(hir_bclass_from_char(ascii_class(
2795 &ast
::ClassAsciiKind
::Word
2801 fn class_bracketed_difference() {
2802 #[cfg(feature = "unicode-gencat")]
2804 t(r
"[\pL--[:ascii:]]"),
2806 hir_uclass_query(ClassQuery
::Binary("letter")),
2807 hir_uclass(&[('
\0'
, '
\x7F'
)])
2812 t(r
"(?-u)[[:alpha:]--[:lower:]]"),
2813 hir_bclass(&[(b'A'
, b'Z'
)])
2818 fn class_bracketed_symmetric_difference() {
2819 #[cfg(feature = "unicode-script")]
2821 t(r
"[\p{sc:Greek}~~\p{scx:Greek}]"),
2823 ('
\u{0342}'
, '
\u{0342}'
),
2824 ('
\u{0345}'
, '
\u{0345}'
),
2825 ('
\u{1DC0}'
, '
\u{1DC1}'
),
2828 assert_eq
!(t(r
"[a-g~~c-j]"), hir_uclass(&[('a'
, 'b'
), ('h'
, 'j'
)]));
2831 t(r
"(?-u)[a-g~~c-j]"),
2832 hir_bclass(&[(b'a'
, b'b'
), (b'h'
, b'j'
)])
2837 fn ignore_whitespace() {
2838 assert_eq
!(t(r
"(?x)\12 3"), hir_lit("\n3"));
2839 assert_eq
!(t(r
"(?x)\x { 53 }"), hir_lit("S"));
2841 t(r
"(?x)\x # comment
2848 assert_eq
!(t(r
"(?x)\x 53"), hir_lit("S"));
2850 t(r
"(?x)\x # comment
2854 assert_eq
!(t(r
"(?x)\x5 3"), hir_lit("S"));
2856 #[cfg(feature = "unicode-gencat")]
2858 t(r
"(?x)\p # comment
2862 hir_uclass_query(ClassQuery
::Binary("separator"))
2874 hir
::RepetitionRange
::Bounded(5, 10),
2879 assert_eq
!(t(r
"(?x)a\ # hi there"), hir_lit("a "));
2883 fn analysis_is_always_utf8() {
2884 // Positive examples.
2885 assert
!(t_bytes(r
"a").is_always_utf8());
2886 assert
!(t_bytes(r
"ab").is_always_utf8());
2887 assert
!(t_bytes(r
"(?-u)a").is_always_utf8());
2888 assert
!(t_bytes(r
"(?-u)ab").is_always_utf8());
2889 assert
!(t_bytes(r
"\xFF").is_always_utf8());
2890 assert
!(t_bytes(r
"\xFF\xFF").is_always_utf8());
2891 assert
!(t_bytes(r
"[^a]").is_always_utf8());
2892 assert
!(t_bytes(r
"[^a][^a]").is_always_utf8());
2893 assert
!(t_bytes(r
"\b").is_always_utf8());
2894 assert
!(t_bytes(r
"\B").is_always_utf8());
2895 assert
!(t_bytes(r
"(?-u)\b").is_always_utf8());
2897 // Negative examples.
2898 assert
!(!t_bytes(r
"(?-u)\xFF").is_always_utf8());
2899 assert
!(!t_bytes(r
"(?-u)\xFF\xFF").is_always_utf8());
2900 assert
!(!t_bytes(r
"(?-u)[^a]").is_always_utf8());
2901 assert
!(!t_bytes(r
"(?-u)[^a][^a]").is_always_utf8());
2902 assert
!(!t_bytes(r
"(?-u)\B").is_always_utf8());
2906 fn analysis_is_all_assertions() {
2907 // Positive examples.
2908 assert
!(t(r
"\b").is_all_assertions());
2909 assert
!(t(r
"\B").is_all_assertions());
2910 assert
!(t(r
"^").is_all_assertions());
2911 assert
!(t(r
"$").is_all_assertions());
2912 assert
!(t(r
"\A").is_all_assertions());
2913 assert
!(t(r
"\z").is_all_assertions());
2914 assert
!(t(r
"$^\z\A\b\B").is_all_assertions());
2915 assert
!(t(r
"$|^|\z|\A|\b|\B").is_all_assertions());
2916 assert
!(t(r
"^$|$^").is_all_assertions());
2917 assert
!(t(r
"((\b)+())*^").is_all_assertions());
2919 // Negative examples.
2920 assert
!(!t(r
"^a").is_all_assertions());
2924 fn analysis_is_anchored() {
2925 // Positive examples.
2926 assert
!(t(r
"^").is_anchored_start());
2927 assert
!(t(r
"$").is_anchored_end());
2928 assert
!(t(r
"^").is_line_anchored_start());
2929 assert
!(t(r
"$").is_line_anchored_end());
2931 assert
!(t(r
"^^").is_anchored_start());
2932 assert
!(t(r
"$$").is_anchored_end());
2933 assert
!(t(r
"^^").is_line_anchored_start());
2934 assert
!(t(r
"$$").is_line_anchored_end());
2936 assert
!(t(r
"^$").is_anchored_start());
2937 assert
!(t(r
"^$").is_anchored_end());
2938 assert
!(t(r
"^$").is_line_anchored_start());
2939 assert
!(t(r
"^$").is_line_anchored_end());
2941 assert
!(t(r
"^foo").is_anchored_start());
2942 assert
!(t(r
"foo$").is_anchored_end());
2943 assert
!(t(r
"^foo").is_line_anchored_start());
2944 assert
!(t(r
"foo$").is_line_anchored_end());
2946 assert
!(t(r
"^foo|^bar").is_anchored_start());
2947 assert
!(t(r
"foo$|bar$").is_anchored_end());
2948 assert
!(t(r
"^foo|^bar").is_line_anchored_start());
2949 assert
!(t(r
"foo$|bar$").is_line_anchored_end());
2951 assert
!(t(r
"^(foo|bar)").is_anchored_start());
2952 assert
!(t(r
"(foo|bar)$").is_anchored_end());
2953 assert
!(t(r
"^(foo|bar)").is_line_anchored_start());
2954 assert
!(t(r
"(foo|bar)$").is_line_anchored_end());
2956 assert
!(t(r
"^+").is_anchored_start());
2957 assert
!(t(r
"$+").is_anchored_end());
2958 assert
!(t(r
"^+").is_line_anchored_start());
2959 assert
!(t(r
"$+").is_line_anchored_end());
2960 assert
!(t(r
"^++").is_anchored_start());
2961 assert
!(t(r
"$++").is_anchored_end());
2962 assert
!(t(r
"^++").is_line_anchored_start());
2963 assert
!(t(r
"$++").is_line_anchored_end());
2964 assert
!(t(r
"(^)+").is_anchored_start());
2965 assert
!(t(r
"($)+").is_anchored_end());
2966 assert
!(t(r
"(^)+").is_line_anchored_start());
2967 assert
!(t(r
"($)+").is_line_anchored_end());
2969 assert
!(t(r
"$^").is_anchored_start());
2970 assert
!(t(r
"$^").is_anchored_start());
2971 assert
!(t(r
"$^").is_line_anchored_end());
2972 assert
!(t(r
"$^").is_line_anchored_end());
2973 assert
!(t(r
"$^|^$").is_anchored_start());
2974 assert
!(t(r
"$^|^$").is_anchored_end());
2975 assert
!(t(r
"$^|^$").is_line_anchored_start());
2976 assert
!(t(r
"$^|^$").is_line_anchored_end());
2978 assert
!(t(r
"\b^").is_anchored_start());
2979 assert
!(t(r
"$\b").is_anchored_end());
2980 assert
!(t(r
"\b^").is_line_anchored_start());
2981 assert
!(t(r
"$\b").is_line_anchored_end());
2982 assert
!(t(r
"^(?m:^)").is_anchored_start());
2983 assert
!(t(r
"(?m:$)$").is_anchored_end());
2984 assert
!(t(r
"^(?m:^)").is_line_anchored_start());
2985 assert
!(t(r
"(?m:$)$").is_line_anchored_end());
2986 assert
!(t(r
"(?m:^)^").is_anchored_start());
2987 assert
!(t(r
"$(?m:$)").is_anchored_end());
2988 assert
!(t(r
"(?m:^)^").is_line_anchored_start());
2989 assert
!(t(r
"$(?m:$)").is_line_anchored_end());
2991 // Negative examples.
2992 assert
!(!t(r
"(?m)^").is_anchored_start());
2993 assert
!(!t(r
"(?m)$").is_anchored_end());
2994 assert
!(!t(r
"(?m:^$)|$^").is_anchored_start());
2995 assert
!(!t(r
"(?m:^$)|$^").is_anchored_end());
2996 assert
!(!t(r
"$^|(?m:^$)").is_anchored_start());
2997 assert
!(!t(r
"$^|(?m:^$)").is_anchored_end());
2999 assert
!(!t(r
"a^").is_anchored_start());
3000 assert
!(!t(r
"$a").is_anchored_start());
3001 assert
!(!t(r
"a^").is_line_anchored_start());
3002 assert
!(!t(r
"$a").is_line_anchored_start());
3004 assert
!(!t(r
"a^").is_anchored_end());
3005 assert
!(!t(r
"$a").is_anchored_end());
3006 assert
!(!t(r
"a^").is_line_anchored_end());
3007 assert
!(!t(r
"$a").is_line_anchored_end());
3009 assert
!(!t(r
"^foo|bar").is_anchored_start());
3010 assert
!(!t(r
"foo|bar$").is_anchored_end());
3011 assert
!(!t(r
"^foo|bar").is_line_anchored_start());
3012 assert
!(!t(r
"foo|bar$").is_line_anchored_end());
3014 assert
!(!t(r
"^*").is_anchored_start());
3015 assert
!(!t(r
"$*").is_anchored_end());
3016 assert
!(!t(r
"^*").is_line_anchored_start());
3017 assert
!(!t(r
"$*").is_line_anchored_end());
3018 assert
!(!t(r
"^*+").is_anchored_start());
3019 assert
!(!t(r
"$*+").is_anchored_end());
3020 assert
!(!t(r
"^*+").is_line_anchored_start());
3021 assert
!(!t(r
"$*+").is_line_anchored_end());
3022 assert
!(!t(r
"^+*").is_anchored_start());
3023 assert
!(!t(r
"$+*").is_anchored_end());
3024 assert
!(!t(r
"^+*").is_line_anchored_start());
3025 assert
!(!t(r
"$+*").is_line_anchored_end());
3026 assert
!(!t(r
"(^)*").is_anchored_start());
3027 assert
!(!t(r
"($)*").is_anchored_end());
3028 assert
!(!t(r
"(^)*").is_line_anchored_start());
3029 assert
!(!t(r
"($)*").is_line_anchored_end());
3033 fn analysis_is_line_anchored() {
3034 assert
!(t(r
"(?m)^(foo|bar)").is_line_anchored_start());
3035 assert
!(t(r
"(?m)(foo|bar)$").is_line_anchored_end());
3037 assert
!(t(r
"(?m)^foo|^bar").is_line_anchored_start());
3038 assert
!(t(r
"(?m)foo$|bar$").is_line_anchored_end());
3040 assert
!(t(r
"(?m)^").is_line_anchored_start());
3041 assert
!(t(r
"(?m)$").is_line_anchored_end());
3043 assert
!(t(r
"(?m:^$)|$^").is_line_anchored_start());
3044 assert
!(t(r
"(?m:^$)|$^").is_line_anchored_end());
3046 assert
!(t(r
"$^|(?m:^$)").is_line_anchored_start());
3047 assert
!(t(r
"$^|(?m:^$)").is_line_anchored_end());
3051 fn analysis_is_any_anchored() {
3052 // Positive examples.
3053 assert
!(t(r
"^").is_any_anchored_start());
3054 assert
!(t(r
"$").is_any_anchored_end());
3055 assert
!(t(r
"\A").is_any_anchored_start());
3056 assert
!(t(r
"\z").is_any_anchored_end());
3058 // Negative examples.
3059 assert
!(!t(r
"(?m)^").is_any_anchored_start());
3060 assert
!(!t(r
"(?m)$").is_any_anchored_end());
3061 assert
!(!t(r
"$").is_any_anchored_start());
3062 assert
!(!t(r
"^").is_any_anchored_end());
3066 fn analysis_is_match_empty() {
3067 // Positive examples.
3068 assert
!(t(r
"").is_match_empty());
3069 assert
!(t(r
"()").is_match_empty());
3070 assert
!(t(r
"()*").is_match_empty());
3071 assert
!(t(r
"()+").is_match_empty());
3072 assert
!(t(r
"()?").is_match_empty());
3073 assert
!(t(r
"a*").is_match_empty());
3074 assert
!(t(r
"a?").is_match_empty());
3075 assert
!(t(r
"a{0}").is_match_empty());
3076 assert
!(t(r
"a{0,}").is_match_empty());
3077 assert
!(t(r
"a{0,1}").is_match_empty());
3078 assert
!(t(r
"a{0,10}").is_match_empty());
3079 #[cfg(feature = "unicode-gencat")]
3080 assert
!(t(r
"\pL*").is_match_empty());
3081 assert
!(t(r
"a*|b").is_match_empty());
3082 assert
!(t(r
"b|a*").is_match_empty());
3083 assert
!(t(r
"a*a?(abcd)*").is_match_empty());
3084 assert
!(t(r
"^").is_match_empty());
3085 assert
!(t(r
"$").is_match_empty());
3086 assert
!(t(r
"(?m)^").is_match_empty());
3087 assert
!(t(r
"(?m)$").is_match_empty());
3088 assert
!(t(r
"\A").is_match_empty());
3089 assert
!(t(r
"\z").is_match_empty());
3090 assert
!(t(r
"\B").is_match_empty());
3091 assert
!(t_bytes(r
"(?-u)\B").is_match_empty());
3093 // Negative examples.
3094 assert
!(!t(r
"a+").is_match_empty());
3095 assert
!(!t(r
"a{1}").is_match_empty());
3096 assert
!(!t(r
"a{1,}").is_match_empty());
3097 assert
!(!t(r
"a{1,2}").is_match_empty());
3098 assert
!(!t(r
"a{1,10}").is_match_empty());
3099 assert
!(!t(r
"b|a").is_match_empty());
3100 assert
!(!t(r
"a*a+(abcd)*").is_match_empty());
3101 assert
!(!t(r
"\b").is_match_empty());
3102 assert
!(!t(r
"(?-u)\b").is_match_empty());
3106 fn analysis_is_literal() {
3107 // Positive examples.
3108 assert
!(t(r
"a").is_literal());
3109 assert
!(t(r
"ab").is_literal());
3110 assert
!(t(r
"abc").is_literal());
3111 assert
!(t(r
"(?m)abc").is_literal());
3113 // Negative examples.
3114 assert
!(!t(r
"").is_literal());
3115 assert
!(!t(r
"^").is_literal());
3116 assert
!(!t(r
"a|b").is_literal());
3117 assert
!(!t(r
"(a)").is_literal());
3118 assert
!(!t(r
"a+").is_literal());
3119 assert
!(!t(r
"foo(a)").is_literal());
3120 assert
!(!t(r
"(a)foo").is_literal());
3121 assert
!(!t(r
"[a]").is_literal());
3125 fn analysis_is_alternation_literal() {
3126 // Positive examples.
3127 assert
!(t(r
"a").is_alternation_literal());
3128 assert
!(t(r
"ab").is_alternation_literal());
3129 assert
!(t(r
"abc").is_alternation_literal());
3130 assert
!(t(r
"(?m)abc").is_alternation_literal());
3131 assert
!(t(r
"a|b").is_alternation_literal());
3132 assert
!(t(r
"a|b|c").is_alternation_literal());
3133 assert
!(t(r
"foo|bar").is_alternation_literal());
3134 assert
!(t(r
"foo|bar|baz").is_alternation_literal());
3136 // Negative examples.
3137 assert
!(!t(r
"").is_alternation_literal());
3138 assert
!(!t(r
"^").is_alternation_literal());
3139 assert
!(!t(r
"(a)").is_alternation_literal());
3140 assert
!(!t(r
"a+").is_alternation_literal());
3141 assert
!(!t(r
"foo(a)").is_alternation_literal());
3142 assert
!(!t(r
"(a)foo").is_alternation_literal());
3143 assert
!(!t(r
"[a]").is_alternation_literal());
3144 assert
!(!t(r
"[a]|b").is_alternation_literal());
3145 assert
!(!t(r
"a|[b]").is_alternation_literal());
3146 assert
!(!t(r
"(a)|b").is_alternation_literal());
3147 assert
!(!t(r
"a|(b)").is_alternation_literal());