1 // Copyright 2018 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
12 This module provides a regular expression parser.
15 use std
::borrow
::Borrow
;
16 use std
::cell
::{Cell, RefCell}
;
20 use ast
::{self, Ast, Position, Span}
;
23 use is_meta_character
;
25 type Result
<T
> = result
::Result
<T
, ast
::Error
>;
27 /// A primitive is an expression with no sub-expressions. This includes
28 /// literals, assertions and non-set character classes. This representation
29 /// is used as intermediate state in the parser.
31 /// This does not include ASCII character classes, since they can only appear
32 /// within a set character class.
33 #[derive(Clone, Debug, Eq, PartialEq)]
35 Literal(ast
::Literal
),
36 Assertion(ast
::Assertion
),
39 Unicode(ast
::ClassUnicode
),
43 /// Return the span of this primitive.
44 fn span(&self) -> &Span
{
46 Primitive
::Literal(ref x
) => &x
.span
,
47 Primitive
::Assertion(ref x
) => &x
.span
,
48 Primitive
::Dot(ref span
) => span
,
49 Primitive
::Perl(ref x
) => &x
.span
,
50 Primitive
::Unicode(ref x
) => &x
.span
,
54 /// Convert this primitive into a proper AST.
55 fn into_ast(self) -> Ast
{
57 Primitive
::Literal(lit
) => Ast
::Literal(lit
),
58 Primitive
::Assertion(assert
) => Ast
::Assertion(assert
),
59 Primitive
::Dot(span
) => Ast
::Dot(span
),
60 Primitive
::Perl(cls
) => Ast
::Class(ast
::Class
::Perl(cls
)),
61 Primitive
::Unicode(cls
) => Ast
::Class(ast
::Class
::Unicode(cls
)),
65 /// Convert this primitive into an item in a character class.
67 /// If this primitive is not a legal item (i.e., an assertion or a dot),
68 /// then return an error.
69 fn into_class_set_item
<P
: Borrow
<Parser
>>(
72 ) -> Result
<ast
::ClassSetItem
> {
73 use ast
::ClassSetItem
;
74 use self::Primitive
::*;
77 Literal(lit
) => Ok(ClassSetItem
::Literal(lit
)),
78 Perl(cls
) => Ok(ClassSetItem
::Perl(cls
)),
79 Unicode(cls
) => Ok(ClassSetItem
::Unicode(cls
)),
80 x
=> Err(p
.error(*x
.span(), ast
::ErrorKind
::ClassEscapeInvalid
)),
84 /// Convert this primitive into a literal in a character class. In
85 /// particular, literals are the only valid items that can appear in
88 /// If this primitive is not a legal item (i.e., a class, assertion or a
89 /// dot), then return an error.
90 fn into_class_literal
<P
: Borrow
<Parser
>>(
93 ) -> Result
<ast
::Literal
> {
94 use self::Primitive
::*;
97 Literal(lit
) => Ok(lit
),
98 x
=> Err(p
.error(*x
.span(), ast
::ErrorKind
::ClassRangeLiteral
)),
103 /// Returns true if the given character is a hexadecimal digit.
104 fn is_hex(c
: char) -> bool
{
105 ('
0'
<= c
&& c
<= '
9'
) || ('a'
<= c
&& c
<= 'f'
) || ('A'
<= c
&& c
<= 'F'
)
108 /// Returns true if the given character is a valid in a capture group name.
110 /// If `first` is true, then `c` is treated as the first character in the
111 /// group name (which is not allowed to be a digit).
112 fn is_capture_char(c
: char, first
: bool
) -> bool
{
113 c
== '_'
|| (!first
&& c
>= '
0'
&& c
<= '
9'
)
114 || (c
>= 'a'
&& c
<= 'z'
) || (c
>= 'A'
&& c
<= 'Z'
)
117 /// A builder for a regular expression parser.
119 /// This builder permits modifying configuration options for the parser.
120 #[derive(Clone, Debug)]
121 pub struct ParserBuilder
{
122 ignore_whitespace
: bool
,
127 impl Default
for ParserBuilder
{
128 fn default() -> ParserBuilder
{
134 /// Create a new parser builder with a default configuration.
135 pub fn new() -> ParserBuilder
{
137 ignore_whitespace
: false,
143 /// Build a parser from this configuration with the given pattern.
144 pub fn build(&self) -> Parser
{
146 pos
: Cell
::new(Position { offset: 0, line: 1, column: 1 }
),
147 capture_index
: Cell
::new(0),
148 nest_limit
: self.nest_limit
,
150 initial_ignore_whitespace
: self.ignore_whitespace
,
151 ignore_whitespace
: Cell
::new(self.ignore_whitespace
),
152 comments
: RefCell
::new(vec
![]),
153 stack_group
: RefCell
::new(vec
![]),
154 stack_class
: RefCell
::new(vec
![]),
155 capture_names
: RefCell
::new(vec
![]),
156 scratch
: RefCell
::new(String
::new()),
160 /// Set the nesting limit for this parser.
162 /// The nesting limit controls how deep the abstract syntax tree is allowed
163 /// to be. If the AST exceeds the given limit (e.g., with too many nested
164 /// groups), then an error is returned by the parser.
166 /// The purpose of this limit is to act as a heuristic to prevent stack
167 /// overflow for consumers that do structural induction on an `Ast` using
168 /// explicit recursion. While this crate never does this (instead using
169 /// constant stack space and moving the call stack to the heap), other
172 /// This limit is not checked until the entire Ast is parsed. Therefore,
173 /// if callers want to put a limit on the amount of heap space used, then
174 /// they should impose a limit on the length, in bytes, of the concrete
175 /// pattern string. In particular, this is viable since this parser
176 /// implementation will limit itself to heap space proportional to the
177 /// lenth of the pattern string.
179 /// Note that a nest limit of `0` will return a nest limit error for most
180 /// patterns but not all. For example, a nest limit of `0` permits `a` but
181 /// not `ab`, since `ab` requires a concatenation, which results in a nest
182 /// depth of `1`. In general, a nest limit is not something that manifests
183 /// in an obvious way in the concrete syntax, therefore, it should not be
184 /// used in a granular way.
185 pub fn nest_limit(&mut self, limit
: u32) -> &mut ParserBuilder
{
186 self.nest_limit
= limit
;
190 /// Whether to support octal syntax or not.
192 /// Octal syntax is a little-known way of uttering Unicode codepoints in
193 /// a regular expression. For example, `a`, `\x61`, `\u0061` and
194 /// `\141` are all equivalent regular expressions, where the last example
195 /// shows octal syntax.
197 /// While supporting octal syntax isn't in and of itself a problem, it does
198 /// make good error messages harder. That is, in PCRE based regex engines,
199 /// syntax like `\0` invokes a backreference, which is explicitly
200 /// unsupported in Rust's regex engine. However, many users expect it to
201 /// be supported. Therefore, when octal support is disabled, the error
202 /// message will explicitly mention that backreferences aren't supported.
204 /// Octal syntax is disabled by default.
205 pub fn octal(&mut self, yes
: bool
) -> &mut ParserBuilder
{
210 /// Enable verbose mode in the regular expression.
212 /// When enabled, verbose mode permits insigificant whitespace in many
213 /// places in the regular expression, as well as comments. Comments are
214 /// started using `#` and continue until the end of the line.
216 /// By default, this is disabled. It may be selectively enabled in the
217 /// regular expression by using the `x` flag regardless of this setting.
218 pub fn ignore_whitespace(&mut self, yes
: bool
) -> &mut ParserBuilder
{
219 self.ignore_whitespace
= yes
;
224 /// A regular expression parser.
226 /// This parses a string representation of a regular expression into an
227 /// abstract syntax tree. The size of the tree is proportional to the length
228 /// of the regular expression pattern.
230 /// A `Parser` can be configured in more detail via a
231 /// [`ParserBuilder`](struct.ParserBuilder.html).
232 #[derive(Clone, Debug)]
234 /// The current position of the parser.
236 /// The current capture index.
237 capture_index
: Cell
<u32>,
238 /// The maximum number of open parens/brackets allowed. If the parser
239 /// exceeds this number, then an error is returned.
241 /// Whether to support octal syntax or not. When `false`, the parser will
242 /// return an error helpfully pointing out that backreferences are not
245 /// The initial setting for `ignore_whitespace` as provided by
246 /// Th`ParserBuilder`. is is used when reseting the parser's state.
247 initial_ignore_whitespace
: bool
,
248 /// Whether whitespace should be ignored. When enabled, comments are
250 ignore_whitespace
: Cell
<bool
>,
251 /// A list of comments, in order of appearance.
252 comments
: RefCell
<Vec
<ast
::Comment
>>,
253 /// A stack of grouped sub-expressions, including alternations.
254 stack_group
: RefCell
<Vec
<GroupState
>>,
255 /// A stack of nested character classes. This is only non-empty when
257 stack_class
: RefCell
<Vec
<ClassState
>>,
258 /// A sorted sequence of capture names. This is used to detect duplicate
259 /// capture names and report an error if one is detected.
260 capture_names
: RefCell
<Vec
<ast
::CaptureName
>>,
261 /// A scratch buffer used in various places. Mostly this is used to
262 /// accumulate relevant characters from parts of a pattern.
263 scratch
: RefCell
<String
>,
266 /// ParserI is the internal parser implementation.
268 /// We use this separate type so that we can carry the provided pattern string
269 /// along with us. In particular, a `Parser` internal state is not tied to any
270 /// one pattern, but `ParserI` is.
272 /// This type also lets us use `ParserI<&Parser>` in production code while
273 /// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
274 /// work against the internal interface of the parser.
275 #[derive(Clone, Debug)]
276 struct ParserI
<'s
, P
> {
277 /// The parser state/configuration.
279 /// The full regular expression provided by the user.
283 /// GroupState represents a single stack frame while parsing nested groups
284 /// and alternations. Each frame records the state up to an opening parenthesis
285 /// or a alternating bracket `|`.
286 #[derive(Clone, Debug)]
288 /// This state is pushed whenever an opening group is found.
290 /// The concatenation immediately preceding the opening group.
292 /// The group that has been opened. Its sub-AST is always empty.
294 /// Whether this group has the `x` flag enabled or not.
295 ignore_whitespace
: bool
,
297 /// This state is pushed whenever a new alternation branch is found. If
298 /// an alternation branch is found and this state is at the top of the
299 /// stack, then this state should be modified to include the new
301 Alternation(ast
::Alternation
),
304 /// ClassState represents a single stack frame while parsing character classes.
305 /// Each frame records the state up to an intersection, difference, symmetric
306 /// difference or nested class.
308 /// Note that a parser's character class stack is only non-empty when parsing
309 /// a character class. In all other cases, it is empty.
310 #[derive(Clone, Debug)]
312 /// This state is pushed whenever an opening bracket is found.
314 /// The union of class items immediately preceding this class.
315 union: ast
::ClassSetUnion
,
316 /// The class that has been opened. Typically this just corresponds
317 /// to the `[`, but it can also include `[^` since `^` indicates
318 /// negation of the class.
319 set
: ast
::ClassBracketed
,
321 /// This state is pushed when a operator is seen. When popped, the stored
322 /// set becomes the left hand side of the operator.
324 /// The type of the operation, i.e., &&, -- or ~~.
325 kind
: ast
::ClassSetBinaryOpKind
,
326 /// The left-hand side of the operator.
332 /// Create a new parser with a default configuration.
334 /// The parser can be run with either the `parse` or `parse_with_comments`
335 /// methods. The parse methods return an abstract syntax tree.
337 /// To set configuration options on the parser, use
338 /// [`ParserBuilder`](struct.ParserBuilder.html).
339 pub fn new() -> Parser
{
340 ParserBuilder
::new().build()
343 /// Parse the regular expression into an abstract syntax tree.
344 pub fn parse(&mut self, pattern
: &str) -> Result
<Ast
> {
345 ParserI
::new(self, pattern
).parse()
348 /// Parse the regular expression and return an abstract syntax tree with
349 /// all of the comments found in the pattern.
350 pub fn parse_with_comments(
353 ) -> Result
<ast
::WithComments
> {
354 ParserI
::new(self, pattern
).parse_with_comments()
357 /// Reset the internal state of a parser.
359 /// This is called at the beginning of every parse. This prevents the
360 /// parser from running with inconsistent state (say, if a previous
361 /// invocation returned an error and the parser is reused).
363 // These settings should be in line with the construction
364 // in `ParserBuilder::build`.
365 self.pos
.set(Position { offset: 0, line: 1, column: 1}
);
366 self.ignore_whitespace
.set(self.initial_ignore_whitespace
);
367 self.comments
.borrow_mut().clear();
368 self.stack_group
.borrow_mut().clear();
369 self.stack_class
.borrow_mut().clear();
373 impl<'s
, P
: Borrow
<Parser
>> ParserI
<'s
, P
> {
374 /// Build an internal parser from a parser configuration and a pattern.
375 fn new(parser
: P
, pattern
: &'s
str) -> ParserI
<'s
, P
> {
376 ParserI { parser: parser, pattern: pattern }
379 /// Return a reference to the parser state.
380 fn parser(&self) -> &Parser
{
384 /// Return a reference to the pattern being parsed.
385 fn pattern(&self) -> &str {
386 self.pattern
.borrow()
389 /// Create a new error with the given span and error type.
390 fn error(&self, span
: Span
, kind
: ast
::ErrorKind
) -> ast
::Error
{
393 pattern
: self.pattern().to_string(),
398 /// Return the current offset of the parser.
400 /// The offset starts at `0` from the beginning of the regular expression
402 fn offset(&self) -> usize {
403 self.parser().pos
.get().offset
406 /// Return the current line number of the parser.
408 /// The line number starts at `1`.
409 fn line(&self) -> usize {
410 self.parser().pos
.get().line
413 /// Return the current column of the parser.
415 /// The column number starts at `1` and is reset whenever a `\n` is seen.
416 fn column(&self) -> usize {
417 self.parser().pos
.get().column
420 /// Return the next capturing index. Each subsequent call increments the
423 /// The span given should correspond to the location of the opening
426 /// If the capture limit is exceeded, then an error is returned.
427 fn next_capture_index(&self, span
: Span
) -> Result
<u32> {
428 let current
= self.parser().capture_index
.get();
429 let i
= current
.checked_add(1).ok_or_else(|| {
430 self.error(span
, ast
::ErrorKind
::CaptureLimitExceeded
)
432 self.parser().capture_index
.set(i
);
436 /// Adds the given capture name to this parser. If this capture name has
437 /// already been used, then an error is returned.
438 fn add_capture_name(&self, cap
: &ast
::CaptureName
) -> Result
<()> {
439 let mut names
= self.parser().capture_names
.borrow_mut();
440 match names
.binary_search_by_key(
445 names
.insert(i
, cap
.clone());
449 Err(self.error(cap
.span
, ast
::ErrorKind
::GroupNameDuplicate
{
450 original
: names
[i
].span
,
456 /// Return whether the parser should ignore whitespace or not.
457 fn ignore_whitespace(&self) -> bool
{
458 self.parser().ignore_whitespace
.get()
461 /// Return the character at the current position of the parser.
463 /// This panics if the current position does not point to a valid char.
464 fn char(&self) -> char {
465 self.char_at(self.offset())
468 /// Return the character at the given position.
470 /// This panics if the given position does not point to a valid char.
471 fn char_at(&self, i
: usize) -> char {
472 self.pattern()[i
..].chars().next()
474 panic
!("expected char at offset {}", i
)
478 /// Bump the parser to the next Unicode scalar value.
480 /// If the end of the input has been reached, then `false` is returned.
481 fn bump(&self) -> bool
{
485 let Position { mut offset, mut line, mut column }
= self.pos();
486 if self.char() == '
\n'
{
487 line
= line
.checked_add(1).unwrap();
490 column
= column
.checked_add(1).unwrap();
492 offset
+= self.char().len_utf8();
493 self.parser().pos
.set(Position
{
498 self.pattern()[self.offset()..].chars().next().is_some()
501 /// If the substring starting at the current position of the parser has
502 /// the given prefix, then bump the parser to the character immediately
503 /// following the prefix and return true. Otherwise, don't bump the parser
504 /// and return false.
505 fn bump_if(&self, prefix
: &str) -> bool
{
506 if self.pattern()[self.offset()..].starts_with(prefix
) {
507 for _
in 0..prefix
.chars().count() {
516 /// Returns true if and only if the parser is positioned at a look-around
517 /// prefix. The conditions under which this returns true must always
518 /// correspond to a regular expression that would otherwise be consider
521 /// This should only be called immediately after parsing the opening of
522 /// a group or a set of flags.
523 fn is_lookaround_prefix(&self) -> bool
{
525 || self.bump_if("?!")
526 || self.bump_if("?<=")
527 || self.bump_if("?<!")
530 /// Bump the parser, and if the `x` flag is enabled, bump through any
531 /// subsequent spaces. Return true if and only if the parser is not at
533 fn bump_and_bump_space(&self) -> bool
{
541 /// If the `x` flag is enabled (i.e., whitespace insensitivity with
542 /// comments), then this will advance the parser through all whitespace
543 /// and comments to the next non-whitespace non-comment byte.
545 /// If the `x` flag is disabled, then this is a no-op.
547 /// This should be used selectively throughout the parser where
548 /// arbitrary whitespace is permitted when the `x` flag is enabled. For
549 /// example, `{ 5 , 6}` is equivalent to `{5,6}`.
550 fn bump_space(&self) {
551 if !self.ignore_whitespace() {
554 while !self.is_eof() {
555 if self.char().is_whitespace() {
557 } else if self.char() == '
#' {
558 let start
= self.pos();
559 let mut comment_text
= String
::new();
561 while !self.is_eof() {
567 comment_text
.push(c
);
569 let comment
= ast
::Comment
{
570 span
: Span
::new(start
, self.pos()),
571 comment
: comment_text
,
573 self.parser().comments
.borrow_mut().push(comment
);
580 /// Peek at the next character in the input without advancing the parser.
582 /// If the input has been exhausted, then this returns `None`.
583 fn peek(&self) -> Option
<char> {
587 self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
590 /// Like peek, but will ignore spaces when the parser is in whitespace
591 /// insensitive mode.
592 fn peek_space(&self) -> Option
<char> {
593 if !self.ignore_whitespace() {
599 let mut start
= self.offset() + self.char().len_utf8();
600 let mut in_comment
= false;
601 for (i
, c
) in self.pattern()[start
..].char_indices() {
602 if c
.is_whitespace() {
604 } else if !in_comment
&& c
== '
#' {
606 } else if in_comment
&& c
== '
\n'
{
613 self.pattern()[start
..].chars().next()
616 /// Returns true if the next call to `bump` would return false.
617 fn is_eof(&self) -> bool
{
618 self.offset() == self.pattern().len()
621 /// Return the current position of the parser, which includes the offset,
623 fn pos(&self) -> Position
{
624 self.parser().pos
.get()
627 /// Create a span at the current position of the parser. Both the start
628 /// and end of the span are set.
629 fn span(&self) -> Span
{
630 Span
::splat(self.pos())
633 /// Create a span that covers the current character.
634 fn span_char(&self) -> Span
{
635 let mut next
= Position
{
636 offset
: self.offset().checked_add(self.char().len_utf8()).unwrap(),
638 column
: self.column().checked_add(1).unwrap(),
640 if self.char() == '
\n'
{
644 Span
::new(self.pos(), next
)
647 /// Parse and push a single alternation on to the parser's internal stack.
648 /// If the top of the stack already has an alternation, then add to that
649 /// instead of pushing a new one.
651 /// The concatenation given corresponds to a single alternation branch.
652 /// The concatenation returned starts the next branch and is empty.
654 /// This assumes the parser is currently positioned at `|` and will advance
655 /// the parser to the character following `|`.
656 fn push_alternate(&self, mut concat
: ast
::Concat
) -> Result
<ast
::Concat
> {
657 assert_eq
!(self.char(), '
|'
);
658 concat
.span
.end
= self.pos();
659 self.push_or_add_alternation(concat
);
667 /// Pushes or adds the given branch of an alternation to the parser's
668 /// internal stack of state.
669 fn push_or_add_alternation(&self, concat
: ast
::Concat
) {
670 use self::GroupState
::*;
672 let mut stack
= self.parser().stack_group
.borrow_mut();
673 if let Some(&mut Alternation(ref mut alts
)) = stack
.last_mut() {
674 alts
.asts
.push(concat
.into_ast());
677 stack
.push(Alternation(ast
::Alternation
{
678 span
: Span
::new(concat
.span
.start
, self.pos()),
679 asts
: vec
![concat
.into_ast()],
683 /// Parse and push a group AST (and its parent concatenation) on to the
684 /// parser's internal stack. Return a fresh concatenation corresponding
685 /// to the group's sub-AST.
687 /// If a set of flags was found (with no group), then the concatenation
688 /// is returned with that set of flags added.
690 /// This assumes that the parser is currently positioned on the opening
691 /// parenthesis. It advances the parser to the character at the start
692 /// of the sub-expression (or adjoining expression).
694 /// If there was a problem parsing the start of the group, then an error
696 fn push_group(&self, mut concat
: ast
::Concat
) -> Result
<ast
::Concat
> {
697 assert_eq
!(self.char(), '
('
);
698 match self.parse_group()?
{
699 Either
::Left(set
) => {
700 let ignore
= set
.flags
.flag_state(ast
::Flag
::IgnoreWhitespace
);
701 if let Some(v
) = ignore
{
702 self.parser().ignore_whitespace
.set(v
);
705 concat
.asts
.push(Ast
::Flags(set
));
708 Either
::Right(group
) => {
709 let old_ignore_whitespace
= self.ignore_whitespace();
710 let new_ignore_whitespace
= group
712 .and_then(|f
| f
.flag_state(ast
::Flag
::IgnoreWhitespace
))
713 .unwrap_or(old_ignore_whitespace
);
714 self.parser().stack_group
.borrow_mut().push(GroupState
::Group
{
717 ignore_whitespace
: old_ignore_whitespace
,
719 self.parser().ignore_whitespace
.set(new_ignore_whitespace
);
728 /// Pop a group AST from the parser's internal stack and set the group's
729 /// AST to the given concatenation. Return the concatenation containing
732 /// This assumes that the parser is currently positioned on the closing
733 /// parenthesis and advances the parser to the character following the `)`.
735 /// If no such group could be popped, then an unopened group error is
737 fn pop_group(&self, mut group_concat
: ast
::Concat
) -> Result
<ast
::Concat
> {
738 use self::GroupState
::*;
740 assert_eq
!(self.char(), '
)'
);
741 let mut stack
= self.parser().stack_group
.borrow_mut();
742 let (mut prior_concat
, mut group
, ignore_whitespace
, alt
) =
744 Some(Group { concat, group, ignore_whitespace }
) => {
745 (concat
, group
, ignore_whitespace
, None
)
747 Some(Alternation(alt
)) => {
749 Some(Group { concat, group, ignore_whitespace }
) => {
750 (concat
, group
, ignore_whitespace
, Some(alt
))
752 None
| Some(Alternation(_
)) => {
753 return Err(self.error(
755 ast
::ErrorKind
::GroupUnopened
,
761 return Err(self.error(
763 ast
::ErrorKind
::GroupUnopened
,
767 self.parser().ignore_whitespace
.set(ignore_whitespace
);
768 group_concat
.span
.end
= self.pos();
770 group
.span
.end
= self.pos();
773 alt
.span
.end
= group_concat
.span
.end
;
774 alt
.asts
.push(group_concat
.into_ast());
775 group
.ast
= Box
::new(alt
.into_ast());
778 group
.ast
= Box
::new(group_concat
.into_ast());
781 prior_concat
.asts
.push(Ast
::Group(group
));
785 /// Pop the last state from the parser's internal stack, if it exists, and
786 /// add the given concatenation to it. There either must be no state or a
787 /// single alternation item on the stack. Any other scenario produces an
790 /// This assumes that the parser has advanced to the end.
791 fn pop_group_end(&self, mut concat
: ast
::Concat
) -> Result
<Ast
> {
792 concat
.span
.end
= self.pos();
793 let mut stack
= self.parser().stack_group
.borrow_mut();
794 let ast
= match stack
.pop() {
795 None
=> Ok(concat
.into_ast()),
796 Some(GroupState
::Alternation(mut alt
)) => {
797 alt
.span
.end
= self.pos();
798 alt
.asts
.push(concat
.into_ast());
799 Ok(Ast
::Alternation(alt
))
801 Some(GroupState
::Group { group, .. }
) => {
802 return Err(self.error(
804 ast
::ErrorKind
::GroupUnclosed
,
808 // If we try to pop again, there should be nothing.
811 Some(GroupState
::Alternation(_
)) => {
812 // This unreachable is unfortunate. This case can't happen
813 // because the only way we can be here is if there were two
814 // `GroupState::Alternation`s adjacent in the parser's stack,
815 // which we guarantee to never happen because we never push a
816 // `GroupState::Alternation` if one is already at the top of
820 Some(GroupState
::Group { group, .. }
) => {
821 Err(self.error(group
.span
, ast
::ErrorKind
::GroupUnclosed
))
826 /// Parse the opening of a character class and push the current class
827 /// parsing context onto the parser's stack. This assumes that the parser
828 /// is positioned at an opening `[`. The given union should correspond to
829 /// the union of set items built up before seeing the `[`.
831 /// If there was a problem parsing the opening of the class, then an error
832 /// is returned. Otherwise, a new union of set items for the class is
833 /// returned (which may be populated with either a `]` or a `-`).
836 parent_union
: ast
::ClassSetUnion
,
837 ) -> Result
<ast
::ClassSetUnion
> {
838 assert_eq
!(self.char(), '
['
);
840 let (nested_set
, nested_union
) = self.parse_set_class_open()?
;
841 self.parser().stack_class
.borrow_mut().push(ClassState
::Open
{
848 /// Parse the end of a character class set and pop the character class
849 /// parser stack. The union given corresponds to the last union built
850 /// before seeing the closing `]`. The union returned corresponds to the
851 /// parent character class set with the nested class added to it.
853 /// This assumes that the parser is positioned at a `]` and will advance
854 /// the parser to the byte immediately following the `]`.
856 /// If the stack is empty after popping, then this returns the final
857 /// "top-level" character class AST (where a "top-level" character class
858 /// is one that is not nested inside any other character class).
860 /// If there is no corresponding opening bracket on the parser's stack,
861 /// then an error is returned.
864 nested_union
: ast
::ClassSetUnion
,
865 ) -> Result
<Either
<ast
::ClassSetUnion
, ast
::Class
>> {
866 assert_eq
!(self.char(), '
]'
);
868 let item
= ast
::ClassSet
::Item(nested_union
.into_item());
869 let prevset
= self.pop_class_op(item
);
870 let mut stack
= self.parser().stack_class
.borrow_mut();
873 // We can never observe an empty stack:
875 // 1) We are guaranteed to start with a non-empty stack since
876 // the character class parser is only initiated when it sees
878 // 2) If we ever observe an empty stack while popping after
879 // seeing a `]`, then we signal the character class parser
881 panic
!("unexpected empty character class stack")
883 Some(ClassState
::Op { .. }
) => {
884 // This panic is unfortunate, but this case is impossible
885 // since we already popped the Op state if one exists above.
886 // Namely, every push to the class parser stack is guarded by
887 // whether an existing Op is already on the top of the stack.
888 // If it is, the existing Op is modified. That is, the stack
889 // can never have consecutive Op states.
890 panic
!("unexpected ClassState::Op")
892 Some(ClassState
::Open { mut union, mut set }
) => {
894 set
.span
.end
= self.pos();
896 if stack
.is_empty() {
897 Ok(Either
::Right(ast
::Class
::Bracketed(set
)))
899 union.push(ast
::ClassSetItem
::Bracketed(Box
::new(set
)));
900 Ok(Either
::Left(union))
906 /// Return an "unclosed class" error whose span points to the most
907 /// recently opened class.
909 /// This should only be called while parsing a character class.
910 fn unclosed_class_error(&self) -> ast
::Error
{
911 for state
in self.parser().stack_class
.borrow().iter().rev() {
913 ClassState
::Open { ref set, .. }
=> {
914 return self.error(set
.span
, ast
::ErrorKind
::ClassUnclosed
);
919 // We are guaranteed to have a non-empty stack with at least
920 // one open bracket, so we should never get here.
921 panic
!("no open character class found")
924 /// Push the current set of class items on to the class parser's stack as
925 /// the left hand side of the given operator.
927 /// A fresh set union is returned, which should be used to build the right
928 /// hand side of this operator.
931 next_kind
: ast
::ClassSetBinaryOpKind
,
932 next_union
: ast
::ClassSetUnion
,
933 ) -> ast
::ClassSetUnion
{
935 let item
= ast
::ClassSet
::Item(next_union
.into_item());
936 let new_lhs
= self.pop_class_op(item
);
937 self.parser().stack_class
.borrow_mut().push(ClassState
::Op
{
941 ast
::ClassSetUnion { span: self.span(), items: vec![] }
944 /// Pop a character class set from the character class parser stack. If the
945 /// top of the stack is just an item (not an operation), then return the
946 /// given set unchanged. If the top of the stack is an operation, then the
947 /// given set will be used as the rhs of the operation on the top of the
948 /// stack. In that case, the binary operation is returned as a set.
949 fn pop_class_op(&self, rhs
: ast
::ClassSet
) -> ast
::ClassSet
{
950 let mut stack
= self.parser().stack_class
.borrow_mut();
951 let (kind
, lhs
) = match stack
.pop() {
952 Some(ClassState
::Op { kind, lhs }
) => (kind
, lhs
),
953 Some(state @ ClassState
::Open { .. }
) => {
957 None
=> unreachable
!(),
959 let span
= Span
::new(lhs
.span().start
, rhs
.span().end
);
960 ast
::ClassSet
::BinaryOp(ast
::ClassSetBinaryOp
{
969 impl<'s
, P
: Borrow
<Parser
>> ParserI
<'s
, P
> {
970 /// Parse the regular expression into an abstract syntax tree.
971 fn parse(&self) -> Result
<Ast
> {
972 self.parse_with_comments().map(|astc
| astc
.ast
)
975 /// Parse the regular expression and return an abstract syntax tree with
976 /// all of the comments found in the pattern.
977 fn parse_with_comments(&self) -> Result
<ast
::WithComments
> {
978 assert_eq
!(self.offset(), 0, "parser can only be used once");
979 self.parser().reset();
980 let mut concat
= ast
::Concat
{
990 '
('
=> concat
= self.push_group(concat
)?
,
991 '
)'
=> concat
= self.pop_group(concat
)?
,
992 '
|'
=> concat
= self.push_alternate(concat
)?
,
994 let class
= self.parse_set_class()?
;
995 concat
.asts
.push(Ast
::Class(class
));
998 concat
= self.parse_uncounted_repetition(
999 concat
, ast
::RepetitionKind
::ZeroOrOne
)?
;
1002 concat
= self.parse_uncounted_repetition(
1003 concat
, ast
::RepetitionKind
::ZeroOrMore
)?
;
1006 concat
= self.parse_uncounted_repetition(
1007 concat
, ast
::RepetitionKind
::OneOrMore
)?
;
1010 concat
= self.parse_counted_repetition(concat
)?
;
1012 _
=> concat
.asts
.push(self.parse_primitive()?
.into_ast()),
1015 let ast
= self.pop_group_end(concat
)?
;
1016 NestLimiter
::new(self).check(&ast
)?
;
1017 Ok(ast
::WithComments
{
1019 comments
: mem
::replace(
1020 &mut *self.parser().comments
.borrow_mut(),
1026 /// Parses an uncounted repetition operation. An uncounted repetition
1027 /// operator includes ?, * and +, but does not include the {m,n} syntax.
1028 /// The given `kind` should correspond to the operator observed by the
1031 /// This assumes that the paser is currently positioned at the repetition
1032 /// operator and advances the parser to the first character after the
1033 /// operator. (Note that the operator may include a single additional `?`,
1034 /// which makes the operator ungreedy.)
1036 /// The caller should include the concatenation that is being built. The
1037 /// concatenation returned includes the repetition operator applied to the
1038 /// last expression in the given concatenation.
1039 fn parse_uncounted_repetition(
1041 mut concat
: ast
::Concat
,
1042 kind
: ast
::RepetitionKind
,
1043 ) -> Result
<ast
::Concat
> {
1045 self.char() == '?'
|| self.char() == '
*'
|| self.char() == '
+'
);
1046 let op_start
= self.pos();
1047 let ast
= match concat
.asts
.pop() {
1049 None
=> return Err(self.error(
1051 ast
::ErrorKind
::RepetitionMissing
,
1055 Ast
::Empty(_
) | Ast
::Flags(_
) => return Err(self.error(
1057 ast
::ErrorKind
::RepetitionMissing
,
1061 let mut greedy
= true;
1062 if self.bump() && self.char() == '?'
{
1066 concat
.asts
.push(Ast
::Repetition(ast
::Repetition
{
1067 span
: ast
.span().with_end(self.pos()),
1068 op
: ast
::RepetitionOp
{
1069 span
: Span
::new(op_start
, self.pos()),
1078 /// Parses a counted repetition operation. A counted repetition operator
1079 /// corresponds to the {m,n} syntax, and does not include the ?, * or +
1082 /// This assumes that the paser is currently positioned at the opening `{`
1083 /// and advances the parser to the first character after the operator.
1084 /// (Note that the operator may include a single additional `?`, which
1085 /// makes the operator ungreedy.)
1087 /// The caller should include the concatenation that is being built. The
1088 /// concatenation returned includes the repetition operator applied to the
1089 /// last expression in the given concatenation.
1090 fn parse_counted_repetition(
1092 mut concat
: ast
::Concat
,
1093 ) -> Result
<ast
::Concat
> {
1094 assert
!(self.char() == '
{'
);
1095 let start
= self.pos();
1096 let ast
= match concat
.asts
.pop() {
1098 None
=> return Err(self.error(
1100 ast
::ErrorKind
::RepetitionMissing
,
1103 if !self.bump_and_bump_space() {
1104 return Err(self.error(
1105 Span
::new(start
, self.pos()),
1106 ast
::ErrorKind
::RepetitionCountUnclosed
,
1109 let count_start
= self.parse_decimal()?
;
1110 let mut range
= ast
::RepetitionRange
::Exactly(count_start
);
1112 return Err(self.error(
1113 Span
::new(start
, self.pos()),
1114 ast
::ErrorKind
::RepetitionCountUnclosed
,
1117 if self.char() == '
,'
{
1118 if !self.bump_and_bump_space() {
1119 return Err(self.error(
1120 Span
::new(start
, self.pos()),
1121 ast
::ErrorKind
::RepetitionCountUnclosed
,
1124 if self.char() != '
}'
{
1125 let count_end
= self.parse_decimal()?
;
1126 range
= ast
::RepetitionRange
::Bounded(count_start
, count_end
);
1128 range
= ast
::RepetitionRange
::AtLeast(count_start
);
1131 if self.is_eof() || self.char() != '
}'
{
1132 return Err(self.error(
1133 Span
::new(start
, self.pos()),
1134 ast
::ErrorKind
::RepetitionCountUnclosed
,
1138 let mut greedy
= true;
1139 if self.bump_and_bump_space() && self.char() == '?'
{
1144 let op_span
= Span
::new(start
, self.pos());
1145 if !range
.is_valid() {
1146 return Err(self.error(
1148 ast
::ErrorKind
::RepetitionCountInvalid
,
1151 concat
.asts
.push(Ast
::Repetition(ast
::Repetition
{
1152 span
: ast
.span().with_end(self.pos()),
1153 op
: ast
::RepetitionOp
{
1155 kind
: ast
::RepetitionKind
::Range(range
),
1163 /// Parse a group (which contains a sub-expression) or a set of flags.
1165 /// If a group was found, then it is returned with an empty AST. If a set
1166 /// of flags is found, then that set is returned.
1168 /// The parser should be positioned at the opening parenthesis.
1170 /// This advances the parser to the character before the start of the
1171 /// sub-expression (in the case of a group) or to the closing parenthesis
1172 /// immediately following the set of flags.
1176 /// If flags are given and incorrectly specified, then a corresponding
1177 /// error is returned.
1179 /// If a capture name is given and it is incorrectly specified, then a
1180 /// corresponding error is returned.
1181 fn parse_group(&self) -> Result
<Either
<ast
::SetFlags
, ast
::Group
>> {
1182 assert_eq
!(self.char(), '
('
);
1183 let open_span
= self.span_char();
1186 if self.is_lookaround_prefix() {
1187 return Err(self.error(
1188 Span
::new(open_span
.start
, self.span().end
),
1189 ast
::ErrorKind
::UnsupportedLookAround
,
1192 let inner_span
= self.span();
1193 if self.bump_if("?P<") {
1194 let capture_index
= self.next_capture_index(open_span
)?
;
1195 let cap
= self.parse_capture_name(capture_index
)?
;
1196 Ok(Either
::Right(ast
::Group
{
1198 kind
: ast
::GroupKind
::CaptureName(cap
),
1199 ast
: Box
::new(Ast
::Empty(self.span())),
1201 } else if self.bump_if("?") {
1203 return Err(self.error(
1205 ast
::ErrorKind
::GroupUnclosed
,
1208 let flags
= self.parse_flags()?
;
1209 let char_end
= self.char();
1211 if char_end
== '
)'
{
1212 // We don't allow empty flags, e.g., `(?)`. We instead
1213 // interpret it as a repetition operator missing its argument.
1214 if flags
.items
.is_empty() {
1215 return Err(self.error(
1217 ast
::ErrorKind
::RepetitionMissing
,
1220 Ok(Either
::Left(ast
::SetFlags
{
1221 span
: Span { end: self.pos(), ..open_span }
,
1225 assert_eq
!(char_end
, '
:'
);
1226 Ok(Either
::Right(ast
::Group
{
1228 kind
: ast
::GroupKind
::NonCapturing(flags
),
1229 ast
: Box
::new(Ast
::Empty(self.span())),
1233 let capture_index
= self.next_capture_index(open_span
)?
;
1234 Ok(Either
::Right(ast
::Group
{
1236 kind
: ast
::GroupKind
::CaptureIndex(capture_index
),
1237 ast
: Box
::new(Ast
::Empty(self.span())),
1242 /// Parses a capture group name. Assumes that the parser is positioned at
1243 /// the first character in the name following the opening `<` (and may
1244 /// possibly be EOF). This advances the parser to the first character
1245 /// following the closing `>`.
1247 /// The caller must provide the capture index of the group for this name.
1248 fn parse_capture_name(
1251 ) -> Result
<ast
::CaptureName
> {
1253 return Err(self.error(
1255 ast
::ErrorKind
::GroupNameUnexpectedEof
,
1258 let start
= self.pos();
1260 if self.char() == '
>'
{
1263 if !is_capture_char(self.char(), self.pos() == start
) {
1264 return Err(self.error(
1266 ast
::ErrorKind
::GroupNameInvalid
,
1273 let end
= self.pos();
1275 return Err(self.error(
1277 ast
::ErrorKind
::GroupNameUnexpectedEof
,
1280 assert_eq
!(self.char(), '
>'
);
1282 let name
= &self.pattern()[start
.offset
..end
.offset
];
1283 if name
.is_empty() {
1284 return Err(self.error(
1285 Span
::new(start
, start
),
1286 ast
::ErrorKind
::GroupNameEmpty
,
1289 let capname
= ast
::CaptureName
{
1290 span
: Span
::new(start
, end
),
1291 name
: name
.to_string(),
1292 index
: capture_index
,
1294 self.add_capture_name(&capname
)?
;
1298 /// Parse a sequence of flags starting at the current character.
1300 /// This advances the parser to the character immediately following the
1301 /// flags, which is guaranteed to be either `:` or `)`.
1305 /// If any flags are duplicated, then an error is returned.
1307 /// If the negation operator is used more than once, then an error is
1310 /// If no flags could be found or if the negation operation is not followed
1311 /// by any flags, then an error is returned.
1312 fn parse_flags(&self) -> Result
<ast
::Flags
> {
1313 let mut flags
= ast
::Flags
{
1317 let mut last_was_negation
= None
;
1318 while self.char() != '
:'
&& self.char() != '
)'
{
1319 if self.char() == '
-'
{
1320 last_was_negation
= Some(self.span_char());
1321 let item
= ast
::FlagsItem
{
1322 span
: self.span_char(),
1323 kind
: ast
::FlagsItemKind
::Negation
,
1325 if let Some(i
) = flags
.add_item(item
) {
1326 return Err(self.error(
1328 ast
::ErrorKind
::FlagRepeatedNegation
{
1329 original
: flags
.items
[i
].span
,
1334 last_was_negation
= None
;
1335 let item
= ast
::FlagsItem
{
1336 span
: self.span_char(),
1337 kind
: ast
::FlagsItemKind
::Flag(self.parse_flag()?
),
1339 if let Some(i
) = flags
.add_item(item
) {
1340 return Err(self.error(
1342 ast
::ErrorKind
::FlagDuplicate
{
1343 original
: flags
.items
[i
].span
,
1349 return Err(self.error(
1351 ast
::ErrorKind
::FlagUnexpectedEof
,
1355 if let Some(span
) = last_was_negation
{
1356 return Err(self.error(span
, ast
::ErrorKind
::FlagDanglingNegation
));
1358 flags
.span
.end
= self.pos();
1362 /// Parse the current character as a flag. Do not advance the parser.
1366 /// If the flag is not recognized, then an error is returned.
1367 fn parse_flag(&self) -> Result
<ast
::Flag
> {
1369 'i'
=> Ok(ast
::Flag
::CaseInsensitive
),
1370 'm'
=> Ok(ast
::Flag
::MultiLine
),
1371 's'
=> Ok(ast
::Flag
::DotMatchesNewLine
),
1372 'U'
=> Ok(ast
::Flag
::SwapGreed
),
1373 'u'
=> Ok(ast
::Flag
::Unicode
),
1374 'x'
=> Ok(ast
::Flag
::IgnoreWhitespace
),
1375 _
=> Err(self.error(
1377 ast
::ErrorKind
::FlagUnrecognized
,
1382 /// Parse a primitive AST. e.g., A literal, non-set character class or
1385 /// This assumes that the parser expects a primitive at the current
1386 /// location. i.e., All other non-primitive cases have been handled.
1387 /// For example, if the parser's position is at `|`, then `|` will be
1388 /// treated as a literal (e.g., inside a character class).
1390 /// This advances the parser to the first character immediately following
1392 fn parse_primitive(&self) -> Result
<Primitive
> {
1394 '
\\'
=> self.parse_escape(),
1396 let ast
= Primitive
::Dot(self.span_char());
1401 let ast
= Primitive
::Assertion(ast
::Assertion
{
1402 span
: self.span_char(),
1403 kind
: ast
::AssertionKind
::StartLine
,
1409 let ast
= Primitive
::Assertion(ast
::Assertion
{
1410 span
: self.span_char(),
1411 kind
: ast
::AssertionKind
::EndLine
,
1417 let ast
= Primitive
::Literal(ast
::Literal
{
1418 span
: self.span_char(),
1419 kind
: ast
::LiteralKind
::Verbatim
,
1428 /// Parse an escape sequence as a primitive AST.
1430 /// This assumes the parser is positioned at the start of the escape
1431 /// sequence, i.e., `\`. It advances the parser to the first position
1432 /// immediately following the escape sequence.
1433 fn parse_escape(&self) -> Result
<Primitive
> {
1434 assert_eq
!(self.char(), '
\\'
);
1435 let start
= self.pos();
1437 return Err(self.error(
1438 Span
::new(start
, self.pos()),
1439 ast
::ErrorKind
::EscapeUnexpectedEof
,
1442 let c
= self.char();
1443 // Put some of the more complicated routines into helpers.
1446 if !self.parser().octal
{
1447 return Err(self.error(
1448 Span
::new(start
, self.span_char().end
),
1449 ast
::ErrorKind
::UnsupportedBackreference
,
1452 let mut lit
= self.parse_octal();
1453 lit
.span
.start
= start
;
1454 return Ok(Primitive
::Literal(lit
));
1456 '
8'
...'
9'
if !self.parser().octal
=> {
1457 return Err(self.error(
1458 Span
::new(start
, self.span_char().end
),
1459 ast
::ErrorKind
::UnsupportedBackreference
,
1462 'x'
| 'u'
| 'U'
=> {
1463 let mut lit
= self.parse_hex()?
;
1464 lit
.span
.start
= start
;
1465 return Ok(Primitive
::Literal(lit
));
1468 let mut cls
= self.parse_unicode_class()?
;
1469 cls
.span
.start
= start
;
1470 return Ok(Primitive
::Unicode(cls
));
1472 'd'
| 's'
| 'w'
| 'D'
| 'S'
| 'W'
=> {
1473 let mut cls
= self.parse_perl_class();
1474 cls
.span
.start
= start
;
1475 return Ok(Primitive
::Perl(cls
));
1480 // Handle all of the one letter sequences inline.
1482 let span
= Span
::new(start
, self.pos());
1483 if is_meta_character(c
) {
1484 return Ok(Primitive
::Literal(ast
::Literal
{
1486 kind
: ast
::LiteralKind
::Punctuation
,
1490 let special
= |kind
, c
| Ok(Primitive
::Literal(ast
::Literal
{
1492 kind
: ast
::LiteralKind
::Special(kind
),
1496 'a'
=> special(ast
::SpecialLiteralKind
::Bell
, '
\x07'
),
1497 'f'
=> special(ast
::SpecialLiteralKind
::FormFeed
, '
\x0C'
),
1498 't'
=> special(ast
::SpecialLiteralKind
::Tab
, '
\t'
),
1499 'n'
=> special(ast
::SpecialLiteralKind
::LineFeed
, '
\n'
),
1500 'r'
=> special(ast
::SpecialLiteralKind
::CarriageReturn
, '
\r'
),
1501 'v'
=> special(ast
::SpecialLiteralKind
::VerticalTab
, '
\x0B'
),
1502 ' '
if self.ignore_whitespace() => {
1503 special(ast
::SpecialLiteralKind
::Space
, ' '
)
1505 'A'
=> Ok(Primitive
::Assertion(ast
::Assertion
{
1507 kind
: ast
::AssertionKind
::StartText
,
1509 'z'
=> Ok(Primitive
::Assertion(ast
::Assertion
{
1511 kind
: ast
::AssertionKind
::EndText
,
1513 'b'
=> Ok(Primitive
::Assertion(ast
::Assertion
{
1515 kind
: ast
::AssertionKind
::WordBoundary
,
1517 'B'
=> Ok(Primitive
::Assertion(ast
::Assertion
{
1519 kind
: ast
::AssertionKind
::NotWordBoundary
,
1521 _
=> Err(self.error(span
, ast
::ErrorKind
::EscapeUnrecognized
)),
1525 /// Parse an octal representation of a Unicode codepoint up to 3 digits
1526 /// long. This expects the parser to be positioned at the first octal
1527 /// digit and advances the parser to the first character immediately
1528 /// following the octal number. This also assumes that parsing octal
1529 /// escapes is enabled.
1531 /// Assuming the preconditions are met, this routine can never fail.
1532 fn parse_octal(&self) -> ast
::Literal
{
1536 assert
!(self.parser().octal
);
1537 assert
!('
0'
<= self.char() && self.char() <= '
7'
);
1538 let start
= self.pos();
1539 // Parse up to two more digits.
1542 '
0'
<= self.char() && self.char() <= '
7'
&&
1543 self.pos().offset
- start
.offset
<= 2
1545 let end
= self.pos();
1546 let octal
= &self.pattern()[start
.offset
..end
.offset
];
1547 // Parsing the octal should never fail since the above guarantees a
1550 u32::from_str_radix(octal
, 8).expect("valid octal number");
1551 // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
1552 // invalid Unicode scalar values.
1553 let c
= char::from_u32(codepoint
).expect("Unicode scalar value");
1555 span
: Span
::new(start
, end
),
1556 kind
: ast
::LiteralKind
::Octal
,
1561 /// Parse a hex representation of a Unicode codepoint. This handles both
1562 /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
1563 /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
1564 /// the first character immediately following the hexadecimal literal.
1565 fn parse_hex(&self) -> Result
<ast
::Literal
> {
1566 assert
!(self.char() == 'x'
1567 || self.char() == 'u'
1568 || self.char() == 'U'
);
1570 let hex_kind
= match self.char() {
1571 'x'
=> ast
::HexLiteralKind
::X
,
1572 'u'
=> ast
::HexLiteralKind
::UnicodeShort
,
1573 _
=> ast
::HexLiteralKind
::UnicodeLong
,
1575 if !self.bump_and_bump_space() {
1576 return Err(self.error(
1578 ast
::ErrorKind
::EscapeUnexpectedEof
,
1581 if self.char() == '
{'
{
1582 self.parse_hex_brace(hex_kind
)
1584 self.parse_hex_digits(hex_kind
)
1588 /// Parse an N-digit hex representation of a Unicode codepoint. This
1589 /// expects the parser to be positioned at the first digit and will advance
1590 /// the parser to the first character immediately following the escape
1593 /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
1594 /// or 8 (for `\UNNNNNNNN`).
1595 fn parse_hex_digits(
1597 kind
: ast
::HexLiteralKind
,
1598 ) -> Result
<ast
::Literal
> {
1602 let mut scratch
= self.parser().scratch
.borrow_mut();
1605 let start
= self.pos();
1606 for i
in 0..kind
.digits() {
1607 if i
> 0 && !self.bump_and_bump_space() {
1608 return Err(self.error(
1610 ast
::ErrorKind
::EscapeUnexpectedEof
,
1613 if !is_hex(self.char()) {
1614 return Err(self.error(
1616 ast
::ErrorKind
::EscapeHexInvalidDigit
,
1619 scratch
.push(self.char());
1621 // The final bump just moves the parser past the literal, which may
1623 self.bump_and_bump_space();
1624 let end
= self.pos();
1625 let hex
= scratch
.as_str();
1626 match u32::from_str_radix(hex
, 16).ok().and_then(char::from_u32
) {
1627 None
=> Err(self.error(
1628 Span
::new(start
, end
),
1629 ast
::ErrorKind
::EscapeHexInvalid
,
1631 Some(c
) => Ok(ast
::Literal
{
1632 span
: Span
::new(start
, end
),
1633 kind
: ast
::LiteralKind
::HexFixed(kind
),
1639 /// Parse a hex representation of any Unicode scalar value. This expects
1640 /// the parser to be positioned at the opening brace `{` and will advance
1641 /// the parser to the first character following the closing brace `}`.
1644 kind
: ast
::HexLiteralKind
,
1645 ) -> Result
<ast
::Literal
> {
1649 let mut scratch
= self.parser().scratch
.borrow_mut();
1652 let brace_pos
= self.pos();
1653 let start
= self.span_char().end
;
1654 while self.bump_and_bump_space() && self.char() != '
}'
{
1655 if !is_hex(self.char()) {
1656 return Err(self.error(
1658 ast
::ErrorKind
::EscapeHexInvalidDigit
,
1661 scratch
.push(self.char());
1664 return Err(self.error(
1665 Span
::new(brace_pos
, self.pos()),
1666 ast
::ErrorKind
::EscapeUnexpectedEof
,
1669 let end
= self.pos();
1670 let hex
= scratch
.as_str();
1671 assert_eq
!(self.char(), '
}'
);
1672 self.bump_and_bump_space();
1675 return Err(self.error(
1676 Span
::new(brace_pos
, self.pos()),
1677 ast
::ErrorKind
::EscapeHexEmpty
,
1680 match u32::from_str_radix(hex
, 16).ok().and_then(char::from_u32
) {
1681 None
=> Err(self.error(
1682 Span
::new(start
, end
),
1683 ast
::ErrorKind
::EscapeHexInvalid
,
1685 Some(c
) => Ok(ast
::Literal
{
1686 span
: Span
::new(start
, self.pos()),
1687 kind
: ast
::LiteralKind
::HexBrace(kind
),
1693 /// Parse a decimal number into a u32 while trimming leading and trailing
1696 /// This expects the parser to be positioned at the first position where
1697 /// a decimal digit could occur. This will advance the parser to the byte
1698 /// immediately following the last contiguous decimal digit.
1700 /// If no decimal digit could be found or if there was a problem parsing
1701 /// the complete set of digits into a u32, then an error is returned.
1702 fn parse_decimal(&self) -> Result
<u32> {
1703 let mut scratch
= self.parser().scratch
.borrow_mut();
1706 while !self.is_eof() && self.char().is_whitespace() {
1709 let start
= self.pos();
1710 while !self.is_eof() && '
0'
<= self.char() && self.char() <= '
9'
{
1711 scratch
.push(self.char());
1712 self.bump_and_bump_space();
1714 let span
= Span
::new(start
, self.pos());
1715 while !self.is_eof() && self.char().is_whitespace() {
1716 self.bump_and_bump_space();
1718 let digits
= scratch
.as_str();
1719 if digits
.is_empty() {
1720 return Err(self.error(span
, ast
::ErrorKind
::DecimalEmpty
));
1722 match u32::from_str_radix(digits
, 10).ok() {
1724 None
=> Err(self.error(span
, ast
::ErrorKind
::DecimalInvalid
)),
1728 /// Parse a standard character class consisting primarily of characters or
1729 /// character ranges, but can also contain nested character classes of
1730 /// any type (sans `.`).
1732 /// This assumes the parser is positioned at the opening `[`. If parsing
1733 /// is successful, then the parser is advanced to the position immediately
1734 /// following the closing `]`.
1735 fn parse_set_class(&self) -> Result
<ast
::Class
> {
1736 assert_eq
!(self.char(), '
['
);
1738 let mut union = ast
::ClassSetUnion
{
1745 return Err(self.unclosed_class_error());
1749 // If we've already parsed the opening bracket, then
1750 // attempt to treat this as the beginning of an ASCII
1751 // class. If ASCII class parsing fails, then the parser
1753 if !self.parser().stack_class
.borrow().is_empty() {
1754 if let Some(cls
) = self.maybe_parse_ascii_class() {
1755 union.push(ast
::ClassSetItem
::Ascii(cls
));
1759 union = self.push_class_open(union)?
;
1762 match self.pop_class(union)?
{
1763 Either
::Left(nested_union
) => { union = nested_union; }
1764 Either
::Right(class
) => return Ok(class
),
1767 '
&'
if self.peek() == Some('
&'
) => {
1768 assert
!(self.bump_if("&&"));
1769 union = self.push_class_op(
1770 ast
::ClassSetBinaryOpKind
::Intersection
, union);
1772 '
-'
if self.peek() == Some('
-'
) => {
1773 assert
!(self.bump_if("--"));
1774 union = self.push_class_op(
1775 ast
::ClassSetBinaryOpKind
::Difference
, union);
1777 '
~'
if self.peek() == Some('
~'
) => {
1778 assert
!(self.bump_if("~~"));
1779 union = self.push_class_op(
1780 ast
::ClassSetBinaryOpKind
::SymmetricDifference
, union);
1783 union.push(self.parse_set_class_range()?
);
1789 /// Parse a single primitive item in a character class set. The item to
1790 /// be parsed can either be one of a simple literal character, a range
1791 /// between two simple literal characters or a "primitive" character
1792 /// class like \w or \p{Greek}.
1794 /// If an invalid escape is found, or if a character class is found where
1795 /// a simple literal is expected (e.g., in a range), then an error is
1797 fn parse_set_class_range(&self) -> Result
<ast
::ClassSetItem
> {
1798 let prim1
= self.parse_set_class_item()?
;
1801 return Err(self.unclosed_class_error());
1803 // If the next char isn't a `-`, then we don't have a range.
1804 // There are two exceptions. If the char after a `-` is a `]`, then
1805 // `-` is interpreted as a literal `-`. Alternatively, if the char
1806 // after a `-` is a `-`, then `--` corresponds to a "difference"
1808 if self.char() != '
-'
1809 || self.peek_space() == Some('
]'
)
1810 || self.peek_space() == Some('
-'
)
1812 return prim1
.into_class_set_item(self);
1814 // OK, now we're parsing a range, so bump past the `-` and parse the
1815 // second half of the range.
1816 if !self.bump_and_bump_space() {
1817 return Err(self.unclosed_class_error());
1819 let prim2
= self.parse_set_class_item()?
;
1820 let range
= ast
::ClassSetRange
{
1821 span
: Span
::new(prim1
.span().start
, prim2
.span().end
),
1822 start
: prim1
.into_class_literal(self)?
,
1823 end
: prim2
.into_class_literal(self)?
,
1825 if !range
.is_valid() {
1826 return Err(self.error(
1828 ast
::ErrorKind
::ClassRangeInvalid
,
1831 Ok(ast
::ClassSetItem
::Range(range
))
1834 /// Parse a single item in a character class as a primitive, where the
1835 /// primitive either consists of a verbatim literal or a single escape
1838 /// This assumes the parser is positioned at the beginning of a primitive,
1839 /// and advances the parser to the first position after the primitive if
1842 /// Note that it is the caller's responsibility to report an error if an
1843 /// illegal primitive was parsed.
1844 fn parse_set_class_item(&self) -> Result
<Primitive
> {
1845 if self.char() == '
\\'
{
1848 let x
= Primitive
::Literal(ast
::Literal
{
1849 span
: self.span_char(),
1850 kind
: ast
::LiteralKind
::Verbatim
,
1858 /// Parses the opening of a character class set. This includes the opening
1859 /// bracket along with `^` if present to indicate negation. This also
1860 /// starts parsing the opening set of unioned items if applicable, since
1861 /// there are special rules applied to certain characters in the opening
1862 /// of a character class. For example, `[^]]` is the class of all
1863 /// characters not equal to `]`. (`]` would need to be escaped in any other
1864 /// position.) Similarly for `-`.
1866 /// In all cases, the op inside the returned `ast::ClassBracketed` is an
1867 /// empty union. This empty union should be replaced with the actual item
1868 /// when it is popped from the parser's stack.
1870 /// This assumes the parser is positioned at the opening `[` and advances
1871 /// the parser to the first non-special byte of the character class.
1873 /// An error is returned if EOF is found.
1874 fn parse_set_class_open(
1876 ) -> Result
<(ast
::ClassBracketed
, ast
::ClassSetUnion
)> {
1877 assert_eq
!(self.char(), '
['
);
1878 let start
= self.pos();
1879 if !self.bump_and_bump_space() {
1880 return Err(self.error(
1881 Span
::new(start
, self.pos()),
1882 ast
::ErrorKind
::ClassUnclosed
,
1887 if self.char() != '
^' {
1890 if !self.bump_and_bump_space() {
1891 return Err(self.error(
1892 Span
::new(start
, self.pos()),
1893 ast
::ErrorKind
::ClassUnclosed
,
1898 // Accept any number of `-` as literal `-`.
1899 let mut union = ast
::ClassSetUnion
{
1903 while self.char() == '
-'
{
1904 union.push(ast
::ClassSetItem
::Literal(ast
::Literal
{
1905 span
: self.span_char(),
1906 kind
: ast
::LiteralKind
::Verbatim
,
1909 if !self.bump_and_bump_space() {
1910 return Err(self.error(
1911 Span
::new(start
, self.pos()),
1912 ast
::ErrorKind
::ClassUnclosed
,
1916 // If `]` is the *first* char in a set, then interpret it as a literal
1917 // `]`. That is, an empty class is impossible to write.
1918 if union.items
.is_empty() && self.char() == '
]'
{
1919 union.push(ast
::ClassSetItem
::Literal(ast
::Literal
{
1920 span
: self.span_char(),
1921 kind
: ast
::LiteralKind
::Verbatim
,
1924 if !self.bump_and_bump_space() {
1925 return Err(self.error(
1926 Span
::new(start
, self.pos()),
1927 ast
::ErrorKind
::ClassUnclosed
,
1931 let set
= ast
::ClassBracketed
{
1932 span
: Span
::new(start
, self.pos()),
1934 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
1935 span
: Span
::new(union.span
.start
, union.span
.start
),
1942 /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
1944 /// This assumes the parser is positioned at the opening `[`.
1946 /// If no valid ASCII character class could be found, then this does not
1947 /// advance the parser and `None` is returned. Otherwise, the parser is
1948 /// advanced to the first byte following the closing `]` and the
1949 /// corresponding ASCII class is returned.
1950 fn maybe_parse_ascii_class(&self) -> Option
<ast
::ClassAscii
> {
1951 // ASCII character classes are interesting from a parsing perspective
1952 // because parsing cannot fail with any interesting error. For example,
1953 // in order to use an ASCII character class, it must be enclosed in
1954 // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
1955 // of it as "ASCII character characters have the syntax `[:NAME:]`
1956 // which can only appear within character brackets." This means that
1957 // things like `[[:lower:]A]` are legal constructs.
1959 // However, if one types an incorrect ASCII character class, e.g.,
1960 // `[[:loower:]]`, then we treat that as a normal nested character
1961 // class containing the characters `:elorw`. One might argue that we
1962 // should return an error instead since the repeated colons give away
1963 // the intent to write an ASCII class. But what if the user typed
1964 // `[[:lower]]` instead? How can we tell that was intended to be an
1965 // ASCII class and not just a normal nested class?
1967 // Reasonable people can probably disagree over this, but for better
1968 // or worse, we implement semantics that never fails at the expense
1969 // of better failure modes.
1970 assert_eq
!(self.char(), '
['
);
1971 // If parsing fails, then we back up the parser to this starting point.
1972 let start
= self.pos();
1973 let mut negated
= false;
1974 if !self.bump() || self.char() != '
:'
{
1975 self.parser().pos
.set(start
);
1979 self.parser().pos
.set(start
);
1982 if self.char() == '
^' {
1985 self.parser().pos
.set(start
);
1989 let name_start
= self.offset();
1990 while self.char() != '
:'
&& self.bump() {}
1992 self.parser().pos
.set(start
);
1995 let name
= &self.pattern()[name_start
..self.offset()];
1996 if !self.bump_if(":]") {
1997 self.parser().pos
.set(start
);
2000 let kind
= match ast
::ClassAsciiKind
::from_name(name
) {
2003 self.parser().pos
.set(start
);
2007 Some(ast
::ClassAscii
{
2008 span
: Span
::new(start
, self.pos()),
2014 /// Parse a Unicode class in either the single character notation, `\pN`
2015 /// or the multi-character bracketed notation, `\p{Greek}`. This assumes
2016 /// the parser is positioned at the `p` (or `P` for negation) and will
2017 /// advance the parser to the character immediately following the class.
2019 /// Note that this does not check whether the class name is valid or not.
2020 fn parse_unicode_class(&self) -> Result
<ast
::ClassUnicode
> {
2021 assert
!(self.char() == 'p'
|| self.char() == 'P'
);
2023 let mut scratch
= self.parser().scratch
.borrow_mut();
2026 let negated
= self.char() == 'P'
;
2027 if !self.bump_and_bump_space() {
2028 return Err(self.error(
2030 ast
::ErrorKind
::EscapeUnexpectedEof
,
2034 if self.char() == '
{'
{
2035 let start
= self.span_char().end
;
2036 while self.bump_and_bump_space() && self.char() != '
}'
{
2037 scratch
.push(self.char());
2040 return Err(self.error(
2042 ast
::ErrorKind
::EscapeUnexpectedEof
,
2045 assert_eq
!(self.char(), '
}'
);
2048 let name
= scratch
.as_str();
2049 if let Some(i
) = name
.find("!=") {
2050 (start
, ast
::ClassUnicodeKind
::NamedValue
{
2051 op
: ast
::ClassUnicodeOpKind
::NotEqual
,
2052 name
: name
[..i
].to_string(),
2053 value
: name
[i
+2..].to_string(),
2055 } else if let Some(i
) = name
.find('
:'
) {
2056 (start
, ast
::ClassUnicodeKind
::NamedValue
{
2057 op
: ast
::ClassUnicodeOpKind
::Colon
,
2058 name
: name
[..i
].to_string(),
2059 value
: name
[i
+1..].to_string(),
2061 } else if let Some(i
) = name
.find('
='
) {
2062 (start
, ast
::ClassUnicodeKind
::NamedValue
{
2063 op
: ast
::ClassUnicodeOpKind
::Equal
,
2064 name
: name
[..i
].to_string(),
2065 value
: name
[i
+1..].to_string(),
2068 (start
, ast
::ClassUnicodeKind
::Named(name
.to_string()))
2071 let start
= self.pos();
2072 let c
= self.char();
2073 self.bump_and_bump_space();
2074 let kind
= ast
::ClassUnicodeKind
::OneLetter(c
);
2077 Ok(ast
::ClassUnicode
{
2078 span
: Span
::new(start
, self.pos()),
2084 /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
2085 /// parser is currently at a valid character class name and will be
2086 /// advanced to the character immediately following the class.
2087 fn parse_perl_class(&self) -> ast
::ClassPerl
{
2088 let c
= self.char();
2089 let span
= self.span_char();
2091 let (negated
, kind
) = match c
{
2092 'd'
=> (false, ast
::ClassPerlKind
::Digit
),
2093 'D'
=> (true, ast
::ClassPerlKind
::Digit
),
2094 's'
=> (false, ast
::ClassPerlKind
::Space
),
2095 'S'
=> (true, ast
::ClassPerlKind
::Space
),
2096 'w'
=> (false, ast
::ClassPerlKind
::Word
),
2097 'W'
=> (true, ast
::ClassPerlKind
::Word
),
2098 c
=> panic
!("expected valid Perl class but got '{}'", c
),
2100 ast
::ClassPerl { span: span, kind: kind, negated: negated }
2104 /// A type that traverses a fully parsed Ast and checks whether its depth
2105 /// exceeds the specified nesting limit. If it does, then an error is returned.
2107 struct NestLimiter
<'p
, 's
: 'p
, P
: 'p
+ 's
> {
2108 /// The parser that is checking the nest limit.
2109 p
: &'p ParserI
<'s
, P
>,
2110 /// The current depth while walking an Ast.
2114 impl<'p
, 's
, P
: Borrow
<Parser
>> NestLimiter
<'p
, 's
, P
> {
2115 fn new(p
: &'p ParserI
<'s
, P
>) -> NestLimiter
<'p
, 's
, P
> {
2116 NestLimiter { p: p, depth: 0 }
2119 fn check(self, ast
: &Ast
) -> Result
<()> {
2120 ast
::visit(ast
, self)
2123 fn increment_depth(&mut self, span
: &Span
) -> Result
<()> {
2124 let new
= self.depth
.checked_add(1).ok_or_else(|| self.p
.error(
2126 ast
::ErrorKind
::NestLimitExceeded(::std
::u32::MAX
),
2128 let limit
= self.p
.parser().nest_limit
;
2130 return Err(self.p
.error(
2132 ast
::ErrorKind
::NestLimitExceeded(limit
),
2139 fn decrement_depth(&mut self) {
2140 // Assuming the correctness of the visitor, this should never drop
2142 self.depth
= self.depth
.checked_sub(1).unwrap();
2146 impl<'p
, 's
, P
: Borrow
<Parser
>> ast
::Visitor
for NestLimiter
<'p
, 's
, P
> {
2148 type Err
= ast
::Error
;
2150 fn finish(self) -> Result
<()> {
2154 fn visit_pre(&mut self, ast
: &Ast
) -> Result
<()> {
2155 let span
= match *ast
{
2161 | Ast
::Class(ast
::Class
::Unicode(_
))
2162 | Ast
::Class(ast
::Class
::Perl(_
)) => {
2163 // These are all base cases, so we don't increment depth.
2166 Ast
::Class(ast
::Class
::Bracketed(ref x
)) => &x
.span
,
2167 Ast
::Repetition(ref x
) => &x
.span
,
2168 Ast
::Group(ref x
) => &x
.span
,
2169 Ast
::Alternation(ref x
) => &x
.span
,
2170 Ast
::Concat(ref x
) => &x
.span
,
2172 self.increment_depth(span
)
2175 fn visit_post(&mut self, ast
: &Ast
) -> Result
<()> {
2182 | Ast
::Class(ast
::Class
::Unicode(_
))
2183 | Ast
::Class(ast
::Class
::Perl(_
)) => {
2184 // These are all base cases, so we don't decrement depth.
2187 Ast
::Class(ast
::Class
::Bracketed(_
))
2188 | Ast
::Repetition(_
)
2190 | Ast
::Alternation(_
)
2191 | Ast
::Concat(_
) => {
2192 self.decrement_depth();
2198 fn visit_class_set_item_pre(
2200 ast
: &ast
::ClassSetItem
,
2202 let span
= match *ast
{
2203 ast
::ClassSetItem
::Empty(_
)
2204 | ast
::ClassSetItem
::Literal(_
)
2205 | ast
::ClassSetItem
::Range(_
)
2206 | ast
::ClassSetItem
::Ascii(_
)
2207 | ast
::ClassSetItem
::Unicode(_
)
2208 | ast
::ClassSetItem
::Perl(_
) => {
2209 // These are all base cases, so we don't increment depth.
2212 ast
::ClassSetItem
::Bracketed(ref x
) => &x
.span
,
2213 ast
::ClassSetItem
::Union(ref x
) => &x
.span
,
2215 self.increment_depth(span
)
2218 fn visit_class_set_item_post(
2220 ast
: &ast
::ClassSetItem
,
2223 ast
::ClassSetItem
::Empty(_
)
2224 | ast
::ClassSetItem
::Literal(_
)
2225 | ast
::ClassSetItem
::Range(_
)
2226 | ast
::ClassSetItem
::Ascii(_
)
2227 | ast
::ClassSetItem
::Unicode(_
)
2228 | ast
::ClassSetItem
::Perl(_
) => {
2229 // These are all base cases, so we don't decrement depth.
2232 ast
::ClassSetItem
::Bracketed(_
)
2233 | ast
::ClassSetItem
::Union(_
) => {
2234 self.decrement_depth();
2240 fn visit_class_set_binary_op_pre(
2242 ast
: &ast
::ClassSetBinaryOp
,
2244 self.increment_depth(&ast
.span
)
2247 fn visit_class_set_binary_op_post(
2249 _ast
: &ast
::ClassSetBinaryOp
,
2251 self.decrement_depth();
2258 use std
::ops
::Range
;
2260 use ast
::{self, Ast, Position, Span}
;
2261 use super::{Parser, ParserI, ParserBuilder, Primitive}
;
2263 // Our own assert_eq, which has slightly better formatting (but honestly
2264 // still kind of crappy).
2265 macro_rules
! assert_eq
{
2266 ($left
:expr
, $right
:expr
) => ({
2267 match (&$left
, &$right
) {
2268 (left_val
, right_val
) => {
2269 if !(*left_val
== *right_val
) {
2270 panic
!("assertion failed: `(left == right)`\n\n\
2271 left: `{:?}`\nright: `{:?}`\n\n",
2272 left_val
, right_val
)
2279 // We create these errors to compare with real ast::Errors in the tests.
2280 // We define equality between TestError and ast::Error to disregard the
2281 // pattern string in ast::Error, which is annoying to provide in tests.
2282 #[derive(Clone, Debug)]
2285 kind
: ast
::ErrorKind
,
2288 impl PartialEq
<ast
::Error
> for TestError
{
2289 fn eq(&self, other
: &ast
::Error
) -> bool
{
2290 self.span
== other
.span
&& self.kind
== other
.kind
2294 impl PartialEq
<TestError
> for ast
::Error
{
2295 fn eq(&self, other
: &TestError
) -> bool
{
2296 self.span
== other
.span
&& self.kind
== other
.kind
2300 fn s(str: &str) -> String
{
2304 fn parser(pattern
: &str) -> ParserI
<Parser
> {
2305 ParserI
::new(Parser
::new(), pattern
)
2308 fn parser_octal(pattern
: &str) -> ParserI
<Parser
> {
2309 let parser
= ParserBuilder
::new().octal(true).build();
2310 ParserI
::new(parser
, pattern
)
2313 fn parser_nest_limit(pattern
: &str, nest_limit
: u32) -> ParserI
<Parser
> {
2314 let p
= ParserBuilder
::new().nest_limit(nest_limit
).build();
2315 ParserI
::new(p
, pattern
)
2318 fn parser_ignore_whitespace(pattern
: &str) -> ParserI
<Parser
> {
2319 let p
= ParserBuilder
::new().ignore_whitespace(true).build();
2320 ParserI
::new(p
, pattern
)
2323 /// Short alias for creating a new span.
2324 fn nspan(start
: Position
, end
: Position
) -> Span
{
2325 Span
::new(start
, end
)
2328 /// Short alias for creating a new position.
2329 fn npos(offset
: usize, line
: usize, column
: usize) -> Position
{
2330 Position
::new(offset
, line
, column
)
2333 /// Create a new span from the given offset range. This assumes a single
2334 /// line and sets the columns based on the offsets. i.e., This only works
2335 /// out of the box for ASCII, which is fine for most tests.
2336 fn span(range
: Range
<usize>) -> Span
{
2337 let start
= Position
::new(range
.start
, 1, range
.start
+ 1);
2338 let end
= Position
::new(range
.end
, 1, range
.end
+ 1);
2339 Span
::new(start
, end
)
2342 /// Create a new span for the corresponding byte range in the given string.
2343 fn span_range(subject
: &str, range
: Range
<usize>) -> Span
{
2344 let start
= Position
{
2345 offset
: range
.start
,
2346 line
: 1 + subject
[..range
.start
].matches('
\n'
).count(),
2347 column
: 1 + subject
[..range
.start
]
2350 .position(|c
| c
== '
\n'
)
2351 .unwrap_or(subject
[..range
.start
].chars().count()),
2353 let end
= Position
{
2355 line
: 1 + subject
[..range
.end
].matches('
\n'
).count(),
2356 column
: 1 + subject
[..range
.end
]
2359 .position(|c
| c
== '
\n'
)
2360 .unwrap_or(subject
[..range
.end
].chars().count()),
2362 Span
::new(start
, end
)
2365 /// Create a verbatim literal starting at the given position.
2366 fn lit(c
: char, start
: usize) -> Ast
{
2367 lit_with(c
, span(start
..start
+ c
.len_utf8()))
2370 /// Create a punctuation literal starting at the given position.
2371 fn punct_lit(c
: char, span
: Span
) -> Ast
{
2372 Ast
::Literal(ast
::Literal
{
2374 kind
: ast
::LiteralKind
::Punctuation
,
2379 /// Create a verbatim literal with the given span.
2380 fn lit_with(c
: char, span
: Span
) -> Ast
{
2381 Ast
::Literal(ast
::Literal
{
2383 kind
: ast
::LiteralKind
::Verbatim
,
2388 /// Create a concatenation with the given range.
2389 fn concat(range
: Range
<usize>, asts
: Vec
<Ast
>) -> Ast
{
2390 concat_with(span(range
), asts
)
2393 /// Create a concatenation with the given span.
2394 fn concat_with(span
: Span
, asts
: Vec
<Ast
>) -> Ast
{
2395 Ast
::Concat(ast
::Concat { span: span, asts: asts }
)
2398 /// Create an alternation with the given span.
2399 fn alt(range
: Range
<usize>, asts
: Vec
<Ast
>) -> Ast
{
2400 Ast
::Alternation(ast
::Alternation { span: span(range), asts: asts }
)
2403 /// Create a capturing group with the given span.
2404 fn group(range
: Range
<usize>, index
: u32, ast
: Ast
) -> Ast
{
2405 Ast
::Group(ast
::Group
{
2407 kind
: ast
::GroupKind
::CaptureIndex(index
),
2412 /// Create an ast::SetFlags.
2414 /// The given pattern should be the full pattern string. The range given
2415 /// should correspond to the byte offsets where the flag set occurs.
2417 /// If negated is true, then the set is interpreted as beginning with a
2421 range
: Range
<usize>,
2425 let mut items
= vec
![
2427 span
: span_range(pat
, (range
.end
- 2)..(range
.end
- 1)),
2428 kind
: ast
::FlagsItemKind
::Flag(flag
),
2432 items
.insert(0, ast
::FlagsItem
{
2433 span
: span_range(pat
, (range
.start
+ 2)..(range
.end
- 2)),
2434 kind
: ast
::FlagsItemKind
::Negation
,
2437 Ast
::Flags(ast
::SetFlags
{
2438 span
: span_range(pat
, range
.clone()),
2440 span
: span_range(pat
, (range
.start
+ 2)..(range
.end
- 1)),
2447 fn parse_nest_limit() {
2448 // A nest limit of 0 still allows some types of regexes.
2450 parser_nest_limit("", 0).parse(),
2451 Ok(Ast
::Empty(span(0..0))));
2453 parser_nest_limit("a", 0).parse(),
2456 // Test repetition operations, which require one level of nesting.
2458 parser_nest_limit("a+", 0).parse().unwrap_err(),
2461 kind
: ast
::ErrorKind
::NestLimitExceeded(0),
2464 parser_nest_limit("a+", 1).parse(),
2465 Ok(Ast
::Repetition(ast
::Repetition
{
2467 op
: ast
::RepetitionOp
{
2469 kind
: ast
::RepetitionKind
::OneOrMore
,
2472 ast
: Box
::new(lit('a'
, 0)),
2475 parser_nest_limit("(a)+", 1).parse().unwrap_err(),
2478 kind
: ast
::ErrorKind
::NestLimitExceeded(1),
2481 parser_nest_limit("a+*", 1).parse().unwrap_err(),
2484 kind
: ast
::ErrorKind
::NestLimitExceeded(1),
2487 parser_nest_limit("a+*", 2).parse(),
2488 Ok(Ast
::Repetition(ast
::Repetition
{
2490 op
: ast
::RepetitionOp
{
2492 kind
: ast
::RepetitionKind
::ZeroOrMore
,
2495 ast
: Box
::new(Ast
::Repetition(ast
::Repetition
{
2497 op
: ast
::RepetitionOp
{
2499 kind
: ast
::RepetitionKind
::OneOrMore
,
2502 ast
: Box
::new(lit('a'
, 0)),
2506 // Test concatenations. A concatenation requires one level of nesting.
2508 parser_nest_limit("ab", 0).parse().unwrap_err(),
2511 kind
: ast
::ErrorKind
::NestLimitExceeded(0),
2514 parser_nest_limit("ab", 1).parse(),
2515 Ok(concat(0..2, vec
![lit('a'
, 0), lit('b'
, 1)])));
2517 parser_nest_limit("abc", 1).parse(),
2518 Ok(concat(0..3, vec
![lit('a'
, 0), lit('b'
, 1), lit('c'
, 2)])));
2520 // Test alternations. An alternation requires one level of nesting.
2522 parser_nest_limit("a|b", 0).parse().unwrap_err(),
2525 kind
: ast
::ErrorKind
::NestLimitExceeded(0),
2528 parser_nest_limit("a|b", 1).parse(),
2529 Ok(alt(0..3, vec
![lit('a'
, 0), lit('b'
, 2)])));
2531 parser_nest_limit("a|b|c", 1).parse(),
2532 Ok(alt(0..5, vec
![lit('a'
, 0), lit('b'
, 2), lit('c'
, 4)])));
2534 // Test character classes. Classes form their own mini-recursive
2537 parser_nest_limit("[a]", 0).parse().unwrap_err(),
2540 kind
: ast
::ErrorKind
::NestLimitExceeded(0),
2543 parser_nest_limit("[a]", 1).parse(),
2544 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
2547 kind
: ast
::ClassSet
::Item(
2548 ast
::ClassSetItem
::Literal(ast
::Literal
{
2550 kind
: ast
::LiteralKind
::Verbatim
,
2556 parser_nest_limit("[ab]", 1).parse().unwrap_err(),
2559 kind
: ast
::ErrorKind
::NestLimitExceeded(1),
2562 parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
2565 kind
: ast
::ErrorKind
::NestLimitExceeded(2),
2568 parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
2571 kind
: ast
::ErrorKind
::NestLimitExceeded(3),
2574 parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
2577 kind
: ast
::ErrorKind
::NestLimitExceeded(1),
2580 parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
2583 kind
: ast
::ErrorKind
::NestLimitExceeded(2),
2588 fn parse_comments() {
2590 # This is comment 1.
2591 foo # This is comment 2.
2592 # This is comment 3.
2594 # This is comment 4.";
2595 let astc
= parser(pat
).parse_with_comments().unwrap();
2598 concat_with(span_range(pat
, 0..pat
.len()), vec
![
2599 flag_set(pat
, 0..4, ast
::Flag
::IgnoreWhitespace
, false),
2600 lit_with('f'
, span_range(pat
, 26..27)),
2601 lit_with('o'
, span_range(pat
, 27..28)),
2602 lit_with('o'
, span_range(pat
, 28..29)),
2603 lit_with('b'
, span_range(pat
, 74..75)),
2604 lit_with('a'
, span_range(pat
, 75..76)),
2605 lit_with('r'
, span_range(pat
, 76..77)),
2607 assert_eq
!(astc
.comments
, vec
![
2609 span
: span_range(pat
, 5..26),
2610 comment
: s(" This is comment 1."),
2613 span
: span_range(pat
, 30..51),
2614 comment
: s(" This is comment 2."),
2617 span
: span_range(pat
, 53..74),
2618 comment
: s(" This is comment 3."),
2621 span
: span_range(pat
, 78..98),
2622 comment
: s(" This is comment 4."),
2628 fn parse_holistic() {
2630 parser("]").parse(),
2633 parser(r
"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
2634 Ok(concat(0..36, vec
![
2635 punct_lit('
\\'
, span(0..2)),
2636 punct_lit('
.'
, span(2..4)),
2637 punct_lit('
+'
, span(4..6)),
2638 punct_lit('
*'
, span(6..8)),
2639 punct_lit('?'
, span(8..10)),
2640 punct_lit('
('
, span(10..12)),
2641 punct_lit('
)'
, span(12..14)),
2642 punct_lit('
|'
, span(14..16)),
2643 punct_lit('
['
, span(16..18)),
2644 punct_lit('
]'
, span(18..20)),
2645 punct_lit('
{'
, span(20..22)),
2646 punct_lit('
}'
, span(22..24)),
2647 punct_lit('
^', span(24..26)),
2648 punct_lit('$'
, span(26..28)),
2649 punct_lit('
#', span(28..30)),
2650 punct_lit('
&'
, span(30..32)),
2651 punct_lit('
-'
, span(32..34)),
2652 punct_lit('
~'
, span(34..36)),
2657 fn parse_ignore_whitespace() {
2658 // Test that basic whitespace insensitivity works.
2659 let pat
= "(?x)a b";
2661 parser(pat
).parse(),
2662 Ok(concat_with(nspan(npos(0, 1, 1), npos(7, 1, 8)), vec
![
2663 flag_set(pat
, 0..4, ast
::Flag
::IgnoreWhitespace
, false),
2664 lit_with('a'
, nspan(npos(4, 1, 5), npos(5, 1, 6))),
2665 lit_with('b'
, nspan(npos(6, 1, 7), npos(7, 1, 8))),
2668 // Test that we can toggle whitespace insensitivity.
2669 let pat
= "(?x)a b(?-x)a b";
2671 parser(pat
).parse(),
2672 Ok(concat_with(nspan(npos(0, 1, 1), npos(15, 1, 16)), vec
![
2673 flag_set(pat
, 0..4, ast
::Flag
::IgnoreWhitespace
, false),
2674 lit_with('a'
, nspan(npos(4, 1, 5), npos(5, 1, 6))),
2675 lit_with('b'
, nspan(npos(6, 1, 7), npos(7, 1, 8))),
2676 flag_set(pat
, 7..12, ast
::Flag
::IgnoreWhitespace
, true),
2677 lit_with('a'
, nspan(npos(12, 1, 13), npos(13, 1, 14))),
2678 lit_with(' '
, nspan(npos(13, 1, 14), npos(14, 1, 15))),
2679 lit_with('b'
, nspan(npos(14, 1, 15), npos(15, 1, 16))),
2682 // Test that nesting whitespace insensitive flags works.
2683 let pat
= "a (?x:a )a ";
2685 parser(pat
).parse(),
2686 Ok(concat_with(span_range(pat
, 0..11), vec
![
2687 lit_with('a'
, span_range(pat
, 0..1)),
2688 lit_with(' '
, span_range(pat
, 1..2)),
2689 Ast
::Group(ast
::Group
{
2690 span
: span_range(pat
, 2..9),
2691 kind
: ast
::GroupKind
::NonCapturing(ast
::Flags
{
2692 span
: span_range(pat
, 4..5),
2695 span
: span_range(pat
, 4..5),
2696 kind
: ast
::FlagsItemKind
::Flag(
2697 ast
::Flag
::IgnoreWhitespace
),
2701 ast
: Box
::new(lit_with('a'
, span_range(pat
, 6..7))),
2703 lit_with('a'
, span_range(pat
, 9..10)),
2704 lit_with(' '
, span_range(pat
, 10..11)),
2707 // Test that whitespace after an opening paren is insignificant.
2708 let pat
= "(?x)( ?P<foo> a )";
2710 parser(pat
).parse(),
2711 Ok(concat_with(span_range(pat
, 0..pat
.len()), vec
![
2712 flag_set(pat
, 0..4, ast
::Flag
::IgnoreWhitespace
, false),
2713 Ast
::Group(ast
::Group
{
2714 span
: span_range(pat
, 4..pat
.len()),
2715 kind
: ast
::GroupKind
::CaptureName(ast
::CaptureName
{
2716 span
: span_range(pat
, 9..12),
2720 ast
: Box
::new(lit_with('a'
, span_range(pat
, 14..15))),
2723 let pat
= "(?x)( a )";
2725 parser(pat
).parse(),
2726 Ok(concat_with(span_range(pat
, 0..pat
.len()), vec
![
2727 flag_set(pat
, 0..4, ast
::Flag
::IgnoreWhitespace
, false),
2728 Ast
::Group(ast
::Group
{
2729 span
: span_range(pat
, 4..pat
.len()),
2730 kind
: ast
::GroupKind
::CaptureIndex(1),
2731 ast
: Box
::new(lit_with('a'
, span_range(pat
, 7..8))),
2734 let pat
= "(?x)( ?: a )";
2736 parser(pat
).parse(),
2737 Ok(concat_with(span_range(pat
, 0..pat
.len()), vec
![
2738 flag_set(pat
, 0..4, ast
::Flag
::IgnoreWhitespace
, false),
2739 Ast
::Group(ast
::Group
{
2740 span
: span_range(pat
, 4..pat
.len()),
2741 kind
: ast
::GroupKind
::NonCapturing(ast
::Flags
{
2742 span
: span_range(pat
, 8..8),
2745 ast
: Box
::new(lit_with('a'
, span_range(pat
, 11..12))),
2748 let pat
= r
"(?x)\x { 53 }";
2750 parser(pat
).parse(),
2751 Ok(concat_with(span_range(pat
, 0..pat
.len()), vec
![
2752 flag_set(pat
, 0..4, ast
::Flag
::IgnoreWhitespace
, false),
2753 Ast
::Literal(ast
::Literal
{
2755 kind
: ast
::LiteralKind
::HexBrace(ast
::HexLiteralKind
::X
),
2760 // Test that whitespace after an escape is OK.
2761 let pat
= r
"(?x)\ ";
2763 parser(pat
).parse(),
2764 Ok(concat_with(span_range(pat
, 0..pat
.len()), vec
![
2765 flag_set(pat
, 0..4, ast
::Flag
::IgnoreWhitespace
, false),
2766 Ast
::Literal(ast
::Literal
{
2767 span
: span_range(pat
, 4..6),
2768 kind
: ast
::LiteralKind
::Special(
2769 ast
::SpecialLiteralKind
::Space
),
2773 // ... but only when `x` mode is enabled.
2776 parser(pat
).parse().unwrap_err(),
2778 span
: span_range(pat
, 0..2),
2779 kind
: ast
::ErrorKind
::EscapeUnrecognized
,
2784 fn parse_newlines() {
2787 parser(pat
).parse(),
2788 Ok(concat_with(span_range(pat
, 0..3), vec
![
2789 Ast
::Dot(span_range(pat
, 0..1)),
2790 lit_with('
\n'
, span_range(pat
, 1..2)),
2791 Ast
::Dot(span_range(pat
, 2..3)),
2794 let pat
= "foobar\nbaz\nquux\n";
2796 parser(pat
).parse(),
2797 Ok(concat_with(span_range(pat
, 0..pat
.len()), vec
![
2798 lit_with('f'
, nspan(npos(0, 1, 1), npos(1, 1, 2))),
2799 lit_with('o'
, nspan(npos(1, 1, 2), npos(2, 1, 3))),
2800 lit_with('o'
, nspan(npos(2, 1, 3), npos(3, 1, 4))),
2801 lit_with('b'
, nspan(npos(3, 1, 4), npos(4, 1, 5))),
2802 lit_with('a'
, nspan(npos(4, 1, 5), npos(5, 1, 6))),
2803 lit_with('r'
, nspan(npos(5, 1, 6), npos(6, 1, 7))),
2804 lit_with('
\n'
, nspan(npos(6, 1, 7), npos(7, 2, 1))),
2805 lit_with('b'
, nspan(npos(7, 2, 1), npos(8, 2, 2))),
2806 lit_with('a'
, nspan(npos(8, 2, 2), npos(9, 2, 3))),
2807 lit_with('z'
, nspan(npos(9, 2, 3), npos(10, 2, 4))),
2808 lit_with('
\n'
, nspan(npos(10, 2, 4), npos(11, 3, 1))),
2809 lit_with('q'
, nspan(npos(11, 3, 1), npos(12, 3, 2))),
2810 lit_with('u'
, nspan(npos(12, 3, 2), npos(13, 3, 3))),
2811 lit_with('u'
, nspan(npos(13, 3, 3), npos(14, 3, 4))),
2812 lit_with('x'
, nspan(npos(14, 3, 4), npos(15, 3, 5))),
2813 lit_with('
\n'
, nspan(npos(15, 3, 5), npos(16, 4, 1))),
2818 fn parse_uncounted_repetition() {
2820 parser(r
"a*").parse(),
2821 Ok(Ast
::Repetition(ast
::Repetition
{
2823 op
: ast
::RepetitionOp
{
2825 kind
: ast
::RepetitionKind
::ZeroOrMore
,
2828 ast
: Box
::new(lit('a'
, 0)),
2831 parser(r
"a+").parse(),
2832 Ok(Ast
::Repetition(ast
::Repetition
{
2834 op
: ast
::RepetitionOp
{
2836 kind
: ast
::RepetitionKind
::OneOrMore
,
2839 ast
: Box
::new(lit('a'
, 0)),
2843 parser(r
"a?").parse(),
2844 Ok(Ast
::Repetition(ast
::Repetition
{
2846 op
: ast
::RepetitionOp
{
2848 kind
: ast
::RepetitionKind
::ZeroOrOne
,
2851 ast
: Box
::new(lit('a'
, 0)),
2854 parser(r
"a??").parse(),
2855 Ok(Ast
::Repetition(ast
::Repetition
{
2857 op
: ast
::RepetitionOp
{
2859 kind
: ast
::RepetitionKind
::ZeroOrOne
,
2862 ast
: Box
::new(lit('a'
, 0)),
2865 parser(r
"a?").parse(),
2866 Ok(Ast
::Repetition(ast
::Repetition
{
2868 op
: ast
::RepetitionOp
{
2870 kind
: ast
::RepetitionKind
::ZeroOrOne
,
2873 ast
: Box
::new(lit('a'
, 0)),
2876 parser(r
"a?b").parse(),
2877 Ok(concat(0..3, vec
![
2878 Ast
::Repetition(ast
::Repetition
{
2880 op
: ast
::RepetitionOp
{
2882 kind
: ast
::RepetitionKind
::ZeroOrOne
,
2885 ast
: Box
::new(lit('a'
, 0)),
2890 parser(r
"a??b").parse(),
2891 Ok(concat(0..4, vec
![
2892 Ast
::Repetition(ast
::Repetition
{
2894 op
: ast
::RepetitionOp
{
2896 kind
: ast
::RepetitionKind
::ZeroOrOne
,
2899 ast
: Box
::new(lit('a'
, 0)),
2904 parser(r
"ab?").parse(),
2905 Ok(concat(0..3, vec
![
2907 Ast
::Repetition(ast
::Repetition
{
2909 op
: ast
::RepetitionOp
{
2911 kind
: ast
::RepetitionKind
::ZeroOrOne
,
2914 ast
: Box
::new(lit('b'
, 1)),
2918 parser(r
"(ab)?").parse(),
2919 Ok(Ast
::Repetition(ast
::Repetition
{
2921 op
: ast
::RepetitionOp
{
2923 kind
: ast
::RepetitionKind
::ZeroOrOne
,
2926 ast
: Box
::new(group(0..4, 1, concat(1..3, vec
![
2932 parser(r
"|a?").parse(),
2934 Ast
::Empty(span(0..0)),
2935 Ast
::Repetition(ast
::Repetition
{
2937 op
: ast
::RepetitionOp
{
2939 kind
: ast
::RepetitionKind
::ZeroOrOne
,
2942 ast
: Box
::new(lit('a'
, 1)),
2947 parser(r
"*").parse().unwrap_err(),
2950 kind
: ast
::ErrorKind
::RepetitionMissing
,
2953 parser(r
"(?i)*").parse().unwrap_err(),
2956 kind
: ast
::ErrorKind
::RepetitionMissing
,
2959 parser(r
"(*)").parse().unwrap_err(),
2962 kind
: ast
::ErrorKind
::RepetitionMissing
,
2965 parser(r
"(?:?)").parse().unwrap_err(),
2968 kind
: ast
::ErrorKind
::RepetitionMissing
,
2971 parser(r
"+").parse().unwrap_err(),
2974 kind
: ast
::ErrorKind
::RepetitionMissing
,
2977 parser(r
"?").parse().unwrap_err(),
2980 kind
: ast
::ErrorKind
::RepetitionMissing
,
2983 parser(r
"(?)").parse().unwrap_err(),
2986 kind
: ast
::ErrorKind
::RepetitionMissing
,
2989 parser(r
"|*").parse().unwrap_err(),
2992 kind
: ast
::ErrorKind
::RepetitionMissing
,
2995 parser(r
"|+").parse().unwrap_err(),
2998 kind
: ast
::ErrorKind
::RepetitionMissing
,
3001 parser(r
"|?").parse().unwrap_err(),
3004 kind
: ast
::ErrorKind
::RepetitionMissing
,
3009 fn parse_counted_repetition() {
3011 parser(r
"a{5}").parse(),
3012 Ok(Ast
::Repetition(ast
::Repetition
{
3014 op
: ast
::RepetitionOp
{
3016 kind
: ast
::RepetitionKind
::Range(
3017 ast
::RepetitionRange
::Exactly(5)),
3020 ast
: Box
::new(lit('a'
, 0)),
3023 parser(r
"a{5,}").parse(),
3024 Ok(Ast
::Repetition(ast
::Repetition
{
3026 op
: ast
::RepetitionOp
{
3028 kind
: ast
::RepetitionKind
::Range(
3029 ast
::RepetitionRange
::AtLeast(5)),
3032 ast
: Box
::new(lit('a'
, 0)),
3035 parser(r
"a{5,9}").parse(),
3036 Ok(Ast
::Repetition(ast
::Repetition
{
3038 op
: ast
::RepetitionOp
{
3040 kind
: ast
::RepetitionKind
::Range(
3041 ast
::RepetitionRange
::Bounded(5, 9)),
3044 ast
: Box
::new(lit('a'
, 0)),
3047 parser(r
"a{5}?").parse(),
3048 Ok(Ast
::Repetition(ast
::Repetition
{
3050 op
: ast
::RepetitionOp
{
3052 kind
: ast
::RepetitionKind
::Range(
3053 ast
::RepetitionRange
::Exactly(5)),
3056 ast
: Box
::new(lit('a'
, 0)),
3059 parser(r
"ab{5}").parse(),
3060 Ok(concat(0..5, vec
![
3062 Ast
::Repetition(ast
::Repetition
{
3064 op
: ast
::RepetitionOp
{
3066 kind
: ast
::RepetitionKind
::Range(
3067 ast
::RepetitionRange
::Exactly(5)),
3070 ast
: Box
::new(lit('b'
, 1)),
3074 parser(r
"ab{5}c").parse(),
3075 Ok(concat(0..6, vec
![
3077 Ast
::Repetition(ast
::Repetition
{
3079 op
: ast
::RepetitionOp
{
3081 kind
: ast
::RepetitionKind
::Range(
3082 ast
::RepetitionRange
::Exactly(5)),
3085 ast
: Box
::new(lit('b'
, 1)),
3091 parser(r
"a{ 5 }").parse(),
3092 Ok(Ast
::Repetition(ast
::Repetition
{
3094 op
: ast
::RepetitionOp
{
3096 kind
: ast
::RepetitionKind
::Range(
3097 ast
::RepetitionRange
::Exactly(5)),
3100 ast
: Box
::new(lit('a'
, 0)),
3103 parser(r
"a{ 5 , 9 }").parse(),
3104 Ok(Ast
::Repetition(ast
::Repetition
{
3106 op
: ast
::RepetitionOp
{
3108 kind
: ast
::RepetitionKind
::Range(
3109 ast
::RepetitionRange
::Bounded(5, 9)),
3112 ast
: Box
::new(lit('a'
, 0)),
3115 parser_ignore_whitespace(r
"a{5,9} ?").parse(),
3116 Ok(Ast
::Repetition(ast
::Repetition
{
3118 op
: ast
::RepetitionOp
{
3120 kind
: ast
::RepetitionKind
::Range(
3121 ast
::RepetitionRange
::Bounded(5, 9)),
3124 ast
: Box
::new(lit('a'
, 0)),
3128 parser(r
"a{").parse().unwrap_err(),
3131 kind
: ast
::ErrorKind
::RepetitionCountUnclosed
,
3134 parser(r
"a{}").parse().unwrap_err(),
3137 kind
: ast
::ErrorKind
::DecimalEmpty
,
3140 parser(r
"a{a").parse().unwrap_err(),
3143 kind
: ast
::ErrorKind
::DecimalEmpty
,
3146 parser(r
"a{9999999999}").parse().unwrap_err(),
3149 kind
: ast
::ErrorKind
::DecimalInvalid
,
3152 parser(r
"a{9").parse().unwrap_err(),
3155 kind
: ast
::ErrorKind
::RepetitionCountUnclosed
,
3158 parser(r
"a{9,a").parse().unwrap_err(),
3161 kind
: ast
::ErrorKind
::DecimalEmpty
,
3164 parser(r
"a{9,9999999999}").parse().unwrap_err(),
3167 kind
: ast
::ErrorKind
::DecimalInvalid
,
3170 parser(r
"a{9,").parse().unwrap_err(),
3173 kind
: ast
::ErrorKind
::RepetitionCountUnclosed
,
3176 parser(r
"a{9,11").parse().unwrap_err(),
3179 kind
: ast
::ErrorKind
::RepetitionCountUnclosed
,
3182 parser(r
"a{2,1}").parse().unwrap_err(),
3185 kind
: ast
::ErrorKind
::RepetitionCountInvalid
,
3188 parser(r
"{5}").parse().unwrap_err(),
3191 kind
: ast
::ErrorKind
::RepetitionMissing
,
3194 parser(r
"|{5}").parse().unwrap_err(),
3197 kind
: ast
::ErrorKind
::RepetitionMissing
,
3202 fn parse_alternate() {
3204 parser(r
"a|b").parse(),
3205 Ok(Ast
::Alternation(ast
::Alternation
{
3207 asts
: vec
![lit('a'
, 0), lit('b'
, 2)],
3210 parser(r
"(a|b)").parse(),
3211 Ok(group(0..5, 1, Ast
::Alternation(ast
::Alternation
{
3213 asts
: vec
![lit('a'
, 1), lit('b'
, 3)],
3217 parser(r
"a|b|c").parse(),
3218 Ok(Ast
::Alternation(ast
::Alternation
{
3220 asts
: vec
![lit('a'
, 0), lit('b'
, 2), lit('c'
, 4)],
3223 parser(r
"ax|by|cz").parse(),
3224 Ok(Ast
::Alternation(ast
::Alternation
{
3227 concat(0..2, vec
![lit('a'
, 0), lit('x'
, 1)]),
3228 concat(3..5, vec
![lit('b'
, 3), lit('y'
, 4)]),
3229 concat(6..8, vec
![lit('c'
, 6), lit('z'
, 7)]),
3233 parser(r
"(ax|by|cz)").parse(),
3234 Ok(group(0..10, 1, Ast
::Alternation(ast
::Alternation
{
3237 concat(1..3, vec
![lit('a'
, 1), lit('x'
, 2)]),
3238 concat(4..6, vec
![lit('b'
, 4), lit('y'
, 5)]),
3239 concat(7..9, vec
![lit('c'
, 7), lit('z'
, 8)]),
3243 parser(r
"(ax|(by|(cz)))").parse(),
3244 Ok(group(0..14, 1, alt(1..13, vec
![
3245 concat(1..3, vec
![lit('a'
, 1), lit('x'
, 2)]),
3246 group(4..13, 2, alt(5..12, vec
![
3247 concat(5..7, vec
![lit('b'
, 5), lit('y'
, 6)]),
3248 group(8..12, 3, concat(9..11, vec
![
3256 parser(r
"|").parse(), Ok(alt(0..1, vec
![
3257 Ast
::Empty(span(0..0)), Ast
::Empty(span(1..1)),
3260 parser(r
"||").parse(), Ok(alt(0..2, vec
![
3261 Ast
::Empty(span(0..0)),
3262 Ast
::Empty(span(1..1)),
3263 Ast
::Empty(span(2..2)),
3266 parser(r
"a|").parse(), Ok(alt(0..2, vec
![
3267 lit('a'
, 0), Ast
::Empty(span(2..2)),
3270 parser(r
"|a").parse(), Ok(alt(0..2, vec
![
3271 Ast
::Empty(span(0..0)), lit('a'
, 1),
3275 parser(r
"(|)").parse(), Ok(group(0..3, 1, alt(1..2, vec
![
3276 Ast
::Empty(span(1..1)), Ast
::Empty(span(2..2)),
3279 parser(r
"(a|)").parse(), Ok(group(0..4, 1, alt(1..3, vec
![
3280 lit('a'
, 1), Ast
::Empty(span(3..3)),
3283 parser(r
"(|a)").parse(), Ok(group(0..4, 1, alt(1..3, vec
![
3284 Ast
::Empty(span(1..1)), lit('a'
, 2),
3288 parser(r
"a|b)").parse().unwrap_err(),
3291 kind
: ast
::ErrorKind
::GroupUnopened
,
3294 parser(r
"(a|b").parse().unwrap_err(),
3297 kind
: ast
::ErrorKind
::GroupUnclosed
,
3302 fn parse_unsupported_lookaround() {
3304 parser(r
"(?=a)").parse().unwrap_err(),
3307 kind
: ast
::ErrorKind
::UnsupportedLookAround
,
3310 parser(r
"(?!a)").parse().unwrap_err(),
3313 kind
: ast
::ErrorKind
::UnsupportedLookAround
,
3316 parser(r
"(?<=a)").parse().unwrap_err(),
3319 kind
: ast
::ErrorKind
::UnsupportedLookAround
,
3322 parser(r
"(?<!a)").parse().unwrap_err(),
3325 kind
: ast
::ErrorKind
::UnsupportedLookAround
,
3331 assert_eq
!(parser("(?i)").parse(), Ok(Ast
::Flags(ast
::SetFlags
{
3335 items
: vec
![ast
::FlagsItem
{
3337 kind
: ast
::FlagsItemKind
::Flag(ast
::Flag
::CaseInsensitive
),
3341 assert_eq
!(parser("(?iU)").parse(), Ok(Ast
::Flags(ast
::SetFlags
{
3348 kind
: ast
::FlagsItemKind
::Flag(
3349 ast
::Flag
::CaseInsensitive
),
3353 kind
: ast
::FlagsItemKind
::Flag(ast
::Flag
::SwapGreed
),
3358 assert_eq
!(parser("(?i-U)").parse(), Ok(Ast
::Flags(ast
::SetFlags
{
3365 kind
: ast
::FlagsItemKind
::Flag(
3366 ast
::Flag
::CaseInsensitive
),
3370 kind
: ast
::FlagsItemKind
::Negation
,
3374 kind
: ast
::FlagsItemKind
::Flag(ast
::Flag
::SwapGreed
),
3380 assert_eq
!(parser("()").parse(), Ok(Ast
::Group(ast
::Group
{
3382 kind
: ast
::GroupKind
::CaptureIndex(1),
3383 ast
: Box
::new(Ast
::Empty(span(1..1))),
3385 assert_eq
!(parser("(a)").parse(), Ok(Ast
::Group(ast
::Group
{
3387 kind
: ast
::GroupKind
::CaptureIndex(1),
3388 ast
: Box
::new(lit('a'
, 1)),
3390 assert_eq
!(parser("(())").parse(), Ok(Ast
::Group(ast
::Group
{
3392 kind
: ast
::GroupKind
::CaptureIndex(1),
3393 ast
: Box
::new(Ast
::Group(ast
::Group
{
3395 kind
: ast
::GroupKind
::CaptureIndex(2),
3396 ast
: Box
::new(Ast
::Empty(span(2..2))),
3400 assert_eq
!(parser("(?:a)").parse(), Ok(Ast
::Group(ast
::Group
{
3402 kind
: ast
::GroupKind
::NonCapturing(ast
::Flags
{
3406 ast
: Box
::new(lit('a'
, 3)),
3409 assert_eq
!(parser("(?i:a)").parse(), Ok(Ast
::Group(ast
::Group
{
3411 kind
: ast
::GroupKind
::NonCapturing(ast
::Flags
{
3416 kind
: ast
::FlagsItemKind
::Flag(
3417 ast
::Flag
::CaseInsensitive
),
3421 ast
: Box
::new(lit('a'
, 4)),
3423 assert_eq
!(parser("(?i-U:a)").parse(), Ok(Ast
::Group(ast
::Group
{
3425 kind
: ast
::GroupKind
::NonCapturing(ast
::Flags
{
3430 kind
: ast
::FlagsItemKind
::Flag(
3431 ast
::Flag
::CaseInsensitive
),
3435 kind
: ast
::FlagsItemKind
::Negation
,
3439 kind
: ast
::FlagsItemKind
::Flag(ast
::Flag
::SwapGreed
),
3443 ast
: Box
::new(lit('a'
, 6)),
3447 parser("(").parse().unwrap_err(),
3450 kind
: ast
::ErrorKind
::GroupUnclosed
,
3453 parser("(?").parse().unwrap_err(),
3456 kind
: ast
::ErrorKind
::GroupUnclosed
,
3459 parser("(?P").parse().unwrap_err(),
3462 kind
: ast
::ErrorKind
::FlagUnrecognized
,
3465 parser("(?P<").parse().unwrap_err(),
3468 kind
: ast
::ErrorKind
::GroupNameUnexpectedEof
,
3471 parser("(a").parse().unwrap_err(),
3474 kind
: ast
::ErrorKind
::GroupUnclosed
,
3477 parser("(()").parse().unwrap_err(),
3480 kind
: ast
::ErrorKind
::GroupUnclosed
,
3483 parser(")").parse().unwrap_err(),
3486 kind
: ast
::ErrorKind
::GroupUnopened
,
3489 parser("a)").parse().unwrap_err(),
3492 kind
: ast
::ErrorKind
::GroupUnopened
,
3497 fn parse_capture_name() {
3498 assert_eq
!(parser("(?P<a>z)").parse(), Ok(Ast
::Group(ast
::Group
{
3500 kind
: ast
::GroupKind
::CaptureName(ast
::CaptureName
{
3505 ast
: Box
::new(lit('z'
, 6)),
3507 assert_eq
!(parser("(?P<abc>z)").parse(), Ok(Ast
::Group(ast
::Group
{
3509 kind
: ast
::GroupKind
::CaptureName(ast
::CaptureName
{
3514 ast
: Box
::new(lit('z'
, 8)),
3518 parser("(?P<").parse().unwrap_err(),
3521 kind
: ast
::ErrorKind
::GroupNameUnexpectedEof
,
3524 parser("(?P<>z)").parse().unwrap_err(),
3527 kind
: ast
::ErrorKind
::GroupNameEmpty
,
3530 parser("(?P<a").parse().unwrap_err(),
3533 kind
: ast
::ErrorKind
::GroupNameUnexpectedEof
,
3536 parser("(?P<ab").parse().unwrap_err(),
3539 kind
: ast
::ErrorKind
::GroupNameUnexpectedEof
,
3542 parser("(?P<0a").parse().unwrap_err(),
3545 kind
: ast
::ErrorKind
::GroupNameInvalid
,
3548 parser("(?P<~").parse().unwrap_err(),
3551 kind
: ast
::ErrorKind
::GroupNameInvalid
,
3554 parser("(?P<abc~").parse().unwrap_err(),
3557 kind
: ast
::ErrorKind
::GroupNameInvalid
,
3560 parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
3563 kind
: ast
::ErrorKind
::GroupNameDuplicate
{
3564 original
: span(4..5),
3571 assert_eq
!(parser("i:").parse_flags(), Ok(ast
::Flags
{
3573 items
: vec
![ast
::FlagsItem
{
3575 kind
: ast
::FlagsItemKind
::Flag(ast
::Flag
::CaseInsensitive
),
3578 assert_eq
!(parser("i)").parse_flags(), Ok(ast
::Flags
{
3580 items
: vec
![ast
::FlagsItem
{
3582 kind
: ast
::FlagsItemKind
::Flag(ast
::Flag
::CaseInsensitive
),
3586 assert_eq
!(parser("isU:").parse_flags(), Ok(ast
::Flags
{
3591 kind
: ast
::FlagsItemKind
::Flag(ast
::Flag
::CaseInsensitive
),
3595 kind
: ast
::FlagsItemKind
::Flag(
3596 ast
::Flag
::DotMatchesNewLine
),
3600 kind
: ast
::FlagsItemKind
::Flag(ast
::Flag
::SwapGreed
),
3605 assert_eq
!(parser("-isU:").parse_flags(), Ok(ast
::Flags
{
3610 kind
: ast
::FlagsItemKind
::Negation
,
3614 kind
: ast
::FlagsItemKind
::Flag(ast
::Flag
::CaseInsensitive
),
3618 kind
: ast
::FlagsItemKind
::Flag(
3619 ast
::Flag
::DotMatchesNewLine
),
3623 kind
: ast
::FlagsItemKind
::Flag(ast
::Flag
::SwapGreed
),
3627 assert_eq
!(parser("i-sU:").parse_flags(), Ok(ast
::Flags
{
3632 kind
: ast
::FlagsItemKind
::Flag(ast
::Flag
::CaseInsensitive
),
3636 kind
: ast
::FlagsItemKind
::Negation
,
3640 kind
: ast
::FlagsItemKind
::Flag(
3641 ast
::Flag
::DotMatchesNewLine
),
3645 kind
: ast
::FlagsItemKind
::Flag(ast
::Flag
::SwapGreed
),
3651 parser("isU").parse_flags().unwrap_err(),
3654 kind
: ast
::ErrorKind
::FlagUnexpectedEof
,
3657 parser("isUa:").parse_flags().unwrap_err(),
3660 kind
: ast
::ErrorKind
::FlagUnrecognized
,
3663 parser("isUi:").parse_flags().unwrap_err(),
3666 kind
: ast
::ErrorKind
::FlagDuplicate
{
3667 original
: span(0..1),
3671 parser("i-sU-i:").parse_flags().unwrap_err(),
3674 kind
: ast
::ErrorKind
::FlagRepeatedNegation
{
3675 original
: span(1..2),
3679 parser("-)").parse_flags().unwrap_err(),
3682 kind
: ast
::ErrorKind
::FlagDanglingNegation
,
3685 parser("i-)").parse_flags().unwrap_err(),
3688 kind
: ast
::ErrorKind
::FlagDanglingNegation
,
3691 parser("iU-)").parse_flags().unwrap_err(),
3694 kind
: ast
::ErrorKind
::FlagDanglingNegation
,
3700 assert_eq
!(parser("i").parse_flag(), Ok(ast
::Flag
::CaseInsensitive
));
3701 assert_eq
!(parser("m").parse_flag(), Ok(ast
::Flag
::MultiLine
));
3702 assert_eq
!(parser("s").parse_flag(), Ok(ast
::Flag
::DotMatchesNewLine
));
3703 assert_eq
!(parser("U").parse_flag(), Ok(ast
::Flag
::SwapGreed
));
3704 assert_eq
!(parser("u").parse_flag(), Ok(ast
::Flag
::Unicode
));
3705 assert_eq
!(parser("x").parse_flag(), Ok(ast
::Flag
::IgnoreWhitespace
));
3708 parser("a").parse_flag().unwrap_err(),
3711 kind
: ast
::ErrorKind
::FlagUnrecognized
,
3714 parser("☃").parse_flag().unwrap_err(),
3716 span
: span_range("☃", 0..3),
3717 kind
: ast
::ErrorKind
::FlagUnrecognized
,
3722 fn parse_primitive_non_escape() {
3724 parser(r
".").parse_primitive(),
3725 Ok(Primitive
::Dot(span(0..1))));
3727 parser(r
"^").parse_primitive(),
3728 Ok(Primitive
::Assertion(ast
::Assertion
{
3730 kind
: ast
::AssertionKind
::StartLine
,
3733 parser(r
"$").parse_primitive(),
3734 Ok(Primitive
::Assertion(ast
::Assertion
{
3736 kind
: ast
::AssertionKind
::EndLine
,
3740 parser(r
"a").parse_primitive(),
3741 Ok(Primitive
::Literal(ast
::Literal
{
3743 kind
: ast
::LiteralKind
::Verbatim
,
3747 parser(r
"|").parse_primitive(),
3748 Ok(Primitive
::Literal(ast
::Literal
{
3750 kind
: ast
::LiteralKind
::Verbatim
,
3754 parser(r
"☃").parse_primitive(),
3755 Ok(Primitive
::Literal(ast
::Literal
{
3756 span
: span_range("☃", 0..3),
3757 kind
: ast
::LiteralKind
::Verbatim
,
3765 parser(r
"\|").parse_primitive(),
3766 Ok(Primitive
::Literal(ast
::Literal
{
3768 kind
: ast
::LiteralKind
::Punctuation
,
3772 (r
"\a", '
\x07'
, ast
::SpecialLiteralKind
::Bell
),
3773 (r
"\f", '
\x0C'
, ast
::SpecialLiteralKind
::FormFeed
),
3774 (r
"\t", '
\t'
, ast
::SpecialLiteralKind
::Tab
),
3775 (r
"\n", '
\n'
, ast
::SpecialLiteralKind
::LineFeed
),
3776 (r
"\r", '
\r'
, ast
::SpecialLiteralKind
::CarriageReturn
),
3777 (r
"\v", '
\x0B'
, ast
::SpecialLiteralKind
::VerticalTab
),
3779 for &(pat
, c
, ref kind
) in specials
{
3781 parser(pat
).parse_primitive(),
3782 Ok(Primitive
::Literal(ast
::Literal
{
3784 kind
: ast
::LiteralKind
::Special(kind
.clone()),
3789 parser(r
"\A").parse_primitive(),
3790 Ok(Primitive
::Assertion(ast
::Assertion
{
3792 kind
: ast
::AssertionKind
::StartText
,
3795 parser(r
"\z").parse_primitive(),
3796 Ok(Primitive
::Assertion(ast
::Assertion
{
3798 kind
: ast
::AssertionKind
::EndText
,
3801 parser(r
"\b").parse_primitive(),
3802 Ok(Primitive
::Assertion(ast
::Assertion
{
3804 kind
: ast
::AssertionKind
::WordBoundary
,
3807 parser(r
"\B").parse_primitive(),
3808 Ok(Primitive
::Assertion(ast
::Assertion
{
3810 kind
: ast
::AssertionKind
::NotWordBoundary
,
3814 parser(r
"\").parse_escape().unwrap_err(),
3817 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
3820 parser(r
"\y").parse_escape().unwrap_err(),
3823 kind
: ast
::ErrorKind
::EscapeUnrecognized
,
3828 fn parse_unsupported_backreference() {
3830 parser(r
"\0").parse_escape().unwrap_err(),
3833 kind
: ast
::ErrorKind
::UnsupportedBackreference
,
3836 parser(r
"\9").parse_escape().unwrap_err(),
3839 kind
: ast
::ErrorKind
::UnsupportedBackreference
,
3846 let pat
= format
!(r
"\{:o}", i
);
3848 parser_octal(&pat
).parse_escape(),
3849 Ok(Primitive
::Literal(ast
::Literal
{
3850 span
: span(0..pat
.len()),
3851 kind
: ast
::LiteralKind
::Octal
,
3852 c
: ::std
::char::from_u32(i
).unwrap(),
3856 parser_octal(r
"\778").parse_escape(),
3857 Ok(Primitive
::Literal(ast
::Literal
{
3859 kind
: ast
::LiteralKind
::Octal
,
3863 parser_octal(r
"\7777").parse_escape(),
3864 Ok(Primitive
::Literal(ast
::Literal
{
3866 kind
: ast
::LiteralKind
::Octal
,
3870 parser_octal(r
"\778").parse(),
3871 Ok(Ast
::Concat(ast
::Concat
{
3874 Ast
::Literal(ast
::Literal
{
3876 kind
: ast
::LiteralKind
::Octal
,
3879 Ast
::Literal(ast
::Literal
{
3881 kind
: ast
::LiteralKind
::Verbatim
,
3887 parser_octal(r
"\7777").parse(),
3888 Ok(Ast
::Concat(ast
::Concat
{
3891 Ast
::Literal(ast
::Literal
{
3893 kind
: ast
::LiteralKind
::Octal
,
3896 Ast
::Literal(ast
::Literal
{
3898 kind
: ast
::LiteralKind
::Verbatim
,
3905 parser_octal(r
"\8").parse_escape().unwrap_err(),
3908 kind
: ast
::ErrorKind
::EscapeUnrecognized
,
3913 fn parse_hex_two() {
3915 let pat
= format
!(r
"\x{:02x}", i
);
3917 parser(&pat
).parse_escape(),
3918 Ok(Primitive
::Literal(ast
::Literal
{
3919 span
: span(0..pat
.len()),
3920 kind
: ast
::LiteralKind
::HexFixed(ast
::HexLiteralKind
::X
),
3921 c
: ::std
::char::from_u32(i
).unwrap(),
3926 parser(r
"\xF").parse_escape().unwrap_err(),
3929 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
3932 parser(r
"\xG").parse_escape().unwrap_err(),
3935 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
3938 parser(r
"\xFG").parse_escape().unwrap_err(),
3941 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
3946 fn parse_hex_four() {
3948 let c
= match ::std
::char::from_u32(i
) {
3952 let pat
= format
!(r
"\u{:04x}", i
);
3954 parser(&pat
).parse_escape(),
3955 Ok(Primitive
::Literal(ast
::Literal
{
3956 span
: span(0..pat
.len()),
3957 kind
: ast
::LiteralKind
::HexFixed(
3958 ast
::HexLiteralKind
::UnicodeShort
),
3964 parser(r
"\uF").parse_escape().unwrap_err(),
3967 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
3970 parser(r
"\uG").parse_escape().unwrap_err(),
3973 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
3976 parser(r
"\uFG").parse_escape().unwrap_err(),
3979 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
3982 parser(r
"\uFFG").parse_escape().unwrap_err(),
3985 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
3988 parser(r
"\uFFFG").parse_escape().unwrap_err(),
3991 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
3994 parser(r
"\uD800").parse_escape().unwrap_err(),
3997 kind
: ast
::ErrorKind
::EscapeHexInvalid
,
4002 fn parse_hex_eight() {
4004 let c
= match ::std
::char::from_u32(i
) {
4008 let pat
= format
!(r
"\U{:08x}", i
);
4010 parser(&pat
).parse_escape(),
4011 Ok(Primitive
::Literal(ast
::Literal
{
4012 span
: span(0..pat
.len()),
4013 kind
: ast
::LiteralKind
::HexFixed(
4014 ast
::HexLiteralKind
::UnicodeLong
),
4020 parser(r
"\UF").parse_escape().unwrap_err(),
4023 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
4026 parser(r
"\UG").parse_escape().unwrap_err(),
4029 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4032 parser(r
"\UFG").parse_escape().unwrap_err(),
4035 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4038 parser(r
"\UFFG").parse_escape().unwrap_err(),
4041 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4044 parser(r
"\UFFFG").parse_escape().unwrap_err(),
4047 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4050 parser(r
"\UFFFFG").parse_escape().unwrap_err(),
4053 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4056 parser(r
"\UFFFFFG").parse_escape().unwrap_err(),
4059 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4062 parser(r
"\UFFFFFFG").parse_escape().unwrap_err(),
4065 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4068 parser(r
"\UFFFFFFFG").parse_escape().unwrap_err(),
4071 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4076 fn parse_hex_brace() {
4078 parser(r
"\u{26c4}").parse_escape(),
4079 Ok(Primitive
::Literal(ast
::Literal
{
4081 kind
: ast
::LiteralKind
::HexBrace(
4082 ast
::HexLiteralKind
::UnicodeShort
),
4086 parser(r
"\U{26c4}").parse_escape(),
4087 Ok(Primitive
::Literal(ast
::Literal
{
4089 kind
: ast
::LiteralKind
::HexBrace(
4090 ast
::HexLiteralKind
::UnicodeLong
),
4094 parser(r
"\x{26c4}").parse_escape(),
4095 Ok(Primitive
::Literal(ast
::Literal
{
4097 kind
: ast
::LiteralKind
::HexBrace(ast
::HexLiteralKind
::X
),
4101 parser(r
"\x{26C4}").parse_escape(),
4102 Ok(Primitive
::Literal(ast
::Literal
{
4104 kind
: ast
::LiteralKind
::HexBrace(ast
::HexLiteralKind
::X
),
4108 parser(r
"\x{10fFfF}").parse_escape(),
4109 Ok(Primitive
::Literal(ast
::Literal
{
4111 kind
: ast
::LiteralKind
::HexBrace(ast
::HexLiteralKind
::X
),
4116 parser(r
"\x").parse_escape().unwrap_err(),
4119 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
4122 parser(r
"\x{").parse_escape().unwrap_err(),
4125 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
4128 parser(r
"\x{FF").parse_escape().unwrap_err(),
4131 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
4134 parser(r
"\x{}").parse_escape().unwrap_err(),
4137 kind
: ast
::ErrorKind
::EscapeHexEmpty
,
4140 parser(r
"\x{FGF}").parse_escape().unwrap_err(),
4143 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4146 parser(r
"\x{FFFFFF}").parse_escape().unwrap_err(),
4149 kind
: ast
::ErrorKind
::EscapeHexInvalid
,
4152 parser(r
"\x{D800}").parse_escape().unwrap_err(),
4155 kind
: ast
::ErrorKind
::EscapeHexInvalid
,
4158 parser(r
"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
4161 kind
: ast
::ErrorKind
::EscapeHexInvalid
,
4166 fn parse_decimal() {
4167 assert_eq
!(parser("123").parse_decimal(), Ok(123));
4168 assert_eq
!(parser("0").parse_decimal(), Ok(0));
4169 assert_eq
!(parser("01").parse_decimal(), Ok(1));
4172 parser("-1").parse_decimal().unwrap_err(),
4175 kind
: ast
::ErrorKind
::DecimalEmpty
,
4178 parser("").parse_decimal().unwrap_err(),
4181 kind
: ast
::ErrorKind
::DecimalEmpty
,
4184 parser("9999999999").parse_decimal().unwrap_err(),
4187 kind
: ast
::ErrorKind
::DecimalInvalid
,
4192 fn parse_set_class() {
4193 fn union(span
: Span
, items
: Vec
<ast
::ClassSetItem
>) -> ast
::ClassSet
{
4194 ast
::ClassSet
::union(ast
::ClassSetUnion
{
4204 ) -> ast
::ClassSet
{
4205 ast
::ClassSet
::BinaryOp(ast
::ClassSetBinaryOp
{
4207 kind
: ast
::ClassSetBinaryOpKind
::Intersection
,
4217 ) -> ast
::ClassSet
{
4218 ast
::ClassSet
::BinaryOp(ast
::ClassSetBinaryOp
{
4220 kind
: ast
::ClassSetBinaryOpKind
::Difference
,
4230 ) -> ast
::ClassSet
{
4231 ast
::ClassSet
::BinaryOp(ast
::ClassSetBinaryOp
{
4233 kind
: ast
::ClassSetBinaryOpKind
::SymmetricDifference
,
4239 fn itemset(item
: ast
::ClassSetItem
) -> ast
::ClassSet
{
4240 ast
::ClassSet
::Item(item
)
4243 fn item_ascii(cls
: ast
::ClassAscii
) -> ast
::ClassSetItem
{
4244 ast
::ClassSetItem
::Ascii(cls
)
4247 fn item_unicode(cls
: ast
::ClassUnicode
) -> ast
::ClassSetItem
{
4248 ast
::ClassSetItem
::Unicode(cls
)
4251 fn item_perl(cls
: ast
::ClassPerl
) -> ast
::ClassSetItem
{
4252 ast
::ClassSetItem
::Perl(cls
)
4255 fn item_bracket(cls
: ast
::ClassBracketed
) -> ast
::ClassSetItem
{
4256 ast
::ClassSetItem
::Bracketed(Box
::new(cls
))
4259 fn lit(span
: Span
, c
: char) -> ast
::ClassSetItem
{
4260 ast
::ClassSetItem
::Literal(ast
::Literal
{
4262 kind
: ast
::LiteralKind
::Verbatim
,
4267 fn empty(span
: Span
) -> ast
::ClassSetItem
{
4268 ast
::ClassSetItem
::Empty(span
)
4271 fn range(span
: Span
, start
: char, end
: char) -> ast
::ClassSetItem
{
4272 let pos1
= Position
{
4273 offset
: span
.start
.offset
+ start
.len_utf8(),
4274 column
: span
.start
.column
+ 1,
4277 let pos2
= Position
{
4278 offset
: span
.end
.offset
- end
.len_utf8(),
4279 column
: span
.end
.column
- 1,
4282 ast
::ClassSetItem
::Range(ast
::ClassSetRange
{
4284 start
: ast
::Literal
{
4285 span
: Span { end: pos1, ..span }
,
4286 kind
: ast
::LiteralKind
::Verbatim
,
4290 span
: Span { start: pos2, ..span }
,
4291 kind
: ast
::LiteralKind
::Verbatim
,
4297 fn alnum(span
: Span
, negated
: bool
) -> ast
::ClassAscii
{
4300 kind
: ast
::ClassAsciiKind
::Alnum
,
4305 fn lower(span
: Span
, negated
: bool
) -> ast
::ClassAscii
{
4308 kind
: ast
::ClassAsciiKind
::Lower
,
4314 parser("[[:alnum:]]").parse(),
4315 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4318 kind
: itemset(item_ascii(alnum(span(1..10), false))),
4321 parser("[[[:alnum:]]]").parse(),
4322 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4325 kind
: itemset(item_bracket(ast
::ClassBracketed
{
4328 kind
: itemset(item_ascii(alnum(span(2..11), false))),
4332 parser("[[:alnum:]&&[:lower:]]").parse(),
4333 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4338 itemset(item_ascii(alnum(span(1..10), false))),
4339 itemset(item_ascii(lower(span(12..21), false))),
4343 parser("[[:alnum:]--[:lower:]]").parse(),
4344 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4349 itemset(item_ascii(alnum(span(1..10), false))),
4350 itemset(item_ascii(lower(span(12..21), false))),
4354 parser("[[:alnum:]~~[:lower:]]").parse(),
4355 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4358 kind
: symdifference(
4360 itemset(item_ascii(alnum(span(1..10), false))),
4361 itemset(item_ascii(lower(span(12..21), false))),
4366 parser("[a]").parse(),
4367 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4370 kind
: itemset(lit(span(1..2), 'a'
)),
4373 parser(r
"[a\]]").parse(),
4374 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4377 kind
: union(span(1..4), vec
![
4378 lit(span(1..2), 'a'
),
4379 ast
::ClassSetItem
::Literal(ast
::Literal
{
4381 kind
: ast
::LiteralKind
::Punctuation
,
4387 parser(r
"[a\-z]").parse(),
4388 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4391 kind
: union(span(1..5), vec
![
4392 lit(span(1..2), 'a'
),
4393 ast
::ClassSetItem
::Literal(ast
::Literal
{
4395 kind
: ast
::LiteralKind
::Punctuation
,
4398 lit(span(4..5), 'z'
),
4402 parser("[ab]").parse(),
4403 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4406 kind
: union(span(1..3), vec
![
4407 lit(span(1..2), 'a'
),
4408 lit(span(2..3), 'b'
),
4412 parser("[a-]").parse(),
4413 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4416 kind
: union(span(1..3), vec
![
4417 lit(span(1..2), 'a'
),
4418 lit(span(2..3), '
-'
),
4422 parser("[-a]").parse(),
4423 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4426 kind
: union(span(1..3), vec
![
4427 lit(span(1..2), '
-'
),
4428 lit(span(2..3), 'a'
),
4432 parser(r
"[\pL]").parse(),
4433 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4436 kind
: itemset(item_unicode(ast
::ClassUnicode
{
4439 kind
: ast
::ClassUnicodeKind
::OneLetter('L'
),
4443 parser(r
"[\w]").parse(),
4444 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4447 kind
: itemset(item_perl(ast
::ClassPerl
{
4449 kind
: ast
::ClassPerlKind
::Word
,
4454 parser(r
"[a\wz]").parse(),
4455 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4458 kind
: union(span(1..5), vec
![
4459 lit(span(1..2), 'a'
),
4460 item_perl(ast
::ClassPerl
{
4462 kind
: ast
::ClassPerlKind
::Word
,
4465 lit(span(4..5), 'z'
),
4470 parser("[a-z]").parse(),
4471 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4474 kind
: itemset(range(span(1..4), 'a'
, 'z'
)),
4477 parser("[a-cx-z]").parse(),
4478 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4481 kind
: union(span(1..7), vec
![
4482 range(span(1..4), 'a'
, 'c'
),
4483 range(span(4..7), 'x'
, 'z'
),
4487 parser(r
"[\w&&a-cx-z]").parse(),
4488 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4493 itemset(item_perl(ast
::ClassPerl
{
4495 kind
: ast
::ClassPerlKind
::Word
,
4498 union(span(5..11), vec
![
4499 range(span(5..8), 'a'
, 'c'
),
4500 range(span(8..11), 'x'
, 'z'
),
4505 parser(r
"[a-cx-z&&\w]").parse(),
4506 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4511 union(span(1..7), vec
![
4512 range(span(1..4), 'a'
, 'c'
),
4513 range(span(4..7), 'x'
, 'z'
),
4515 itemset(item_perl(ast
::ClassPerl
{
4517 kind
: ast
::ClassPerlKind
::Word
,
4523 parser(r
"[a--b--c]").parse(),
4524 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4531 itemset(lit(span(1..2), 'a'
)),
4532 itemset(lit(span(4..5), 'b'
)),
4534 itemset(lit(span(7..8), 'c'
)),
4538 parser(r
"[a~~b~~c]").parse(),
4539 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4542 kind
: symdifference(
4546 itemset(lit(span(1..2), 'a'
)),
4547 itemset(lit(span(4..5), 'b'
)),
4549 itemset(lit(span(7..8), 'c'
)),
4553 parser(r
"[\^&&^]").parse(),
4554 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4559 itemset(ast
::ClassSetItem
::Literal(ast
::Literal
{
4561 kind
: ast
::LiteralKind
::Punctuation
,
4564 itemset(lit(span(5..6), '
^')),
4568 parser(r
"[\&&&&]").parse(),
4569 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4574 itemset(ast
::ClassSetItem
::Literal(ast
::Literal
{
4576 kind
: ast
::LiteralKind
::Punctuation
,
4579 itemset(lit(span(5..6), '
&'
)),
4583 parser(r
"[&&&&]").parse(),
4584 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4591 itemset(empty(span(1..1))),
4592 itemset(empty(span(3..3))),
4594 itemset(empty(span(5..5))),
4598 let pat
= "[☃-⛄]";
4600 parser(pat
).parse(),
4601 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4602 span
: span_range(pat
, 0..9),
4604 kind
: itemset(ast
::ClassSetItem
::Range(ast
::ClassSetRange
{
4605 span
: span_range(pat
, 1..8),
4606 start
: ast
::Literal
{
4607 span
: span_range(pat
, 1..4),
4608 kind
: ast
::LiteralKind
::Verbatim
,
4612 span
: span_range(pat
, 5..8),
4613 kind
: ast
::LiteralKind
::Verbatim
,
4620 parser(r
"[]]").parse(),
4621 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4624 kind
: itemset(lit(span(1..2), '
]'
)),
4627 parser(r
"[]\[]").parse(),
4628 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4631 kind
: union(span(1..4), vec
![
4632 lit(span(1..2), '
]'
),
4633 ast
::ClassSetItem
::Literal(ast
::Literal
{
4635 kind
: ast
::LiteralKind
::Punctuation
,
4641 parser(r
"[\[]]").parse(),
4642 Ok(concat(0..5, vec
![
4643 Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4646 kind
: itemset(ast
::ClassSetItem
::Literal(ast
::Literal
{
4648 kind
: ast
::LiteralKind
::Punctuation
,
4652 Ast
::Literal(ast
::Literal
{
4654 kind
: ast
::LiteralKind
::Verbatim
,
4660 parser("[").parse().unwrap_err(),
4663 kind
: ast
::ErrorKind
::ClassUnclosed
,
4666 parser("[[").parse().unwrap_err(),
4669 kind
: ast
::ErrorKind
::ClassUnclosed
,
4672 parser("[[-]").parse().unwrap_err(),
4675 kind
: ast
::ErrorKind
::ClassUnclosed
,
4678 parser("[[[:alnum:]").parse().unwrap_err(),
4681 kind
: ast
::ErrorKind
::ClassUnclosed
,
4684 parser(r
"[\b]").parse().unwrap_err(),
4687 kind
: ast
::ErrorKind
::ClassEscapeInvalid
,
4690 parser(r
"[\w-a]").parse().unwrap_err(),
4693 kind
: ast
::ErrorKind
::ClassRangeLiteral
,
4696 parser(r
"[a-\w]").parse().unwrap_err(),
4699 kind
: ast
::ErrorKind
::ClassRangeLiteral
,
4702 parser(r
"[z-a]").parse().unwrap_err(),
4705 kind
: ast
::ErrorKind
::ClassRangeInvalid
,
4709 parser_ignore_whitespace("[a ").parse().unwrap_err(),
4712 kind
: ast
::ErrorKind
::ClassUnclosed
,
4715 parser_ignore_whitespace("[a- ").parse().unwrap_err(),
4718 kind
: ast
::ErrorKind
::ClassUnclosed
,
4723 fn parse_set_class_open() {
4725 parser("[a]").parse_set_class_open(), {
4726 let set
= ast
::ClassBracketed
{
4729 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
4734 let union = ast
::ClassSetUnion
{
4741 parser_ignore_whitespace("[ a]").parse_set_class_open(), {
4742 let set
= ast
::ClassBracketed
{
4745 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
4750 let union = ast
::ClassSetUnion
{
4757 parser("[^a]").parse_set_class_open(), {
4758 let set
= ast
::ClassBracketed
{
4761 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
4766 let union = ast
::ClassSetUnion
{
4773 parser_ignore_whitespace("[ ^ a]").parse_set_class_open(), {
4774 let set
= ast
::ClassBracketed
{
4777 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
4782 let union = ast
::ClassSetUnion
{
4789 parser("[-a]").parse_set_class_open(), {
4790 let set
= ast
::ClassBracketed
{
4793 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
4798 let union = ast
::ClassSetUnion
{
4801 ast
::ClassSetItem
::Literal(ast
::Literal
{
4803 kind
: ast
::LiteralKind
::Verbatim
,
4811 parser_ignore_whitespace("[ - a]").parse_set_class_open(), {
4812 let set
= ast
::ClassBracketed
{
4815 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
4820 let union = ast
::ClassSetUnion
{
4823 ast
::ClassSetItem
::Literal(ast
::Literal
{
4825 kind
: ast
::LiteralKind
::Verbatim
,
4833 parser("[^-a]").parse_set_class_open(), {
4834 let set
= ast
::ClassBracketed
{
4837 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
4842 let union = ast
::ClassSetUnion
{
4845 ast
::ClassSetItem
::Literal(ast
::Literal
{
4847 kind
: ast
::LiteralKind
::Verbatim
,
4855 parser("[--a]").parse_set_class_open(), {
4856 let set
= ast
::ClassBracketed
{
4859 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
4864 let union = ast
::ClassSetUnion
{
4867 ast
::ClassSetItem
::Literal(ast
::Literal
{
4869 kind
: ast
::LiteralKind
::Verbatim
,
4872 ast
::ClassSetItem
::Literal(ast
::Literal
{
4874 kind
: ast
::LiteralKind
::Verbatim
,
4882 parser("[]a]").parse_set_class_open(), {
4883 let set
= ast
::ClassBracketed
{
4886 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
4891 let union = ast
::ClassSetUnion
{
4894 ast
::ClassSetItem
::Literal(ast
::Literal
{
4896 kind
: ast
::LiteralKind
::Verbatim
,
4904 parser_ignore_whitespace("[ ] a]").parse_set_class_open(), {
4905 let set
= ast
::ClassBracketed
{
4908 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
4913 let union = ast
::ClassSetUnion
{
4916 ast
::ClassSetItem
::Literal(ast
::Literal
{
4918 kind
: ast
::LiteralKind
::Verbatim
,
4926 parser("[^]a]").parse_set_class_open(), {
4927 let set
= ast
::ClassBracketed
{
4930 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
4935 let union = ast
::ClassSetUnion
{
4938 ast
::ClassSetItem
::Literal(ast
::Literal
{
4940 kind
: ast
::LiteralKind
::Verbatim
,
4948 parser("[-]a]").parse_set_class_open(), {
4949 let set
= ast
::ClassBracketed
{
4952 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
4957 let union = ast
::ClassSetUnion
{
4960 ast
::ClassSetItem
::Literal(ast
::Literal
{
4962 kind
: ast
::LiteralKind
::Verbatim
,
4971 parser("[").parse_set_class_open().unwrap_err(),
4974 kind
: ast
::ErrorKind
::ClassUnclosed
,
4977 parser_ignore_whitespace("[ ")
4978 .parse_set_class_open()
4982 kind
: ast
::ErrorKind
::ClassUnclosed
,
4985 parser("[^").parse_set_class_open().unwrap_err(),
4988 kind
: ast
::ErrorKind
::ClassUnclosed
,
4991 parser("[]").parse_set_class_open().unwrap_err(),
4994 kind
: ast
::ErrorKind
::ClassUnclosed
,
4997 parser("[-").parse_set_class_open().unwrap_err(),
5000 kind
: ast
::ErrorKind
::ClassUnclosed
,
5003 parser("[--").parse_set_class_open().unwrap_err(),
5006 kind
: ast
::ErrorKind
::ClassUnclosed
,
5011 fn maybe_parse_ascii_class() {
5013 parser(r
"[:alnum:]").maybe_parse_ascii_class(),
5014 Some(ast
::ClassAscii
{
5016 kind
: ast
::ClassAsciiKind
::Alnum
,
5020 parser(r
"[:alnum:]A").maybe_parse_ascii_class(),
5021 Some(ast
::ClassAscii
{
5023 kind
: ast
::ClassAsciiKind
::Alnum
,
5027 parser(r
"[:^alnum:]").maybe_parse_ascii_class(),
5028 Some(ast
::ClassAscii
{
5030 kind
: ast
::ClassAsciiKind
::Alnum
,
5034 let p
= parser(r
"[:");
5035 assert_eq
!(p
.maybe_parse_ascii_class(), None
);
5036 assert_eq
!(p
.offset(), 0);
5038 let p
= parser(r
"[:^");
5039 assert_eq
!(p
.maybe_parse_ascii_class(), None
);
5040 assert_eq
!(p
.offset(), 0);
5042 let p
= parser(r
"[^:alnum:]");
5043 assert_eq
!(p
.maybe_parse_ascii_class(), None
);
5044 assert_eq
!(p
.offset(), 0);
5046 let p
= parser(r
"[:alnnum:]");
5047 assert_eq
!(p
.maybe_parse_ascii_class(), None
);
5048 assert_eq
!(p
.offset(), 0);
5050 let p
= parser(r
"[:alnum]");
5051 assert_eq
!(p
.maybe_parse_ascii_class(), None
);
5052 assert_eq
!(p
.offset(), 0);
5054 let p
= parser(r
"[:alnum:");
5055 assert_eq
!(p
.maybe_parse_ascii_class(), None
);
5056 assert_eq
!(p
.offset(), 0);
5060 fn parse_unicode_class() {
5062 parser(r
"\pN").parse_escape(),
5063 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5066 kind
: ast
::ClassUnicodeKind
::OneLetter('N'
),
5069 parser(r
"\PN").parse_escape(),
5070 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5073 kind
: ast
::ClassUnicodeKind
::OneLetter('N'
),
5076 parser(r
"\p{N}").parse_escape(),
5077 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5080 kind
: ast
::ClassUnicodeKind
::Named(s("N")),
5083 parser(r
"\P{N}").parse_escape(),
5084 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5087 kind
: ast
::ClassUnicodeKind
::Named(s("N")),
5090 parser(r
"\p{Greek}").parse_escape(),
5091 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5094 kind
: ast
::ClassUnicodeKind
::Named(s("Greek")),
5098 parser(r
"\p{scx:Katakana}").parse_escape(),
5099 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5102 kind
: ast
::ClassUnicodeKind
::NamedValue
{
5103 op
: ast
::ClassUnicodeOpKind
::Colon
,
5105 value
: s("Katakana"),
5109 parser(r
"\p{scx=Katakana}").parse_escape(),
5110 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5113 kind
: ast
::ClassUnicodeKind
::NamedValue
{
5114 op
: ast
::ClassUnicodeOpKind
::Equal
,
5116 value
: s("Katakana"),
5120 parser(r
"\p{scx!=Katakana}").parse_escape(),
5121 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5124 kind
: ast
::ClassUnicodeKind
::NamedValue
{
5125 op
: ast
::ClassUnicodeOpKind
::NotEqual
,
5127 value
: s("Katakana"),
5132 parser(r
"\p{:}").parse_escape(),
5133 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5136 kind
: ast
::ClassUnicodeKind
::NamedValue
{
5137 op
: ast
::ClassUnicodeOpKind
::Colon
,
5143 parser(r
"\p{=}").parse_escape(),
5144 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5147 kind
: ast
::ClassUnicodeKind
::NamedValue
{
5148 op
: ast
::ClassUnicodeOpKind
::Equal
,
5154 parser(r
"\p{!=}").parse_escape(),
5155 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5158 kind
: ast
::ClassUnicodeKind
::NamedValue
{
5159 op
: ast
::ClassUnicodeOpKind
::NotEqual
,
5166 parser(r
"\p").parse_escape().unwrap_err(),
5169 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
5172 parser(r
"\p{").parse_escape().unwrap_err(),
5175 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
5178 parser(r
"\p{N").parse_escape().unwrap_err(),
5181 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
5184 parser(r
"\p{Greek").parse_escape().unwrap_err(),
5187 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
5191 parser(r
"\pNz").parse(),
5192 Ok(Ast
::Concat(ast
::Concat
{
5195 Ast
::Class(ast
::Class
::Unicode(ast
::ClassUnicode
{
5198 kind
: ast
::ClassUnicodeKind
::OneLetter('N'
),
5200 Ast
::Literal(ast
::Literal
{
5202 kind
: ast
::LiteralKind
::Verbatim
,
5208 parser(r
"\p{Greek}z").parse(),
5209 Ok(Ast
::Concat(ast
::Concat
{
5212 Ast
::Class(ast
::Class
::Unicode(ast
::ClassUnicode
{
5215 kind
: ast
::ClassUnicodeKind
::Named(s("Greek")),
5217 Ast
::Literal(ast
::Literal
{
5219 kind
: ast
::LiteralKind
::Verbatim
,
5227 fn parse_perl_class() {
5229 parser(r
"\d").parse_escape(),
5230 Ok(Primitive
::Perl(ast
::ClassPerl
{
5232 kind
: ast
::ClassPerlKind
::Digit
,
5236 parser(r
"\D").parse_escape(),
5237 Ok(Primitive
::Perl(ast
::ClassPerl
{
5239 kind
: ast
::ClassPerlKind
::Digit
,
5243 parser(r
"\s").parse_escape(),
5244 Ok(Primitive
::Perl(ast
::ClassPerl
{
5246 kind
: ast
::ClassPerlKind
::Space
,
5250 parser(r
"\S").parse_escape(),
5251 Ok(Primitive
::Perl(ast
::ClassPerl
{
5253 kind
: ast
::ClassPerlKind
::Space
,
5257 parser(r
"\w").parse_escape(),
5258 Ok(Primitive
::Perl(ast
::ClassPerl
{
5260 kind
: ast
::ClassPerlKind
::Word
,
5264 parser(r
"\W").parse_escape(),
5265 Ok(Primitive
::Perl(ast
::ClassPerl
{
5267 kind
: ast
::ClassPerlKind
::Word
,
5272 parser(r
"\d").parse(),
5273 Ok(Ast
::Class(ast
::Class
::Perl(ast
::ClassPerl
{
5275 kind
: ast
::ClassPerlKind
::Digit
,
5279 parser(r
"\dz").parse(),
5280 Ok(Ast
::Concat(ast
::Concat
{
5283 Ast
::Class(ast
::Class
::Perl(ast
::ClassPerl
{
5285 kind
: ast
::ClassPerlKind
::Digit
,
5288 Ast
::Literal(ast
::Literal
{
5290 kind
: ast
::LiteralKind
::Verbatim
,
5297 // This tests a bug fix where the nest limit checker wasn't decrementing
5298 // its depth during post-traversal, which causes long regexes to trip
5299 // the default limit too aggressively.
5301 fn regression_454_nest_too_big() {
5337 assert
!(parser_nest_limit(pattern
, 50).parse().is_ok());
5340 // This tests that we treat a trailing `-` in a character class as a
5341 // literal `-` even when whitespace mode is enabled and there is whitespace
5342 // after the trailing `-`.
5344 fn regression_455_trailing_dash_ignore_whitespace() {
5345 assert
!(parser("(?x)[ / - ]").parse().is_ok());
5346 assert
!(parser("(?x)[ a - ]").parse().is_ok());
5347 assert
!(parser("(?x)[
5350 ").parse().is_ok());
5351 assert
!(parser("(?x)[
5354 ").parse().is_ok());
5356 assert
!(parser("(?x)[ / -").parse().is_err());
5357 assert
!(parser("(?x)[ / - ").parse().is_err());
5358 assert
!(parser("(?x)[
5360 ").parse().is_err());
5361 assert
!(parser("(?x)[
5363 ").parse().is_err());