2 This module provides a regular expression parser.
5 use std
::borrow
::Borrow
;
6 use std
::cell
::{Cell, RefCell}
;
10 use crate::ast
::{self, Ast, Position, Span}
;
11 use crate::either
::Either
;
13 use crate::is_meta_character
;
15 type Result
<T
> = result
::Result
<T
, ast
::Error
>;
17 /// A primitive is an expression with no sub-expressions. This includes
18 /// literals, assertions and non-set character classes. This representation
19 /// is used as intermediate state in the parser.
21 /// This does not include ASCII character classes, since they can only appear
22 /// within a set character class.
23 #[derive(Clone, Debug, Eq, PartialEq)]
25 Literal(ast
::Literal
),
26 Assertion(ast
::Assertion
),
29 Unicode(ast
::ClassUnicode
),
33 /// Return the span of this primitive.
34 fn span(&self) -> &Span
{
36 Primitive
::Literal(ref x
) => &x
.span
,
37 Primitive
::Assertion(ref x
) => &x
.span
,
38 Primitive
::Dot(ref span
) => span
,
39 Primitive
::Perl(ref x
) => &x
.span
,
40 Primitive
::Unicode(ref x
) => &x
.span
,
44 /// Convert this primitive into a proper AST.
45 fn into_ast(self) -> Ast
{
47 Primitive
::Literal(lit
) => Ast
::Literal(lit
),
48 Primitive
::Assertion(assert
) => Ast
::Assertion(assert
),
49 Primitive
::Dot(span
) => Ast
::Dot(span
),
50 Primitive
::Perl(cls
) => Ast
::Class(ast
::Class
::Perl(cls
)),
51 Primitive
::Unicode(cls
) => Ast
::Class(ast
::Class
::Unicode(cls
)),
55 /// Convert this primitive into an item in a character class.
57 /// If this primitive is not a legal item (i.e., an assertion or a dot),
58 /// then return an error.
59 fn into_class_set_item
<P
: Borrow
<Parser
>>(
62 ) -> Result
<ast
::ClassSetItem
> {
63 use self::Primitive
::*;
64 use crate::ast
::ClassSetItem
;
67 Literal(lit
) => Ok(ClassSetItem
::Literal(lit
)),
68 Perl(cls
) => Ok(ClassSetItem
::Perl(cls
)),
69 Unicode(cls
) => Ok(ClassSetItem
::Unicode(cls
)),
70 x
=> Err(p
.error(*x
.span(), ast
::ErrorKind
::ClassEscapeInvalid
)),
74 /// Convert this primitive into a literal in a character class. In
75 /// particular, literals are the only valid items that can appear in
78 /// If this primitive is not a legal item (i.e., a class, assertion or a
79 /// dot), then return an error.
80 fn into_class_literal
<P
: Borrow
<Parser
>>(
83 ) -> Result
<ast
::Literal
> {
84 use self::Primitive
::*;
87 Literal(lit
) => Ok(lit
),
88 x
=> Err(p
.error(*x
.span(), ast
::ErrorKind
::ClassRangeLiteral
)),
93 /// Returns true if the given character is a hexadecimal digit.
94 fn is_hex(c
: char) -> bool
{
95 ('
0'
<= c
&& c
<= '
9'
) || ('a'
<= c
&& c
<= 'f'
) || ('A'
<= c
&& c
<= 'F'
)
98 /// Returns true if the given character is a valid in a capture group name.
100 /// If `first` is true, then `c` is treated as the first character in the
101 /// group name (which must be alphabetic or underscore).
102 fn is_capture_char(c
: char, first
: bool
) -> bool
{
105 && (('
0'
<= c
&& c
<= '
9'
) || c
== '
.'
|| c
== '
['
|| c
== '
]'
))
106 || ('A'
<= c
&& c
<= 'Z'
)
107 || ('a'
<= c
&& c
<= 'z'
)
110 /// A builder for a regular expression parser.
112 /// This builder permits modifying configuration options for the parser.
113 #[derive(Clone, Debug)]
114 pub struct ParserBuilder
{
115 ignore_whitespace
: bool
,
120 impl Default
for ParserBuilder
{
121 fn default() -> ParserBuilder
{
127 /// Create a new parser builder with a default configuration.
128 pub fn new() -> ParserBuilder
{
130 ignore_whitespace
: false,
136 /// Build a parser from this configuration with the given pattern.
137 pub fn build(&self) -> Parser
{
139 pos
: Cell
::new(Position { offset: 0, line: 1, column: 1 }
),
140 capture_index
: Cell
::new(0),
141 nest_limit
: self.nest_limit
,
143 initial_ignore_whitespace
: self.ignore_whitespace
,
144 ignore_whitespace
: Cell
::new(self.ignore_whitespace
),
145 comments
: RefCell
::new(vec
![]),
146 stack_group
: RefCell
::new(vec
![]),
147 stack_class
: RefCell
::new(vec
![]),
148 capture_names
: RefCell
::new(vec
![]),
149 scratch
: RefCell
::new(String
::new()),
153 /// Set the nesting limit for this parser.
155 /// The nesting limit controls how deep the abstract syntax tree is allowed
156 /// to be. If the AST exceeds the given limit (e.g., with too many nested
157 /// groups), then an error is returned by the parser.
159 /// The purpose of this limit is to act as a heuristic to prevent stack
160 /// overflow for consumers that do structural induction on an `Ast` using
161 /// explicit recursion. While this crate never does this (instead using
162 /// constant stack space and moving the call stack to the heap), other
165 /// This limit is not checked until the entire Ast is parsed. Therefore,
166 /// if callers want to put a limit on the amount of heap space used, then
167 /// they should impose a limit on the length, in bytes, of the concrete
168 /// pattern string. In particular, this is viable since this parser
169 /// implementation will limit itself to heap space proportional to the
170 /// length of the pattern string.
172 /// Note that a nest limit of `0` will return a nest limit error for most
173 /// patterns but not all. For example, a nest limit of `0` permits `a` but
174 /// not `ab`, since `ab` requires a concatenation, which results in a nest
175 /// depth of `1`. In general, a nest limit is not something that manifests
176 /// in an obvious way in the concrete syntax, therefore, it should not be
177 /// used in a granular way.
178 pub fn nest_limit(&mut self, limit
: u32) -> &mut ParserBuilder
{
179 self.nest_limit
= limit
;
183 /// Whether to support octal syntax or not.
185 /// Octal syntax is a little-known way of uttering Unicode codepoints in
186 /// a regular expression. For example, `a`, `\x61`, `\u0061` and
187 /// `\141` are all equivalent regular expressions, where the last example
188 /// shows octal syntax.
190 /// While supporting octal syntax isn't in and of itself a problem, it does
191 /// make good error messages harder. That is, in PCRE based regex engines,
192 /// syntax like `\0` invokes a backreference, which is explicitly
193 /// unsupported in Rust's regex engine. However, many users expect it to
194 /// be supported. Therefore, when octal support is disabled, the error
195 /// message will explicitly mention that backreferences aren't supported.
197 /// Octal syntax is disabled by default.
198 pub fn octal(&mut self, yes
: bool
) -> &mut ParserBuilder
{
203 /// Enable verbose mode in the regular expression.
205 /// When enabled, verbose mode permits insignificant whitespace in many
206 /// places in the regular expression, as well as comments. Comments are
207 /// started using `#` and continue until the end of the line.
209 /// By default, this is disabled. It may be selectively enabled in the
210 /// regular expression by using the `x` flag regardless of this setting.
211 pub fn ignore_whitespace(&mut self, yes
: bool
) -> &mut ParserBuilder
{
212 self.ignore_whitespace
= yes
;
217 /// A regular expression parser.
219 /// This parses a string representation of a regular expression into an
220 /// abstract syntax tree. The size of the tree is proportional to the length
221 /// of the regular expression pattern.
223 /// A `Parser` can be configured in more detail via a
224 /// [`ParserBuilder`](struct.ParserBuilder.html).
225 #[derive(Clone, Debug)]
227 /// The current position of the parser.
229 /// The current capture index.
230 capture_index
: Cell
<u32>,
231 /// The maximum number of open parens/brackets allowed. If the parser
232 /// exceeds this number, then an error is returned.
234 /// Whether to support octal syntax or not. When `false`, the parser will
235 /// return an error helpfully pointing out that backreferences are not
238 /// The initial setting for `ignore_whitespace` as provided by
239 /// `ParserBuilder`. It is used when resetting the parser's state.
240 initial_ignore_whitespace
: bool
,
241 /// Whether whitespace should be ignored. When enabled, comments are
243 ignore_whitespace
: Cell
<bool
>,
244 /// A list of comments, in order of appearance.
245 comments
: RefCell
<Vec
<ast
::Comment
>>,
246 /// A stack of grouped sub-expressions, including alternations.
247 stack_group
: RefCell
<Vec
<GroupState
>>,
248 /// A stack of nested character classes. This is only non-empty when
250 stack_class
: RefCell
<Vec
<ClassState
>>,
251 /// A sorted sequence of capture names. This is used to detect duplicate
252 /// capture names and report an error if one is detected.
253 capture_names
: RefCell
<Vec
<ast
::CaptureName
>>,
254 /// A scratch buffer used in various places. Mostly this is used to
255 /// accumulate relevant characters from parts of a pattern.
256 scratch
: RefCell
<String
>,
259 /// ParserI is the internal parser implementation.
261 /// We use this separate type so that we can carry the provided pattern string
262 /// along with us. In particular, a `Parser` internal state is not tied to any
263 /// one pattern, but `ParserI` is.
265 /// This type also lets us use `ParserI<&Parser>` in production code while
266 /// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
267 /// work against the internal interface of the parser.
268 #[derive(Clone, Debug)]
269 struct ParserI
<'s
, P
> {
270 /// The parser state/configuration.
272 /// The full regular expression provided by the user.
276 /// GroupState represents a single stack frame while parsing nested groups
277 /// and alternations. Each frame records the state up to an opening parenthesis
278 /// or a alternating bracket `|`.
279 #[derive(Clone, Debug)]
281 /// This state is pushed whenever an opening group is found.
283 /// The concatenation immediately preceding the opening group.
285 /// The group that has been opened. Its sub-AST is always empty.
287 /// Whether this group has the `x` flag enabled or not.
288 ignore_whitespace
: bool
,
290 /// This state is pushed whenever a new alternation branch is found. If
291 /// an alternation branch is found and this state is at the top of the
292 /// stack, then this state should be modified to include the new
294 Alternation(ast
::Alternation
),
297 /// ClassState represents a single stack frame while parsing character classes.
298 /// Each frame records the state up to an intersection, difference, symmetric
299 /// difference or nested class.
301 /// Note that a parser's character class stack is only non-empty when parsing
302 /// a character class. In all other cases, it is empty.
303 #[derive(Clone, Debug)]
305 /// This state is pushed whenever an opening bracket is found.
307 /// The union of class items immediately preceding this class.
308 union: ast
::ClassSetUnion
,
309 /// The class that has been opened. Typically this just corresponds
310 /// to the `[`, but it can also include `[^` since `^` indicates
311 /// negation of the class.
312 set
: ast
::ClassBracketed
,
314 /// This state is pushed when a operator is seen. When popped, the stored
315 /// set becomes the left hand side of the operator.
317 /// The type of the operation, i.e., &&, -- or ~~.
318 kind
: ast
::ClassSetBinaryOpKind
,
319 /// The left-hand side of the operator.
325 /// Create a new parser with a default configuration.
327 /// The parser can be run with either the `parse` or `parse_with_comments`
328 /// methods. The parse methods return an abstract syntax tree.
330 /// To set configuration options on the parser, use
331 /// [`ParserBuilder`](struct.ParserBuilder.html).
332 pub fn new() -> Parser
{
333 ParserBuilder
::new().build()
336 /// Parse the regular expression into an abstract syntax tree.
337 pub fn parse(&mut self, pattern
: &str) -> Result
<Ast
> {
338 ParserI
::new(self, pattern
).parse()
341 /// Parse the regular expression and return an abstract syntax tree with
342 /// all of the comments found in the pattern.
343 pub fn parse_with_comments(
346 ) -> Result
<ast
::WithComments
> {
347 ParserI
::new(self, pattern
).parse_with_comments()
350 /// Reset the internal state of a parser.
352 /// This is called at the beginning of every parse. This prevents the
353 /// parser from running with inconsistent state (say, if a previous
354 /// invocation returned an error and the parser is reused).
356 // These settings should be in line with the construction
357 // in `ParserBuilder::build`.
358 self.pos
.set(Position { offset: 0, line: 1, column: 1 }
);
359 self.ignore_whitespace
.set(self.initial_ignore_whitespace
);
360 self.comments
.borrow_mut().clear();
361 self.stack_group
.borrow_mut().clear();
362 self.stack_class
.borrow_mut().clear();
366 impl<'s
, P
: Borrow
<Parser
>> ParserI
<'s
, P
> {
367 /// Build an internal parser from a parser configuration and a pattern.
368 fn new(parser
: P
, pattern
: &'s
str) -> ParserI
<'s
, P
> {
369 ParserI { parser, pattern }
372 /// Return a reference to the parser state.
373 fn parser(&self) -> &Parser
{
377 /// Return a reference to the pattern being parsed.
378 fn pattern(&self) -> &str {
379 self.pattern
.borrow()
382 /// Create a new error with the given span and error type.
383 fn error(&self, span
: Span
, kind
: ast
::ErrorKind
) -> ast
::Error
{
384 ast
::Error { kind, pattern: self.pattern().to_string(), span }
387 /// Return the current offset of the parser.
389 /// The offset starts at `0` from the beginning of the regular expression
391 fn offset(&self) -> usize {
392 self.parser().pos
.get().offset
395 /// Return the current line number of the parser.
397 /// The line number starts at `1`.
398 fn line(&self) -> usize {
399 self.parser().pos
.get().line
402 /// Return the current column of the parser.
404 /// The column number starts at `1` and is reset whenever a `\n` is seen.
405 fn column(&self) -> usize {
406 self.parser().pos
.get().column
409 /// Return the next capturing index. Each subsequent call increments the
412 /// The span given should correspond to the location of the opening
415 /// If the capture limit is exceeded, then an error is returned.
416 fn next_capture_index(&self, span
: Span
) -> Result
<u32> {
417 let current
= self.parser().capture_index
.get();
418 let i
= current
.checked_add(1).ok_or_else(|| {
419 self.error(span
, ast
::ErrorKind
::CaptureLimitExceeded
)
421 self.parser().capture_index
.set(i
);
425 /// Adds the given capture name to this parser. If this capture name has
426 /// already been used, then an error is returned.
427 fn add_capture_name(&self, cap
: &ast
::CaptureName
) -> Result
<()> {
428 let mut names
= self.parser().capture_names
.borrow_mut();
430 .binary_search_by_key(&cap
.name
.as_str(), |c
| c
.name
.as_str())
433 names
.insert(i
, cap
.clone());
436 Ok(i
) => Err(self.error(
438 ast
::ErrorKind
::GroupNameDuplicate { original: names[i].span }
,
443 /// Return whether the parser should ignore whitespace or not.
444 fn ignore_whitespace(&self) -> bool
{
445 self.parser().ignore_whitespace
.get()
448 /// Return the character at the current position of the parser.
450 /// This panics if the current position does not point to a valid char.
451 fn char(&self) -> char {
452 self.char_at(self.offset())
455 /// Return the character at the given position.
457 /// This panics if the given position does not point to a valid char.
458 fn char_at(&self, i
: usize) -> char {
462 .unwrap_or_else(|| panic
!("expected char at offset {}", i
))
465 /// Bump the parser to the next Unicode scalar value.
467 /// If the end of the input has been reached, then `false` is returned.
468 fn bump(&self) -> bool
{
472 let Position { mut offset, mut line, mut column }
= self.pos();
473 if self.char() == '
\n'
{
474 line
= line
.checked_add(1).unwrap();
477 column
= column
.checked_add(1).unwrap();
479 offset
+= self.char().len_utf8();
480 self.parser().pos
.set(Position { offset, line, column }
);
481 self.pattern()[self.offset()..].chars().next().is_some()
484 /// If the substring starting at the current position of the parser has
485 /// the given prefix, then bump the parser to the character immediately
486 /// following the prefix and return true. Otherwise, don't bump the parser
487 /// and return false.
488 fn bump_if(&self, prefix
: &str) -> bool
{
489 if self.pattern()[self.offset()..].starts_with(prefix
) {
490 for _
in 0..prefix
.chars().count() {
499 /// Returns true if and only if the parser is positioned at a look-around
500 /// prefix. The conditions under which this returns true must always
501 /// correspond to a regular expression that would otherwise be consider
504 /// This should only be called immediately after parsing the opening of
505 /// a group or a set of flags.
506 fn is_lookaround_prefix(&self) -> bool
{
508 || self.bump_if("?!")
509 || self.bump_if("?<=")
510 || self.bump_if("?<!")
513 /// Bump the parser, and if the `x` flag is enabled, bump through any
514 /// subsequent spaces. Return true if and only if the parser is not at
516 fn bump_and_bump_space(&self) -> bool
{
524 /// If the `x` flag is enabled (i.e., whitespace insensitivity with
525 /// comments), then this will advance the parser through all whitespace
526 /// and comments to the next non-whitespace non-comment byte.
528 /// If the `x` flag is disabled, then this is a no-op.
530 /// This should be used selectively throughout the parser where
531 /// arbitrary whitespace is permitted when the `x` flag is enabled. For
532 /// example, `{ 5 , 6}` is equivalent to `{5,6}`.
533 fn bump_space(&self) {
534 if !self.ignore_whitespace() {
537 while !self.is_eof() {
538 if self.char().is_whitespace() {
540 } else if self.char() == '
#' {
541 let start
= self.pos();
542 let mut comment_text
= String
::new();
544 while !self.is_eof() {
550 comment_text
.push(c
);
552 let comment
= ast
::Comment
{
553 span
: Span
::new(start
, self.pos()),
554 comment
: comment_text
,
556 self.parser().comments
.borrow_mut().push(comment
);
563 /// Peek at the next character in the input without advancing the parser.
565 /// If the input has been exhausted, then this returns `None`.
566 fn peek(&self) -> Option
<char> {
570 self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
573 /// Like peek, but will ignore spaces when the parser is in whitespace
574 /// insensitive mode.
575 fn peek_space(&self) -> Option
<char> {
576 if !self.ignore_whitespace() {
582 let mut start
= self.offset() + self.char().len_utf8();
583 let mut in_comment
= false;
584 for (i
, c
) in self.pattern()[start
..].char_indices() {
585 if c
.is_whitespace() {
587 } else if !in_comment
&& c
== '
#' {
589 } else if in_comment
&& c
== '
\n'
{
596 self.pattern()[start
..].chars().next()
599 /// Returns true if the next call to `bump` would return false.
600 fn is_eof(&self) -> bool
{
601 self.offset() == self.pattern().len()
604 /// Return the current position of the parser, which includes the offset,
606 fn pos(&self) -> Position
{
607 self.parser().pos
.get()
610 /// Create a span at the current position of the parser. Both the start
611 /// and end of the span are set.
612 fn span(&self) -> Span
{
613 Span
::splat(self.pos())
616 /// Create a span that covers the current character.
617 fn span_char(&self) -> Span
{
618 let mut next
= Position
{
619 offset
: self.offset().checked_add(self.char().len_utf8()).unwrap(),
621 column
: self.column().checked_add(1).unwrap(),
623 if self.char() == '
\n'
{
627 Span
::new(self.pos(), next
)
630 /// Parse and push a single alternation on to the parser's internal stack.
631 /// If the top of the stack already has an alternation, then add to that
632 /// instead of pushing a new one.
634 /// The concatenation given corresponds to a single alternation branch.
635 /// The concatenation returned starts the next branch and is empty.
637 /// This assumes the parser is currently positioned at `|` and will advance
638 /// the parser to the character following `|`.
640 fn push_alternate(&self, mut concat
: ast
::Concat
) -> Result
<ast
::Concat
> {
641 assert_eq
!(self.char(), '
|'
);
642 concat
.span
.end
= self.pos();
643 self.push_or_add_alternation(concat
);
645 Ok(ast
::Concat { span: self.span(), asts: vec![] }
)
648 /// Pushes or adds the given branch of an alternation to the parser's
649 /// internal stack of state.
650 fn push_or_add_alternation(&self, concat
: ast
::Concat
) {
651 use self::GroupState
::*;
653 let mut stack
= self.parser().stack_group
.borrow_mut();
654 if let Some(&mut Alternation(ref mut alts
)) = stack
.last_mut() {
655 alts
.asts
.push(concat
.into_ast());
658 stack
.push(Alternation(ast
::Alternation
{
659 span
: Span
::new(concat
.span
.start
, self.pos()),
660 asts
: vec
![concat
.into_ast()],
664 /// Parse and push a group AST (and its parent concatenation) on to the
665 /// parser's internal stack. Return a fresh concatenation corresponding
666 /// to the group's sub-AST.
668 /// If a set of flags was found (with no group), then the concatenation
669 /// is returned with that set of flags added.
671 /// This assumes that the parser is currently positioned on the opening
672 /// parenthesis. It advances the parser to the character at the start
673 /// of the sub-expression (or adjoining expression).
675 /// If there was a problem parsing the start of the group, then an error
678 fn push_group(&self, mut concat
: ast
::Concat
) -> Result
<ast
::Concat
> {
679 assert_eq
!(self.char(), '
('
);
680 match self.parse_group()?
{
681 Either
::Left(set
) => {
682 let ignore
= set
.flags
.flag_state(ast
::Flag
::IgnoreWhitespace
);
683 if let Some(v
) = ignore
{
684 self.parser().ignore_whitespace
.set(v
);
687 concat
.asts
.push(Ast
::Flags(set
));
690 Either
::Right(group
) => {
691 let old_ignore_whitespace
= self.ignore_whitespace();
692 let new_ignore_whitespace
= group
694 .and_then(|f
| f
.flag_state(ast
::Flag
::IgnoreWhitespace
))
695 .unwrap_or(old_ignore_whitespace
);
696 self.parser().stack_group
.borrow_mut().push(
700 ignore_whitespace
: old_ignore_whitespace
,
703 self.parser().ignore_whitespace
.set(new_ignore_whitespace
);
704 Ok(ast
::Concat { span: self.span(), asts: vec![] }
)
709 /// Pop a group AST from the parser's internal stack and set the group's
710 /// AST to the given concatenation. Return the concatenation containing
713 /// This assumes that the parser is currently positioned on the closing
714 /// parenthesis and advances the parser to the character following the `)`.
716 /// If no such group could be popped, then an unopened group error is
719 fn pop_group(&self, mut group_concat
: ast
::Concat
) -> Result
<ast
::Concat
> {
720 use self::GroupState
::*;
722 assert_eq
!(self.char(), '
)'
);
723 let mut stack
= self.parser().stack_group
.borrow_mut();
724 let (mut prior_concat
, mut group
, ignore_whitespace
, alt
) = match stack
727 Some(Group { concat, group, ignore_whitespace }
) => {
728 (concat
, group
, ignore_whitespace
, None
)
730 Some(Alternation(alt
)) => match stack
.pop() {
731 Some(Group { concat, group, ignore_whitespace }
) => {
732 (concat
, group
, ignore_whitespace
, Some(alt
))
734 None
| Some(Alternation(_
)) => {
735 return Err(self.error(
737 ast
::ErrorKind
::GroupUnopened
,
743 .error(self.span_char(), ast
::ErrorKind
::GroupUnopened
));
746 self.parser().ignore_whitespace
.set(ignore_whitespace
);
747 group_concat
.span
.end
= self.pos();
749 group
.span
.end
= self.pos();
752 alt
.span
.end
= group_concat
.span
.end
;
753 alt
.asts
.push(group_concat
.into_ast());
754 group
.ast
= Box
::new(alt
.into_ast());
757 group
.ast
= Box
::new(group_concat
.into_ast());
760 prior_concat
.asts
.push(Ast
::Group(group
));
764 /// Pop the last state from the parser's internal stack, if it exists, and
765 /// add the given concatenation to it. There either must be no state or a
766 /// single alternation item on the stack. Any other scenario produces an
769 /// This assumes that the parser has advanced to the end.
771 fn pop_group_end(&self, mut concat
: ast
::Concat
) -> Result
<Ast
> {
772 concat
.span
.end
= self.pos();
773 let mut stack
= self.parser().stack_group
.borrow_mut();
774 let ast
= match stack
.pop() {
775 None
=> Ok(concat
.into_ast()),
776 Some(GroupState
::Alternation(mut alt
)) => {
777 alt
.span
.end
= self.pos();
778 alt
.asts
.push(concat
.into_ast());
779 Ok(Ast
::Alternation(alt
))
781 Some(GroupState
::Group { group, .. }
) => {
783 self.error(group
.span
, ast
::ErrorKind
::GroupUnclosed
)
787 // If we try to pop again, there should be nothing.
790 Some(GroupState
::Alternation(_
)) => {
791 // This unreachable is unfortunate. This case can't happen
792 // because the only way we can be here is if there were two
793 // `GroupState::Alternation`s adjacent in the parser's stack,
794 // which we guarantee to never happen because we never push a
795 // `GroupState::Alternation` if one is already at the top of
799 Some(GroupState
::Group { group, .. }
) => {
800 Err(self.error(group
.span
, ast
::ErrorKind
::GroupUnclosed
))
805 /// Parse the opening of a character class and push the current class
806 /// parsing context onto the parser's stack. This assumes that the parser
807 /// is positioned at an opening `[`. The given union should correspond to
808 /// the union of set items built up before seeing the `[`.
810 /// If there was a problem parsing the opening of the class, then an error
811 /// is returned. Otherwise, a new union of set items for the class is
812 /// returned (which may be populated with either a `]` or a `-`).
816 parent_union
: ast
::ClassSetUnion
,
817 ) -> Result
<ast
::ClassSetUnion
> {
818 assert_eq
!(self.char(), '
['
);
820 let (nested_set
, nested_union
) = self.parse_set_class_open()?
;
824 .push(ClassState
::Open { union: parent_union, set: nested_set }
);
828 /// Parse the end of a character class set and pop the character class
829 /// parser stack. The union given corresponds to the last union built
830 /// before seeing the closing `]`. The union returned corresponds to the
831 /// parent character class set with the nested class added to it.
833 /// This assumes that the parser is positioned at a `]` and will advance
834 /// the parser to the byte immediately following the `]`.
836 /// If the stack is empty after popping, then this returns the final
837 /// "top-level" character class AST (where a "top-level" character class
838 /// is one that is not nested inside any other character class).
840 /// If there is no corresponding opening bracket on the parser's stack,
841 /// then an error is returned.
845 nested_union
: ast
::ClassSetUnion
,
846 ) -> Result
<Either
<ast
::ClassSetUnion
, ast
::Class
>> {
847 assert_eq
!(self.char(), '
]'
);
849 let item
= ast
::ClassSet
::Item(nested_union
.into_item());
850 let prevset
= self.pop_class_op(item
);
851 let mut stack
= self.parser().stack_class
.borrow_mut();
854 // We can never observe an empty stack:
856 // 1) We are guaranteed to start with a non-empty stack since
857 // the character class parser is only initiated when it sees
859 // 2) If we ever observe an empty stack while popping after
860 // seeing a `]`, then we signal the character class parser
862 panic
!("unexpected empty character class stack")
864 Some(ClassState
::Op { .. }
) => {
865 // This panic is unfortunate, but this case is impossible
866 // since we already popped the Op state if one exists above.
867 // Namely, every push to the class parser stack is guarded by
868 // whether an existing Op is already on the top of the stack.
869 // If it is, the existing Op is modified. That is, the stack
870 // can never have consecutive Op states.
871 panic
!("unexpected ClassState::Op")
873 Some(ClassState
::Open { mut union, mut set }
) => {
875 set
.span
.end
= self.pos();
877 if stack
.is_empty() {
878 Ok(Either
::Right(ast
::Class
::Bracketed(set
)))
880 union.push(ast
::ClassSetItem
::Bracketed(Box
::new(set
)));
881 Ok(Either
::Left(union))
887 /// Return an "unclosed class" error whose span points to the most
888 /// recently opened class.
890 /// This should only be called while parsing a character class.
892 fn unclosed_class_error(&self) -> ast
::Error
{
893 for state
in self.parser().stack_class
.borrow().iter().rev() {
894 if let ClassState
::Open { ref set, .. }
= *state
{
895 return self.error(set
.span
, ast
::ErrorKind
::ClassUnclosed
);
898 // We are guaranteed to have a non-empty stack with at least
899 // one open bracket, so we should never get here.
900 panic
!("no open character class found")
903 /// Push the current set of class items on to the class parser's stack as
904 /// the left hand side of the given operator.
906 /// A fresh set union is returned, which should be used to build the right
907 /// hand side of this operator.
911 next_kind
: ast
::ClassSetBinaryOpKind
,
912 next_union
: ast
::ClassSetUnion
,
913 ) -> ast
::ClassSetUnion
{
914 let item
= ast
::ClassSet
::Item(next_union
.into_item());
915 let new_lhs
= self.pop_class_op(item
);
919 .push(ClassState
::Op { kind: next_kind, lhs: new_lhs }
);
920 ast
::ClassSetUnion { span: self.span(), items: vec![] }
923 /// Pop a character class set from the character class parser stack. If the
924 /// top of the stack is just an item (not an operation), then return the
925 /// given set unchanged. If the top of the stack is an operation, then the
926 /// given set will be used as the rhs of the operation on the top of the
927 /// stack. In that case, the binary operation is returned as a set.
929 fn pop_class_op(&self, rhs
: ast
::ClassSet
) -> ast
::ClassSet
{
930 let mut stack
= self.parser().stack_class
.borrow_mut();
931 let (kind
, lhs
) = match stack
.pop() {
932 Some(ClassState
::Op { kind, lhs }
) => (kind
, lhs
),
933 Some(state @ ClassState
::Open { .. }
) => {
937 None
=> unreachable
!(),
939 let span
= Span
::new(lhs
.span().start
, rhs
.span().end
);
940 ast
::ClassSet
::BinaryOp(ast
::ClassSetBinaryOp
{
949 impl<'s
, P
: Borrow
<Parser
>> ParserI
<'s
, P
> {
950 /// Parse the regular expression into an abstract syntax tree.
951 fn parse(&self) -> Result
<Ast
> {
952 self.parse_with_comments().map(|astc
| astc
.ast
)
955 /// Parse the regular expression and return an abstract syntax tree with
956 /// all of the comments found in the pattern.
957 fn parse_with_comments(&self) -> Result
<ast
::WithComments
> {
958 assert_eq
!(self.offset(), 0, "parser can only be used once");
959 self.parser().reset();
960 let mut concat
= ast
::Concat { span: self.span(), asts: vec![] }
;
967 '
('
=> concat
= self.push_group(concat
)?
,
968 '
)'
=> concat
= self.pop_group(concat
)?
,
969 '
|'
=> concat
= self.push_alternate(concat
)?
,
971 let class
= self.parse_set_class()?
;
972 concat
.asts
.push(Ast
::Class(class
));
975 concat
= self.parse_uncounted_repetition(
977 ast
::RepetitionKind
::ZeroOrOne
,
981 concat
= self.parse_uncounted_repetition(
983 ast
::RepetitionKind
::ZeroOrMore
,
987 concat
= self.parse_uncounted_repetition(
989 ast
::RepetitionKind
::OneOrMore
,
993 concat
= self.parse_counted_repetition(concat
)?
;
995 _
=> concat
.asts
.push(self.parse_primitive()?
.into_ast()),
998 let ast
= self.pop_group_end(concat
)?
;
999 NestLimiter
::new(self).check(&ast
)?
;
1000 Ok(ast
::WithComments
{
1002 comments
: mem
::replace(
1003 &mut *self.parser().comments
.borrow_mut(),
1009 /// Parses an uncounted repetition operation. An uncounted repetition
1010 /// operator includes ?, * and +, but does not include the {m,n} syntax.
1011 /// The given `kind` should correspond to the operator observed by the
1014 /// This assumes that the parser is currently positioned at the repetition
1015 /// operator and advances the parser to the first character after the
1016 /// operator. (Note that the operator may include a single additional `?`,
1017 /// which makes the operator ungreedy.)
1019 /// The caller should include the concatenation that is being built. The
1020 /// concatenation returned includes the repetition operator applied to the
1021 /// last expression in the given concatenation.
1023 fn parse_uncounted_repetition(
1025 mut concat
: ast
::Concat
,
1026 kind
: ast
::RepetitionKind
,
1027 ) -> Result
<ast
::Concat
> {
1029 self.char() == '?'
|| self.char() == '
*'
|| self.char() == '
+'
1031 let op_start
= self.pos();
1032 let ast
= match concat
.asts
.pop() {
1036 self.error(self.span(), ast
::ErrorKind
::RepetitionMissing
)
1041 Ast
::Empty(_
) | Ast
::Flags(_
) => {
1043 self.error(self.span(), ast
::ErrorKind
::RepetitionMissing
)
1048 let mut greedy
= true;
1049 if self.bump() && self.char() == '?'
{
1053 concat
.asts
.push(Ast
::Repetition(ast
::Repetition
{
1054 span
: ast
.span().with_end(self.pos()),
1055 op
: ast
::RepetitionOp
{
1056 span
: Span
::new(op_start
, self.pos()),
1065 /// Parses a counted repetition operation. A counted repetition operator
1066 /// corresponds to the {m,n} syntax, and does not include the ?, * or +
1069 /// This assumes that the parser is currently positioned at the opening `{`
1070 /// and advances the parser to the first character after the operator.
1071 /// (Note that the operator may include a single additional `?`, which
1072 /// makes the operator ungreedy.)
1074 /// The caller should include the concatenation that is being built. The
1075 /// concatenation returned includes the repetition operator applied to the
1076 /// last expression in the given concatenation.
1078 fn parse_counted_repetition(
1080 mut concat
: ast
::Concat
,
1081 ) -> Result
<ast
::Concat
> {
1082 assert
!(self.char() == '
{'
);
1083 let start
= self.pos();
1084 let ast
= match concat
.asts
.pop() {
1088 self.error(self.span(), ast
::ErrorKind
::RepetitionMissing
)
1093 Ast
::Empty(_
) | Ast
::Flags(_
) => {
1095 self.error(self.span(), ast
::ErrorKind
::RepetitionMissing
)
1100 if !self.bump_and_bump_space() {
1101 return Err(self.error(
1102 Span
::new(start
, self.pos()),
1103 ast
::ErrorKind
::RepetitionCountUnclosed
,
1106 let count_start
= specialize_err(
1107 self.parse_decimal(),
1108 ast
::ErrorKind
::DecimalEmpty
,
1109 ast
::ErrorKind
::RepetitionCountDecimalEmpty
,
1111 let mut range
= ast
::RepetitionRange
::Exactly(count_start
);
1113 return Err(self.error(
1114 Span
::new(start
, self.pos()),
1115 ast
::ErrorKind
::RepetitionCountUnclosed
,
1118 if self.char() == '
,'
{
1119 if !self.bump_and_bump_space() {
1120 return Err(self.error(
1121 Span
::new(start
, self.pos()),
1122 ast
::ErrorKind
::RepetitionCountUnclosed
,
1125 if self.char() != '
}'
{
1126 let count_end
= specialize_err(
1127 self.parse_decimal(),
1128 ast
::ErrorKind
::DecimalEmpty
,
1129 ast
::ErrorKind
::RepetitionCountDecimalEmpty
,
1131 range
= ast
::RepetitionRange
::Bounded(count_start
, count_end
);
1133 range
= ast
::RepetitionRange
::AtLeast(count_start
);
1136 if self.is_eof() || self.char() != '
}'
{
1137 return Err(self.error(
1138 Span
::new(start
, self.pos()),
1139 ast
::ErrorKind
::RepetitionCountUnclosed
,
1143 let mut greedy
= true;
1144 if self.bump_and_bump_space() && self.char() == '?'
{
1149 let op_span
= Span
::new(start
, self.pos());
1150 if !range
.is_valid() {
1152 self.error(op_span
, ast
::ErrorKind
::RepetitionCountInvalid
)
1155 concat
.asts
.push(Ast
::Repetition(ast
::Repetition
{
1156 span
: ast
.span().with_end(self.pos()),
1157 op
: ast
::RepetitionOp
{
1159 kind
: ast
::RepetitionKind
::Range(range
),
1167 /// Parse a group (which contains a sub-expression) or a set of flags.
1169 /// If a group was found, then it is returned with an empty AST. If a set
1170 /// of flags is found, then that set is returned.
1172 /// The parser should be positioned at the opening parenthesis.
1174 /// This advances the parser to the character before the start of the
1175 /// sub-expression (in the case of a group) or to the closing parenthesis
1176 /// immediately following the set of flags.
1180 /// If flags are given and incorrectly specified, then a corresponding
1181 /// error is returned.
1183 /// If a capture name is given and it is incorrectly specified, then a
1184 /// corresponding error is returned.
1186 fn parse_group(&self) -> Result
<Either
<ast
::SetFlags
, ast
::Group
>> {
1187 assert_eq
!(self.char(), '
('
);
1188 let open_span
= self.span_char();
1191 if self.is_lookaround_prefix() {
1192 return Err(self.error(
1193 Span
::new(open_span
.start
, self.span().end
),
1194 ast
::ErrorKind
::UnsupportedLookAround
,
1197 let inner_span
= self.span();
1198 if self.bump_if("?P<") {
1199 let capture_index
= self.next_capture_index(open_span
)?
;
1200 let cap
= self.parse_capture_name(capture_index
)?
;
1201 Ok(Either
::Right(ast
::Group
{
1203 kind
: ast
::GroupKind
::CaptureName(cap
),
1204 ast
: Box
::new(Ast
::Empty(self.span())),
1206 } else if self.bump_if("?") {
1209 self.error(open_span
, ast
::ErrorKind
::GroupUnclosed
)
1212 let flags
= self.parse_flags()?
;
1213 let char_end
= self.char();
1215 if char_end
== '
)'
{
1216 // We don't allow empty flags, e.g., `(?)`. We instead
1217 // interpret it as a repetition operator missing its argument.
1218 if flags
.items
.is_empty() {
1219 return Err(self.error(
1221 ast
::ErrorKind
::RepetitionMissing
,
1224 Ok(Either
::Left(ast
::SetFlags
{
1225 span
: Span { end: self.pos(), ..open_span }
,
1229 assert_eq
!(char_end
, '
:'
);
1230 Ok(Either
::Right(ast
::Group
{
1232 kind
: ast
::GroupKind
::NonCapturing(flags
),
1233 ast
: Box
::new(Ast
::Empty(self.span())),
1237 let capture_index
= self.next_capture_index(open_span
)?
;
1238 Ok(Either
::Right(ast
::Group
{
1240 kind
: ast
::GroupKind
::CaptureIndex(capture_index
),
1241 ast
: Box
::new(Ast
::Empty(self.span())),
1246 /// Parses a capture group name. Assumes that the parser is positioned at
1247 /// the first character in the name following the opening `<` (and may
1248 /// possibly be EOF). This advances the parser to the first character
1249 /// following the closing `>`.
1251 /// The caller must provide the capture index of the group for this name.
1253 fn parse_capture_name(
1256 ) -> Result
<ast
::CaptureName
> {
1259 .error(self.span(), ast
::ErrorKind
::GroupNameUnexpectedEof
));
1261 let start
= self.pos();
1263 if self.char() == '
>'
{
1266 if !is_capture_char(self.char(), self.pos() == start
) {
1267 return Err(self.error(
1269 ast
::ErrorKind
::GroupNameInvalid
,
1276 let end
= self.pos();
1279 .error(self.span(), ast
::ErrorKind
::GroupNameUnexpectedEof
));
1281 assert_eq
!(self.char(), '
>'
);
1283 let name
= &self.pattern()[start
.offset
..end
.offset
];
1284 if name
.is_empty() {
1285 return Err(self.error(
1286 Span
::new(start
, start
),
1287 ast
::ErrorKind
::GroupNameEmpty
,
1290 let capname
= ast
::CaptureName
{
1291 span
: Span
::new(start
, end
),
1292 name
: name
.to_string(),
1293 index
: capture_index
,
1295 self.add_capture_name(&capname
)?
;
1299 /// Parse a sequence of flags starting at the current character.
1301 /// This advances the parser to the character immediately following the
1302 /// flags, which is guaranteed to be either `:` or `)`.
1306 /// If any flags are duplicated, then an error is returned.
1308 /// If the negation operator is used more than once, then an error is
1311 /// If no flags could be found or if the negation operation is not followed
1312 /// by any flags, then an error is returned.
1314 fn parse_flags(&self) -> Result
<ast
::Flags
> {
1315 let mut flags
= ast
::Flags { span: self.span(), items: vec![] }
;
1316 let mut last_was_negation
= None
;
1317 while self.char() != '
:'
&& self.char() != '
)'
{
1318 if self.char() == '
-'
{
1319 last_was_negation
= Some(self.span_char());
1320 let item
= ast
::FlagsItem
{
1321 span
: self.span_char(),
1322 kind
: ast
::FlagsItemKind
::Negation
,
1324 if let Some(i
) = flags
.add_item(item
) {
1325 return Err(self.error(
1327 ast
::ErrorKind
::FlagRepeatedNegation
{
1328 original
: flags
.items
[i
].span
,
1333 last_was_negation
= None
;
1334 let item
= ast
::FlagsItem
{
1335 span
: self.span_char(),
1336 kind
: ast
::FlagsItemKind
::Flag(self.parse_flag()?
),
1338 if let Some(i
) = flags
.add_item(item
) {
1339 return Err(self.error(
1341 ast
::ErrorKind
::FlagDuplicate
{
1342 original
: flags
.items
[i
].span
,
1349 self.error(self.span(), ast
::ErrorKind
::FlagUnexpectedEof
)
1353 if let Some(span
) = last_was_negation
{
1354 return Err(self.error(span
, ast
::ErrorKind
::FlagDanglingNegation
));
1356 flags
.span
.end
= self.pos();
1360 /// Parse the current character as a flag. Do not advance the parser.
1364 /// If the flag is not recognized, then an error is returned.
1366 fn parse_flag(&self) -> Result
<ast
::Flag
> {
1368 'i'
=> Ok(ast
::Flag
::CaseInsensitive
),
1369 'm'
=> Ok(ast
::Flag
::MultiLine
),
1370 's'
=> Ok(ast
::Flag
::DotMatchesNewLine
),
1371 'U'
=> Ok(ast
::Flag
::SwapGreed
),
1372 'u'
=> Ok(ast
::Flag
::Unicode
),
1373 'x'
=> Ok(ast
::Flag
::IgnoreWhitespace
),
1376 .error(self.span_char(), ast
::ErrorKind
::FlagUnrecognized
))
1381 /// Parse a primitive AST. e.g., A literal, non-set character class or
1384 /// This assumes that the parser expects a primitive at the current
1385 /// location. i.e., All other non-primitive cases have been handled.
1386 /// For example, if the parser's position is at `|`, then `|` will be
1387 /// treated as a literal (e.g., inside a character class).
1389 /// This advances the parser to the first character immediately following
1391 fn parse_primitive(&self) -> Result
<Primitive
> {
1393 '
\\'
=> self.parse_escape(),
1395 let ast
= Primitive
::Dot(self.span_char());
1400 let ast
= Primitive
::Assertion(ast
::Assertion
{
1401 span
: self.span_char(),
1402 kind
: ast
::AssertionKind
::StartLine
,
1408 let ast
= Primitive
::Assertion(ast
::Assertion
{
1409 span
: self.span_char(),
1410 kind
: ast
::AssertionKind
::EndLine
,
1416 let ast
= Primitive
::Literal(ast
::Literal
{
1417 span
: self.span_char(),
1418 kind
: ast
::LiteralKind
::Verbatim
,
1427 /// Parse an escape sequence as a primitive AST.
1429 /// This assumes the parser is positioned at the start of the escape
1430 /// sequence, i.e., `\`. It advances the parser to the first position
1431 /// immediately following the escape sequence.
1433 fn parse_escape(&self) -> Result
<Primitive
> {
1434 assert_eq
!(self.char(), '
\\'
);
1435 let start
= self.pos();
1437 return Err(self.error(
1438 Span
::new(start
, self.pos()),
1439 ast
::ErrorKind
::EscapeUnexpectedEof
,
1442 let c
= self.char();
1443 // Put some of the more complicated routines into helpers.
1446 if !self.parser().octal
{
1447 return Err(self.error(
1448 Span
::new(start
, self.span_char().end
),
1449 ast
::ErrorKind
::UnsupportedBackreference
,
1452 let mut lit
= self.parse_octal();
1453 lit
.span
.start
= start
;
1454 return Ok(Primitive
::Literal(lit
));
1456 '
8'
..='
9'
if !self.parser().octal
=> {
1457 return Err(self.error(
1458 Span
::new(start
, self.span_char().end
),
1459 ast
::ErrorKind
::UnsupportedBackreference
,
1462 'x'
| 'u'
| 'U'
=> {
1463 let mut lit
= self.parse_hex()?
;
1464 lit
.span
.start
= start
;
1465 return Ok(Primitive
::Literal(lit
));
1468 let mut cls
= self.parse_unicode_class()?
;
1469 cls
.span
.start
= start
;
1470 return Ok(Primitive
::Unicode(cls
));
1472 'd'
| 's'
| 'w'
| 'D'
| 'S'
| 'W'
=> {
1473 let mut cls
= self.parse_perl_class();
1474 cls
.span
.start
= start
;
1475 return Ok(Primitive
::Perl(cls
));
1480 // Handle all of the one letter sequences inline.
1482 let span
= Span
::new(start
, self.pos());
1483 if is_meta_character(c
) {
1484 return Ok(Primitive
::Literal(ast
::Literal
{
1486 kind
: ast
::LiteralKind
::Punctuation
,
1490 let special
= |kind
, c
| {
1491 Ok(Primitive
::Literal(ast
::Literal
{
1493 kind
: ast
::LiteralKind
::Special(kind
),
1498 'a'
=> special(ast
::SpecialLiteralKind
::Bell
, '
\x07'
),
1499 'f'
=> special(ast
::SpecialLiteralKind
::FormFeed
, '
\x0C'
),
1500 't'
=> special(ast
::SpecialLiteralKind
::Tab
, '
\t'
),
1501 'n'
=> special(ast
::SpecialLiteralKind
::LineFeed
, '
\n'
),
1502 'r'
=> special(ast
::SpecialLiteralKind
::CarriageReturn
, '
\r'
),
1503 'v'
=> special(ast
::SpecialLiteralKind
::VerticalTab
, '
\x0B'
),
1504 ' '
if self.ignore_whitespace() => {
1505 special(ast
::SpecialLiteralKind
::Space
, ' '
)
1507 'A'
=> Ok(Primitive
::Assertion(ast
::Assertion
{
1509 kind
: ast
::AssertionKind
::StartText
,
1511 'z'
=> Ok(Primitive
::Assertion(ast
::Assertion
{
1513 kind
: ast
::AssertionKind
::EndText
,
1515 'b'
=> Ok(Primitive
::Assertion(ast
::Assertion
{
1517 kind
: ast
::AssertionKind
::WordBoundary
,
1519 'B'
=> Ok(Primitive
::Assertion(ast
::Assertion
{
1521 kind
: ast
::AssertionKind
::NotWordBoundary
,
1523 _
=> Err(self.error(span
, ast
::ErrorKind
::EscapeUnrecognized
)),
1527 /// Parse an octal representation of a Unicode codepoint up to 3 digits
1528 /// long. This expects the parser to be positioned at the first octal
1529 /// digit and advances the parser to the first character immediately
1530 /// following the octal number. This also assumes that parsing octal
1531 /// escapes is enabled.
1533 /// Assuming the preconditions are met, this routine can never fail.
1535 fn parse_octal(&self) -> ast
::Literal
{
1539 assert
!(self.parser().octal
);
1540 assert
!('
0'
<= self.char() && self.char() <= '
7'
);
1541 let start
= self.pos();
1542 // Parse up to two more digits.
1544 && '
0'
<= self.char()
1545 && self.char() <= '
7'
1546 && self.pos().offset
- start
.offset
<= 2
1548 let end
= self.pos();
1549 let octal
= &self.pattern()[start
.offset
..end
.offset
];
1550 // Parsing the octal should never fail since the above guarantees a
1553 u32::from_str_radix(octal
, 8).expect("valid octal number");
1554 // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
1555 // invalid Unicode scalar values.
1556 let c
= char::from_u32(codepoint
).expect("Unicode scalar value");
1558 span
: Span
::new(start
, end
),
1559 kind
: ast
::LiteralKind
::Octal
,
1564 /// Parse a hex representation of a Unicode codepoint. This handles both
1565 /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
1566 /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
1567 /// the first character immediately following the hexadecimal literal.
1569 fn parse_hex(&self) -> Result
<ast
::Literal
> {
1571 self.char() == 'x'
|| self.char() == 'u'
|| self.char() == 'U'
1574 let hex_kind
= match self.char() {
1575 'x'
=> ast
::HexLiteralKind
::X
,
1576 'u'
=> ast
::HexLiteralKind
::UnicodeShort
,
1577 _
=> ast
::HexLiteralKind
::UnicodeLong
,
1579 if !self.bump_and_bump_space() {
1581 self.error(self.span(), ast
::ErrorKind
::EscapeUnexpectedEof
)
1584 if self.char() == '
{'
{
1585 self.parse_hex_brace(hex_kind
)
1587 self.parse_hex_digits(hex_kind
)
1591 /// Parse an N-digit hex representation of a Unicode codepoint. This
1592 /// expects the parser to be positioned at the first digit and will advance
1593 /// the parser to the first character immediately following the escape
1596 /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
1597 /// or 8 (for `\UNNNNNNNN`).
1599 fn parse_hex_digits(
1601 kind
: ast
::HexLiteralKind
,
1602 ) -> Result
<ast
::Literal
> {
1606 let mut scratch
= self.parser().scratch
.borrow_mut();
1609 let start
= self.pos();
1610 for i
in 0..kind
.digits() {
1611 if i
> 0 && !self.bump_and_bump_space() {
1613 .error(self.span(), ast
::ErrorKind
::EscapeUnexpectedEof
));
1615 if !is_hex(self.char()) {
1616 return Err(self.error(
1618 ast
::ErrorKind
::EscapeHexInvalidDigit
,
1621 scratch
.push(self.char());
1623 // The final bump just moves the parser past the literal, which may
1625 self.bump_and_bump_space();
1626 let end
= self.pos();
1627 let hex
= scratch
.as_str();
1628 match u32::from_str_radix(hex
, 16).ok().and_then(char::from_u32
) {
1629 None
=> Err(self.error(
1630 Span
::new(start
, end
),
1631 ast
::ErrorKind
::EscapeHexInvalid
,
1633 Some(c
) => Ok(ast
::Literal
{
1634 span
: Span
::new(start
, end
),
1635 kind
: ast
::LiteralKind
::HexFixed(kind
),
1641 /// Parse a hex representation of any Unicode scalar value. This expects
1642 /// the parser to be positioned at the opening brace `{` and will advance
1643 /// the parser to the first character following the closing brace `}`.
1647 kind
: ast
::HexLiteralKind
,
1648 ) -> Result
<ast
::Literal
> {
1652 let mut scratch
= self.parser().scratch
.borrow_mut();
1655 let brace_pos
= self.pos();
1656 let start
= self.span_char().end
;
1657 while self.bump_and_bump_space() && self.char() != '
}'
{
1658 if !is_hex(self.char()) {
1659 return Err(self.error(
1661 ast
::ErrorKind
::EscapeHexInvalidDigit
,
1664 scratch
.push(self.char());
1667 return Err(self.error(
1668 Span
::new(brace_pos
, self.pos()),
1669 ast
::ErrorKind
::EscapeUnexpectedEof
,
1672 let end
= self.pos();
1673 let hex
= scratch
.as_str();
1674 assert_eq
!(self.char(), '
}'
);
1675 self.bump_and_bump_space();
1678 return Err(self.error(
1679 Span
::new(brace_pos
, self.pos()),
1680 ast
::ErrorKind
::EscapeHexEmpty
,
1683 match u32::from_str_radix(hex
, 16).ok().and_then(char::from_u32
) {
1684 None
=> Err(self.error(
1685 Span
::new(start
, end
),
1686 ast
::ErrorKind
::EscapeHexInvalid
,
1688 Some(c
) => Ok(ast
::Literal
{
1689 span
: Span
::new(start
, self.pos()),
1690 kind
: ast
::LiteralKind
::HexBrace(kind
),
1696 /// Parse a decimal number into a u32 while trimming leading and trailing
1699 /// This expects the parser to be positioned at the first position where
1700 /// a decimal digit could occur. This will advance the parser to the byte
1701 /// immediately following the last contiguous decimal digit.
1703 /// If no decimal digit could be found or if there was a problem parsing
1704 /// the complete set of digits into a u32, then an error is returned.
1705 fn parse_decimal(&self) -> Result
<u32> {
1706 let mut scratch
= self.parser().scratch
.borrow_mut();
1709 while !self.is_eof() && self.char().is_whitespace() {
1712 let start
= self.pos();
1713 while !self.is_eof() && '
0'
<= self.char() && self.char() <= '
9'
{
1714 scratch
.push(self.char());
1715 self.bump_and_bump_space();
1717 let span
= Span
::new(start
, self.pos());
1718 while !self.is_eof() && self.char().is_whitespace() {
1719 self.bump_and_bump_space();
1721 let digits
= scratch
.as_str();
1722 if digits
.is_empty() {
1723 return Err(self.error(span
, ast
::ErrorKind
::DecimalEmpty
));
1725 match u32::from_str_radix(digits
, 10).ok() {
1727 None
=> Err(self.error(span
, ast
::ErrorKind
::DecimalInvalid
)),
1731 /// Parse a standard character class consisting primarily of characters or
1732 /// character ranges, but can also contain nested character classes of
1733 /// any type (sans `.`).
1735 /// This assumes the parser is positioned at the opening `[`. If parsing
1736 /// is successful, then the parser is advanced to the position immediately
1737 /// following the closing `]`.
1739 fn parse_set_class(&self) -> Result
<ast
::Class
> {
1740 assert_eq
!(self.char(), '
['
);
1743 ast
::ClassSetUnion { span: self.span(), items: vec![] }
;
1747 return Err(self.unclosed_class_error());
1751 // If we've already parsed the opening bracket, then
1752 // attempt to treat this as the beginning of an ASCII
1753 // class. If ASCII class parsing fails, then the parser
1755 if !self.parser().stack_class
.borrow().is_empty() {
1756 if let Some(cls
) = self.maybe_parse_ascii_class() {
1757 union.push(ast
::ClassSetItem
::Ascii(cls
));
1761 union = self.push_class_open(union)?
;
1763 '
]'
=> match self.pop_class(union)?
{
1764 Either
::Left(nested_union
) => {
1765 union = nested_union
;
1767 Either
::Right(class
) => return Ok(class
),
1769 '
&'
if self.peek() == Some('
&'
) => {
1770 assert
!(self.bump_if("&&"));
1771 union = self.push_class_op(
1772 ast
::ClassSetBinaryOpKind
::Intersection
,
1776 '
-'
if self.peek() == Some('
-'
) => {
1777 assert
!(self.bump_if("--"));
1778 union = self.push_class_op(
1779 ast
::ClassSetBinaryOpKind
::Difference
,
1783 '
~'
if self.peek() == Some('
~'
) => {
1784 assert
!(self.bump_if("~~"));
1785 union = self.push_class_op(
1786 ast
::ClassSetBinaryOpKind
::SymmetricDifference
,
1791 union.push(self.parse_set_class_range()?
);
1797 /// Parse a single primitive item in a character class set. The item to
1798 /// be parsed can either be one of a simple literal character, a range
1799 /// between two simple literal characters or a "primitive" character
1800 /// class like \w or \p{Greek}.
1802 /// If an invalid escape is found, or if a character class is found where
1803 /// a simple literal is expected (e.g., in a range), then an error is
1806 fn parse_set_class_range(&self) -> Result
<ast
::ClassSetItem
> {
1807 let prim1
= self.parse_set_class_item()?
;
1810 return Err(self.unclosed_class_error());
1812 // If the next char isn't a `-`, then we don't have a range.
1813 // There are two exceptions. If the char after a `-` is a `]`, then
1814 // `-` is interpreted as a literal `-`. Alternatively, if the char
1815 // after a `-` is a `-`, then `--` corresponds to a "difference"
1817 if self.char() != '
-'
1818 || self.peek_space() == Some('
]'
)
1819 || self.peek_space() == Some('
-'
)
1821 return prim1
.into_class_set_item(self);
1823 // OK, now we're parsing a range, so bump past the `-` and parse the
1824 // second half of the range.
1825 if !self.bump_and_bump_space() {
1826 return Err(self.unclosed_class_error());
1828 let prim2
= self.parse_set_class_item()?
;
1829 let range
= ast
::ClassSetRange
{
1830 span
: Span
::new(prim1
.span().start
, prim2
.span().end
),
1831 start
: prim1
.into_class_literal(self)?
,
1832 end
: prim2
.into_class_literal(self)?
,
1834 if !range
.is_valid() {
1836 self.error(range
.span
, ast
::ErrorKind
::ClassRangeInvalid
)
1839 Ok(ast
::ClassSetItem
::Range(range
))
1842 /// Parse a single item in a character class as a primitive, where the
1843 /// primitive either consists of a verbatim literal or a single escape
1846 /// This assumes the parser is positioned at the beginning of a primitive,
1847 /// and advances the parser to the first position after the primitive if
1850 /// Note that it is the caller's responsibility to report an error if an
1851 /// illegal primitive was parsed.
1853 fn parse_set_class_item(&self) -> Result
<Primitive
> {
1854 if self.char() == '
\\'
{
1857 let x
= Primitive
::Literal(ast
::Literal
{
1858 span
: self.span_char(),
1859 kind
: ast
::LiteralKind
::Verbatim
,
1867 /// Parses the opening of a character class set. This includes the opening
1868 /// bracket along with `^` if present to indicate negation. This also
1869 /// starts parsing the opening set of unioned items if applicable, since
1870 /// there are special rules applied to certain characters in the opening
1871 /// of a character class. For example, `[^]]` is the class of all
1872 /// characters not equal to `]`. (`]` would need to be escaped in any other
1873 /// position.) Similarly for `-`.
1875 /// In all cases, the op inside the returned `ast::ClassBracketed` is an
1876 /// empty union. This empty union should be replaced with the actual item
1877 /// when it is popped from the parser's stack.
1879 /// This assumes the parser is positioned at the opening `[` and advances
1880 /// the parser to the first non-special byte of the character class.
1882 /// An error is returned if EOF is found.
1884 fn parse_set_class_open(
1886 ) -> Result
<(ast
::ClassBracketed
, ast
::ClassSetUnion
)> {
1887 assert_eq
!(self.char(), '
['
);
1888 let start
= self.pos();
1889 if !self.bump_and_bump_space() {
1890 return Err(self.error(
1891 Span
::new(start
, self.pos()),
1892 ast
::ErrorKind
::ClassUnclosed
,
1896 let negated
= if self.char() != '
^' {
1899 if !self.bump_and_bump_space() {
1900 return Err(self.error(
1901 Span
::new(start
, self.pos()),
1902 ast
::ErrorKind
::ClassUnclosed
,
1907 // Accept any number of `-` as literal `-`.
1909 ast
::ClassSetUnion { span: self.span(), items: vec![] }
;
1910 while self.char() == '
-'
{
1911 union.push(ast
::ClassSetItem
::Literal(ast
::Literal
{
1912 span
: self.span_char(),
1913 kind
: ast
::LiteralKind
::Verbatim
,
1916 if !self.bump_and_bump_space() {
1917 return Err(self.error(
1918 Span
::new(start
, start
),
1919 ast
::ErrorKind
::ClassUnclosed
,
1923 // If `]` is the *first* char in a set, then interpret it as a literal
1924 // `]`. That is, an empty class is impossible to write.
1925 if union.items
.is_empty() && self.char() == '
]'
{
1926 union.push(ast
::ClassSetItem
::Literal(ast
::Literal
{
1927 span
: self.span_char(),
1928 kind
: ast
::LiteralKind
::Verbatim
,
1931 if !self.bump_and_bump_space() {
1932 return Err(self.error(
1933 Span
::new(start
, self.pos()),
1934 ast
::ErrorKind
::ClassUnclosed
,
1938 let set
= ast
::ClassBracketed
{
1939 span
: Span
::new(start
, self.pos()),
1941 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
1942 span
: Span
::new(union.span
.start
, union.span
.start
),
1949 /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
1951 /// This assumes the parser is positioned at the opening `[`.
1953 /// If no valid ASCII character class could be found, then this does not
1954 /// advance the parser and `None` is returned. Otherwise, the parser is
1955 /// advanced to the first byte following the closing `]` and the
1956 /// corresponding ASCII class is returned.
1958 fn maybe_parse_ascii_class(&self) -> Option
<ast
::ClassAscii
> {
1959 // ASCII character classes are interesting from a parsing perspective
1960 // because parsing cannot fail with any interesting error. For example,
1961 // in order to use an ASCII character class, it must be enclosed in
1962 // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
1963 // of it as "ASCII character characters have the syntax `[:NAME:]`
1964 // which can only appear within character brackets." This means that
1965 // things like `[[:lower:]A]` are legal constructs.
1967 // However, if one types an incorrect ASCII character class, e.g.,
1968 // `[[:loower:]]`, then we treat that as a normal nested character
1969 // class containing the characters `:elorw`. One might argue that we
1970 // should return an error instead since the repeated colons give away
1971 // the intent to write an ASCII class. But what if the user typed
1972 // `[[:lower]]` instead? How can we tell that was intended to be an
1973 // ASCII class and not just a normal nested class?
1975 // Reasonable people can probably disagree over this, but for better
1976 // or worse, we implement semantics that never fails at the expense
1977 // of better failure modes.
1978 assert_eq
!(self.char(), '
['
);
1979 // If parsing fails, then we back up the parser to this starting point.
1980 let start
= self.pos();
1981 let mut negated
= false;
1982 if !self.bump() || self.char() != '
:'
{
1983 self.parser().pos
.set(start
);
1987 self.parser().pos
.set(start
);
1990 if self.char() == '
^' {
1993 self.parser().pos
.set(start
);
1997 let name_start
= self.offset();
1998 while self.char() != '
:'
&& self.bump() {}
2000 self.parser().pos
.set(start
);
2003 let name
= &self.pattern()[name_start
..self.offset()];
2004 if !self.bump_if(":]") {
2005 self.parser().pos
.set(start
);
2008 let kind
= match ast
::ClassAsciiKind
::from_name(name
) {
2011 self.parser().pos
.set(start
);
2015 Some(ast
::ClassAscii
{
2016 span
: Span
::new(start
, self.pos()),
2022 /// Parse a Unicode class in either the single character notation, `\pN`
2023 /// or the multi-character bracketed notation, `\p{Greek}`. This assumes
2024 /// the parser is positioned at the `p` (or `P` for negation) and will
2025 /// advance the parser to the character immediately following the class.
2027 /// Note that this does not check whether the class name is valid or not.
2029 fn parse_unicode_class(&self) -> Result
<ast
::ClassUnicode
> {
2030 assert
!(self.char() == 'p'
|| self.char() == 'P'
);
2032 let mut scratch
= self.parser().scratch
.borrow_mut();
2035 let negated
= self.char() == 'P'
;
2036 if !self.bump_and_bump_space() {
2038 self.error(self.span(), ast
::ErrorKind
::EscapeUnexpectedEof
)
2041 let (start
, kind
) = if self.char() == '
{'
{
2042 let start
= self.span_char().end
;
2043 while self.bump_and_bump_space() && self.char() != '
}'
{
2044 scratch
.push(self.char());
2048 .error(self.span(), ast
::ErrorKind
::EscapeUnexpectedEof
));
2050 assert_eq
!(self.char(), '
}'
);
2053 let name
= scratch
.as_str();
2054 if let Some(i
) = name
.find("!=") {
2057 ast
::ClassUnicodeKind
::NamedValue
{
2058 op
: ast
::ClassUnicodeOpKind
::NotEqual
,
2059 name
: name
[..i
].to_string(),
2060 value
: name
[i
+ 2..].to_string(),
2063 } else if let Some(i
) = name
.find('
:'
) {
2066 ast
::ClassUnicodeKind
::NamedValue
{
2067 op
: ast
::ClassUnicodeOpKind
::Colon
,
2068 name
: name
[..i
].to_string(),
2069 value
: name
[i
+ 1..].to_string(),
2072 } else if let Some(i
) = name
.find('
='
) {
2075 ast
::ClassUnicodeKind
::NamedValue
{
2076 op
: ast
::ClassUnicodeOpKind
::Equal
,
2077 name
: name
[..i
].to_string(),
2078 value
: name
[i
+ 1..].to_string(),
2082 (start
, ast
::ClassUnicodeKind
::Named(name
.to_string()))
2085 let start
= self.pos();
2086 let c
= self.char();
2088 return Err(self.error(
2090 ast
::ErrorKind
::UnicodeClassInvalid
,
2093 self.bump_and_bump_space();
2094 let kind
= ast
::ClassUnicodeKind
::OneLetter(c
);
2097 Ok(ast
::ClassUnicode
{
2098 span
: Span
::new(start
, self.pos()),
2104 /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
2105 /// parser is currently at a valid character class name and will be
2106 /// advanced to the character immediately following the class.
2108 fn parse_perl_class(&self) -> ast
::ClassPerl
{
2109 let c
= self.char();
2110 let span
= self.span_char();
2112 let (negated
, kind
) = match c
{
2113 'd'
=> (false, ast
::ClassPerlKind
::Digit
),
2114 'D'
=> (true, ast
::ClassPerlKind
::Digit
),
2115 's'
=> (false, ast
::ClassPerlKind
::Space
),
2116 'S'
=> (true, ast
::ClassPerlKind
::Space
),
2117 'w'
=> (false, ast
::ClassPerlKind
::Word
),
2118 'W'
=> (true, ast
::ClassPerlKind
::Word
),
2119 c
=> panic
!("expected valid Perl class but got '{}'", c
),
2121 ast
::ClassPerl { span, kind, negated }
2125 /// A type that traverses a fully parsed Ast and checks whether its depth
2126 /// exceeds the specified nesting limit. If it does, then an error is returned.
2128 struct NestLimiter
<'p
, 's
, P
> {
2129 /// The parser that is checking the nest limit.
2130 p
: &'p ParserI
<'s
, P
>,
2131 /// The current depth while walking an Ast.
2135 impl<'p
, 's
, P
: Borrow
<Parser
>> NestLimiter
<'p
, 's
, P
> {
2136 fn new(p
: &'p ParserI
<'s
, P
>) -> NestLimiter
<'p
, 's
, P
> {
2137 NestLimiter { p, depth: 0 }
2141 fn check(self, ast
: &Ast
) -> Result
<()> {
2142 ast
::visit(ast
, self)
2145 fn increment_depth(&mut self, span
: &Span
) -> Result
<()> {
2146 let new
= self.depth
.checked_add(1).ok_or_else(|| {
2149 ast
::ErrorKind
::NestLimitExceeded(::std
::u32::MAX
),
2152 let limit
= self.p
.parser().nest_limit
;
2154 return Err(self.p
.error(
2156 ast
::ErrorKind
::NestLimitExceeded(limit
),
2163 fn decrement_depth(&mut self) {
2164 // Assuming the correctness of the visitor, this should never drop
2166 self.depth
= self.depth
.checked_sub(1).unwrap();
2170 impl<'p
, 's
, P
: Borrow
<Parser
>> ast
::Visitor
for NestLimiter
<'p
, 's
, P
> {
2172 type Err
= ast
::Error
;
2174 fn finish(self) -> Result
<()> {
2178 fn visit_pre(&mut self, ast
: &Ast
) -> Result
<()> {
2179 let span
= match *ast
{
2185 | Ast
::Class(ast
::Class
::Unicode(_
))
2186 | Ast
::Class(ast
::Class
::Perl(_
)) => {
2187 // These are all base cases, so we don't increment depth.
2190 Ast
::Class(ast
::Class
::Bracketed(ref x
)) => &x
.span
,
2191 Ast
::Repetition(ref x
) => &x
.span
,
2192 Ast
::Group(ref x
) => &x
.span
,
2193 Ast
::Alternation(ref x
) => &x
.span
,
2194 Ast
::Concat(ref x
) => &x
.span
,
2196 self.increment_depth(span
)
2199 fn visit_post(&mut self, ast
: &Ast
) -> Result
<()> {
2206 | Ast
::Class(ast
::Class
::Unicode(_
))
2207 | Ast
::Class(ast
::Class
::Perl(_
)) => {
2208 // These are all base cases, so we don't decrement depth.
2211 Ast
::Class(ast
::Class
::Bracketed(_
))
2212 | Ast
::Repetition(_
)
2214 | Ast
::Alternation(_
)
2215 | Ast
::Concat(_
) => {
2216 self.decrement_depth();
2222 fn visit_class_set_item_pre(
2224 ast
: &ast
::ClassSetItem
,
2226 let span
= match *ast
{
2227 ast
::ClassSetItem
::Empty(_
)
2228 | ast
::ClassSetItem
::Literal(_
)
2229 | ast
::ClassSetItem
::Range(_
)
2230 | ast
::ClassSetItem
::Ascii(_
)
2231 | ast
::ClassSetItem
::Unicode(_
)
2232 | ast
::ClassSetItem
::Perl(_
) => {
2233 // These are all base cases, so we don't increment depth.
2236 ast
::ClassSetItem
::Bracketed(ref x
) => &x
.span
,
2237 ast
::ClassSetItem
::Union(ref x
) => &x
.span
,
2239 self.increment_depth(span
)
2242 fn visit_class_set_item_post(
2244 ast
: &ast
::ClassSetItem
,
2247 ast
::ClassSetItem
::Empty(_
)
2248 | ast
::ClassSetItem
::Literal(_
)
2249 | ast
::ClassSetItem
::Range(_
)
2250 | ast
::ClassSetItem
::Ascii(_
)
2251 | ast
::ClassSetItem
::Unicode(_
)
2252 | ast
::ClassSetItem
::Perl(_
) => {
2253 // These are all base cases, so we don't decrement depth.
2256 ast
::ClassSetItem
::Bracketed(_
) | ast
::ClassSetItem
::Union(_
) => {
2257 self.decrement_depth();
2263 fn visit_class_set_binary_op_pre(
2265 ast
: &ast
::ClassSetBinaryOp
,
2267 self.increment_depth(&ast
.span
)
2270 fn visit_class_set_binary_op_post(
2272 _ast
: &ast
::ClassSetBinaryOp
,
2274 self.decrement_depth();
2279 /// When the result is an error, transforms the ast::ErrorKind from the source
2280 /// Result into another one. This function is used to return clearer error
2281 /// messages when possible.
2282 fn specialize_err
<T
>(
2284 from
: ast
::ErrorKind
,
2287 if let Err(e
) = result
{
2289 Err(ast
::Error { kind: to, pattern: e.pattern, span: e.span }
)
2300 use std
::ops
::Range
;
2302 use super::{Parser, ParserBuilder, ParserI, Primitive}
;
2303 use crate::ast
::{self, Ast, Position, Span}
;
2305 // Our own assert_eq, which has slightly better formatting (but honestly
2306 // still kind of crappy).
2307 macro_rules
! assert_eq
{
2308 ($left
:expr
, $right
:expr
) => {{
2309 match (&$left
, &$right
) {
2310 (left_val
, right_val
) => {
2311 if !(*left_val
== *right_val
) {
2313 "assertion failed: `(left == right)`\n\n\
2314 left: `{:?}`\nright: `{:?}`\n\n",
2323 // We create these errors to compare with real ast::Errors in the tests.
2324 // We define equality between TestError and ast::Error to disregard the
2325 // pattern string in ast::Error, which is annoying to provide in tests.
2326 #[derive(Clone, Debug)]
2329 kind
: ast
::ErrorKind
,
2332 impl PartialEq
<ast
::Error
> for TestError
{
2333 fn eq(&self, other
: &ast
::Error
) -> bool
{
2334 self.span
== other
.span
&& self.kind
== other
.kind
2338 impl PartialEq
<TestError
> for ast
::Error
{
2339 fn eq(&self, other
: &TestError
) -> bool
{
2340 self.span
== other
.span
&& self.kind
== other
.kind
2344 fn s(str: &str) -> String
{
2348 fn parser(pattern
: &str) -> ParserI
<'_
, Parser
> {
2349 ParserI
::new(Parser
::new(), pattern
)
2352 fn parser_octal(pattern
: &str) -> ParserI
<'_
, Parser
> {
2353 let parser
= ParserBuilder
::new().octal(true).build();
2354 ParserI
::new(parser
, pattern
)
2357 fn parser_nest_limit(
2360 ) -> ParserI
<'_
, Parser
> {
2361 let p
= ParserBuilder
::new().nest_limit(nest_limit
).build();
2362 ParserI
::new(p
, pattern
)
2365 fn parser_ignore_whitespace(pattern
: &str) -> ParserI
<'_
, Parser
> {
2366 let p
= ParserBuilder
::new().ignore_whitespace(true).build();
2367 ParserI
::new(p
, pattern
)
2370 /// Short alias for creating a new span.
2371 fn nspan(start
: Position
, end
: Position
) -> Span
{
2372 Span
::new(start
, end
)
2375 /// Short alias for creating a new position.
2376 fn npos(offset
: usize, line
: usize, column
: usize) -> Position
{
2377 Position
::new(offset
, line
, column
)
2380 /// Create a new span from the given offset range. This assumes a single
2381 /// line and sets the columns based on the offsets. i.e., This only works
2382 /// out of the box for ASCII, which is fine for most tests.
2383 fn span(range
: Range
<usize>) -> Span
{
2384 let start
= Position
::new(range
.start
, 1, range
.start
+ 1);
2385 let end
= Position
::new(range
.end
, 1, range
.end
+ 1);
2386 Span
::new(start
, end
)
2389 /// Create a new span for the corresponding byte range in the given string.
2390 fn span_range(subject
: &str, range
: Range
<usize>) -> Span
{
2391 let start
= Position
{
2392 offset
: range
.start
,
2393 line
: 1 + subject
[..range
.start
].matches('
\n'
).count(),
2394 column
: 1 + subject
[..range
.start
]
2397 .position(|c
| c
== '
\n'
)
2398 .unwrap_or(subject
[..range
.start
].chars().count()),
2400 let end
= Position
{
2402 line
: 1 + subject
[..range
.end
].matches('
\n'
).count(),
2403 column
: 1 + subject
[..range
.end
]
2406 .position(|c
| c
== '
\n'
)
2407 .unwrap_or(subject
[..range
.end
].chars().count()),
2409 Span
::new(start
, end
)
2412 /// Create a verbatim literal starting at the given position.
2413 fn lit(c
: char, start
: usize) -> Ast
{
2414 lit_with(c
, span(start
..start
+ c
.len_utf8()))
2417 /// Create a punctuation literal starting at the given position.
2418 fn punct_lit(c
: char, span
: Span
) -> Ast
{
2419 Ast
::Literal(ast
::Literal
{
2421 kind
: ast
::LiteralKind
::Punctuation
,
2426 /// Create a verbatim literal with the given span.
2427 fn lit_with(c
: char, span
: Span
) -> Ast
{
2428 Ast
::Literal(ast
::Literal
{
2430 kind
: ast
::LiteralKind
::Verbatim
,
2435 /// Create a concatenation with the given range.
2436 fn concat(range
: Range
<usize>, asts
: Vec
<Ast
>) -> Ast
{
2437 concat_with(span(range
), asts
)
2440 /// Create a concatenation with the given span.
2441 fn concat_with(span
: Span
, asts
: Vec
<Ast
>) -> Ast
{
2442 Ast
::Concat(ast
::Concat { span, asts }
)
2445 /// Create an alternation with the given span.
2446 fn alt(range
: Range
<usize>, asts
: Vec
<Ast
>) -> Ast
{
2447 Ast
::Alternation(ast
::Alternation { span: span(range), asts }
)
2450 /// Create a capturing group with the given span.
2451 fn group(range
: Range
<usize>, index
: u32, ast
: Ast
) -> Ast
{
2452 Ast
::Group(ast
::Group
{
2454 kind
: ast
::GroupKind
::CaptureIndex(index
),
2459 /// Create an ast::SetFlags.
2461 /// The given pattern should be the full pattern string. The range given
2462 /// should correspond to the byte offsets where the flag set occurs.
2464 /// If negated is true, then the set is interpreted as beginning with a
2468 range
: Range
<usize>,
2472 let mut items
= vec
![ast
::FlagsItem
{
2473 span
: span_range(pat
, (range
.end
- 2)..(range
.end
- 1)),
2474 kind
: ast
::FlagsItemKind
::Flag(flag
),
2480 span
: span_range(pat
, (range
.start
+ 2)..(range
.end
- 2)),
2481 kind
: ast
::FlagsItemKind
::Negation
,
2485 Ast
::Flags(ast
::SetFlags
{
2486 span
: span_range(pat
, range
.clone()),
2488 span
: span_range(pat
, (range
.start
+ 2)..(range
.end
- 1)),
2495 fn parse_nest_limit() {
2496 // A nest limit of 0 still allows some types of regexes.
2498 parser_nest_limit("", 0).parse(),
2499 Ok(Ast
::Empty(span(0..0)))
2501 assert_eq
!(parser_nest_limit("a", 0).parse(), Ok(lit('a'
, 0)));
2503 // Test repetition operations, which require one level of nesting.
2505 parser_nest_limit("a+", 0).parse().unwrap_err(),
2508 kind
: ast
::ErrorKind
::NestLimitExceeded(0),
2512 parser_nest_limit("a+", 1).parse(),
2513 Ok(Ast
::Repetition(ast
::Repetition
{
2515 op
: ast
::RepetitionOp
{
2517 kind
: ast
::RepetitionKind
::OneOrMore
,
2520 ast
: Box
::new(lit('a'
, 0)),
2524 parser_nest_limit("(a)+", 1).parse().unwrap_err(),
2527 kind
: ast
::ErrorKind
::NestLimitExceeded(1),
2531 parser_nest_limit("a+*", 1).parse().unwrap_err(),
2534 kind
: ast
::ErrorKind
::NestLimitExceeded(1),
2538 parser_nest_limit("a+*", 2).parse(),
2539 Ok(Ast
::Repetition(ast
::Repetition
{
2541 op
: ast
::RepetitionOp
{
2543 kind
: ast
::RepetitionKind
::ZeroOrMore
,
2546 ast
: Box
::new(Ast
::Repetition(ast
::Repetition
{
2548 op
: ast
::RepetitionOp
{
2550 kind
: ast
::RepetitionKind
::OneOrMore
,
2553 ast
: Box
::new(lit('a'
, 0)),
2558 // Test concatenations. A concatenation requires one level of nesting.
2560 parser_nest_limit("ab", 0).parse().unwrap_err(),
2563 kind
: ast
::ErrorKind
::NestLimitExceeded(0),
2567 parser_nest_limit("ab", 1).parse(),
2568 Ok(concat(0..2, vec
![lit('a'
, 0), lit('b'
, 1)]))
2571 parser_nest_limit("abc", 1).parse(),
2572 Ok(concat(0..3, vec
![lit('a'
, 0), lit('b'
, 1), lit('c'
, 2)]))
2575 // Test alternations. An alternation requires one level of nesting.
2577 parser_nest_limit("a|b", 0).parse().unwrap_err(),
2580 kind
: ast
::ErrorKind
::NestLimitExceeded(0),
2584 parser_nest_limit("a|b", 1).parse(),
2585 Ok(alt(0..3, vec
![lit('a'
, 0), lit('b'
, 2)]))
2588 parser_nest_limit("a|b|c", 1).parse(),
2589 Ok(alt(0..5, vec
![lit('a'
, 0), lit('b'
, 2), lit('c'
, 4)]))
2592 // Test character classes. Classes form their own mini-recursive
2595 parser_nest_limit("[a]", 0).parse().unwrap_err(),
2598 kind
: ast
::ErrorKind
::NestLimitExceeded(0),
2602 parser_nest_limit("[a]", 1).parse(),
2603 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
2606 kind
: ast
::ClassSet
::Item(ast
::ClassSetItem
::Literal(
2609 kind
: ast
::LiteralKind
::Verbatim
,
2616 parser_nest_limit("[ab]", 1).parse().unwrap_err(),
2619 kind
: ast
::ErrorKind
::NestLimitExceeded(1),
2623 parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
2626 kind
: ast
::ErrorKind
::NestLimitExceeded(2),
2630 parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
2633 kind
: ast
::ErrorKind
::NestLimitExceeded(3),
2637 parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
2640 kind
: ast
::ErrorKind
::NestLimitExceeded(1),
2644 parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
2647 kind
: ast
::ErrorKind
::NestLimitExceeded(2),
2653 fn parse_comments() {
2655 # This is comment 1.
2656 foo # This is comment 2.
2657 # This is comment 3.
2659 # This is comment 4.";
2660 let astc
= parser(pat
).parse_with_comments().unwrap();
2664 span_range(pat
, 0..pat
.len()),
2666 flag_set(pat
, 0..4, ast
::Flag
::IgnoreWhitespace
, false),
2667 lit_with('f'
, span_range(pat
, 26..27)),
2668 lit_with('o'
, span_range(pat
, 27..28)),
2669 lit_with('o'
, span_range(pat
, 28..29)),
2670 lit_with('b'
, span_range(pat
, 74..75)),
2671 lit_with('a'
, span_range(pat
, 75..76)),
2672 lit_with('r'
, span_range(pat
, 76..77)),
2680 span
: span_range(pat
, 5..26),
2681 comment
: s(" This is comment 1."),
2684 span
: span_range(pat
, 30..51),
2685 comment
: s(" This is comment 2."),
2688 span
: span_range(pat
, 53..74),
2689 comment
: s(" This is comment 3."),
2692 span
: span_range(pat
, 78..98),
2693 comment
: s(" This is comment 4."),
2700 fn parse_holistic() {
2701 assert_eq
!(parser("]").parse(), Ok(lit('
]'
, 0)));
2703 parser(r
"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
2707 punct_lit('
\\'
, span(0..2)),
2708 punct_lit('
.'
, span(2..4)),
2709 punct_lit('
+'
, span(4..6)),
2710 punct_lit('
*'
, span(6..8)),
2711 punct_lit('?'
, span(8..10)),
2712 punct_lit('
('
, span(10..12)),
2713 punct_lit('
)'
, span(12..14)),
2714 punct_lit('
|'
, span(14..16)),
2715 punct_lit('
['
, span(16..18)),
2716 punct_lit('
]'
, span(18..20)),
2717 punct_lit('
{'
, span(20..22)),
2718 punct_lit('
}'
, span(22..24)),
2719 punct_lit('
^', span(24..26)),
2720 punct_lit('$'
, span(26..28)),
2721 punct_lit('
#', span(28..30)),
2722 punct_lit('
&'
, span(30..32)),
2723 punct_lit('
-'
, span(32..34)),
2724 punct_lit('
~'
, span(34..36)),
2731 fn parse_ignore_whitespace() {
2732 // Test that basic whitespace insensitivity works.
2733 let pat
= "(?x)a b";
2735 parser(pat
).parse(),
2737 nspan(npos(0, 1, 1), npos(7, 1, 8)),
2739 flag_set(pat
, 0..4, ast
::Flag
::IgnoreWhitespace
, false),
2740 lit_with('a'
, nspan(npos(4, 1, 5), npos(5, 1, 6))),
2741 lit_with('b'
, nspan(npos(6, 1, 7), npos(7, 1, 8))),
2746 // Test that we can toggle whitespace insensitivity.
2747 let pat
= "(?x)a b(?-x)a b";
2749 parser(pat
).parse(),
2751 nspan(npos(0, 1, 1), npos(15, 1, 16)),
2753 flag_set(pat
, 0..4, ast
::Flag
::IgnoreWhitespace
, false),
2754 lit_with('a'
, nspan(npos(4, 1, 5), npos(5, 1, 6))),
2755 lit_with('b'
, nspan(npos(6, 1, 7), npos(7, 1, 8))),
2756 flag_set(pat
, 7..12, ast
::Flag
::IgnoreWhitespace
, true),
2757 lit_with('a'
, nspan(npos(12, 1, 13), npos(13, 1, 14))),
2758 lit_with(' '
, nspan(npos(13, 1, 14), npos(14, 1, 15))),
2759 lit_with('b'
, nspan(npos(14, 1, 15), npos(15, 1, 16))),
2764 // Test that nesting whitespace insensitive flags works.
2765 let pat
= "a (?x:a )a ";
2767 parser(pat
).parse(),
2769 span_range(pat
, 0..11),
2771 lit_with('a'
, span_range(pat
, 0..1)),
2772 lit_with(' '
, span_range(pat
, 1..2)),
2773 Ast
::Group(ast
::Group
{
2774 span
: span_range(pat
, 2..9),
2775 kind
: ast
::GroupKind
::NonCapturing(ast
::Flags
{
2776 span
: span_range(pat
, 4..5),
2777 items
: vec
![ast
::FlagsItem
{
2778 span
: span_range(pat
, 4..5),
2779 kind
: ast
::FlagsItemKind
::Flag(
2780 ast
::Flag
::IgnoreWhitespace
2784 ast
: Box
::new(lit_with('a'
, span_range(pat
, 6..7))),
2786 lit_with('a'
, span_range(pat
, 9..10)),
2787 lit_with(' '
, span_range(pat
, 10..11)),
2792 // Test that whitespace after an opening paren is insignificant.
2793 let pat
= "(?x)( ?P<foo> a )";
2795 parser(pat
).parse(),
2797 span_range(pat
, 0..pat
.len()),
2799 flag_set(pat
, 0..4, ast
::Flag
::IgnoreWhitespace
, false),
2800 Ast
::Group(ast
::Group
{
2801 span
: span_range(pat
, 4..pat
.len()),
2802 kind
: ast
::GroupKind
::CaptureName(ast
::CaptureName
{
2803 span
: span_range(pat
, 9..12),
2807 ast
: Box
::new(lit_with('a'
, span_range(pat
, 14..15))),
2812 let pat
= "(?x)( a )";
2814 parser(pat
).parse(),
2816 span_range(pat
, 0..pat
.len()),
2818 flag_set(pat
, 0..4, ast
::Flag
::IgnoreWhitespace
, false),
2819 Ast
::Group(ast
::Group
{
2820 span
: span_range(pat
, 4..pat
.len()),
2821 kind
: ast
::GroupKind
::CaptureIndex(1),
2822 ast
: Box
::new(lit_with('a'
, span_range(pat
, 7..8))),
2827 let pat
= "(?x)( ?: a )";
2829 parser(pat
).parse(),
2831 span_range(pat
, 0..pat
.len()),
2833 flag_set(pat
, 0..4, ast
::Flag
::IgnoreWhitespace
, false),
2834 Ast
::Group(ast
::Group
{
2835 span
: span_range(pat
, 4..pat
.len()),
2836 kind
: ast
::GroupKind
::NonCapturing(ast
::Flags
{
2837 span
: span_range(pat
, 8..8),
2840 ast
: Box
::new(lit_with('a'
, span_range(pat
, 11..12))),
2845 let pat
= r
"(?x)\x { 53 }";
2847 parser(pat
).parse(),
2849 span_range(pat
, 0..pat
.len()),
2851 flag_set(pat
, 0..4, ast
::Flag
::IgnoreWhitespace
, false),
2852 Ast
::Literal(ast
::Literal
{
2854 kind
: ast
::LiteralKind
::HexBrace(
2855 ast
::HexLiteralKind
::X
2863 // Test that whitespace after an escape is OK.
2864 let pat
= r
"(?x)\ ";
2866 parser(pat
).parse(),
2868 span_range(pat
, 0..pat
.len()),
2870 flag_set(pat
, 0..4, ast
::Flag
::IgnoreWhitespace
, false),
2871 Ast
::Literal(ast
::Literal
{
2872 span
: span_range(pat
, 4..6),
2873 kind
: ast
::LiteralKind
::Special(
2874 ast
::SpecialLiteralKind
::Space
2881 // ... but only when `x` mode is enabled.
2884 parser(pat
).parse().unwrap_err(),
2886 span
: span_range(pat
, 0..2),
2887 kind
: ast
::ErrorKind
::EscapeUnrecognized
,
2893 fn parse_newlines() {
2896 parser(pat
).parse(),
2898 span_range(pat
, 0..3),
2900 Ast
::Dot(span_range(pat
, 0..1)),
2901 lit_with('
\n'
, span_range(pat
, 1..2)),
2902 Ast
::Dot(span_range(pat
, 2..3)),
2907 let pat
= "foobar\nbaz\nquux\n";
2909 parser(pat
).parse(),
2911 span_range(pat
, 0..pat
.len()),
2913 lit_with('f'
, nspan(npos(0, 1, 1), npos(1, 1, 2))),
2914 lit_with('o'
, nspan(npos(1, 1, 2), npos(2, 1, 3))),
2915 lit_with('o'
, nspan(npos(2, 1, 3), npos(3, 1, 4))),
2916 lit_with('b'
, nspan(npos(3, 1, 4), npos(4, 1, 5))),
2917 lit_with('a'
, nspan(npos(4, 1, 5), npos(5, 1, 6))),
2918 lit_with('r'
, nspan(npos(5, 1, 6), npos(6, 1, 7))),
2919 lit_with('
\n'
, nspan(npos(6, 1, 7), npos(7, 2, 1))),
2920 lit_with('b'
, nspan(npos(7, 2, 1), npos(8, 2, 2))),
2921 lit_with('a'
, nspan(npos(8, 2, 2), npos(9, 2, 3))),
2922 lit_with('z'
, nspan(npos(9, 2, 3), npos(10, 2, 4))),
2923 lit_with('
\n'
, nspan(npos(10, 2, 4), npos(11, 3, 1))),
2924 lit_with('q'
, nspan(npos(11, 3, 1), npos(12, 3, 2))),
2925 lit_with('u'
, nspan(npos(12, 3, 2), npos(13, 3, 3))),
2926 lit_with('u'
, nspan(npos(13, 3, 3), npos(14, 3, 4))),
2927 lit_with('x'
, nspan(npos(14, 3, 4), npos(15, 3, 5))),
2928 lit_with('
\n'
, nspan(npos(15, 3, 5), npos(16, 4, 1))),
2935 fn parse_uncounted_repetition() {
2937 parser(r
"a*").parse(),
2938 Ok(Ast
::Repetition(ast
::Repetition
{
2940 op
: ast
::RepetitionOp
{
2942 kind
: ast
::RepetitionKind
::ZeroOrMore
,
2945 ast
: Box
::new(lit('a'
, 0)),
2949 parser(r
"a+").parse(),
2950 Ok(Ast
::Repetition(ast
::Repetition
{
2952 op
: ast
::RepetitionOp
{
2954 kind
: ast
::RepetitionKind
::OneOrMore
,
2957 ast
: Box
::new(lit('a'
, 0)),
2962 parser(r
"a?").parse(),
2963 Ok(Ast
::Repetition(ast
::Repetition
{
2965 op
: ast
::RepetitionOp
{
2967 kind
: ast
::RepetitionKind
::ZeroOrOne
,
2970 ast
: Box
::new(lit('a'
, 0)),
2974 parser(r
"a??").parse(),
2975 Ok(Ast
::Repetition(ast
::Repetition
{
2977 op
: ast
::RepetitionOp
{
2979 kind
: ast
::RepetitionKind
::ZeroOrOne
,
2982 ast
: Box
::new(lit('a'
, 0)),
2986 parser(r
"a?").parse(),
2987 Ok(Ast
::Repetition(ast
::Repetition
{
2989 op
: ast
::RepetitionOp
{
2991 kind
: ast
::RepetitionKind
::ZeroOrOne
,
2994 ast
: Box
::new(lit('a'
, 0)),
2998 parser(r
"a?b").parse(),
3002 Ast
::Repetition(ast
::Repetition
{
3004 op
: ast
::RepetitionOp
{
3006 kind
: ast
::RepetitionKind
::ZeroOrOne
,
3009 ast
: Box
::new(lit('a'
, 0)),
3016 parser(r
"a??b").parse(),
3020 Ast
::Repetition(ast
::Repetition
{
3022 op
: ast
::RepetitionOp
{
3024 kind
: ast
::RepetitionKind
::ZeroOrOne
,
3027 ast
: Box
::new(lit('a'
, 0)),
3034 parser(r
"ab?").parse(),
3039 Ast
::Repetition(ast
::Repetition
{
3041 op
: ast
::RepetitionOp
{
3043 kind
: ast
::RepetitionKind
::ZeroOrOne
,
3046 ast
: Box
::new(lit('b'
, 1)),
3052 parser(r
"(ab)?").parse(),
3053 Ok(Ast
::Repetition(ast
::Repetition
{
3055 op
: ast
::RepetitionOp
{
3057 kind
: ast
::RepetitionKind
::ZeroOrOne
,
3060 ast
: Box
::new(group(
3063 concat(1..3, vec
![lit('a'
, 1), lit('b'
, 2),])
3068 parser(r
"|a?").parse(),
3072 Ast
::Empty(span(0..0)),
3073 Ast
::Repetition(ast
::Repetition
{
3075 op
: ast
::RepetitionOp
{
3077 kind
: ast
::RepetitionKind
::ZeroOrOne
,
3080 ast
: Box
::new(lit('a'
, 1)),
3087 parser(r
"*").parse().unwrap_err(),
3090 kind
: ast
::ErrorKind
::RepetitionMissing
,
3094 parser(r
"(?i)*").parse().unwrap_err(),
3097 kind
: ast
::ErrorKind
::RepetitionMissing
,
3101 parser(r
"(*)").parse().unwrap_err(),
3104 kind
: ast
::ErrorKind
::RepetitionMissing
,
3108 parser(r
"(?:?)").parse().unwrap_err(),
3111 kind
: ast
::ErrorKind
::RepetitionMissing
,
3115 parser(r
"+").parse().unwrap_err(),
3118 kind
: ast
::ErrorKind
::RepetitionMissing
,
3122 parser(r
"?").parse().unwrap_err(),
3125 kind
: ast
::ErrorKind
::RepetitionMissing
,
3129 parser(r
"(?)").parse().unwrap_err(),
3132 kind
: ast
::ErrorKind
::RepetitionMissing
,
3136 parser(r
"|*").parse().unwrap_err(),
3139 kind
: ast
::ErrorKind
::RepetitionMissing
,
3143 parser(r
"|+").parse().unwrap_err(),
3146 kind
: ast
::ErrorKind
::RepetitionMissing
,
3150 parser(r
"|?").parse().unwrap_err(),
3153 kind
: ast
::ErrorKind
::RepetitionMissing
,
3159 fn parse_counted_repetition() {
3161 parser(r
"a{5}").parse(),
3162 Ok(Ast
::Repetition(ast
::Repetition
{
3164 op
: ast
::RepetitionOp
{
3166 kind
: ast
::RepetitionKind
::Range(
3167 ast
::RepetitionRange
::Exactly(5)
3171 ast
: Box
::new(lit('a'
, 0)),
3175 parser(r
"a{5,}").parse(),
3176 Ok(Ast
::Repetition(ast
::Repetition
{
3178 op
: ast
::RepetitionOp
{
3180 kind
: ast
::RepetitionKind
::Range(
3181 ast
::RepetitionRange
::AtLeast(5)
3185 ast
: Box
::new(lit('a'
, 0)),
3189 parser(r
"a{5,9}").parse(),
3190 Ok(Ast
::Repetition(ast
::Repetition
{
3192 op
: ast
::RepetitionOp
{
3194 kind
: ast
::RepetitionKind
::Range(
3195 ast
::RepetitionRange
::Bounded(5, 9)
3199 ast
: Box
::new(lit('a'
, 0)),
3203 parser(r
"a{5}?").parse(),
3204 Ok(Ast
::Repetition(ast
::Repetition
{
3206 op
: ast
::RepetitionOp
{
3208 kind
: ast
::RepetitionKind
::Range(
3209 ast
::RepetitionRange
::Exactly(5)
3213 ast
: Box
::new(lit('a'
, 0)),
3217 parser(r
"ab{5}").parse(),
3222 Ast
::Repetition(ast
::Repetition
{
3224 op
: ast
::RepetitionOp
{
3226 kind
: ast
::RepetitionKind
::Range(
3227 ast
::RepetitionRange
::Exactly(5)
3231 ast
: Box
::new(lit('b'
, 1)),
3237 parser(r
"ab{5}c").parse(),
3242 Ast
::Repetition(ast
::Repetition
{
3244 op
: ast
::RepetitionOp
{
3246 kind
: ast
::RepetitionKind
::Range(
3247 ast
::RepetitionRange
::Exactly(5)
3251 ast
: Box
::new(lit('b'
, 1)),
3259 parser(r
"a{ 5 }").parse(),
3260 Ok(Ast
::Repetition(ast
::Repetition
{
3262 op
: ast
::RepetitionOp
{
3264 kind
: ast
::RepetitionKind
::Range(
3265 ast
::RepetitionRange
::Exactly(5)
3269 ast
: Box
::new(lit('a'
, 0)),
3273 parser(r
"a{ 5 , 9 }").parse(),
3274 Ok(Ast
::Repetition(ast
::Repetition
{
3276 op
: ast
::RepetitionOp
{
3278 kind
: ast
::RepetitionKind
::Range(
3279 ast
::RepetitionRange
::Bounded(5, 9)
3283 ast
: Box
::new(lit('a'
, 0)),
3287 parser_ignore_whitespace(r
"a{5,9} ?").parse(),
3288 Ok(Ast
::Repetition(ast
::Repetition
{
3290 op
: ast
::RepetitionOp
{
3292 kind
: ast
::RepetitionKind
::Range(
3293 ast
::RepetitionRange
::Bounded(5, 9)
3297 ast
: Box
::new(lit('a'
, 0)),
3302 parser(r
"(?i){0}").parse().unwrap_err(),
3305 kind
: ast
::ErrorKind
::RepetitionMissing
,
3309 parser(r
"(?m){1,1}").parse().unwrap_err(),
3312 kind
: ast
::ErrorKind
::RepetitionMissing
,
3316 parser(r
"a{]}").parse().unwrap_err(),
3319 kind
: ast
::ErrorKind
::RepetitionCountDecimalEmpty
,
3323 parser(r
"a{1,]}").parse().unwrap_err(),
3326 kind
: ast
::ErrorKind
::RepetitionCountDecimalEmpty
,
3330 parser(r
"a{").parse().unwrap_err(),
3333 kind
: ast
::ErrorKind
::RepetitionCountUnclosed
,
3337 parser(r
"a{}").parse().unwrap_err(),
3340 kind
: ast
::ErrorKind
::RepetitionCountDecimalEmpty
,
3344 parser(r
"a{a").parse().unwrap_err(),
3347 kind
: ast
::ErrorKind
::RepetitionCountDecimalEmpty
,
3351 parser(r
"a{9999999999}").parse().unwrap_err(),
3354 kind
: ast
::ErrorKind
::DecimalInvalid
,
3358 parser(r
"a{9").parse().unwrap_err(),
3361 kind
: ast
::ErrorKind
::RepetitionCountUnclosed
,
3365 parser(r
"a{9,a").parse().unwrap_err(),
3368 kind
: ast
::ErrorKind
::RepetitionCountDecimalEmpty
,
3372 parser(r
"a{9,9999999999}").parse().unwrap_err(),
3375 kind
: ast
::ErrorKind
::DecimalInvalid
,
3379 parser(r
"a{9,").parse().unwrap_err(),
3382 kind
: ast
::ErrorKind
::RepetitionCountUnclosed
,
3386 parser(r
"a{9,11").parse().unwrap_err(),
3389 kind
: ast
::ErrorKind
::RepetitionCountUnclosed
,
3393 parser(r
"a{2,1}").parse().unwrap_err(),
3396 kind
: ast
::ErrorKind
::RepetitionCountInvalid
,
3400 parser(r
"{5}").parse().unwrap_err(),
3403 kind
: ast
::ErrorKind
::RepetitionMissing
,
3407 parser(r
"|{5}").parse().unwrap_err(),
3410 kind
: ast
::ErrorKind
::RepetitionMissing
,
3416 fn parse_alternate() {
3418 parser(r
"a|b").parse(),
3419 Ok(Ast
::Alternation(ast
::Alternation
{
3421 asts
: vec
![lit('a'
, 0), lit('b'
, 2)],
3425 parser(r
"(a|b)").parse(),
3429 Ast
::Alternation(ast
::Alternation
{
3431 asts
: vec
![lit('a'
, 1), lit('b'
, 3)],
3437 parser(r
"a|b|c").parse(),
3438 Ok(Ast
::Alternation(ast
::Alternation
{
3440 asts
: vec
![lit('a'
, 0), lit('b'
, 2), lit('c'
, 4)],
3444 parser(r
"ax|by|cz").parse(),
3445 Ok(Ast
::Alternation(ast
::Alternation
{
3448 concat(0..2, vec
![lit('a'
, 0), lit('x'
, 1)]),
3449 concat(3..5, vec
![lit('b'
, 3), lit('y'
, 4)]),
3450 concat(6..8, vec
![lit('c'
, 6), lit('z'
, 7)]),
3455 parser(r
"(ax|by|cz)").parse(),
3459 Ast
::Alternation(ast
::Alternation
{
3462 concat(1..3, vec
![lit('a'
, 1), lit('x'
, 2)]),
3463 concat(4..6, vec
![lit('b'
, 4), lit('y'
, 5)]),
3464 concat(7..9, vec
![lit('c'
, 7), lit('z'
, 8)]),
3470 parser(r
"(ax|(by|(cz)))").parse(),
3477 concat(1..3, vec
![lit('a'
, 1), lit('x'
, 2)]),
3486 vec
![lit('b'
, 5), lit('y'
, 6)]
3493 vec
![lit('c'
, 9), lit('z'
, 10),]
3505 parser(r
"|").parse(),
3508 vec
![Ast
::Empty(span(0..0)), Ast
::Empty(span(1..1)),]
3512 parser(r
"||").parse(),
3516 Ast
::Empty(span(0..0)),
3517 Ast
::Empty(span(1..1)),
3518 Ast
::Empty(span(2..2)),
3523 parser(r
"a|").parse(),
3524 Ok(alt(0..2, vec
![lit('a'
, 0), Ast
::Empty(span(2..2)),]))
3527 parser(r
"|a").parse(),
3528 Ok(alt(0..2, vec
![Ast
::Empty(span(0..0)), lit('a'
, 1),]))
3532 parser(r
"(|)").parse(),
3538 vec
![Ast
::Empty(span(1..1)), Ast
::Empty(span(2..2)),]
3543 parser(r
"(a|)").parse(),
3547 alt(1..3, vec
![lit('a'
, 1), Ast
::Empty(span(3..3)),])
3551 parser(r
"(|a)").parse(),
3555 alt(1..3, vec
![Ast
::Empty(span(1..1)), lit('a'
, 2),])
3560 parser(r
"a|b)").parse().unwrap_err(),
3563 kind
: ast
::ErrorKind
::GroupUnopened
,
3567 parser(r
"(a|b").parse().unwrap_err(),
3570 kind
: ast
::ErrorKind
::GroupUnclosed
,
3576 fn parse_unsupported_lookaround() {
3578 parser(r
"(?=a)").parse().unwrap_err(),
3581 kind
: ast
::ErrorKind
::UnsupportedLookAround
,
3585 parser(r
"(?!a)").parse().unwrap_err(),
3588 kind
: ast
::ErrorKind
::UnsupportedLookAround
,
3592 parser(r
"(?<=a)").parse().unwrap_err(),
3595 kind
: ast
::ErrorKind
::UnsupportedLookAround
,
3599 parser(r
"(?<!a)").parse().unwrap_err(),
3602 kind
: ast
::ErrorKind
::UnsupportedLookAround
,
3610 parser("(?i)").parse(),
3611 Ok(Ast
::Flags(ast
::SetFlags
{
3615 items
: vec
![ast
::FlagsItem
{
3617 kind
: ast
::FlagsItemKind
::Flag(
3618 ast
::Flag
::CaseInsensitive
3625 parser("(?iU)").parse(),
3626 Ok(Ast
::Flags(ast
::SetFlags
{
3633 kind
: ast
::FlagsItemKind
::Flag(
3634 ast
::Flag
::CaseInsensitive
3639 kind
: ast
::FlagsItemKind
::Flag(
3640 ast
::Flag
::SwapGreed
3648 parser("(?i-U)").parse(),
3649 Ok(Ast
::Flags(ast
::SetFlags
{
3656 kind
: ast
::FlagsItemKind
::Flag(
3657 ast
::Flag
::CaseInsensitive
3662 kind
: ast
::FlagsItemKind
::Negation
,
3666 kind
: ast
::FlagsItemKind
::Flag(
3667 ast
::Flag
::SwapGreed
3676 parser("()").parse(),
3677 Ok(Ast
::Group(ast
::Group
{
3679 kind
: ast
::GroupKind
::CaptureIndex(1),
3680 ast
: Box
::new(Ast
::Empty(span(1..1))),
3684 parser("(a)").parse(),
3685 Ok(Ast
::Group(ast
::Group
{
3687 kind
: ast
::GroupKind
::CaptureIndex(1),
3688 ast
: Box
::new(lit('a'
, 1)),
3692 parser("(())").parse(),
3693 Ok(Ast
::Group(ast
::Group
{
3695 kind
: ast
::GroupKind
::CaptureIndex(1),
3696 ast
: Box
::new(Ast
::Group(ast
::Group
{
3698 kind
: ast
::GroupKind
::CaptureIndex(2),
3699 ast
: Box
::new(Ast
::Empty(span(2..2))),
3705 parser("(?:a)").parse(),
3706 Ok(Ast
::Group(ast
::Group
{
3708 kind
: ast
::GroupKind
::NonCapturing(ast
::Flags
{
3712 ast
: Box
::new(lit('a'
, 3)),
3717 parser("(?i:a)").parse(),
3718 Ok(Ast
::Group(ast
::Group
{
3720 kind
: ast
::GroupKind
::NonCapturing(ast
::Flags
{
3722 items
: vec
![ast
::FlagsItem
{
3724 kind
: ast
::FlagsItemKind
::Flag(
3725 ast
::Flag
::CaseInsensitive
3729 ast
: Box
::new(lit('a'
, 4)),
3733 parser("(?i-U:a)").parse(),
3734 Ok(Ast
::Group(ast
::Group
{
3736 kind
: ast
::GroupKind
::NonCapturing(ast
::Flags
{
3741 kind
: ast
::FlagsItemKind
::Flag(
3742 ast
::Flag
::CaseInsensitive
3747 kind
: ast
::FlagsItemKind
::Negation
,
3751 kind
: ast
::FlagsItemKind
::Flag(
3752 ast
::Flag
::SwapGreed
3757 ast
: Box
::new(lit('a'
, 6)),
3762 parser("(").parse().unwrap_err(),
3765 kind
: ast
::ErrorKind
::GroupUnclosed
,
3769 parser("(?").parse().unwrap_err(),
3772 kind
: ast
::ErrorKind
::GroupUnclosed
,
3776 parser("(?P").parse().unwrap_err(),
3779 kind
: ast
::ErrorKind
::FlagUnrecognized
,
3783 parser("(?P<").parse().unwrap_err(),
3786 kind
: ast
::ErrorKind
::GroupNameUnexpectedEof
,
3790 parser("(a").parse().unwrap_err(),
3793 kind
: ast
::ErrorKind
::GroupUnclosed
,
3797 parser("(()").parse().unwrap_err(),
3800 kind
: ast
::ErrorKind
::GroupUnclosed
,
3804 parser(")").parse().unwrap_err(),
3807 kind
: ast
::ErrorKind
::GroupUnopened
,
3811 parser("a)").parse().unwrap_err(),
3814 kind
: ast
::ErrorKind
::GroupUnopened
,
3820 fn parse_capture_name() {
3822 parser("(?P<a>z)").parse(),
3823 Ok(Ast
::Group(ast
::Group
{
3825 kind
: ast
::GroupKind
::CaptureName(ast
::CaptureName
{
3830 ast
: Box
::new(lit('z'
, 6)),
3834 parser("(?P<abc>z)").parse(),
3835 Ok(Ast
::Group(ast
::Group
{
3837 kind
: ast
::GroupKind
::CaptureName(ast
::CaptureName
{
3842 ast
: Box
::new(lit('z'
, 8)),
3847 parser("(?P<a_1>z)").parse(),
3848 Ok(Ast
::Group(ast
::Group
{
3850 kind
: ast
::GroupKind
::CaptureName(ast
::CaptureName
{
3855 ast
: Box
::new(lit('z'
, 8)),
3860 parser("(?P<a.1>z)").parse(),
3861 Ok(Ast
::Group(ast
::Group
{
3863 kind
: ast
::GroupKind
::CaptureName(ast
::CaptureName
{
3868 ast
: Box
::new(lit('z'
, 8)),
3873 parser("(?P<a[1]>z)").parse(),
3874 Ok(Ast
::Group(ast
::Group
{
3876 kind
: ast
::GroupKind
::CaptureName(ast
::CaptureName
{
3881 ast
: Box
::new(lit('z'
, 9)),
3886 parser("(?P<").parse().unwrap_err(),
3889 kind
: ast
::ErrorKind
::GroupNameUnexpectedEof
,
3893 parser("(?P<>z)").parse().unwrap_err(),
3896 kind
: ast
::ErrorKind
::GroupNameEmpty
,
3900 parser("(?P<a").parse().unwrap_err(),
3903 kind
: ast
::ErrorKind
::GroupNameUnexpectedEof
,
3907 parser("(?P<ab").parse().unwrap_err(),
3910 kind
: ast
::ErrorKind
::GroupNameUnexpectedEof
,
3914 parser("(?P<0a").parse().unwrap_err(),
3917 kind
: ast
::ErrorKind
::GroupNameInvalid
,
3921 parser("(?P<~").parse().unwrap_err(),
3924 kind
: ast
::ErrorKind
::GroupNameInvalid
,
3928 parser("(?P<abc~").parse().unwrap_err(),
3931 kind
: ast
::ErrorKind
::GroupNameInvalid
,
3935 parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
3938 kind
: ast
::ErrorKind
::GroupNameDuplicate
{
3939 original
: span(4..5),
3948 parser("i:").parse_flags(),
3951 items
: vec
![ast
::FlagsItem
{
3953 kind
: ast
::FlagsItemKind
::Flag(ast
::Flag
::CaseInsensitive
),
3958 parser("i)").parse_flags(),
3961 items
: vec
![ast
::FlagsItem
{
3963 kind
: ast
::FlagsItemKind
::Flag(ast
::Flag
::CaseInsensitive
),
3969 parser("isU:").parse_flags(),
3975 kind
: ast
::FlagsItemKind
::Flag(
3976 ast
::Flag
::CaseInsensitive
3981 kind
: ast
::FlagsItemKind
::Flag(
3982 ast
::Flag
::DotMatchesNewLine
3987 kind
: ast
::FlagsItemKind
::Flag(ast
::Flag
::SwapGreed
),
3994 parser("-isU:").parse_flags(),
4000 kind
: ast
::FlagsItemKind
::Negation
,
4004 kind
: ast
::FlagsItemKind
::Flag(
4005 ast
::Flag
::CaseInsensitive
4010 kind
: ast
::FlagsItemKind
::Flag(
4011 ast
::Flag
::DotMatchesNewLine
4016 kind
: ast
::FlagsItemKind
::Flag(ast
::Flag
::SwapGreed
),
4022 parser("i-sU:").parse_flags(),
4028 kind
: ast
::FlagsItemKind
::Flag(
4029 ast
::Flag
::CaseInsensitive
4034 kind
: ast
::FlagsItemKind
::Negation
,
4038 kind
: ast
::FlagsItemKind
::Flag(
4039 ast
::Flag
::DotMatchesNewLine
4044 kind
: ast
::FlagsItemKind
::Flag(ast
::Flag
::SwapGreed
),
4051 parser("isU").parse_flags().unwrap_err(),
4054 kind
: ast
::ErrorKind
::FlagUnexpectedEof
,
4058 parser("isUa:").parse_flags().unwrap_err(),
4061 kind
: ast
::ErrorKind
::FlagUnrecognized
,
4065 parser("isUi:").parse_flags().unwrap_err(),
4068 kind
: ast
::ErrorKind
::FlagDuplicate { original: span(0..1) }
,
4072 parser("i-sU-i:").parse_flags().unwrap_err(),
4075 kind
: ast
::ErrorKind
::FlagRepeatedNegation
{
4076 original
: span(1..2),
4081 parser("-)").parse_flags().unwrap_err(),
4084 kind
: ast
::ErrorKind
::FlagDanglingNegation
,
4088 parser("i-)").parse_flags().unwrap_err(),
4091 kind
: ast
::ErrorKind
::FlagDanglingNegation
,
4095 parser("iU-)").parse_flags().unwrap_err(),
4098 kind
: ast
::ErrorKind
::FlagDanglingNegation
,
4105 assert_eq
!(parser("i").parse_flag(), Ok(ast
::Flag
::CaseInsensitive
));
4106 assert_eq
!(parser("m").parse_flag(), Ok(ast
::Flag
::MultiLine
));
4107 assert_eq
!(parser("s").parse_flag(), Ok(ast
::Flag
::DotMatchesNewLine
));
4108 assert_eq
!(parser("U").parse_flag(), Ok(ast
::Flag
::SwapGreed
));
4109 assert_eq
!(parser("u").parse_flag(), Ok(ast
::Flag
::Unicode
));
4110 assert_eq
!(parser("x").parse_flag(), Ok(ast
::Flag
::IgnoreWhitespace
));
4113 parser("a").parse_flag().unwrap_err(),
4116 kind
: ast
::ErrorKind
::FlagUnrecognized
,
4120 parser("☃").parse_flag().unwrap_err(),
4122 span
: span_range("☃", 0..3),
4123 kind
: ast
::ErrorKind
::FlagUnrecognized
,
4129 fn parse_primitive_non_escape() {
4131 parser(r
".").parse_primitive(),
4132 Ok(Primitive
::Dot(span(0..1)))
4135 parser(r
"^").parse_primitive(),
4136 Ok(Primitive
::Assertion(ast
::Assertion
{
4138 kind
: ast
::AssertionKind
::StartLine
,
4142 parser(r
"$").parse_primitive(),
4143 Ok(Primitive
::Assertion(ast
::Assertion
{
4145 kind
: ast
::AssertionKind
::EndLine
,
4150 parser(r
"a").parse_primitive(),
4151 Ok(Primitive
::Literal(ast
::Literal
{
4153 kind
: ast
::LiteralKind
::Verbatim
,
4158 parser(r
"|").parse_primitive(),
4159 Ok(Primitive
::Literal(ast
::Literal
{
4161 kind
: ast
::LiteralKind
::Verbatim
,
4166 parser(r
"☃").parse_primitive(),
4167 Ok(Primitive
::Literal(ast
::Literal
{
4168 span
: span_range("☃", 0..3),
4169 kind
: ast
::LiteralKind
::Verbatim
,
4178 parser(r
"\|").parse_primitive(),
4179 Ok(Primitive
::Literal(ast
::Literal
{
4181 kind
: ast
::LiteralKind
::Punctuation
,
4186 (r
"\a", '
\x07'
, ast
::SpecialLiteralKind
::Bell
),
4187 (r
"\f", '
\x0C'
, ast
::SpecialLiteralKind
::FormFeed
),
4188 (r
"\t", '
\t'
, ast
::SpecialLiteralKind
::Tab
),
4189 (r
"\n", '
\n'
, ast
::SpecialLiteralKind
::LineFeed
),
4190 (r
"\r", '
\r'
, ast
::SpecialLiteralKind
::CarriageReturn
),
4191 (r
"\v", '
\x0B'
, ast
::SpecialLiteralKind
::VerticalTab
),
4193 for &(pat
, c
, ref kind
) in specials
{
4195 parser(pat
).parse_primitive(),
4196 Ok(Primitive
::Literal(ast
::Literal
{
4198 kind
: ast
::LiteralKind
::Special(kind
.clone()),
4204 parser(r
"\A").parse_primitive(),
4205 Ok(Primitive
::Assertion(ast
::Assertion
{
4207 kind
: ast
::AssertionKind
::StartText
,
4211 parser(r
"\z").parse_primitive(),
4212 Ok(Primitive
::Assertion(ast
::Assertion
{
4214 kind
: ast
::AssertionKind
::EndText
,
4218 parser(r
"\b").parse_primitive(),
4219 Ok(Primitive
::Assertion(ast
::Assertion
{
4221 kind
: ast
::AssertionKind
::WordBoundary
,
4225 parser(r
"\B").parse_primitive(),
4226 Ok(Primitive
::Assertion(ast
::Assertion
{
4228 kind
: ast
::AssertionKind
::NotWordBoundary
,
4233 parser(r
"\").parse_escape().unwrap_err(),
4236 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
4240 parser(r
"\y").parse_escape().unwrap_err(),
4243 kind
: ast
::ErrorKind
::EscapeUnrecognized
,
4249 fn parse_unsupported_backreference() {
4251 parser(r
"\0").parse_escape().unwrap_err(),
4254 kind
: ast
::ErrorKind
::UnsupportedBackreference
,
4258 parser(r
"\9").parse_escape().unwrap_err(),
4261 kind
: ast
::ErrorKind
::UnsupportedBackreference
,
4269 let pat
= format
!(r
"\{:o}", i
);
4271 parser_octal(&pat
).parse_escape(),
4272 Ok(Primitive
::Literal(ast
::Literal
{
4273 span
: span(0..pat
.len()),
4274 kind
: ast
::LiteralKind
::Octal
,
4275 c
: ::std
::char::from_u32(i
).unwrap(),
4280 parser_octal(r
"\778").parse_escape(),
4281 Ok(Primitive
::Literal(ast
::Literal
{
4283 kind
: ast
::LiteralKind
::Octal
,
4288 parser_octal(r
"\7777").parse_escape(),
4289 Ok(Primitive
::Literal(ast
::Literal
{
4291 kind
: ast
::LiteralKind
::Octal
,
4296 parser_octal(r
"\778").parse(),
4297 Ok(Ast
::Concat(ast
::Concat
{
4300 Ast
::Literal(ast
::Literal
{
4302 kind
: ast
::LiteralKind
::Octal
,
4305 Ast
::Literal(ast
::Literal
{
4307 kind
: ast
::LiteralKind
::Verbatim
,
4314 parser_octal(r
"\7777").parse(),
4315 Ok(Ast
::Concat(ast
::Concat
{
4318 Ast
::Literal(ast
::Literal
{
4320 kind
: ast
::LiteralKind
::Octal
,
4323 Ast
::Literal(ast
::Literal
{
4325 kind
: ast
::LiteralKind
::Verbatim
,
4333 parser_octal(r
"\8").parse_escape().unwrap_err(),
4336 kind
: ast
::ErrorKind
::EscapeUnrecognized
,
4342 fn parse_hex_two() {
4344 let pat
= format
!(r
"\x{:02x}", i
);
4346 parser(&pat
).parse_escape(),
4347 Ok(Primitive
::Literal(ast
::Literal
{
4348 span
: span(0..pat
.len()),
4349 kind
: ast
::LiteralKind
::HexFixed(ast
::HexLiteralKind
::X
),
4350 c
: ::std
::char::from_u32(i
).unwrap(),
4356 parser(r
"\xF").parse_escape().unwrap_err(),
4359 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
4363 parser(r
"\xG").parse_escape().unwrap_err(),
4366 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4370 parser(r
"\xFG").parse_escape().unwrap_err(),
4373 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4379 fn parse_hex_four() {
4381 let c
= match ::std
::char::from_u32(i
) {
4385 let pat
= format
!(r
"\u{:04x}", i
);
4387 parser(&pat
).parse_escape(),
4388 Ok(Primitive
::Literal(ast
::Literal
{
4389 span
: span(0..pat
.len()),
4390 kind
: ast
::LiteralKind
::HexFixed(
4391 ast
::HexLiteralKind
::UnicodeShort
4399 parser(r
"\uF").parse_escape().unwrap_err(),
4402 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
4406 parser(r
"\uG").parse_escape().unwrap_err(),
4409 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4413 parser(r
"\uFG").parse_escape().unwrap_err(),
4416 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4420 parser(r
"\uFFG").parse_escape().unwrap_err(),
4423 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4427 parser(r
"\uFFFG").parse_escape().unwrap_err(),
4430 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4434 parser(r
"\uD800").parse_escape().unwrap_err(),
4437 kind
: ast
::ErrorKind
::EscapeHexInvalid
,
4443 fn parse_hex_eight() {
4445 let c
= match ::std
::char::from_u32(i
) {
4449 let pat
= format
!(r
"\U{:08x}", i
);
4451 parser(&pat
).parse_escape(),
4452 Ok(Primitive
::Literal(ast
::Literal
{
4453 span
: span(0..pat
.len()),
4454 kind
: ast
::LiteralKind
::HexFixed(
4455 ast
::HexLiteralKind
::UnicodeLong
4463 parser(r
"\UF").parse_escape().unwrap_err(),
4466 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
4470 parser(r
"\UG").parse_escape().unwrap_err(),
4473 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4477 parser(r
"\UFG").parse_escape().unwrap_err(),
4480 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4484 parser(r
"\UFFG").parse_escape().unwrap_err(),
4487 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4491 parser(r
"\UFFFG").parse_escape().unwrap_err(),
4494 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4498 parser(r
"\UFFFFG").parse_escape().unwrap_err(),
4501 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4505 parser(r
"\UFFFFFG").parse_escape().unwrap_err(),
4508 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4512 parser(r
"\UFFFFFFG").parse_escape().unwrap_err(),
4515 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4519 parser(r
"\UFFFFFFFG").parse_escape().unwrap_err(),
4522 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4528 fn parse_hex_brace() {
4530 parser(r
"\u{26c4}").parse_escape(),
4531 Ok(Primitive
::Literal(ast
::Literal
{
4533 kind
: ast
::LiteralKind
::HexBrace(
4534 ast
::HexLiteralKind
::UnicodeShort
4540 parser(r
"\U{26c4}").parse_escape(),
4541 Ok(Primitive
::Literal(ast
::Literal
{
4543 kind
: ast
::LiteralKind
::HexBrace(
4544 ast
::HexLiteralKind
::UnicodeLong
4550 parser(r
"\x{26c4}").parse_escape(),
4551 Ok(Primitive
::Literal(ast
::Literal
{
4553 kind
: ast
::LiteralKind
::HexBrace(ast
::HexLiteralKind
::X
),
4558 parser(r
"\x{26C4}").parse_escape(),
4559 Ok(Primitive
::Literal(ast
::Literal
{
4561 kind
: ast
::LiteralKind
::HexBrace(ast
::HexLiteralKind
::X
),
4566 parser(r
"\x{10fFfF}").parse_escape(),
4567 Ok(Primitive
::Literal(ast
::Literal
{
4569 kind
: ast
::LiteralKind
::HexBrace(ast
::HexLiteralKind
::X
),
4575 parser(r
"\x").parse_escape().unwrap_err(),
4578 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
4582 parser(r
"\x{").parse_escape().unwrap_err(),
4585 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
4589 parser(r
"\x{FF").parse_escape().unwrap_err(),
4592 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
4596 parser(r
"\x{}").parse_escape().unwrap_err(),
4599 kind
: ast
::ErrorKind
::EscapeHexEmpty
,
4603 parser(r
"\x{FGF}").parse_escape().unwrap_err(),
4606 kind
: ast
::ErrorKind
::EscapeHexInvalidDigit
,
4610 parser(r
"\x{FFFFFF}").parse_escape().unwrap_err(),
4613 kind
: ast
::ErrorKind
::EscapeHexInvalid
,
4617 parser(r
"\x{D800}").parse_escape().unwrap_err(),
4620 kind
: ast
::ErrorKind
::EscapeHexInvalid
,
4624 parser(r
"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
4627 kind
: ast
::ErrorKind
::EscapeHexInvalid
,
4633 fn parse_decimal() {
4634 assert_eq
!(parser("123").parse_decimal(), Ok(123));
4635 assert_eq
!(parser("0").parse_decimal(), Ok(0));
4636 assert_eq
!(parser("01").parse_decimal(), Ok(1));
4639 parser("-1").parse_decimal().unwrap_err(),
4640 TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4643 parser("").parse_decimal().unwrap_err(),
4644 TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4647 parser("9999999999").parse_decimal().unwrap_err(),
4650 kind
: ast
::ErrorKind
::DecimalInvalid
,
4656 fn parse_set_class() {
4657 fn union(span
: Span
, items
: Vec
<ast
::ClassSetItem
>) -> ast
::ClassSet
{
4658 ast
::ClassSet
::union(ast
::ClassSetUnion { span, items }
)
4665 ) -> ast
::ClassSet
{
4666 ast
::ClassSet
::BinaryOp(ast
::ClassSetBinaryOp
{
4668 kind
: ast
::ClassSetBinaryOpKind
::Intersection
,
4678 ) -> ast
::ClassSet
{
4679 ast
::ClassSet
::BinaryOp(ast
::ClassSetBinaryOp
{
4681 kind
: ast
::ClassSetBinaryOpKind
::Difference
,
4691 ) -> ast
::ClassSet
{
4692 ast
::ClassSet
::BinaryOp(ast
::ClassSetBinaryOp
{
4694 kind
: ast
::ClassSetBinaryOpKind
::SymmetricDifference
,
4700 fn itemset(item
: ast
::ClassSetItem
) -> ast
::ClassSet
{
4701 ast
::ClassSet
::Item(item
)
4704 fn item_ascii(cls
: ast
::ClassAscii
) -> ast
::ClassSetItem
{
4705 ast
::ClassSetItem
::Ascii(cls
)
4708 fn item_unicode(cls
: ast
::ClassUnicode
) -> ast
::ClassSetItem
{
4709 ast
::ClassSetItem
::Unicode(cls
)
4712 fn item_perl(cls
: ast
::ClassPerl
) -> ast
::ClassSetItem
{
4713 ast
::ClassSetItem
::Perl(cls
)
4716 fn item_bracket(cls
: ast
::ClassBracketed
) -> ast
::ClassSetItem
{
4717 ast
::ClassSetItem
::Bracketed(Box
::new(cls
))
4720 fn lit(span
: Span
, c
: char) -> ast
::ClassSetItem
{
4721 ast
::ClassSetItem
::Literal(ast
::Literal
{
4723 kind
: ast
::LiteralKind
::Verbatim
,
4728 fn empty(span
: Span
) -> ast
::ClassSetItem
{
4729 ast
::ClassSetItem
::Empty(span
)
4732 fn range(span
: Span
, start
: char, end
: char) -> ast
::ClassSetItem
{
4733 let pos1
= Position
{
4734 offset
: span
.start
.offset
+ start
.len_utf8(),
4735 column
: span
.start
.column
+ 1,
4738 let pos2
= Position
{
4739 offset
: span
.end
.offset
- end
.len_utf8(),
4740 column
: span
.end
.column
- 1,
4743 ast
::ClassSetItem
::Range(ast
::ClassSetRange
{
4745 start
: ast
::Literal
{
4746 span
: Span { end: pos1, ..span }
,
4747 kind
: ast
::LiteralKind
::Verbatim
,
4751 span
: Span { start: pos2, ..span }
,
4752 kind
: ast
::LiteralKind
::Verbatim
,
4758 fn alnum(span
: Span
, negated
: bool
) -> ast
::ClassAscii
{
4759 ast
::ClassAscii { span, kind: ast::ClassAsciiKind::Alnum, negated }
4762 fn lower(span
: Span
, negated
: bool
) -> ast
::ClassAscii
{
4763 ast
::ClassAscii { span, kind: ast::ClassAsciiKind::Lower, negated }
4767 parser("[[:alnum:]]").parse(),
4768 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4771 kind
: itemset(item_ascii(alnum(span(1..10), false))),
4775 parser("[[[:alnum:]]]").parse(),
4776 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4779 kind
: itemset(item_bracket(ast
::ClassBracketed
{
4782 kind
: itemset(item_ascii(alnum(span(2..11), false))),
4787 parser("[[:alnum:]&&[:lower:]]").parse(),
4788 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4793 itemset(item_ascii(alnum(span(1..10), false))),
4794 itemset(item_ascii(lower(span(12..21), false))),
4799 parser("[[:alnum:]--[:lower:]]").parse(),
4800 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4805 itemset(item_ascii(alnum(span(1..10), false))),
4806 itemset(item_ascii(lower(span(12..21), false))),
4811 parser("[[:alnum:]~~[:lower:]]").parse(),
4812 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4815 kind
: symdifference(
4817 itemset(item_ascii(alnum(span(1..10), false))),
4818 itemset(item_ascii(lower(span(12..21), false))),
4824 parser("[a]").parse(),
4825 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4828 kind
: itemset(lit(span(1..2), 'a'
)),
4832 parser(r
"[a\]]").parse(),
4833 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4839 lit(span(1..2), 'a'
),
4840 ast
::ClassSetItem
::Literal(ast
::Literal
{
4842 kind
: ast
::LiteralKind
::Punctuation
,
4850 parser(r
"[a\-z]").parse(),
4851 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4857 lit(span(1..2), 'a'
),
4858 ast
::ClassSetItem
::Literal(ast
::Literal
{
4860 kind
: ast
::LiteralKind
::Punctuation
,
4863 lit(span(4..5), 'z'
),
4869 parser("[ab]").parse(),
4870 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4875 vec
![lit(span(1..2), 'a'
), lit(span(2..3), 'b'
),]
4880 parser("[a-]").parse(),
4881 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4886 vec
![lit(span(1..2), 'a'
), lit(span(2..3), '
-'
),]
4891 parser("[-a]").parse(),
4892 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4897 vec
![lit(span(1..2), '
-'
), lit(span(2..3), 'a'
),]
4902 parser(r
"[\pL]").parse(),
4903 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4906 kind
: itemset(item_unicode(ast
::ClassUnicode
{
4909 kind
: ast
::ClassUnicodeKind
::OneLetter('L'
),
4914 parser(r
"[\w]").parse(),
4915 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4918 kind
: itemset(item_perl(ast
::ClassPerl
{
4920 kind
: ast
::ClassPerlKind
::Word
,
4926 parser(r
"[a\wz]").parse(),
4927 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4933 lit(span(1..2), 'a'
),
4934 item_perl(ast
::ClassPerl
{
4936 kind
: ast
::ClassPerlKind
::Word
,
4939 lit(span(4..5), 'z'
),
4946 parser("[a-z]").parse(),
4947 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4950 kind
: itemset(range(span(1..4), 'a'
, 'z'
)),
4954 parser("[a-cx-z]").parse(),
4955 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4961 range(span(1..4), 'a'
, 'c'
),
4962 range(span(4..7), 'x'
, 'z'
),
4968 parser(r
"[\w&&a-cx-z]").parse(),
4969 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4974 itemset(item_perl(ast
::ClassPerl
{
4976 kind
: ast
::ClassPerlKind
::Word
,
4982 range(span(5..8), 'a'
, 'c'
),
4983 range(span(8..11), 'x'
, 'z'
),
4990 parser(r
"[a-cx-z&&\w]").parse(),
4991 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
4999 range(span(1..4), 'a'
, 'c'
),
5000 range(span(4..7), 'x'
, 'z'
),
5003 itemset(item_perl(ast
::ClassPerl
{
5005 kind
: ast
::ClassPerlKind
::Word
,
5012 parser(r
"[a--b--c]").parse(),
5013 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
5020 itemset(lit(span(1..2), 'a'
)),
5021 itemset(lit(span(4..5), 'b'
)),
5023 itemset(lit(span(7..8), 'c'
)),
5028 parser(r
"[a~~b~~c]").parse(),
5029 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
5032 kind
: symdifference(
5036 itemset(lit(span(1..2), 'a'
)),
5037 itemset(lit(span(4..5), 'b'
)),
5039 itemset(lit(span(7..8), 'c'
)),
5044 parser(r
"[\^&&^]").parse(),
5045 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
5050 itemset(ast
::ClassSetItem
::Literal(ast
::Literal
{
5052 kind
: ast
::LiteralKind
::Punctuation
,
5055 itemset(lit(span(5..6), '
^')),
5060 parser(r
"[\&&&&]").parse(),
5061 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
5066 itemset(ast
::ClassSetItem
::Literal(ast
::Literal
{
5068 kind
: ast
::LiteralKind
::Punctuation
,
5071 itemset(lit(span(5..6), '
&'
)),
5076 parser(r
"[&&&&]").parse(),
5077 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
5084 itemset(empty(span(1..1))),
5085 itemset(empty(span(3..3))),
5087 itemset(empty(span(5..5))),
5092 let pat
= "[☃-⛄]";
5094 parser(pat
).parse(),
5095 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
5096 span
: span_range(pat
, 0..9),
5098 kind
: itemset(ast
::ClassSetItem
::Range(ast
::ClassSetRange
{
5099 span
: span_range(pat
, 1..8),
5100 start
: ast
::Literal
{
5101 span
: span_range(pat
, 1..4),
5102 kind
: ast
::LiteralKind
::Verbatim
,
5106 span
: span_range(pat
, 5..8),
5107 kind
: ast
::LiteralKind
::Verbatim
,
5115 parser(r
"[]]").parse(),
5116 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
5119 kind
: itemset(lit(span(1..2), '
]'
)),
5123 parser(r
"[]\[]").parse(),
5124 Ok(Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
5130 lit(span(1..2), '
]'
),
5131 ast
::ClassSetItem
::Literal(ast
::Literal
{
5133 kind
: ast
::LiteralKind
::Punctuation
,
5141 parser(r
"[\[]]").parse(),
5145 Ast
::Class(ast
::Class
::Bracketed(ast
::ClassBracketed
{
5148 kind
: itemset(ast
::ClassSetItem
::Literal(
5151 kind
: ast
::LiteralKind
::Punctuation
,
5156 Ast
::Literal(ast
::Literal
{
5158 kind
: ast
::LiteralKind
::Verbatim
,
5166 parser("[").parse().unwrap_err(),
5169 kind
: ast
::ErrorKind
::ClassUnclosed
,
5173 parser("[[").parse().unwrap_err(),
5176 kind
: ast
::ErrorKind
::ClassUnclosed
,
5180 parser("[[-]").parse().unwrap_err(),
5183 kind
: ast
::ErrorKind
::ClassUnclosed
,
5187 parser("[[[:alnum:]").parse().unwrap_err(),
5190 kind
: ast
::ErrorKind
::ClassUnclosed
,
5194 parser(r
"[\b]").parse().unwrap_err(),
5197 kind
: ast
::ErrorKind
::ClassEscapeInvalid
,
5201 parser(r
"[\w-a]").parse().unwrap_err(),
5204 kind
: ast
::ErrorKind
::ClassRangeLiteral
,
5208 parser(r
"[a-\w]").parse().unwrap_err(),
5211 kind
: ast
::ErrorKind
::ClassRangeLiteral
,
5215 parser(r
"[z-a]").parse().unwrap_err(),
5218 kind
: ast
::ErrorKind
::ClassRangeInvalid
,
5223 parser_ignore_whitespace("[a ").parse().unwrap_err(),
5226 kind
: ast
::ErrorKind
::ClassUnclosed
,
5230 parser_ignore_whitespace("[a- ").parse().unwrap_err(),
5233 kind
: ast
::ErrorKind
::ClassUnclosed
,
5239 fn parse_set_class_open() {
5240 assert_eq
!(parser("[a]").parse_set_class_open(), {
5241 let set
= ast
::ClassBracketed
{
5244 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
5249 let union = ast
::ClassSetUnion { span: span(1..1), items: vec![] }
;
5253 parser_ignore_whitespace("[ a]").parse_set_class_open(),
5255 let set
= ast
::ClassBracketed
{
5258 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
5264 ast
::ClassSetUnion { span: span(4..4), items: vec![] }
;
5268 assert_eq
!(parser("[^a]").parse_set_class_open(), {
5269 let set
= ast
::ClassBracketed
{
5272 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
5277 let union = ast
::ClassSetUnion { span: span(2..2), items: vec![] }
;
5281 parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
5283 let set
= ast
::ClassBracketed
{
5286 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
5292 ast
::ClassSetUnion { span: span(4..4), items: vec![] }
;
5296 assert_eq
!(parser("[-a]").parse_set_class_open(), {
5297 let set
= ast
::ClassBracketed
{
5300 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
5305 let union = ast
::ClassSetUnion
{
5307 items
: vec
![ast
::ClassSetItem
::Literal(ast
::Literal
{
5309 kind
: ast
::LiteralKind
::Verbatim
,
5316 parser_ignore_whitespace("[ - a]").parse_set_class_open(),
5318 let set
= ast
::ClassBracketed
{
5321 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
5326 let union = ast
::ClassSetUnion
{
5328 items
: vec
![ast
::ClassSetItem
::Literal(ast
::Literal
{
5330 kind
: ast
::LiteralKind
::Verbatim
,
5337 assert_eq
!(parser("[^-a]").parse_set_class_open(), {
5338 let set
= ast
::ClassBracketed
{
5341 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
5346 let union = ast
::ClassSetUnion
{
5348 items
: vec
![ast
::ClassSetItem
::Literal(ast
::Literal
{
5350 kind
: ast
::LiteralKind
::Verbatim
,
5356 assert_eq
!(parser("[--a]").parse_set_class_open(), {
5357 let set
= ast
::ClassBracketed
{
5360 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
5365 let union = ast
::ClassSetUnion
{
5368 ast
::ClassSetItem
::Literal(ast
::Literal
{
5370 kind
: ast
::LiteralKind
::Verbatim
,
5373 ast
::ClassSetItem
::Literal(ast
::Literal
{
5375 kind
: ast
::LiteralKind
::Verbatim
,
5382 assert_eq
!(parser("[]a]").parse_set_class_open(), {
5383 let set
= ast
::ClassBracketed
{
5386 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
5391 let union = ast
::ClassSetUnion
{
5393 items
: vec
![ast
::ClassSetItem
::Literal(ast
::Literal
{
5395 kind
: ast
::LiteralKind
::Verbatim
,
5402 parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
5404 let set
= ast
::ClassBracketed
{
5407 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
5412 let union = ast
::ClassSetUnion
{
5414 items
: vec
![ast
::ClassSetItem
::Literal(ast
::Literal
{
5416 kind
: ast
::LiteralKind
::Verbatim
,
5423 assert_eq
!(parser("[^]a]").parse_set_class_open(), {
5424 let set
= ast
::ClassBracketed
{
5427 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
5432 let union = ast
::ClassSetUnion
{
5434 items
: vec
![ast
::ClassSetItem
::Literal(ast
::Literal
{
5436 kind
: ast
::LiteralKind
::Verbatim
,
5442 assert_eq
!(parser("[-]a]").parse_set_class_open(), {
5443 let set
= ast
::ClassBracketed
{
5446 kind
: ast
::ClassSet
::union(ast
::ClassSetUnion
{
5451 let union = ast
::ClassSetUnion
{
5453 items
: vec
![ast
::ClassSetItem
::Literal(ast
::Literal
{
5455 kind
: ast
::LiteralKind
::Verbatim
,
5463 parser("[").parse_set_class_open().unwrap_err(),
5466 kind
: ast
::ErrorKind
::ClassUnclosed
,
5470 parser_ignore_whitespace("[ ")
5471 .parse_set_class_open()
5475 kind
: ast
::ErrorKind
::ClassUnclosed
,
5479 parser("[^").parse_set_class_open().unwrap_err(),
5482 kind
: ast
::ErrorKind
::ClassUnclosed
,
5486 parser("[]").parse_set_class_open().unwrap_err(),
5489 kind
: ast
::ErrorKind
::ClassUnclosed
,
5493 parser("[-").parse_set_class_open().unwrap_err(),
5496 kind
: ast
::ErrorKind
::ClassUnclosed
,
5500 parser("[--").parse_set_class_open().unwrap_err(),
5503 kind
: ast
::ErrorKind
::ClassUnclosed
,
5507 // See: https://github.com/rust-lang/regex/issues/792
5509 parser("(?x)[-#]").parse_with_comments().unwrap_err(),
5512 kind
: ast
::ErrorKind
::ClassUnclosed
,
5518 fn maybe_parse_ascii_class() {
5520 parser(r
"[:alnum:]").maybe_parse_ascii_class(),
5521 Some(ast
::ClassAscii
{
5523 kind
: ast
::ClassAsciiKind
::Alnum
,
5528 parser(r
"[:alnum:]A").maybe_parse_ascii_class(),
5529 Some(ast
::ClassAscii
{
5531 kind
: ast
::ClassAsciiKind
::Alnum
,
5536 parser(r
"[:^alnum:]").maybe_parse_ascii_class(),
5537 Some(ast
::ClassAscii
{
5539 kind
: ast
::ClassAsciiKind
::Alnum
,
5544 let p
= parser(r
"[:");
5545 assert_eq
!(p
.maybe_parse_ascii_class(), None
);
5546 assert_eq
!(p
.offset(), 0);
5548 let p
= parser(r
"[:^");
5549 assert_eq
!(p
.maybe_parse_ascii_class(), None
);
5550 assert_eq
!(p
.offset(), 0);
5552 let p
= parser(r
"[^:alnum:]");
5553 assert_eq
!(p
.maybe_parse_ascii_class(), None
);
5554 assert_eq
!(p
.offset(), 0);
5556 let p
= parser(r
"[:alnnum:]");
5557 assert_eq
!(p
.maybe_parse_ascii_class(), None
);
5558 assert_eq
!(p
.offset(), 0);
5560 let p
= parser(r
"[:alnum]");
5561 assert_eq
!(p
.maybe_parse_ascii_class(), None
);
5562 assert_eq
!(p
.offset(), 0);
5564 let p
= parser(r
"[:alnum:");
5565 assert_eq
!(p
.maybe_parse_ascii_class(), None
);
5566 assert_eq
!(p
.offset(), 0);
5570 fn parse_unicode_class() {
5572 parser(r
"\pN").parse_escape(),
5573 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5576 kind
: ast
::ClassUnicodeKind
::OneLetter('N'
),
5580 parser(r
"\PN").parse_escape(),
5581 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5584 kind
: ast
::ClassUnicodeKind
::OneLetter('N'
),
5588 parser(r
"\p{N}").parse_escape(),
5589 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5592 kind
: ast
::ClassUnicodeKind
::Named(s("N")),
5596 parser(r
"\P{N}").parse_escape(),
5597 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5600 kind
: ast
::ClassUnicodeKind
::Named(s("N")),
5604 parser(r
"\p{Greek}").parse_escape(),
5605 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5608 kind
: ast
::ClassUnicodeKind
::Named(s("Greek")),
5613 parser(r
"\p{scx:Katakana}").parse_escape(),
5614 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5617 kind
: ast
::ClassUnicodeKind
::NamedValue
{
5618 op
: ast
::ClassUnicodeOpKind
::Colon
,
5620 value
: s("Katakana"),
5625 parser(r
"\p{scx=Katakana}").parse_escape(),
5626 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5629 kind
: ast
::ClassUnicodeKind
::NamedValue
{
5630 op
: ast
::ClassUnicodeOpKind
::Equal
,
5632 value
: s("Katakana"),
5637 parser(r
"\p{scx!=Katakana}").parse_escape(),
5638 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5641 kind
: ast
::ClassUnicodeKind
::NamedValue
{
5642 op
: ast
::ClassUnicodeOpKind
::NotEqual
,
5644 value
: s("Katakana"),
5650 parser(r
"\p{:}").parse_escape(),
5651 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5654 kind
: ast
::ClassUnicodeKind
::NamedValue
{
5655 op
: ast
::ClassUnicodeOpKind
::Colon
,
5662 parser(r
"\p{=}").parse_escape(),
5663 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5666 kind
: ast
::ClassUnicodeKind
::NamedValue
{
5667 op
: ast
::ClassUnicodeOpKind
::Equal
,
5674 parser(r
"\p{!=}").parse_escape(),
5675 Ok(Primitive
::Unicode(ast
::ClassUnicode
{
5678 kind
: ast
::ClassUnicodeKind
::NamedValue
{
5679 op
: ast
::ClassUnicodeOpKind
::NotEqual
,
5687 parser(r
"\p").parse_escape().unwrap_err(),
5690 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
5694 parser(r
"\p{").parse_escape().unwrap_err(),
5697 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
5701 parser(r
"\p{N").parse_escape().unwrap_err(),
5704 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
5708 parser(r
"\p{Greek").parse_escape().unwrap_err(),
5711 kind
: ast
::ErrorKind
::EscapeUnexpectedEof
,
5716 parser(r
"\pNz").parse(),
5717 Ok(Ast
::Concat(ast
::Concat
{
5720 Ast
::Class(ast
::Class
::Unicode(ast
::ClassUnicode
{
5723 kind
: ast
::ClassUnicodeKind
::OneLetter('N'
),
5725 Ast
::Literal(ast
::Literal
{
5727 kind
: ast
::LiteralKind
::Verbatim
,
5734 parser(r
"\p{Greek}z").parse(),
5735 Ok(Ast
::Concat(ast
::Concat
{
5738 Ast
::Class(ast
::Class
::Unicode(ast
::ClassUnicode
{
5741 kind
: ast
::ClassUnicodeKind
::Named(s("Greek")),
5743 Ast
::Literal(ast
::Literal
{
5745 kind
: ast
::LiteralKind
::Verbatim
,
5752 parser(r
"\p\{").parse().unwrap_err(),
5755 kind
: ast
::ErrorKind
::UnicodeClassInvalid
,
5759 parser(r
"\P\{").parse().unwrap_err(),
5762 kind
: ast
::ErrorKind
::UnicodeClassInvalid
,
5768 fn parse_perl_class() {
5770 parser(r
"\d").parse_escape(),
5771 Ok(Primitive
::Perl(ast
::ClassPerl
{
5773 kind
: ast
::ClassPerlKind
::Digit
,
5778 parser(r
"\D").parse_escape(),
5779 Ok(Primitive
::Perl(ast
::ClassPerl
{
5781 kind
: ast
::ClassPerlKind
::Digit
,
5786 parser(r
"\s").parse_escape(),
5787 Ok(Primitive
::Perl(ast
::ClassPerl
{
5789 kind
: ast
::ClassPerlKind
::Space
,
5794 parser(r
"\S").parse_escape(),
5795 Ok(Primitive
::Perl(ast
::ClassPerl
{
5797 kind
: ast
::ClassPerlKind
::Space
,
5802 parser(r
"\w").parse_escape(),
5803 Ok(Primitive
::Perl(ast
::ClassPerl
{
5805 kind
: ast
::ClassPerlKind
::Word
,
5810 parser(r
"\W").parse_escape(),
5811 Ok(Primitive
::Perl(ast
::ClassPerl
{
5813 kind
: ast
::ClassPerlKind
::Word
,
5819 parser(r
"\d").parse(),
5820 Ok(Ast
::Class(ast
::Class
::Perl(ast
::ClassPerl
{
5822 kind
: ast
::ClassPerlKind
::Digit
,
5827 parser(r
"\dz").parse(),
5828 Ok(Ast
::Concat(ast
::Concat
{
5831 Ast
::Class(ast
::Class
::Perl(ast
::ClassPerl
{
5833 kind
: ast
::ClassPerlKind
::Digit
,
5836 Ast
::Literal(ast
::Literal
{
5838 kind
: ast
::LiteralKind
::Verbatim
,
5846 // This tests a bug fix where the nest limit checker wasn't decrementing
5847 // its depth during post-traversal, which causes long regexes to trip
5848 // the default limit too aggressively.
5850 fn regression_454_nest_too_big() {
5886 assert
!(parser_nest_limit(pattern
, 50).parse().is_ok());
5889 // This tests that we treat a trailing `-` in a character class as a
5890 // literal `-` even when whitespace mode is enabled and there is whitespace
5891 // after the trailing `-`.
5893 fn regression_455_trailing_dash_ignore_whitespace() {
5894 assert
!(parser("(?x)[ / - ]").parse().is_ok());
5895 assert
!(parser("(?x)[ a - ]").parse().is_ok());
5913 assert
!(parser("(?x)[ / -").parse().is_err());
5914 assert
!(parser("(?x)[ / - ").parse().is_err());