]> git.proxmox.com Git - rustc.git/blob - src/vendor/regex-syntax-0.5.5/src/ast/mod.rs
New upstream version 1.28.0~beta.14+dfsg1
[rustc.git] / src / vendor / regex-syntax-0.5.5 / src / ast / mod.rs
1 // Copyright 2017 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 /*!
12 Defines an abstract syntax for regular expressions.
13 */
14
15 use std::cmp::Ordering;
16 use std::error;
17 use std::fmt;
18
19 pub use ast::visitor::{Visitor, visit};
20
21 pub mod parse;
22 pub mod print;
23 mod visitor;
24
25 /// An error that occurred while parsing a regular expression into an abstract
26 /// syntax tree.
27 ///
28 /// Note that note all ASTs represents a valid regular expression. For example,
29 /// an AST is constructed without error for `\p{Quux}`, but `Quux` is not a
30 /// valid Unicode property name. That particular error is reported when
31 /// translating an AST to the high-level intermediate representation (`HIR`).
32 #[derive(Clone, Debug, Eq, PartialEq)]
33 pub struct Error {
34 /// The kind of error.
35 kind: ErrorKind,
36 /// The original pattern that the parser generated the error from. Every
37 /// span in an error is a valid range into this string.
38 pattern: String,
39 /// The span of this error.
40 span: Span,
41 }
42
43 impl Error {
44 /// Return the type of this error.
45 pub fn kind(&self) -> &ErrorKind {
46 &self.kind
47 }
48
49 /// The original pattern string in which this error occurred.
50 ///
51 /// Every span reported by this error is reported in terms of this string.
52 pub fn pattern(&self) -> &str {
53 &self.pattern
54 }
55
56 /// Return the span at which this error occurred.
57 pub fn span(&self) -> &Span {
58 &self.span
59 }
60
61 /// Return an auxiliary span. This span exists only for some errors that
62 /// benefit from being able to point to two locations in the original
63 /// regular expression. For example, "duplicate" errors will have the
64 /// main error position set to the duplicate occurrence while its
65 /// auxiliary span will be set to the initial occurrence.
66 pub fn auxiliary_span(&self) -> Option<&Span> {
67 use self::ErrorKind::*;
68 match self.kind {
69 FlagDuplicate { ref original } => Some(original),
70 FlagRepeatedNegation { ref original, .. } => Some(original),
71 GroupNameDuplicate { ref original, .. } => Some(original),
72 _ => None,
73 }
74 }
75 }
76
77 /// The type of an error that occurred while building an AST.
78 #[derive(Clone, Debug, Eq, PartialEq)]
79 pub enum ErrorKind {
80 /// The capturing group limit was exceeded.
81 ///
82 /// Note that this represents a limit on the total number of capturing
83 /// groups in a regex and not necessarily the number of nested capturing
84 /// groups. That is, the nest limit can be low and it is still possible for
85 /// this error to occur.
86 CaptureLimitExceeded,
87 /// An invalid escape sequence was found in a character class set.
88 ClassEscapeInvalid,
89 /// An invalid character class range was found. An invalid range is any
90 /// range where the start is greater than the end.
91 ClassRangeInvalid,
92 /// An opening `[` was found with no corresponding closing `]`.
93 ClassUnclosed,
94 /// An empty decimal number was given where one was expected.
95 DecimalEmpty,
96 /// An invalid decimal number was given where one was expected.
97 DecimalInvalid,
98 /// A bracketed hex literal was empty.
99 EscapeHexEmpty,
100 /// A bracketed hex literal did not correspond to a Unicode scalar value.
101 EscapeHexInvalid,
102 /// An invalid hexadecimal digit was found.
103 EscapeHexInvalidDigit,
104 /// EOF was found before an escape sequence was completed.
105 EscapeUnexpectedEof,
106 /// An unrecognized escape sequence.
107 EscapeUnrecognized,
108 /// A dangling negation was used when setting flags, e.g., `i-`.
109 FlagDanglingNegation,
110 /// A flag was used twice, e.g., `i-i`.
111 FlagDuplicate {
112 /// The position of the original flag. The error position
113 /// points to the duplicate flag.
114 original: Span,
115 },
116 /// The negation operator was used twice, e.g., `-i-s`.
117 FlagRepeatedNegation {
118 /// The position of the original negation operator. The error position
119 /// points to the duplicate negation operator.
120 original: Span,
121 },
122 /// Expected a flag but got EOF, e.g., `(?`.
123 FlagUnexpectedEof,
124 /// Unrecognized flag, e.g., `a`.
125 FlagUnrecognized,
126 /// A duplicate capture name was found.
127 GroupNameDuplicate {
128 /// The position of the initial occurrence of the capture name. The
129 /// error position itself points to the duplicate occurrence.
130 original: Span,
131 },
132 /// A capture group name is empty, e.g., `(?P<>abc)`.
133 GroupNameEmpty,
134 /// An invalid character was seen for a capture group name. This includes
135 /// errors where the first character is a digit (even though subsequent
136 /// characters are allowed to be digits).
137 GroupNameInvalid,
138 /// A closing `>` could not be found for a capture group name.
139 GroupNameUnexpectedEof,
140 /// An unclosed group, e.g., `(ab`.
141 ///
142 /// The span of this error corresponds to the unclosed parenthesis.
143 GroupUnclosed,
144 /// An unopened group, e.g., `ab)`.
145 GroupUnopened,
146 /// The nest limit was exceeded. The limit stored here is the limit
147 /// configured in the parser.
148 NestLimitExceeded(u32),
149 /// The range provided in a counted repetition operator is invalid. The
150 /// range is invalid if the start is greater than the end.
151 RepetitionCountInvalid,
152 /// An opening `{` was found with no corresponding closing `}`.
153 RepetitionCountUnclosed,
154 /// A repetition operator was applied to a missing sub-expression. This
155 /// occurs, for example, in the regex consisting of just a `*`. It is,
156 /// however, possible to create a repetition operating on an empty
157 /// sub-expression. For example, `()*` is still considered valid.
158 RepetitionMissing,
159 /// When octal support is disabled, this error is produced when an octal
160 /// escape is used. The octal escape is assumed to be an invocation of
161 /// a backreference, which is the common case.
162 UnsupportedBackreference,
163 /// When syntax similar to PCRE's look-around is used, this error is
164 /// returned. Some example syntaxes that are rejected include, but are
165 /// not necessarily limited to, `(?=re)`, `(?!re)`, `(?<=re)` and
166 /// `(?<!re)`. Note that all of these syntaxes are otherwise invalid; this
167 /// error is used to improve the user experience.
168 UnsupportedLookAround,
169 /// Hints that destructuring should not be exhaustive.
170 ///
171 /// This enum may grow additional variants, so this makes sure clients
172 /// don't count on exhaustive matching. (Otherwise, adding a new variant
173 /// could break existing code.)
174 #[doc(hidden)]
175 __Nonexhaustive,
176 }
177
178 impl error::Error for Error {
179 fn description(&self) -> &str {
180 use self::ErrorKind::*;
181 match self.kind {
182 CaptureLimitExceeded => "capture group limit exceeded",
183 ClassEscapeInvalid => "invalid escape sequence in character class",
184 ClassRangeInvalid => "invalid character class range",
185 ClassUnclosed => "unclosed character class",
186 DecimalEmpty => "empty decimal literal",
187 DecimalInvalid => "invalid decimal literal",
188 EscapeHexEmpty => "empty hexadecimal literal",
189 EscapeHexInvalid => "invalid hexadecimal literal",
190 EscapeHexInvalidDigit => "invalid hexadecimal digit",
191 EscapeUnexpectedEof => "unexpected eof (escape sequence)",
192 EscapeUnrecognized => "unrecognized escape sequence",
193 FlagDanglingNegation => "dangling flag negation operator",
194 FlagDuplicate{..} => "duplicate flag",
195 FlagRepeatedNegation{..} => "repeated negation",
196 FlagUnexpectedEof => "unexpected eof (flag)",
197 FlagUnrecognized => "unrecognized flag",
198 GroupNameDuplicate{..} => "duplicate capture group name",
199 GroupNameEmpty => "empty capture group name",
200 GroupNameInvalid => "invalid capture group name",
201 GroupNameUnexpectedEof => "unclosed capture group name",
202 GroupUnclosed => "unclosed group",
203 GroupUnopened => "unopened group",
204 NestLimitExceeded(_) => "nest limit exceeded",
205 RepetitionCountInvalid => "invalid repetition count range",
206 RepetitionCountUnclosed => "unclosed counted repetition",
207 RepetitionMissing => "repetition operator missing expression",
208 UnsupportedBackreference => "backreferences are not supported",
209 UnsupportedLookAround => "look-around is not supported",
210 _ => unreachable!(),
211 }
212 }
213 }
214
215 impl fmt::Display for Error {
216 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
217 ::error::Formatter::from(self).fmt(f)
218 }
219 }
220
221 impl fmt::Display for ErrorKind {
222 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
223 use self::ErrorKind::*;
224 match *self {
225 CaptureLimitExceeded => {
226 write!(f, "exceeded the maximum number of \
227 capturing groups ({})", ::std::u32::MAX)
228 }
229 ClassEscapeInvalid => {
230 write!(f, "invalid escape sequence found in character class")
231 }
232 ClassRangeInvalid => {
233 write!(f, "invalid character class range, \
234 the start must be <= the end")
235 }
236 ClassUnclosed => {
237 write!(f, "unclosed character class")
238 }
239 DecimalEmpty => {
240 write!(f, "decimal literal empty")
241 }
242 DecimalInvalid => {
243 write!(f, "decimal literal invalid")
244 }
245 EscapeHexEmpty => {
246 write!(f, "hexadecimal literal empty")
247 }
248 EscapeHexInvalid => {
249 write!(f, "hexadecimal literal is not a Unicode scalar value")
250 }
251 EscapeHexInvalidDigit => {
252 write!(f, "invalid hexadecimal digit")
253 }
254 EscapeUnexpectedEof => {
255 write!(f, "incomplete escape sequence, \
256 reached end of pattern prematurely")
257 }
258 EscapeUnrecognized => {
259 write!(f, "unrecognized escape sequence")
260 }
261 FlagDanglingNegation => {
262 write!(f, "dangling flag negation operator")
263 }
264 FlagDuplicate{..} => {
265 write!(f, "duplicate flag")
266 }
267 FlagRepeatedNegation{..} => {
268 write!(f, "flag negation operator repeated")
269 }
270 FlagUnexpectedEof => {
271 write!(f, "expected flag but got end of regex")
272 }
273 FlagUnrecognized => {
274 write!(f, "unrecognized flag")
275 }
276 GroupNameDuplicate{..} => {
277 write!(f, "duplicate capture group name")
278 }
279 GroupNameEmpty => {
280 write!(f, "empty capture group name")
281 }
282 GroupNameInvalid => {
283 write!(f, "invalid capture group character")
284 }
285 GroupNameUnexpectedEof => {
286 write!(f, "unclosed capture group name")
287 }
288 GroupUnclosed => {
289 write!(f, "unclosed group")
290 }
291 GroupUnopened => {
292 write!(f, "unopened group")
293 }
294 NestLimitExceeded(limit) => {
295 write!(f, "exceed the maximum number of \
296 nested parentheses/brackets ({})", limit)
297 }
298 RepetitionCountInvalid => {
299 write!(f, "invalid repetition count range, \
300 the start must be <= the end")
301 }
302 RepetitionCountUnclosed => {
303 write!(f, "unclosed counted repetition")
304 }
305 RepetitionMissing => {
306 write!(f, "repetition operator missing expression")
307 }
308 UnsupportedBackreference => {
309 write!(f, "backreferences are not supported")
310 }
311 UnsupportedLookAround => {
312 write!(f, "look-around, including look-ahead and look-behind, \
313 is not supported")
314 }
315 _ => unreachable!(),
316 }
317 }
318 }
319
320 /// Span represents the position information of a single AST item.
321 ///
322 /// All span positions are absolute byte offsets that can be used on the
323 /// original regular expression that was parsed.
324 #[derive(Clone, Copy, Eq, PartialEq)]
325 pub struct Span {
326 /// The start byte offset.
327 pub start: Position,
328 /// The end byte offset.
329 pub end: Position,
330 }
331
332 impl fmt::Debug for Span {
333 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
334 write!(f, "Span({:?}, {:?})", self.start, self.end)
335 }
336 }
337
338 impl Ord for Span {
339 fn cmp(&self, other: &Span) -> Ordering {
340 (&self.start, &self.end).cmp(&(&other.start, &other.end))
341 }
342 }
343
344 impl PartialOrd for Span {
345 fn partial_cmp(&self, other: &Span) -> Option<Ordering> {
346 Some(self.cmp(other))
347 }
348 }
349
350 /// A single position in a regular expression.
351 ///
352 /// A position encodes one half of a span, and include the byte offset, line
353 /// number and column number.
354 #[derive(Clone, Copy, Eq, PartialEq)]
355 pub struct Position {
356 /// The absolute offset of this position, starting at `0` from the
357 /// beginning of the regular expression pattern string.
358 pub offset: usize,
359 /// The line number, starting at `1`.
360 pub line: usize,
361 /// The approximate column number, starting at `1`.
362 pub column: usize,
363 }
364
365 impl fmt::Debug for Position {
366 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
367 write!(
368 f,
369 "Position(o: {:?}, l: {:?}, c: {:?})",
370 self.offset, self.line, self.column)
371 }
372 }
373
374 impl Ord for Position {
375 fn cmp(&self, other: &Position) -> Ordering {
376 self.offset.cmp(&other.offset)
377 }
378 }
379
380 impl PartialOrd for Position {
381 fn partial_cmp(&self, other: &Position) -> Option<Ordering> {
382 Some(self.cmp(other))
383 }
384 }
385
386 impl Span {
387 /// Create a new span with the given positions.
388 pub fn new(start: Position, end: Position) -> Span {
389 Span { start: start, end: end }
390 }
391
392 /// Create a new span using the given position as the start and end.
393 pub fn splat(pos: Position) -> Span {
394 Span::new(pos, pos)
395 }
396
397 /// Create a new span by replacing the starting the position with the one
398 /// given.
399 pub fn with_start(self, pos: Position) -> Span {
400 Span { start: pos, ..self }
401 }
402
403 /// Create a new span by replacing the ending the position with the one
404 /// given.
405 pub fn with_end(self, pos: Position) -> Span {
406 Span { end: pos, ..self }
407 }
408
409 /// Returns true if and only if this span occurs on a single line.
410 pub fn is_one_line(&self) -> bool {
411 self.start.line == self.end.line
412 }
413
414 /// Returns true if and only if this span is empty. That is, it points to
415 /// a single position in the concrete syntax of a regular expression.
416 pub fn is_empty(&self) -> bool {
417 self.start.offset == self.end.offset
418 }
419 }
420
421 impl Position {
422 /// Create a new position with the given information.
423 ///
424 /// `offset` is the absolute offset of the position, starting at `0` from
425 /// the beginning of the regular expression pattern string.
426 ///
427 /// `line` is the line number, starting at `1`.
428 ///
429 /// `column` is the approximate column number, starting at `1`.
430 pub fn new(offset: usize, line: usize, column: usize) -> Position {
431 Position { offset: offset, line: line, column: column }
432 }
433 }
434
435 /// An abstract syntax tree for a singular expression along with comments
436 /// found.
437 ///
438 /// Comments are not stored in the tree itself to avoid complexity. Each
439 /// comment contains a span of precisely where it occurred in the original
440 /// regular expression.
441 #[derive(Clone, Debug, Eq, PartialEq)]
442 pub struct WithComments {
443 /// The actual ast.
444 pub ast: Ast,
445 /// All comments found in the original regular expression.
446 pub comments: Vec<Comment>,
447 }
448
449 /// A comment from a regular expression with an associated span.
450 ///
451 /// A regular expression can only contain comments when the `x` flag is
452 /// enabled.
453 #[derive(Clone, Debug, Eq, PartialEq)]
454 pub struct Comment {
455 /// The span of this comment, including the beginning `#` and ending `\n`.
456 pub span: Span,
457 /// The comment text, starting with the first character following the `#`
458 /// and ending with the last character preceding the `\n`.
459 pub comment: String,
460 }
461
462 /// An abstract syntax tree for a single regular expression.
463 ///
464 /// An `Ast`'s `fmt::Display` implementation uses constant stack space and heap
465 /// space proportional to the size of the `Ast`.
466 ///
467 /// This type defines its own destructor that uses constant stack space and
468 /// heap space proportional to the size of the `Ast`.
469 #[derive(Clone, Debug, Eq, PartialEq)]
470 pub enum Ast {
471 /// An empty regex that matches everything.
472 Empty(Span),
473 /// A set of flags, e.g., `(?is)`.
474 Flags(SetFlags),
475 /// A single character literal, which includes escape sequences.
476 Literal(Literal),
477 /// The "any character" class.
478 Dot(Span),
479 /// A single zero-width assertion.
480 Assertion(Assertion),
481 /// A single character class. This includes all forms of character classes
482 /// except for `.`. e.g., `\d`, `\pN`, `[a-z]` and `[[:alpha:]]`.
483 Class(Class),
484 /// A repetition operator applied to an arbitrary regular expression.
485 Repetition(Repetition),
486 /// A grouped regular expression.
487 Group(Group),
488 /// An alternation of regular expressions.
489 Alternation(Alternation),
490 /// A concatenation of regular expressions.
491 Concat(Concat),
492 }
493
494 impl Ast {
495 /// Return the span of this abstract syntax tree.
496 pub fn span(&self) -> &Span {
497 match *self {
498 Ast::Empty(ref span) => span,
499 Ast::Flags(ref x) => &x.span,
500 Ast::Literal(ref x) => &x.span,
501 Ast::Dot(ref span) => span,
502 Ast::Assertion(ref x) => &x.span,
503 Ast::Class(ref x) => x.span(),
504 Ast::Repetition(ref x) => &x.span,
505 Ast::Group(ref x) => &x.span,
506 Ast::Alternation(ref x) => &x.span,
507 Ast::Concat(ref x) => &x.span,
508 }
509 }
510
511 /// Return true if and only if this Ast is empty.
512 pub fn is_empty(&self) -> bool {
513 match *self {
514 Ast::Empty(_) => true,
515 _ => false,
516 }
517 }
518
519 /// Returns true if and only if this AST has any (including possibly empty)
520 /// subexpressions.
521 fn has_subexprs(&self) -> bool {
522 match *self {
523 Ast::Empty(_)
524 | Ast::Flags(_)
525 | Ast::Literal(_)
526 | Ast::Dot(_)
527 | Ast::Assertion(_) => false,
528 Ast::Class(_)
529 | Ast::Repetition(_)
530 | Ast::Group(_)
531 | Ast::Alternation(_)
532 | Ast::Concat(_) => true,
533 }
534 }
535 }
536
537 /// Print a display representation of this Ast.
538 ///
539 /// This does not preserve any of the original whitespace formatting that may
540 /// have originally been present in the concrete syntax from which this Ast
541 /// was generated.
542 ///
543 /// This implementation uses constant stack space and heap space proportional
544 /// to the size of the `Ast`.
545 impl fmt::Display for Ast {
546 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
547 use ast::print::Printer;
548 Printer::new().print(self, f)
549 }
550 }
551
552 /// An alternation of regular expressions.
553 #[derive(Clone, Debug, Eq, PartialEq)]
554 pub struct Alternation {
555 /// The span of this alternation.
556 pub span: Span,
557 /// The alternate regular expressions.
558 pub asts: Vec<Ast>,
559 }
560
561 impl Alternation {
562 /// Return this alternation as an AST.
563 ///
564 /// If this alternation contains zero ASTs, then Ast::Empty is
565 /// returned. If this alternation contains exactly 1 AST, then the
566 /// corresponding AST is returned. Otherwise, Ast::Alternation is returned.
567 pub fn into_ast(mut self) -> Ast {
568 match self.asts.len() {
569 0 => Ast::Empty(self.span),
570 1 => self.asts.pop().unwrap(),
571 _ => Ast::Alternation(self),
572 }
573 }
574 }
575
576 /// A concatenation of regular expressions.
577 #[derive(Clone, Debug, Eq, PartialEq)]
578 pub struct Concat {
579 /// The span of this concatenation.
580 pub span: Span,
581 /// The concatenation regular expressions.
582 pub asts: Vec<Ast>,
583 }
584
585 impl Concat {
586 /// Return this concatenation as an AST.
587 ///
588 /// If this concatenation contains zero ASTs, then Ast::Empty is
589 /// returned. If this concatenation contains exactly 1 AST, then the
590 /// corresponding AST is returned. Otherwise, Ast::Concat is returned.
591 pub fn into_ast(mut self) -> Ast {
592 match self.asts.len() {
593 0 => Ast::Empty(self.span),
594 1 => self.asts.pop().unwrap(),
595 _ => Ast::Concat(self),
596 }
597 }
598 }
599
600 /// A single literal expression.
601 ///
602 /// A literal corresponds to a single Unicode scalar value. Literals may be
603 /// represented in their literal form, e.g., `a` or in their escaped form,
604 /// e.g., `\x61`.
605 #[derive(Clone, Debug, Eq, PartialEq)]
606 pub struct Literal {
607 /// The span of this literal.
608 pub span: Span,
609 /// The kind of this literal.
610 pub kind: LiteralKind,
611 /// The Unicode scalar value corresponding to this literal.
612 pub c: char,
613 }
614
615 impl Literal {
616 /// If this literal was written as a `\x` hex escape, then this returns
617 /// the corresponding byte value. Otherwise, this returns `None`.
618 pub fn byte(&self) -> Option<u8> {
619 let short_hex = LiteralKind::HexFixed(HexLiteralKind::X);
620 if self.c as u32 <= 255 && self.kind == short_hex {
621 Some(self.c as u8)
622 } else {
623 None
624 }
625 }
626 }
627
628 /// The kind of a single literal expression.
629 #[derive(Clone, Debug, Eq, PartialEq)]
630 pub enum LiteralKind {
631 /// The literal is written verbatim, e.g., `a` or `☃`.
632 Verbatim,
633 /// The literal is written as an escape because it is punctuation, e.g.,
634 /// `\*` or `\[`.
635 Punctuation,
636 /// The literal is written as an octal escape, e.g., `\141`.
637 Octal,
638 /// The literal is written as a hex code with a fixed number of digits
639 /// depending on the type of the escape, e.g., `\x61` or or `\u0061` or
640 /// `\U00000061`.
641 HexFixed(HexLiteralKind),
642 /// The literal is written as a hex code with a bracketed number of
643 /// digits. The only restriction is that the bracketed hex code must refer
644 /// to a valid Unicode scalar value.
645 HexBrace(HexLiteralKind),
646 /// The literal is written as a specially recognized escape, e.g., `\f`
647 /// or `\n`.
648 Special(SpecialLiteralKind),
649 }
650
651 /// The type of a special literal.
652 ///
653 /// A special literal is a special escape sequence recognized by the regex
654 /// parser, e.g., `\f` or `\n`.
655 #[derive(Clone, Debug, Eq, PartialEq)]
656 pub enum SpecialLiteralKind {
657 /// Bell, spelled `\a` (`\x07`).
658 Bell,
659 /// Form feed, spelled `\f` (`\x0C`).
660 FormFeed,
661 /// Tab, spelled `\t` (`\x09`).
662 Tab,
663 /// Line feed, spelled `\n` (`\x0A`).
664 LineFeed,
665 /// Carriage return, spelled `\r` (`\x0D`).
666 CarriageReturn,
667 /// Vertical tab, spelled `\v` (`\x0B`).
668 VerticalTab,
669 /// Space, spelled `\ ` (`\x20`). Note that this can only appear when
670 /// parsing in verbose mode.
671 Space,
672 }
673
674 /// The type of a Unicode hex literal.
675 ///
676 /// Note that all variants behave the same when used with brackets. They only
677 /// differ when used without brackets in the number of hex digits that must
678 /// follow.
679 #[derive(Clone, Debug, Eq, PartialEq)]
680 pub enum HexLiteralKind {
681 /// A `\x` prefix. When used without brackets, this form is limited to
682 /// two digits.
683 X,
684 /// A `\u` prefix. When used without brackets, this form is limited to
685 /// four digits.
686 UnicodeShort,
687 /// A `\U` prefix. When used without brackets, this form is limited to
688 /// eight digits.
689 UnicodeLong,
690 }
691
692 impl HexLiteralKind {
693 /// The number of digits that must be used with this literal form when
694 /// used without brackets. When used with brackets, there is no
695 /// restriction on the number of digits.
696 pub fn digits(&self) -> u32 {
697 match *self {
698 HexLiteralKind::X => 2,
699 HexLiteralKind::UnicodeShort => 4,
700 HexLiteralKind::UnicodeLong => 8,
701 }
702 }
703 }
704
705 /// A single character class expression.
706 #[derive(Clone, Debug, Eq, PartialEq)]
707 pub enum Class {
708 /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
709 Unicode(ClassUnicode),
710 /// A perl character class, e.g., `\d` or `\W`.
711 Perl(ClassPerl),
712 /// A bracketed character class set, which may contain zero or more
713 /// character ranges and/or zero or more nested classes. e.g.,
714 /// `[a-zA-Z\pL]`.
715 Bracketed(ClassBracketed),
716 }
717
718 impl Class {
719 /// Return the span of this character class.
720 pub fn span(&self) -> &Span {
721 match *self {
722 Class::Perl(ref x) => &x.span,
723 Class::Unicode(ref x) => &x.span,
724 Class::Bracketed(ref x) => &x.span,
725 }
726 }
727 }
728
729 /// A Perl character class.
730 #[derive(Clone, Debug, Eq, PartialEq)]
731 pub struct ClassPerl {
732 /// The span of this class.
733 pub span: Span,
734 /// The kind of Perl class.
735 pub kind: ClassPerlKind,
736 /// Whether the class is negated or not. e.g., `\d` is not negated but
737 /// `\D` is.
738 pub negated: bool,
739 }
740
741 /// The available Perl character classes.
742 #[derive(Clone, Debug, Eq, PartialEq)]
743 pub enum ClassPerlKind {
744 /// Decimal numbers.
745 Digit,
746 /// Whitespace.
747 Space,
748 /// Word characters.
749 Word,
750 }
751
752 /// An ASCII character class.
753 #[derive(Clone, Debug, Eq, PartialEq)]
754 pub struct ClassAscii {
755 /// The span of this class.
756 pub span: Span,
757 /// The kind of ASCII class.
758 pub kind: ClassAsciiKind,
759 /// Whether the class is negated or not. e.g., `[[:alpha:]]` is not negated
760 /// but `[[:^alpha:]]` is.
761 pub negated: bool,
762 }
763
764 /// The available ASCII character classes.
765 #[derive(Clone, Debug, Eq, PartialEq)]
766 pub enum ClassAsciiKind {
767 /// `[0-9A-Za-z]`
768 Alnum,
769 /// `[A-Za-z]`
770 Alpha,
771 /// `[\x00-\x7F]`
772 Ascii,
773 /// `[ \t]`
774 Blank,
775 /// `[\x00-\x1F\x7F]`
776 Cntrl,
777 /// `[0-9]`
778 Digit,
779 /// `[!-~]`
780 Graph,
781 /// `[a-z]`
782 Lower,
783 /// `[ -~]`
784 Print,
785 /// `[!-/:-@\[-`{-~]`
786 Punct,
787 /// `[\t\n\v\f\r ]`
788 Space,
789 /// `[A-Z]`
790 Upper,
791 /// `[0-9A-Za-z_]`
792 Word,
793 /// `[0-9A-Fa-f]`
794 Xdigit,
795 }
796
797 impl ClassAsciiKind {
798 /// Return the corresponding ClassAsciiKind variant for the given name.
799 ///
800 /// The name given should correspond to the lowercase version of the
801 /// variant name. e.g., `cntrl` is the name for `ClassAsciiKind::Cntrl`.
802 ///
803 /// If no variant with the corresponding name exists, then `None` is
804 /// returned.
805 pub fn from_name(name: &str) -> Option<ClassAsciiKind> {
806 use self::ClassAsciiKind::*;
807 match name {
808 "alnum" => Some(Alnum),
809 "alpha" => Some(Alpha),
810 "ascii" => Some(Ascii),
811 "blank" => Some(Blank),
812 "cntrl" => Some(Cntrl),
813 "digit" => Some(Digit),
814 "graph" => Some(Graph),
815 "lower" => Some(Lower),
816 "print" => Some(Print),
817 "punct" => Some(Punct),
818 "space" => Some(Space),
819 "upper" => Some(Upper),
820 "word" => Some(Word),
821 "xdigit" => Some(Xdigit),
822 _ => None,
823 }
824 }
825 }
826
827 /// A Unicode character class.
828 #[derive(Clone, Debug, Eq, PartialEq)]
829 pub struct ClassUnicode {
830 /// The span of this class.
831 pub span: Span,
832 /// Whether this class is negated or not.
833 ///
834 /// Note: be careful when using this attribute. This specifically refers
835 /// to whether the class is written as `\p` or `\P`, where the latter
836 /// is `negated = true`. However, it also possible to write something like
837 /// `\P{scx!=Katakana}` which is actually equivalent to
838 /// `\p{scx=Katakana}` and is therefore not actually negated even though
839 /// `negated = true` here. To test whether this class is truly negated
840 /// or not, use the `is_negated` method.
841 pub negated: bool,
842 /// The kind of Unicode class.
843 pub kind: ClassUnicodeKind,
844 }
845
846 impl ClassUnicode {
847 /// Returns true if this class has been negated.
848 ///
849 /// Note that this takes the Unicode op into account, if it's present.
850 /// e.g., `is_negated` for `\P{scx!=Katakana}` will return `false`.
851 pub fn is_negated(&self) -> bool {
852 match self.kind {
853 ClassUnicodeKind::NamedValue {
854 op: ClassUnicodeOpKind::NotEqual, ..
855 } => !self.negated,
856 _ => self.negated,
857 }
858 }
859 }
860
861 /// The available forms of Unicode character classes.
862 #[derive(Clone, Debug, Eq, PartialEq)]
863 pub enum ClassUnicodeKind {
864 /// A one letter abbreviated class, e.g., `\pN`.
865 OneLetter(char),
866 /// A binary property, general category or script. The string may be
867 /// empty.
868 Named(String),
869 /// A property name and an associated value.
870 NamedValue {
871 /// The type of Unicode op used to associate `name` with `value`.
872 op: ClassUnicodeOpKind,
873 /// The property name (which may be empty).
874 name: String,
875 /// The property value (which may be empty).
876 value: String,
877 },
878 }
879
880 /// The type of op used in a Unicode character class.
881 #[derive(Clone, Debug, Eq, PartialEq)]
882 pub enum ClassUnicodeOpKind {
883 /// A property set to a specific value, e.g., `\p{scx=Katakana}`.
884 Equal,
885 /// A property set to a specific value using a colon, e.g.,
886 /// `\p{scx:Katakana}`.
887 Colon,
888 /// A property that isn't a particular value, e.g., `\p{scx!=Katakana}`.
889 NotEqual,
890 }
891
892 impl ClassUnicodeOpKind {
893 /// Whether the op is an equality op or not.
894 pub fn is_equal(&self) -> bool {
895 match *self {
896 ClassUnicodeOpKind::Equal|ClassUnicodeOpKind::Colon => true,
897 _ => false,
898 }
899 }
900 }
901
902 /// A bracketed character class, e.g., `[a-z0-9]`.
903 #[derive(Clone, Debug, Eq, PartialEq)]
904 pub struct ClassBracketed {
905 /// The span of this class.
906 pub span: Span,
907 /// Whether this class is negated or not. e.g., `[a]` is not negated but
908 /// `[^a]` is.
909 pub negated: bool,
910 /// The type of this set. A set is either a normal union of things, e.g.,
911 /// `[abc]` or a result of applying set operations, e.g., `[\pL--c]`.
912 pub kind: ClassSet,
913 }
914
915 /// A character class set.
916 ///
917 /// This type corresponds to the internal structure of a bracketed character
918 /// class. That is, every bracketed character is one of two types: a union of
919 /// items (literals, ranges, other bracketed classes) or a tree of binary set
920 /// operations.
921 #[derive(Clone, Debug, Eq, PartialEq)]
922 pub enum ClassSet {
923 /// An item, which can be a single literal, range, nested character class
924 /// or a union of items.
925 Item(ClassSetItem),
926 /// A single binary operation (i.e., &&, -- or ~~).
927 BinaryOp(ClassSetBinaryOp),
928 }
929
930 impl ClassSet {
931 /// Build a set from a union.
932 pub fn union(ast: ClassSetUnion) -> ClassSet {
933 ClassSet::Item(ClassSetItem::Union(ast))
934 }
935
936 /// Return the span of this character class set.
937 pub fn span(&self) -> &Span {
938 match *self {
939 ClassSet::Item(ref x) => x.span(),
940 ClassSet::BinaryOp(ref x) => &x.span,
941 }
942 }
943
944 /// Return true if and only if this class set is empty.
945 fn is_empty(&self) -> bool {
946 match *self {
947 ClassSet::Item(ClassSetItem::Empty(_)) => true,
948 _ => false,
949 }
950 }
951 }
952
953 /// A single component of a character class set.
954 #[derive(Clone, Debug, Eq, PartialEq)]
955 pub enum ClassSetItem {
956 /// An empty item.
957 ///
958 /// Note that a bracketed character class cannot contain a single empty
959 /// item. Empty items can appear when using one of the binary operators.
960 /// For example, `[&&]` is the intersection of two empty classes.
961 Empty(Span),
962 /// A single literal.
963 Literal(Literal),
964 /// A range between two literals.
965 Range(ClassSetRange),
966 /// An ASCII character class, e.g., `[:alnum:]` or `[:punct:]`.
967 Ascii(ClassAscii),
968 /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
969 Unicode(ClassUnicode),
970 /// A perl character class, e.g., `\d` or `\W`.
971 Perl(ClassPerl),
972 /// A bracketed character class set, which may contain zero or more
973 /// character ranges and/or zero or more nested classes. e.g.,
974 /// `[a-zA-Z\pL]`.
975 Bracketed(Box<ClassBracketed>),
976 /// A union of items.
977 Union(ClassSetUnion),
978 }
979
980 impl ClassSetItem {
981 /// Return the span of this character class set item.
982 pub fn span(&self) -> &Span {
983 match *self {
984 ClassSetItem::Empty(ref span) => span,
985 ClassSetItem::Literal(ref x) => &x.span,
986 ClassSetItem::Range(ref x) => &x.span,
987 ClassSetItem::Ascii(ref x) => &x.span,
988 ClassSetItem::Perl(ref x) => &x.span,
989 ClassSetItem::Unicode(ref x) => &x.span,
990 ClassSetItem::Bracketed(ref x) => &x.span,
991 ClassSetItem::Union(ref x) => &x.span,
992 }
993 }
994 }
995
996 /// A single character class range in a set.
997 #[derive(Clone, Debug, Eq, PartialEq)]
998 pub struct ClassSetRange {
999 /// The span of this range.
1000 pub span: Span,
1001 /// The start of this range.
1002 pub start: Literal,
1003 /// The end of this range.
1004 pub end: Literal,
1005 }
1006
1007 impl ClassSetRange {
1008 /// Returns true if and only if this character class range is valid.
1009 ///
1010 /// The only case where a range is invalid is if its start is greater than
1011 /// its end.
1012 pub fn is_valid(&self) -> bool {
1013 self.start.c <= self.end.c
1014 }
1015 }
1016
1017 /// A union of items inside a character class set.
1018 #[derive(Clone, Debug, Eq, PartialEq)]
1019 pub struct ClassSetUnion {
1020 /// The span of the items in this operation. e.g., the `a-z0-9` in
1021 /// `[^a-z0-9]`
1022 pub span: Span,
1023 /// The sequence of items that make up this union.
1024 pub items: Vec<ClassSetItem>,
1025 }
1026
1027 impl ClassSetUnion {
1028 /// Push a new item in this union.
1029 ///
1030 /// The ending position of this union's span is updated to the ending
1031 /// position of the span of the item given. If the union is empty, then
1032 /// the starting position of this union is set to the starting position
1033 /// of this item.
1034 ///
1035 /// In other words, if you only use this method to add items to a union
1036 /// and you set the spans on each item correctly, then you should never
1037 /// need to adjust the span of the union directly.
1038 pub fn push(&mut self, item: ClassSetItem) {
1039 if self.items.is_empty() {
1040 self.span.start = item.span().start;
1041 }
1042 self.span.end = item.span().end;
1043 self.items.push(item);
1044 }
1045
1046 /// Return this union as a character class set item.
1047 ///
1048 /// If this union contains zero items, then an empty union is
1049 /// returned. If this concatenation contains exactly 1 item, then the
1050 /// corresponding item is returned. Otherwise, ClassSetItem::Union is
1051 /// returned.
1052 pub fn into_item(mut self) -> ClassSetItem {
1053 match self.items.len() {
1054 0 => ClassSetItem::Empty(self.span),
1055 1 => self.items.pop().unwrap(),
1056 _ => ClassSetItem::Union(self),
1057 }
1058 }
1059 }
1060
1061 /// A Unicode character class set operation.
1062 #[derive(Clone, Debug, Eq, PartialEq)]
1063 pub struct ClassSetBinaryOp {
1064 /// The span of this operation. e.g., the `a-z--[h-p]` in `[a-z--h-p]`.
1065 pub span: Span,
1066 /// The type of this set operation.
1067 pub kind: ClassSetBinaryOpKind,
1068 /// The left hand side of the operation.
1069 pub lhs: Box<ClassSet>,
1070 /// The right hand side of the operation.
1071 pub rhs: Box<ClassSet>,
1072 }
1073
1074 /// The type of a Unicode character class set operation.
1075 ///
1076 /// Note that this doesn't explicitly represent union since there is no
1077 /// explicit union operator. Concatenation inside a character class corresponds
1078 /// to the union operation.
1079 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
1080 pub enum ClassSetBinaryOpKind {
1081 /// The intersection of two sets, e.g., `\pN&&[a-z]`.
1082 Intersection,
1083 /// The difference of two sets, e.g., `\pN--[0-9]`.
1084 Difference,
1085 /// The symmetric difference of two sets. The symmetric difference is the
1086 /// set of elements belonging to one but not both sets.
1087 /// e.g., `[\pL~~[:ascii:]]`.
1088 SymmetricDifference,
1089 }
1090
1091 /// A single zero-width assertion.
1092 #[derive(Clone, Debug, Eq, PartialEq)]
1093 pub struct Assertion {
1094 /// The span of this assertion.
1095 pub span: Span,
1096 /// The assertion kind, e.g., `\b` or `^`.
1097 pub kind: AssertionKind,
1098 }
1099
1100 /// An assertion kind.
1101 #[derive(Clone, Debug, Eq, PartialEq)]
1102 pub enum AssertionKind {
1103 /// `^`
1104 StartLine,
1105 /// `$`
1106 EndLine,
1107 /// `\A`
1108 StartText,
1109 /// `\z`
1110 EndText,
1111 /// `\b`
1112 WordBoundary,
1113 /// `\B`
1114 NotWordBoundary,
1115 }
1116
1117 /// A repetition operation applied to a regular expression.
1118 #[derive(Clone, Debug, Eq, PartialEq)]
1119 pub struct Repetition {
1120 /// The span of this operation.
1121 pub span: Span,
1122 /// The actual operation.
1123 pub op: RepetitionOp,
1124 /// Whether this operation was applied greedily or not.
1125 pub greedy: bool,
1126 /// The regular expression under repetition.
1127 pub ast: Box<Ast>,
1128 }
1129
1130 /// The repetition operator itself.
1131 #[derive(Clone, Debug, Eq, PartialEq)]
1132 pub struct RepetitionOp {
1133 /// The span of this operator. This includes things like `+`, `*?` and
1134 /// `{m,n}`.
1135 pub span: Span,
1136 /// The type of operation.
1137 pub kind: RepetitionKind,
1138 }
1139
1140 /// The kind of a repetition operator.
1141 #[derive(Clone, Debug, Eq, PartialEq)]
1142 pub enum RepetitionKind {
1143 /// `?`
1144 ZeroOrOne,
1145 /// `*`
1146 ZeroOrMore,
1147 /// `+`
1148 OneOrMore,
1149 /// `{m,n}`
1150 Range(RepetitionRange),
1151 }
1152
1153 /// A range repetition operator.
1154 #[derive(Clone, Debug, Eq, PartialEq)]
1155 pub enum RepetitionRange {
1156 /// `{m}`
1157 Exactly(u32),
1158 /// `{m,}`
1159 AtLeast(u32),
1160 /// `{m,n}`
1161 Bounded(u32, u32),
1162 }
1163
1164 impl RepetitionRange {
1165 /// Returns true if and only if this repetition range is valid.
1166 ///
1167 /// The only case where a repetition range is invalid is if it is bounded
1168 /// and its start is greater than its end.
1169 pub fn is_valid(&self) -> bool {
1170 match *self {
1171 RepetitionRange::Bounded(s, e) if s > e => false,
1172 _ => true,
1173 }
1174 }
1175 }
1176
1177 /// A grouped regular expression.
1178 ///
1179 /// This includes both capturing and non-capturing groups. This does **not**
1180 /// include flag-only groups like `(?is)`, but does contain any group that
1181 /// contains a sub-expression, e.g., `(a)`, `(?P<name>a)`, `(?:a)` and
1182 /// `(?is:a)`.
1183 #[derive(Clone, Debug, Eq, PartialEq)]
1184 pub struct Group {
1185 /// The span of this group.
1186 pub span: Span,
1187 /// The kind of this group.
1188 pub kind: GroupKind,
1189 /// The regular expression in this group.
1190 pub ast: Box<Ast>,
1191 }
1192
1193 impl Group {
1194 /// If this group is non-capturing, then this returns the (possibly empty)
1195 /// set of flags. Otherwise, `None` is returned.
1196 pub fn flags(&self) -> Option<&Flags> {
1197 match self.kind {
1198 GroupKind::NonCapturing(ref flags) => Some(flags),
1199 _ => None,
1200 }
1201 }
1202
1203 /// Returns true if and only if this group is capturing.
1204 pub fn is_capturing(&self) -> bool {
1205 match self.kind {
1206 GroupKind::CaptureIndex(_) | GroupKind::CaptureName(_) => true,
1207 GroupKind::NonCapturing(_) => false,
1208 }
1209 }
1210
1211 /// Returns the capture index of this group, if this is a capturing group.
1212 ///
1213 /// This returns a capture index precisely when `is_capturing` is `true`.
1214 pub fn capture_index(&self) -> Option<u32> {
1215 match self.kind {
1216 GroupKind::CaptureIndex(i) => Some(i),
1217 GroupKind::CaptureName(ref x) => Some(x.index),
1218 GroupKind::NonCapturing(_) => None,
1219 }
1220 }
1221 }
1222
1223 /// The kind of a group.
1224 #[derive(Clone, Debug, Eq, PartialEq)]
1225 pub enum GroupKind {
1226 /// `(a)`
1227 CaptureIndex(u32),
1228 /// `(?P<name>a)`
1229 CaptureName(CaptureName),
1230 /// `(?:a)` and `(?i:a)`
1231 NonCapturing(Flags),
1232 }
1233
1234 /// A capture name.
1235 ///
1236 /// This corresponds to the name itself between the angle brackets in, e.g.,
1237 /// `(?P<foo>expr)`.
1238 #[derive(Clone, Debug, Eq, PartialEq)]
1239 pub struct CaptureName {
1240 /// The span of this capture name.
1241 pub span: Span,
1242 /// The capture name.
1243 pub name: String,
1244 /// The capture index.
1245 pub index: u32,
1246 }
1247
1248 /// A group of flags that is not applied to a particular regular expression.
1249 #[derive(Clone, Debug, Eq, PartialEq)]
1250 pub struct SetFlags {
1251 /// The span of these flags, including the grouping parentheses.
1252 pub span: Span,
1253 /// The actual sequence of flags.
1254 pub flags: Flags,
1255 }
1256
1257 /// A group of flags.
1258 ///
1259 /// This corresponds only to the sequence of flags themselves, e.g., `is-u`.
1260 #[derive(Clone, Debug, Eq, PartialEq)]
1261 pub struct Flags {
1262 /// The span of this group of flags.
1263 pub span: Span,
1264 /// A sequence of flag items. Each item is either a flag or a negation
1265 /// operator.
1266 pub items: Vec<FlagsItem>,
1267 }
1268
1269 impl Flags {
1270 /// Add the given item to this sequence of flags.
1271 ///
1272 /// If the item was added successfully, then `None` is returned. If the
1273 /// given item is a duplicate, then `Some(i)` is returned, where
1274 /// `items[i].kind == item.kind`.
1275 pub fn add_item(&mut self, item: FlagsItem) -> Option<usize> {
1276 for (i, x) in self.items.iter().enumerate() {
1277 if x.kind == item.kind {
1278 return Some(i);
1279 }
1280 }
1281 self.items.push(item);
1282 None
1283 }
1284
1285 /// Returns the state of the given flag in this set.
1286 ///
1287 /// If the given flag is in the set but is negated, then `Some(false)` is
1288 /// returned.
1289 ///
1290 /// If the given flag is in the set and is not negated, then `Some(true)`
1291 /// is returned.
1292 ///
1293 /// Otherwise, `None` is returned.
1294 pub fn flag_state(&self, flag: Flag) -> Option<bool> {
1295 let mut negated = false;
1296 for x in &self.items {
1297 match x.kind {
1298 FlagsItemKind::Negation => {
1299 negated = true;
1300 }
1301 FlagsItemKind::Flag(ref xflag) if xflag == &flag => {
1302 return Some(!negated);
1303 }
1304 _ => {}
1305 }
1306 }
1307 None
1308 }
1309 }
1310
1311 /// A single item in a group of flags.
1312 #[derive(Clone, Debug, Eq, PartialEq)]
1313 pub struct FlagsItem {
1314 /// The span of this item.
1315 pub span: Span,
1316 /// The kind of this item.
1317 pub kind: FlagsItemKind,
1318 }
1319
1320 /// The kind of an item in a group of flags.
1321 #[derive(Clone, Debug, Eq, PartialEq)]
1322 pub enum FlagsItemKind {
1323 /// A negation operator applied to all subsequent flags in the enclosing
1324 /// group.
1325 Negation,
1326 /// A single flag in a group.
1327 Flag(Flag),
1328 }
1329
1330 impl FlagsItemKind {
1331 /// Returns true if and only if this item is a negation operator.
1332 pub fn is_negation(&self) -> bool {
1333 match *self {
1334 FlagsItemKind::Negation => true,
1335 _ => false,
1336 }
1337 }
1338 }
1339
1340 /// A single flag.
1341 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
1342 pub enum Flag {
1343 /// `i`
1344 CaseInsensitive,
1345 /// `m`
1346 MultiLine,
1347 /// `s`
1348 DotMatchesNewLine,
1349 /// `U`
1350 SwapGreed,
1351 /// `u`
1352 Unicode,
1353 /// `x`
1354 IgnoreWhitespace,
1355 }
1356
1357 /// A custom `Drop` impl is used for `Ast` such that it uses constant stack
1358 /// space but heap space proportional to the depth of the `Ast`.
1359 impl Drop for Ast {
1360 fn drop(&mut self) {
1361 use std::mem;
1362
1363 match *self {
1364 Ast::Empty(_)
1365 | Ast::Flags(_)
1366 | Ast::Literal(_)
1367 | Ast::Dot(_)
1368 | Ast::Assertion(_)
1369 // Classes are recursive, so they get their own Drop impl.
1370 | Ast::Class(_) => return,
1371 Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
1372 Ast::Group(ref x) if !x.ast.has_subexprs() => return,
1373 Ast::Alternation(ref x) if x.asts.is_empty() => return,
1374 Ast::Concat(ref x) if x.asts.is_empty() => return,
1375 _ => {}
1376 }
1377
1378 let empty_span = || Span::splat(Position::new(0, 0, 0));
1379 let empty_ast = || Ast::Empty(empty_span());
1380 let mut stack = vec![mem::replace(self, empty_ast())];
1381 while let Some(mut ast) = stack.pop() {
1382 match ast {
1383 Ast::Empty(_)
1384 | Ast::Flags(_)
1385 | Ast::Literal(_)
1386 | Ast::Dot(_)
1387 | Ast::Assertion(_)
1388 // Classes are recursive, so they get their own Drop impl.
1389 | Ast::Class(_) => {}
1390 Ast::Repetition(ref mut x) => {
1391 stack.push(mem::replace(&mut x.ast, empty_ast()));
1392 }
1393 Ast::Group(ref mut x) => {
1394 stack.push(mem::replace(&mut x.ast, empty_ast()));
1395 }
1396 Ast::Alternation(ref mut x) => {
1397 stack.extend(x.asts.drain(..));
1398 }
1399 Ast::Concat(ref mut x) => {
1400 stack.extend(x.asts.drain(..));
1401 }
1402 }
1403 }
1404 }
1405 }
1406
1407 /// A custom `Drop` impl is used for `ClassSet` such that it uses constant
1408 /// stack space but heap space proportional to the depth of the `ClassSet`.
1409 impl Drop for ClassSet {
1410 fn drop(&mut self) {
1411 use std::mem;
1412
1413 match *self {
1414 ClassSet::Item(ref item) => {
1415 match *item {
1416 ClassSetItem::Empty(_)
1417 | ClassSetItem::Literal(_)
1418 | ClassSetItem::Range(_)
1419 | ClassSetItem::Ascii(_)
1420 | ClassSetItem::Unicode(_)
1421 | ClassSetItem::Perl(_) => return,
1422 ClassSetItem::Bracketed(ref x) => {
1423 if x.kind.is_empty() {
1424 return;
1425 }
1426 }
1427 ClassSetItem::Union(ref x) => {
1428 if x.items.is_empty() {
1429 return;
1430 }
1431 }
1432 }
1433 }
1434 ClassSet::BinaryOp(ref op) => {
1435 if op.lhs.is_empty() && op.rhs.is_empty() {
1436 return;
1437 }
1438 }
1439 }
1440
1441 let empty_span = || Span::splat(Position::new(0, 0, 0));
1442 let empty_set = || ClassSet::Item(ClassSetItem::Empty(empty_span()));
1443 let mut stack = vec![mem::replace(self, empty_set())];
1444 while let Some(mut set) = stack.pop() {
1445 match set {
1446 ClassSet::Item(ref mut item) => {
1447 match *item {
1448 ClassSetItem::Empty(_)
1449 | ClassSetItem::Literal(_)
1450 | ClassSetItem::Range(_)
1451 | ClassSetItem::Ascii(_)
1452 | ClassSetItem::Unicode(_)
1453 | ClassSetItem::Perl(_) => {}
1454 ClassSetItem::Bracketed(ref mut x) => {
1455 stack.push(mem::replace(&mut x.kind, empty_set()));
1456 }
1457 ClassSetItem::Union(ref mut x) => {
1458 stack.extend(
1459 x.items.drain(..).map(ClassSet::Item));
1460 }
1461 }
1462 }
1463 ClassSet::BinaryOp(ref mut op) => {
1464 stack.push(mem::replace(&mut op.lhs, empty_set()));
1465 stack.push(mem::replace(&mut op.rhs, empty_set()));
1466 }
1467 }
1468 }
1469 }
1470 }
1471
1472 #[cfg(test)]
1473 mod tests {
1474 use super::*;
1475
1476 // We use a thread with an explicit stack size to test that our destructor
1477 // for Ast can handle arbitrarily sized expressions in constant stack
1478 // space. In case we run on a platform without threads (WASM?), we limit
1479 // this test to Windows/Unix.
1480 #[test]
1481 #[cfg(any(unix, windows))]
1482 fn no_stack_overflow_on_drop() {
1483 use std::thread;
1484
1485 let run = || {
1486 let span = || Span::splat(Position::new(0, 0, 0));
1487 let mut ast = Ast::Empty(span());
1488 for i in 0..200 {
1489 ast = Ast::Group(Group {
1490 span: span(),
1491 kind: GroupKind::CaptureIndex(i),
1492 ast: Box::new(ast),
1493 });
1494 }
1495 assert!(!ast.is_empty());
1496 };
1497
1498 // We run our test on a thread with a small stack size so we can
1499 // force the issue more easily.
1500 thread::Builder::new()
1501 .stack_size(1<<10)
1502 .spawn(run)
1503 .unwrap()
1504 .join()
1505 .unwrap();
1506 }
1507 }