src/vendor/regex-syntax-0.5.5/src/ast/mod.rs

   1 // Copyright 2017 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 /*!
  12 Defines an abstract syntax for regular expressions.
  13 */
  14
  15 use std::cmp::Ordering;
  16 use std::error;
  17 use std::fmt;
  18
  19 pub use ast::visitor::{Visitor, visit};
  20
  21 pub mod parse;
  22 pub mod print;
  23 mod visitor;
  24
  25 /// An error that occurred while parsing a regular expression into an abstract
  26 /// syntax tree.
  27 ///
  28 /// Note that note all ASTs represents a valid regular expression. For example,
  29 /// an AST is constructed without error for `\p{Quux}`, but `Quux` is not a
  30 /// valid Unicode property name. That particular error is reported when
  31 /// translating an AST to the high-level intermediate representation (`HIR`).
  32 #[derive(Clone, Debug, Eq, PartialEq)]
  33 pub struct Error {
  34     /// The kind of error.
  35     kind: ErrorKind,
  36     /// The original pattern that the parser generated the error from. Every
  37     /// span in an error is a valid range into this string.
  38     pattern: String,
  39     /// The span of this error.
  40     span: Span,
  41 }
  42
  43 impl Error {
  44     /// Return the type of this error.
  45     pub fn kind(&self) -> &ErrorKind {
  46         &self.kind
  47     }
  48
  49     /// The original pattern string in which this error occurred.
  50     ///
  51     /// Every span reported by this error is reported in terms of this string.
  52     pub fn pattern(&self) -> &str {
  53         &self.pattern
  54     }
  55
  56     /// Return the span at which this error occurred.
  57     pub fn span(&self) -> &Span {
  58         &self.span
  59     }
  60
  61     /// Return an auxiliary span. This span exists only for some errors that
  62     /// benefit from being able to point to two locations in the original
  63     /// regular expression. For example, "duplicate" errors will have the
  64     /// main error position set to the duplicate occurrence while its
  65     /// auxiliary span will be set to the initial occurrence.
  66     pub fn auxiliary_span(&self) -> Option<&Span> {
  67         use self::ErrorKind::*;
  68         match self.kind {
  69             FlagDuplicate { ref original } => Some(original),
  70             FlagRepeatedNegation { ref original, .. } => Some(original),
  71             GroupNameDuplicate { ref original, .. } => Some(original),
  72             _ => None,
  73         }
  74     }
  75 }
  76
  77 /// The type of an error that occurred while building an AST.
  78 #[derive(Clone, Debug, Eq, PartialEq)]
  79 pub enum ErrorKind {
  80     /// The capturing group limit was exceeded.
  81     ///
  82     /// Note that this represents a limit on the total number of capturing
  83     /// groups in a regex and not necessarily the number of nested capturing
  84     /// groups. That is, the nest limit can be low and it is still possible for
  85     /// this error to occur.
  86     CaptureLimitExceeded,
  87     /// An invalid escape sequence was found in a character class set.
  88     ClassEscapeInvalid,
  89     /// An invalid character class range was found. An invalid range is any
  90     /// range where the start is greater than the end.
  91     ClassRangeInvalid,
  92     /// An opening `[` was found with no corresponding closing `]`.
  93     ClassUnclosed,
  94     /// An empty decimal number was given where one was expected.
  95     DecimalEmpty,
  96     /// An invalid decimal number was given where one was expected.
  97     DecimalInvalid,
  98     /// A bracketed hex literal was empty.
  99     EscapeHexEmpty,
 100     /// A bracketed hex literal did not correspond to a Unicode scalar value.
 101     EscapeHexInvalid,
 102     /// An invalid hexadecimal digit was found.
 103     EscapeHexInvalidDigit,
 104     /// EOF was found before an escape sequence was completed.
 105     EscapeUnexpectedEof,
 106     /// An unrecognized escape sequence.
 107     EscapeUnrecognized,
 108     /// A dangling negation was used when setting flags, e.g., `i-`.
 109     FlagDanglingNegation,
 110     /// A flag was used twice, e.g., `i-i`.
 111     FlagDuplicate {
 112         /// The position of the original flag. The error position
 113         /// points to the duplicate flag.
 114         original: Span,
 115     },
 116     /// The negation operator was used twice, e.g., `-i-s`.
 117     FlagRepeatedNegation {
 118         /// The position of the original negation operator. The error position
 119         /// points to the duplicate negation operator.
 120         original: Span,
 121     },
 122     /// Expected a flag but got EOF, e.g., `(?`.
 123     FlagUnexpectedEof,
 124     /// Unrecognized flag, e.g., `a`.
 125     FlagUnrecognized,
 126     /// A duplicate capture name was found.
 127     GroupNameDuplicate {
 128         /// The position of the initial occurrence of the capture name. The
 129         /// error position itself points to the duplicate occurrence.
 130         original: Span,
 131     },
 132     /// A capture group name is empty, e.g., `(?P<>abc)`.
 133     GroupNameEmpty,
 134     /// An invalid character was seen for a capture group name. This includes
 135     /// errors where the first character is a digit (even though subsequent
 136     /// characters are allowed to be digits).
 137     GroupNameInvalid,
 138     /// A closing `>` could not be found for a capture group name.
 139     GroupNameUnexpectedEof,
 140     /// An unclosed group, e.g., `(ab`.
 141     ///
 142     /// The span of this error corresponds to the unclosed parenthesis.
 143     GroupUnclosed,
 144     /// An unopened group, e.g., `ab)`.
 145     GroupUnopened,
 146     /// The nest limit was exceeded. The limit stored here is the limit
 147     /// configured in the parser.
 148     NestLimitExceeded(u32),
 149     /// The range provided in a counted repetition operator is invalid. The
 150     /// range is invalid if the start is greater than the end.
 151     RepetitionCountInvalid,
 152     /// An opening `{` was found with no corresponding closing `}`.
 153     RepetitionCountUnclosed,
 154     /// A repetition operator was applied to a missing sub-expression. This
 155     /// occurs, for example, in the regex consisting of just a `*`. It is,
 156     /// however, possible to create a repetition operating on an empty
 157     /// sub-expression. For example, `()*` is still considered valid.
 158     RepetitionMissing,
 159     /// When octal support is disabled, this error is produced when an octal
 160     /// escape is used. The octal escape is assumed to be an invocation of
 161     /// a backreference, which is the common case.
 162     UnsupportedBackreference,
 163     /// When syntax similar to PCRE's look-around is used, this error is
 164     /// returned. Some example syntaxes that are rejected include, but are
 165     /// not necessarily limited to, `(?=re)`, `(?!re)`, `(?<=re)` and
 166     /// `(?<!re)`. Note that all of these syntaxes are otherwise invalid; this
 167     /// error is used to improve the user experience.
 168     UnsupportedLookAround,
 169     /// Hints that destructuring should not be exhaustive.
 170     ///
 171     /// This enum may grow additional variants, so this makes sure clients
 172     /// don't count on exhaustive matching. (Otherwise, adding a new variant
 173     /// could break existing code.)
 174     #[doc(hidden)]
 175     __Nonexhaustive,
 176 }
 177
 178 impl error::Error for Error {
 179     fn description(&self) -> &str {
 180         use self::ErrorKind::*;
 181         match self.kind {
 182             CaptureLimitExceeded => "capture group limit exceeded",
 183             ClassEscapeInvalid => "invalid escape sequence in character class",
 184             ClassRangeInvalid => "invalid character class range",
 185             ClassUnclosed => "unclosed character class",
 186             DecimalEmpty => "empty decimal literal",
 187             DecimalInvalid => "invalid decimal literal",
 188             EscapeHexEmpty => "empty hexadecimal literal",
 189             EscapeHexInvalid => "invalid hexadecimal literal",
 190             EscapeHexInvalidDigit => "invalid hexadecimal digit",
 191             EscapeUnexpectedEof => "unexpected eof (escape sequence)",
 192             EscapeUnrecognized => "unrecognized escape sequence",
 193             FlagDanglingNegation => "dangling flag negation operator",
 194             FlagDuplicate{..} => "duplicate flag",
 195             FlagRepeatedNegation{..} => "repeated negation",
 196             FlagUnexpectedEof => "unexpected eof (flag)",
 197             FlagUnrecognized => "unrecognized flag",
 198             GroupNameDuplicate{..} => "duplicate capture group name",
 199             GroupNameEmpty => "empty capture group name",
 200             GroupNameInvalid => "invalid capture group name",
 201             GroupNameUnexpectedEof => "unclosed capture group name",
 202             GroupUnclosed => "unclosed group",
 203             GroupUnopened => "unopened group",
 204             NestLimitExceeded(_) => "nest limit exceeded",
 205             RepetitionCountInvalid => "invalid repetition count range",
 206             RepetitionCountUnclosed => "unclosed counted repetition",
 207             RepetitionMissing => "repetition operator missing expression",
 208             UnsupportedBackreference => "backreferences are not supported",
 209             UnsupportedLookAround => "look-around is not supported",
 210             _ => unreachable!(),
 211         }
 212     }
 213 }
 214
 215 impl fmt::Display for Error {
 216     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 217         ::error::Formatter::from(self).fmt(f)
 218     }
 219 }
 220
 221 impl fmt::Display for ErrorKind {
 222     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 223         use self::ErrorKind::*;
 224         match *self {
 225             CaptureLimitExceeded => {
 226                 write!(f, "exceeded the maximum number of \
 227                            capturing groups ({})", ::std::u32::MAX)
 228             }
 229             ClassEscapeInvalid => {
 230                 write!(f, "invalid escape sequence found in character class")
 231             }
 232             ClassRangeInvalid => {
 233                 write!(f, "invalid character class range, \
 234                            the start must be <= the end")
 235             }
 236             ClassUnclosed => {
 237                 write!(f, "unclosed character class")
 238             }
 239             DecimalEmpty => {
 240                 write!(f, "decimal literal empty")
 241             }
 242             DecimalInvalid => {
 243                 write!(f, "decimal literal invalid")
 244             }
 245             EscapeHexEmpty => {
 246                 write!(f, "hexadecimal literal empty")
 247             }
 248             EscapeHexInvalid => {
 249                 write!(f, "hexadecimal literal is not a Unicode scalar value")
 250             }
 251             EscapeHexInvalidDigit => {
 252                 write!(f, "invalid hexadecimal digit")
 253             }
 254             EscapeUnexpectedEof => {
 255                 write!(f, "incomplete escape sequence, \
 256                            reached end of pattern prematurely")
 257             }
 258             EscapeUnrecognized => {
 259                 write!(f, "unrecognized escape sequence")
 260             }
 261             FlagDanglingNegation => {
 262                 write!(f, "dangling flag negation operator")
 263             }
 264             FlagDuplicate{..} => {
 265                 write!(f, "duplicate flag")
 266             }
 267             FlagRepeatedNegation{..} => {
 268                 write!(f, "flag negation operator repeated")
 269             }
 270             FlagUnexpectedEof => {
 271                 write!(f, "expected flag but got end of regex")
 272             }
 273             FlagUnrecognized => {
 274                 write!(f, "unrecognized flag")
 275             }
 276             GroupNameDuplicate{..} => {
 277                 write!(f, "duplicate capture group name")
 278             }
 279             GroupNameEmpty => {
 280                 write!(f, "empty capture group name")
 281             }
 282             GroupNameInvalid => {
 283                 write!(f, "invalid capture group character")
 284             }
 285             GroupNameUnexpectedEof => {
 286                 write!(f, "unclosed capture group name")
 287             }
 288             GroupUnclosed => {
 289                 write!(f, "unclosed group")
 290             }
 291             GroupUnopened => {
 292                 write!(f, "unopened group")
 293             }
 294             NestLimitExceeded(limit) => {
 295                 write!(f, "exceed the maximum number of \
 296                            nested parentheses/brackets ({})", limit)
 297             }
 298             RepetitionCountInvalid => {
 299                 write!(f, "invalid repetition count range, \
 300                            the start must be <= the end")
 301             }
 302             RepetitionCountUnclosed => {
 303                 write!(f, "unclosed counted repetition")
 304             }
 305             RepetitionMissing => {
 306                 write!(f, "repetition operator missing expression")
 307             }
 308             UnsupportedBackreference => {
 309                 write!(f, "backreferences are not supported")
 310             }
 311             UnsupportedLookAround => {
 312                 write!(f, "look-around, including look-ahead and look-behind, \
 313                            is not supported")
 314             }
 315             _ => unreachable!(),
 316         }
 317     }
 318 }
 319
 320 /// Span represents the position information of a single AST item.
 321 ///
 322 /// All span positions are absolute byte offsets that can be used on the
 323 /// original regular expression that was parsed.
 324 #[derive(Clone, Copy, Eq, PartialEq)]
 325 pub struct Span {
 326     /// The start byte offset.
 327     pub start: Position,
 328     /// The end byte offset.
 329     pub end: Position,
 330 }
 331
 332 impl fmt::Debug for Span {
 333     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 334         write!(f, "Span({:?}, {:?})", self.start, self.end)
 335     }
 336 }
 337
 338 impl Ord for Span {
 339     fn cmp(&self, other: &Span) -> Ordering {
 340         (&self.start, &self.end).cmp(&(&other.start, &other.end))
 341     }
 342 }
 343
 344 impl PartialOrd for Span {
 345     fn partial_cmp(&self, other: &Span) -> Option<Ordering> {
 346         Some(self.cmp(other))
 347     }
 348 }
 349
 350 /// A single position in a regular expression.
 351 ///
 352 /// A position encodes one half of a span, and include the byte offset, line
 353 /// number and column number.
 354 #[derive(Clone, Copy, Eq, PartialEq)]
 355 pub struct Position {
 356     /// The absolute offset of this position, starting at `0` from the
 357     /// beginning of the regular expression pattern string.
 358     pub offset: usize,
 359     /// The line number, starting at `1`.
 360     pub line: usize,
 361     /// The approximate column number, starting at `1`.
 362     pub column: usize,
 363 }
 364
 365 impl fmt::Debug for Position {
 366     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 367         write!(
 368             f,
 369             "Position(o: {:?}, l: {:?}, c: {:?})",
 370             self.offset, self.line, self.column)
 371     }
 372 }
 373
 374 impl Ord for Position {
 375     fn cmp(&self, other: &Position) -> Ordering {
 376         self.offset.cmp(&other.offset)
 377     }
 378 }
 379
 380 impl PartialOrd for Position {
 381     fn partial_cmp(&self, other: &Position) -> Option<Ordering> {
 382         Some(self.cmp(other))
 383     }
 384 }
 385
 386 impl Span {
 387     /// Create a new span with the given positions.
 388     pub fn new(start: Position, end: Position) -> Span {
 389         Span { start: start, end: end }
 390     }
 391
 392     /// Create a new span using the given position as the start and end.
 393     pub fn splat(pos: Position) -> Span {
 394         Span::new(pos, pos)
 395     }
 396
 397     /// Create a new span by replacing the starting the position with the one
 398     /// given.
 399     pub fn with_start(self, pos: Position) -> Span {
 400         Span { start: pos, ..self }
 401     }
 402
 403     /// Create a new span by replacing the ending the position with the one
 404     /// given.
 405     pub fn with_end(self, pos: Position) -> Span {
 406         Span { end: pos, ..self }
 407     }
 408
 409     /// Returns true if and only if this span occurs on a single line.
 410     pub fn is_one_line(&self) -> bool {
 411         self.start.line == self.end.line
 412     }
 413
 414     /// Returns true if and only if this span is empty. That is, it points to
 415     /// a single position in the concrete syntax of a regular expression.
 416     pub fn is_empty(&self) -> bool {
 417         self.start.offset == self.end.offset
 418     }
 419 }
 420
 421 impl Position {
 422     /// Create a new position with the given information.
 423     ///
 424     /// `offset` is the absolute offset of the position, starting at `0` from
 425     /// the beginning of the regular expression pattern string.
 426     ///
 427     /// `line` is the line number, starting at `1`.
 428     ///
 429     /// `column` is the approximate column number, starting at `1`.
 430     pub fn new(offset: usize, line: usize, column: usize) -> Position {
 431         Position { offset: offset, line: line, column: column }
 432     }
 433 }
 434
 435 /// An abstract syntax tree for a singular expression along with comments
 436 /// found.
 437 ///
 438 /// Comments are not stored in the tree itself to avoid complexity. Each
 439 /// comment contains a span of precisely where it occurred in the original
 440 /// regular expression.
 441 #[derive(Clone, Debug, Eq, PartialEq)]
 442 pub struct WithComments {
 443     /// The actual ast.
 444     pub ast: Ast,
 445     /// All comments found in the original regular expression.
 446     pub comments: Vec<Comment>,
 447 }
 448
 449 /// A comment from a regular expression with an associated span.
 450 ///
 451 /// A regular expression can only contain comments when the `x` flag is
 452 /// enabled.
 453 #[derive(Clone, Debug, Eq, PartialEq)]
 454 pub struct Comment {
 455     /// The span of this comment, including the beginning `#` and ending `\n`.
 456     pub span: Span,
 457     /// The comment text, starting with the first character following the `#`
 458     /// and ending with the last character preceding the `\n`.
 459     pub comment: String,
 460 }
 461
 462 /// An abstract syntax tree for a single regular expression.
 463 ///
 464 /// An `Ast`'s `fmt::Display` implementation uses constant stack space and heap
 465 /// space proportional to the size of the `Ast`.
 466 ///
 467 /// This type defines its own destructor that uses constant stack space and
 468 /// heap space proportional to the size of the `Ast`.
 469 #[derive(Clone, Debug, Eq, PartialEq)]
 470 pub enum Ast {
 471     /// An empty regex that matches everything.
 472     Empty(Span),
 473     /// A set of flags, e.g., `(?is)`.
 474     Flags(SetFlags),
 475     /// A single character literal, which includes escape sequences.
 476     Literal(Literal),
 477     /// The "any character" class.
 478     Dot(Span),
 479     /// A single zero-width assertion.
 480     Assertion(Assertion),
 481     /// A single character class. This includes all forms of character classes
 482     /// except for `.`. e.g., `\d`, `\pN`, `[a-z]` and `[[:alpha:]]`.
 483     Class(Class),
 484     /// A repetition operator applied to an arbitrary regular expression.
 485     Repetition(Repetition),
 486     /// A grouped regular expression.
 487     Group(Group),
 488     /// An alternation of regular expressions.
 489     Alternation(Alternation),
 490     /// A concatenation of regular expressions.
 491     Concat(Concat),
 492 }
 493
 494 impl Ast {
 495     /// Return the span of this abstract syntax tree.
 496     pub fn span(&self) -> &Span {
 497         match *self {
 498             Ast::Empty(ref span) => span,
 499             Ast::Flags(ref x) => &x.span,
 500             Ast::Literal(ref x) => &x.span,
 501             Ast::Dot(ref span) => span,
 502             Ast::Assertion(ref x) => &x.span,
 503             Ast::Class(ref x) => x.span(),
 504             Ast::Repetition(ref x) => &x.span,
 505             Ast::Group(ref x) => &x.span,
 506             Ast::Alternation(ref x) => &x.span,
 507             Ast::Concat(ref x) => &x.span,
 508         }
 509     }
 510
 511     /// Return true if and only if this Ast is empty.
 512     pub fn is_empty(&self) -> bool {
 513         match *self {
 514             Ast::Empty(_) => true,
 515             _ => false,
 516         }
 517     }
 518
 519     /// Returns true if and only if this AST has any (including possibly empty)
 520     /// subexpressions.
 521     fn has_subexprs(&self) -> bool {
 522         match *self {
 523             Ast::Empty(_)
 524             | Ast::Flags(_)
 525             | Ast::Literal(_)
 526             | Ast::Dot(_)
 527             | Ast::Assertion(_) => false,
 528             Ast::Class(_)
 529             | Ast::Repetition(_)
 530             | Ast::Group(_)
 531             | Ast::Alternation(_)
 532             | Ast::Concat(_) => true,
 533         }
 534     }
 535 }
 536
 537 /// Print a display representation of this Ast.
 538 ///
 539 /// This does not preserve any of the original whitespace formatting that may
 540 /// have originally been present in the concrete syntax from which this Ast
 541 /// was generated.
 542 ///
 543 /// This implementation uses constant stack space and heap space proportional
 544 /// to the size of the `Ast`.
 545 impl fmt::Display for Ast {
 546     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 547         use ast::print::Printer;
 548         Printer::new().print(self, f)
 549     }
 550 }
 551
 552 /// An alternation of regular expressions.
 553 #[derive(Clone, Debug, Eq, PartialEq)]
 554 pub struct Alternation {
 555     /// The span of this alternation.
 556     pub span: Span,
 557     /// The alternate regular expressions.
 558     pub asts: Vec<Ast>,
 559 }
 560
 561 impl Alternation {
 562     /// Return this alternation as an AST.
 563     ///
 564     /// If this alternation contains zero ASTs, then Ast::Empty is
 565     /// returned. If this alternation contains exactly 1 AST, then the
 566     /// corresponding AST is returned. Otherwise, Ast::Alternation is returned.
 567     pub fn into_ast(mut self) -> Ast {
 568         match self.asts.len() {
 569             0 => Ast::Empty(self.span),
 570             1 => self.asts.pop().unwrap(),
 571             _ => Ast::Alternation(self),
 572         }
 573     }
 574 }
 575
 576 /// A concatenation of regular expressions.
 577 #[derive(Clone, Debug, Eq, PartialEq)]
 578 pub struct Concat {
 579     /// The span of this concatenation.
 580     pub span: Span,
 581     /// The concatenation regular expressions.
 582     pub asts: Vec<Ast>,
 583 }
 584
 585 impl Concat {
 586     /// Return this concatenation as an AST.
 587     ///
 588     /// If this concatenation contains zero ASTs, then Ast::Empty is
 589     /// returned. If this concatenation contains exactly 1 AST, then the
 590     /// corresponding AST is returned. Otherwise, Ast::Concat is returned.
 591     pub fn into_ast(mut self) -> Ast {
 592         match self.asts.len() {
 593             0 => Ast::Empty(self.span),
 594             1 => self.asts.pop().unwrap(),
 595             _ => Ast::Concat(self),
 596         }
 597     }
 598 }
 599
 600 /// A single literal expression.
 601 ///
 602 /// A literal corresponds to a single Unicode scalar value. Literals may be
 603 /// represented in their literal form, e.g., `a` or in their escaped form,
 604 /// e.g., `\x61`.
 605 #[derive(Clone, Debug, Eq, PartialEq)]
 606 pub struct Literal {
 607     /// The span of this literal.
 608     pub span: Span,
 609     /// The kind of this literal.
 610     pub kind: LiteralKind,
 611     /// The Unicode scalar value corresponding to this literal.
 612     pub c: char,
 613 }
 614
 615 impl Literal {
 616     /// If this literal was written as a `\x` hex escape, then this returns
 617     /// the corresponding byte value. Otherwise, this returns `None`.
 618     pub fn byte(&self) -> Option<u8> {
 619         let short_hex = LiteralKind::HexFixed(HexLiteralKind::X);
 620         if self.c as u32 <= 255 && self.kind == short_hex {
 621             Some(self.c as u8)
 622         } else {
 623             None
 624         }
 625     }
 626 }
 627
 628 /// The kind of a single literal expression.
 629 #[derive(Clone, Debug, Eq, PartialEq)]
 630 pub enum LiteralKind {
 631     /// The literal is written verbatim, e.g., `a` or `☃`.
 632     Verbatim,
 633     /// The literal is written as an escape because it is punctuation, e.g.,
 634     /// `\*` or `\[`.
 635     Punctuation,
 636     /// The literal is written as an octal escape, e.g., `\141`.
 637     Octal,
 638     /// The literal is written as a hex code with a fixed number of digits
 639     /// depending on the type of the escape, e.g., `\x61` or or `\u0061` or
 640     /// `\U00000061`.
 641     HexFixed(HexLiteralKind),
 642     /// The literal is written as a hex code with a bracketed number of
 643     /// digits. The only restriction is that the bracketed hex code must refer
 644     /// to a valid Unicode scalar value.
 645     HexBrace(HexLiteralKind),
 646     /// The literal is written as a specially recognized escape, e.g., `\f`
 647     /// or `\n`.
 648     Special(SpecialLiteralKind),
 649 }
 650
 651 /// The type of a special literal.
 652 ///
 653 /// A special literal is a special escape sequence recognized by the regex
 654 /// parser, e.g., `\f` or `\n`.
 655 #[derive(Clone, Debug, Eq, PartialEq)]
 656 pub enum SpecialLiteralKind {
 657     /// Bell, spelled `\a` (`\x07`).
 658     Bell,
 659     /// Form feed, spelled `\f` (`\x0C`).
 660     FormFeed,
 661     /// Tab, spelled `\t` (`\x09`).
 662     Tab,
 663     /// Line feed, spelled `\n` (`\x0A`).
 664     LineFeed,
 665     /// Carriage return, spelled `\r` (`\x0D`).
 666     CarriageReturn,
 667     /// Vertical tab, spelled `\v` (`\x0B`).
 668     VerticalTab,
 669     /// Space, spelled `\ ` (`\x20`). Note that this can only appear when
 670     /// parsing in verbose mode.
 671     Space,
 672 }
 673
 674 /// The type of a Unicode hex literal.
 675 ///
 676 /// Note that all variants behave the same when used with brackets. They only
 677 /// differ when used without brackets in the number of hex digits that must
 678 /// follow.
 679 #[derive(Clone, Debug, Eq, PartialEq)]
 680 pub enum HexLiteralKind {
 681     /// A `\x` prefix. When used without brackets, this form is limited to
 682     /// two digits.
 683     X,
 684     /// A `\u` prefix. When used without brackets, this form is limited to
 685     /// four digits.
 686     UnicodeShort,
 687     /// A `\U` prefix. When used without brackets, this form is limited to
 688     /// eight digits.
 689     UnicodeLong,
 690 }
 691
 692 impl HexLiteralKind {
 693     /// The number of digits that must be used with this literal form when
 694     /// used without brackets. When used with brackets, there is no
 695     /// restriction on the number of digits.
 696     pub fn digits(&self) -> u32 {
 697         match *self {
 698             HexLiteralKind::X => 2,
 699             HexLiteralKind::UnicodeShort => 4,
 700             HexLiteralKind::UnicodeLong => 8,
 701         }
 702     }
 703 }
 704
 705 /// A single character class expression.
 706 #[derive(Clone, Debug, Eq, PartialEq)]
 707 pub enum Class {
 708     /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
 709     Unicode(ClassUnicode),
 710     /// A perl character class, e.g., `\d` or `\W`.
 711     Perl(ClassPerl),
 712     /// A bracketed character class set, which may contain zero or more
 713     /// character ranges and/or zero or more nested classes. e.g.,
 714     /// `[a-zA-Z\pL]`.
 715     Bracketed(ClassBracketed),
 716 }
 717
 718 impl Class {
 719     /// Return the span of this character class.
 720     pub fn span(&self) -> &Span {
 721         match *self {
 722             Class::Perl(ref x) => &x.span,
 723             Class::Unicode(ref x) => &x.span,
 724             Class::Bracketed(ref x) => &x.span,
 725         }
 726     }
 727 }
 728
 729 /// A Perl character class.
 730 #[derive(Clone, Debug, Eq, PartialEq)]
 731 pub struct ClassPerl {
 732     /// The span of this class.
 733     pub span: Span,
 734     /// The kind of Perl class.
 735     pub kind: ClassPerlKind,
 736     /// Whether the class is negated or not. e.g., `\d` is not negated but
 737     /// `\D` is.
 738     pub negated: bool,
 739 }
 740
 741 /// The available Perl character classes.
 742 #[derive(Clone, Debug, Eq, PartialEq)]
 743 pub enum ClassPerlKind {
 744     /// Decimal numbers.
 745     Digit,
 746     /// Whitespace.
 747     Space,
 748     /// Word characters.
 749     Word,
 750 }
 751
 752 /// An ASCII character class.
 753 #[derive(Clone, Debug, Eq, PartialEq)]
 754 pub struct ClassAscii {
 755     /// The span of this class.
 756     pub span: Span,
 757     /// The kind of ASCII class.
 758     pub kind: ClassAsciiKind,
 759     /// Whether the class is negated or not. e.g., `[[:alpha:]]` is not negated
 760     /// but `[[:^alpha:]]` is.
 761     pub negated: bool,
 762 }
 763
 764 /// The available ASCII character classes.
 765 #[derive(Clone, Debug, Eq, PartialEq)]
 766 pub enum ClassAsciiKind {
 767     /// `[0-9A-Za-z]`
 768     Alnum,
 769     /// `[A-Za-z]`
 770     Alpha,
 771     /// `[\x00-\x7F]`
 772     Ascii,
 773     /// `[ \t]`
 774     Blank,
 775     /// `[\x00-\x1F\x7F]`
 776     Cntrl,
 777     /// `[0-9]`
 778     Digit,
 779     /// `[!-~]`
 780     Graph,
 781     /// `[a-z]`
 782     Lower,
 783     /// `[ -~]`
 784     Print,
 785     /// `[!-/:-@\[-`{-~]`
 786     Punct,
 787     /// `[\t\n\v\f\r ]`
 788     Space,
 789     /// `[A-Z]`
 790     Upper,
 791     /// `[0-9A-Za-z_]`
 792     Word,
 793     /// `[0-9A-Fa-f]`
 794     Xdigit,
 795 }
 796
 797 impl ClassAsciiKind {
 798     /// Return the corresponding ClassAsciiKind variant for the given name.
 799     ///
 800     /// The name given should correspond to the lowercase version of the
 801     /// variant name. e.g., `cntrl` is the name for `ClassAsciiKind::Cntrl`.
 802     ///
 803     /// If no variant with the corresponding name exists, then `None` is
 804     /// returned.
 805     pub fn from_name(name: &str) -> Option<ClassAsciiKind> {
 806         use self::ClassAsciiKind::*;
 807         match name {
 808             "alnum" => Some(Alnum),
 809             "alpha" => Some(Alpha),
 810             "ascii" => Some(Ascii),
 811             "blank" => Some(Blank),
 812             "cntrl" => Some(Cntrl),
 813             "digit" => Some(Digit),
 814             "graph" => Some(Graph),
 815             "lower" => Some(Lower),
 816             "print" => Some(Print),
 817             "punct" => Some(Punct),
 818             "space" => Some(Space),
 819             "upper" => Some(Upper),
 820             "word" => Some(Word),
 821             "xdigit" => Some(Xdigit),
 822             _ => None,
 823         }
 824     }
 825 }
 826
 827 /// A Unicode character class.
 828 #[derive(Clone, Debug, Eq, PartialEq)]
 829 pub struct ClassUnicode {
 830     /// The span of this class.
 831     pub span: Span,
 832     /// Whether this class is negated or not.
 833     ///
 834     /// Note: be careful when using this attribute. This specifically refers
 835     /// to whether the class is written as `\p` or `\P`, where the latter
 836     /// is `negated = true`. However, it also possible to write something like
 837     /// `\P{scx!=Katakana}` which is actually equivalent to
 838     /// `\p{scx=Katakana}` and is therefore not actually negated even though
 839     /// `negated = true` here. To test whether this class is truly negated
 840     /// or not, use the `is_negated` method.
 841     pub negated: bool,
 842     /// The kind of Unicode class.
 843     pub kind: ClassUnicodeKind,
 844 }
 845
 846 impl ClassUnicode {
 847     /// Returns true if this class has been negated.
 848     ///
 849     /// Note that this takes the Unicode op into account, if it's present.
 850     /// e.g., `is_negated` for `\P{scx!=Katakana}` will return `false`.
 851     pub fn is_negated(&self) -> bool {
 852         match self.kind {
 853             ClassUnicodeKind::NamedValue {
 854                 op: ClassUnicodeOpKind::NotEqual, ..
 855             } => !self.negated,
 856             _ => self.negated,
 857         }
 858     }
 859 }
 860
 861 /// The available forms of Unicode character classes.
 862 #[derive(Clone, Debug, Eq, PartialEq)]
 863 pub enum ClassUnicodeKind {
 864     /// A one letter abbreviated class, e.g., `\pN`.
 865     OneLetter(char),
 866     /// A binary property, general category or script. The string may be
 867     /// empty.
 868     Named(String),
 869     /// A property name and an associated value.
 870     NamedValue {
 871         /// The type of Unicode op used to associate `name` with `value`.
 872         op: ClassUnicodeOpKind,
 873         /// The property name (which may be empty).
 874         name: String,
 875         /// The property value (which may be empty).
 876         value: String,
 877     },
 878 }
 879
 880 /// The type of op used in a Unicode character class.
 881 #[derive(Clone, Debug, Eq, PartialEq)]
 882 pub enum ClassUnicodeOpKind {
 883     /// A property set to a specific value, e.g., `\p{scx=Katakana}`.
 884     Equal,
 885     /// A property set to a specific value using a colon, e.g.,
 886     /// `\p{scx:Katakana}`.
 887     Colon,
 888     /// A property that isn't a particular value, e.g., `\p{scx!=Katakana}`.
 889     NotEqual,
 890 }
 891
 892 impl ClassUnicodeOpKind {
 893     /// Whether the op is an equality op or not.
 894     pub fn is_equal(&self) -> bool {
 895         match *self {
 896             ClassUnicodeOpKind::Equal|ClassUnicodeOpKind::Colon => true,
 897             _ => false,
 898         }
 899     }
 900 }
 901
 902 /// A bracketed character class, e.g., `[a-z0-9]`.
 903 #[derive(Clone, Debug, Eq, PartialEq)]
 904 pub struct ClassBracketed {
 905     /// The span of this class.
 906     pub span: Span,
 907     /// Whether this class is negated or not. e.g., `[a]` is not negated but
 908     /// `[^a]` is.
 909     pub negated: bool,
 910     /// The type of this set. A set is either a normal union of things, e.g.,
 911     /// `[abc]` or a result of applying set operations, e.g., `[\pL--c]`.
 912     pub kind: ClassSet,
 913 }
 914
 915 /// A character class set.
 916 ///
 917 /// This type corresponds to the internal structure of a bracketed character
 918 /// class. That is, every bracketed character is one of two types: a union of
 919 /// items (literals, ranges, other bracketed classes) or a tree of binary set
 920 /// operations.
 921 #[derive(Clone, Debug, Eq, PartialEq)]
 922 pub enum ClassSet {
 923     /// An item, which can be a single literal, range, nested character class
 924     /// or a union of items.
 925     Item(ClassSetItem),
 926     /// A single binary operation (i.e., &&, -- or ~~).
 927     BinaryOp(ClassSetBinaryOp),
 928 }
 929
 930 impl ClassSet {
 931     /// Build a set from a union.
 932     pub fn union(ast: ClassSetUnion) -> ClassSet {
 933         ClassSet::Item(ClassSetItem::Union(ast))
 934     }
 935
 936     /// Return the span of this character class set.
 937     pub fn span(&self) -> &Span {
 938         match *self {
 939             ClassSet::Item(ref x) => x.span(),
 940             ClassSet::BinaryOp(ref x) => &x.span,
 941         }
 942     }
 943
 944     /// Return true if and only if this class set is empty.
 945     fn is_empty(&self) -> bool {
 946         match *self {
 947             ClassSet::Item(ClassSetItem::Empty(_)) => true,
 948             _ => false,
 949         }
 950     }
 951 }
 952
 953 /// A single component of a character class set.
 954 #[derive(Clone, Debug, Eq, PartialEq)]
 955 pub enum ClassSetItem {
 956     /// An empty item.
 957     ///
 958     /// Note that a bracketed character class cannot contain a single empty
 959     /// item. Empty items can appear when using one of the binary operators.
 960     /// For example, `[&&]` is the intersection of two empty classes.
 961     Empty(Span),
 962     /// A single literal.
 963     Literal(Literal),
 964     /// A range between two literals.
 965     Range(ClassSetRange),
 966     /// An ASCII character class, e.g., `[:alnum:]` or `[:punct:]`.
 967     Ascii(ClassAscii),
 968     /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
 969     Unicode(ClassUnicode),
 970     /// A perl character class, e.g., `\d` or `\W`.
 971     Perl(ClassPerl),
 972     /// A bracketed character class set, which may contain zero or more
 973     /// character ranges and/or zero or more nested classes. e.g.,
 974     /// `[a-zA-Z\pL]`.
 975     Bracketed(Box<ClassBracketed>),
 976     /// A union of items.
 977     Union(ClassSetUnion),
 978 }
 979
 980 impl ClassSetItem {
 981     /// Return the span of this character class set item.
 982     pub fn span(&self) -> &Span {
 983         match *self {
 984             ClassSetItem::Empty(ref span) => span,
 985             ClassSetItem::Literal(ref x) => &x.span,
 986             ClassSetItem::Range(ref x) => &x.span,
 987             ClassSetItem::Ascii(ref x) => &x.span,
 988             ClassSetItem::Perl(ref x) => &x.span,
 989             ClassSetItem::Unicode(ref x) => &x.span,
 990             ClassSetItem::Bracketed(ref x) => &x.span,
 991             ClassSetItem::Union(ref x) => &x.span,
 992         }
 993     }
 994 }
 995
 996 /// A single character class range in a set.
 997 #[derive(Clone, Debug, Eq, PartialEq)]
 998 pub struct ClassSetRange {
 999     /// The span of this range.
1000     pub span: Span,
1001     /// The start of this range.
1002     pub start: Literal,
1003     /// The end of this range.
1004     pub end: Literal,
1005 }
1006
1007 impl ClassSetRange {
1008     /// Returns true if and only if this character class range is valid.
1009     ///
1010     /// The only case where a range is invalid is if its start is greater than
1011     /// its end.
1012     pub fn is_valid(&self) -> bool {
1013         self.start.c <= self.end.c
1014     }
1015 }
1016
1017 /// A union of items inside a character class set.
1018 #[derive(Clone, Debug, Eq, PartialEq)]
1019 pub struct ClassSetUnion {
1020     /// The span of the items in this operation. e.g., the `a-z0-9` in
1021     /// `[^a-z0-9]`
1022     pub span: Span,
1023     /// The sequence of items that make up this union.
1024     pub items: Vec<ClassSetItem>,
1025 }
1026
1027 impl ClassSetUnion {
1028     /// Push a new item in this union.
1029     ///
1030     /// The ending position of this union's span is updated to the ending
1031     /// position of the span of the item given. If the union is empty, then
1032     /// the starting position of this union is set to the starting position
1033     /// of this item.
1034     ///
1035     /// In other words, if you only use this method to add items to a union
1036     /// and you set the spans on each item correctly, then you should never
1037     /// need to adjust the span of the union directly.
1038     pub fn push(&mut self, item: ClassSetItem) {
1039         if self.items.is_empty() {
1040             self.span.start = item.span().start;
1041         }
1042         self.span.end = item.span().end;
1043         self.items.push(item);
1044     }
1045
1046     /// Return this union as a character class set item.
1047     ///
1048     /// If this union contains zero items, then an empty union is
1049     /// returned. If this concatenation contains exactly 1 item, then the
1050     /// corresponding item is returned. Otherwise, ClassSetItem::Union is
1051     /// returned.
1052     pub fn into_item(mut self) -> ClassSetItem {
1053         match self.items.len() {
1054             0 => ClassSetItem::Empty(self.span),
1055             1 => self.items.pop().unwrap(),
1056             _ => ClassSetItem::Union(self),
1057         }
1058     }
1059 }
1060
1061 /// A Unicode character class set operation.
1062 #[derive(Clone, Debug, Eq, PartialEq)]
1063 pub struct ClassSetBinaryOp {
1064     /// The span of this operation. e.g., the `a-z--[h-p]` in `[a-z--h-p]`.
1065     pub span: Span,
1066     /// The type of this set operation.
1067     pub kind: ClassSetBinaryOpKind,
1068     /// The left hand side of the operation.
1069     pub lhs: Box<ClassSet>,
1070     /// The right hand side of the operation.
1071     pub rhs: Box<ClassSet>,
1072 }
1073
1074 /// The type of a Unicode character class set operation.
1075 ///
1076 /// Note that this doesn't explicitly represent union since there is no
1077 /// explicit union operator. Concatenation inside a character class corresponds
1078 /// to the union operation.
1079 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
1080 pub enum ClassSetBinaryOpKind {
1081     /// The intersection of two sets, e.g., `\pN&&[a-z]`.
1082     Intersection,
1083     /// The difference of two sets, e.g., `\pN--[0-9]`.
1084     Difference,
1085     /// The symmetric difference of two sets. The symmetric difference is the
1086     /// set of elements belonging to one but not both sets.
1087     /// e.g., `[\pL~~[:ascii:]]`.
1088     SymmetricDifference,
1089 }
1090
1091 /// A single zero-width assertion.
1092 #[derive(Clone, Debug, Eq, PartialEq)]
1093 pub struct Assertion {
1094     /// The span of this assertion.
1095     pub span: Span,
1096     /// The assertion kind, e.g., `\b` or `^`.
1097     pub kind: AssertionKind,
1098 }
1099
1100 /// An assertion kind.
1101 #[derive(Clone, Debug, Eq, PartialEq)]
1102 pub enum AssertionKind {
1103     /// `^`
1104     StartLine,
1105     /// `$`
1106     EndLine,
1107     /// `\A`
1108     StartText,
1109     /// `\z`
1110     EndText,
1111     /// `\b`
1112     WordBoundary,
1113     /// `\B`
1114     NotWordBoundary,
1115 }
1116
1117 /// A repetition operation applied to a regular expression.
1118 #[derive(Clone, Debug, Eq, PartialEq)]
1119 pub struct Repetition {
1120     /// The span of this operation.
1121     pub span: Span,
1122     /// The actual operation.
1123     pub op: RepetitionOp,
1124     /// Whether this operation was applied greedily or not.
1125     pub greedy: bool,
1126     /// The regular expression under repetition.
1127     pub ast: Box<Ast>,
1128 }
1129
1130 /// The repetition operator itself.
1131 #[derive(Clone, Debug, Eq, PartialEq)]
1132 pub struct RepetitionOp {
1133     /// The span of this operator. This includes things like `+`, `*?` and
1134     /// `{m,n}`.
1135     pub span: Span,
1136     /// The type of operation.
1137     pub kind: RepetitionKind,
1138 }
1139
1140 /// The kind of a repetition operator.
1141 #[derive(Clone, Debug, Eq, PartialEq)]
1142 pub enum RepetitionKind {
1143     /// `?`
1144     ZeroOrOne,
1145     /// `*`
1146     ZeroOrMore,
1147     /// `+`
1148     OneOrMore,
1149     /// `{m,n}`
1150     Range(RepetitionRange),
1151 }
1152
1153 /// A range repetition operator.
1154 #[derive(Clone, Debug, Eq, PartialEq)]
1155 pub enum RepetitionRange {
1156     /// `{m}`
1157     Exactly(u32),
1158     /// `{m,}`
1159     AtLeast(u32),
1160     /// `{m,n}`
1161     Bounded(u32, u32),
1162 }
1163
1164 impl RepetitionRange {
1165     /// Returns true if and only if this repetition range is valid.
1166     ///
1167     /// The only case where a repetition range is invalid is if it is bounded
1168     /// and its start is greater than its end.
1169     pub fn is_valid(&self) -> bool {
1170         match *self {
1171             RepetitionRange::Bounded(s, e) if s > e => false,
1172             _ => true,
1173         }
1174     }
1175 }
1176
1177 /// A grouped regular expression.
1178 ///
1179 /// This includes both capturing and non-capturing groups. This does **not**
1180 /// include flag-only groups like `(?is)`, but does contain any group that
1181 /// contains a sub-expression, e.g., `(a)`, `(?P<name>a)`, `(?:a)` and
1182 /// `(?is:a)`.
1183 #[derive(Clone, Debug, Eq, PartialEq)]
1184 pub struct Group {
1185     /// The span of this group.
1186     pub span: Span,
1187     /// The kind of this group.
1188     pub kind: GroupKind,
1189     /// The regular expression in this group.
1190     pub ast: Box<Ast>,
1191 }
1192
1193 impl Group {
1194     /// If this group is non-capturing, then this returns the (possibly empty)
1195     /// set of flags. Otherwise, `None` is returned.
1196     pub fn flags(&self) -> Option<&Flags> {
1197         match self.kind {
1198             GroupKind::NonCapturing(ref flags) => Some(flags),
1199             _ => None,
1200         }
1201     }
1202
1203     /// Returns true if and only if this group is capturing.
1204     pub fn is_capturing(&self) -> bool {
1205         match self.kind {
1206             GroupKind::CaptureIndex(_) | GroupKind::CaptureName(_) => true,
1207             GroupKind::NonCapturing(_) => false,
1208         }
1209     }
1210
1211     /// Returns the capture index of this group, if this is a capturing group.
1212     ///
1213     /// This returns a capture index precisely when `is_capturing` is `true`.
1214     pub fn capture_index(&self) -> Option<u32> {
1215         match self.kind {
1216             GroupKind::CaptureIndex(i) => Some(i),
1217             GroupKind::CaptureName(ref x) => Some(x.index),
1218             GroupKind::NonCapturing(_) => None,
1219         }
1220     }
1221 }
1222
1223 /// The kind of a group.
1224 #[derive(Clone, Debug, Eq, PartialEq)]
1225 pub enum GroupKind {
1226     /// `(a)`
1227     CaptureIndex(u32),
1228     /// `(?P<name>a)`
1229     CaptureName(CaptureName),
1230     /// `(?:a)` and `(?i:a)`
1231     NonCapturing(Flags),
1232 }
1233
1234 /// A capture name.
1235 ///
1236 /// This corresponds to the name itself between the angle brackets in, e.g.,
1237 /// `(?P<foo>expr)`.
1238 #[derive(Clone, Debug, Eq, PartialEq)]
1239 pub struct CaptureName {
1240     /// The span of this capture name.
1241     pub span: Span,
1242     /// The capture name.
1243     pub name: String,
1244     /// The capture index.
1245     pub index: u32,
1246 }
1247
1248 /// A group of flags that is not applied to a particular regular expression.
1249 #[derive(Clone, Debug, Eq, PartialEq)]
1250 pub struct SetFlags {
1251     /// The span of these flags, including the grouping parentheses.
1252     pub span: Span,
1253     /// The actual sequence of flags.
1254     pub flags: Flags,
1255 }
1256
1257 /// A group of flags.
1258 ///
1259 /// This corresponds only to the sequence of flags themselves, e.g., `is-u`.
1260 #[derive(Clone, Debug, Eq, PartialEq)]
1261 pub struct Flags {
1262     /// The span of this group of flags.
1263     pub span: Span,
1264     /// A sequence of flag items. Each item is either a flag or a negation
1265     /// operator.
1266     pub items: Vec<FlagsItem>,
1267 }
1268
1269 impl Flags {
1270     /// Add the given item to this sequence of flags.
1271     ///
1272     /// If the item was added successfully, then `None` is returned. If the
1273     /// given item is a duplicate, then `Some(i)` is returned, where
1274     /// `items[i].kind == item.kind`.
1275     pub fn add_item(&mut self, item: FlagsItem) -> Option<usize> {
1276         for (i, x) in self.items.iter().enumerate() {
1277             if x.kind == item.kind {
1278                 return Some(i);
1279             }
1280         }
1281         self.items.push(item);
1282         None
1283     }
1284
1285     /// Returns the state of the given flag in this set.
1286     ///
1287     /// If the given flag is in the set but is negated, then `Some(false)` is
1288     /// returned.
1289     ///
1290     /// If the given flag is in the set and is not negated, then `Some(true)`
1291     /// is returned.
1292     ///
1293     /// Otherwise, `None` is returned.
1294     pub fn flag_state(&self, flag: Flag) -> Option<bool> {
1295         let mut negated = false;
1296         for x in &self.items {
1297             match x.kind {
1298                 FlagsItemKind::Negation => {
1299                     negated = true;
1300                 }
1301                 FlagsItemKind::Flag(ref xflag) if xflag == &flag => {
1302                     return Some(!negated);
1303                 }
1304                 _ => {}
1305             }
1306         }
1307         None
1308     }
1309 }
1310
1311 /// A single item in a group of flags.
1312 #[derive(Clone, Debug, Eq, PartialEq)]
1313 pub struct FlagsItem {
1314     /// The span of this item.
1315     pub span: Span,
1316     /// The kind of this item.
1317     pub kind: FlagsItemKind,
1318 }
1319
1320 /// The kind of an item in a group of flags.
1321 #[derive(Clone, Debug, Eq, PartialEq)]
1322 pub enum FlagsItemKind {
1323     /// A negation operator applied to all subsequent flags in the enclosing
1324     /// group.
1325     Negation,
1326     /// A single flag in a group.
1327     Flag(Flag),
1328 }
1329
1330 impl FlagsItemKind {
1331     /// Returns true if and only if this item is a negation operator.
1332     pub fn is_negation(&self) -> bool {
1333         match *self {
1334             FlagsItemKind::Negation => true,
1335             _ => false,
1336         }
1337     }
1338 }
1339
1340 /// A single flag.
1341 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
1342 pub enum Flag {
1343     /// `i`
1344     CaseInsensitive,
1345     /// `m`
1346     MultiLine,
1347     /// `s`
1348     DotMatchesNewLine,
1349     /// `U`
1350     SwapGreed,
1351     /// `u`
1352     Unicode,
1353     /// `x`
1354     IgnoreWhitespace,
1355 }
1356
1357 /// A custom `Drop` impl is used for `Ast` such that it uses constant stack
1358 /// space but heap space proportional to the depth of the `Ast`.
1359 impl Drop for Ast {
1360     fn drop(&mut self) {
1361         use std::mem;
1362
1363         match *self {
1364             Ast::Empty(_)
1365             | Ast::Flags(_)
1366             | Ast::Literal(_)
1367             | Ast::Dot(_)
1368             | Ast::Assertion(_)
1369             // Classes are recursive, so they get their own Drop impl.
1370             | Ast::Class(_) => return,
1371             Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
1372             Ast::Group(ref x) if !x.ast.has_subexprs() => return,
1373             Ast::Alternation(ref x) if x.asts.is_empty() => return,
1374             Ast::Concat(ref x) if x.asts.is_empty() => return,
1375             _ => {}
1376         }
1377
1378         let empty_span = || Span::splat(Position::new(0, 0, 0));
1379         let empty_ast = || Ast::Empty(empty_span());
1380         let mut stack = vec![mem::replace(self, empty_ast())];
1381         while let Some(mut ast) = stack.pop() {
1382             match ast {
1383                 Ast::Empty(_)
1384                 | Ast::Flags(_)
1385                 | Ast::Literal(_)
1386                 | Ast::Dot(_)
1387                 | Ast::Assertion(_)
1388                 // Classes are recursive, so they get their own Drop impl.
1389                 | Ast::Class(_) => {}
1390                 Ast::Repetition(ref mut x) => {
1391                     stack.push(mem::replace(&mut x.ast, empty_ast()));
1392                 }
1393                 Ast::Group(ref mut x) => {
1394                     stack.push(mem::replace(&mut x.ast, empty_ast()));
1395                 }
1396                 Ast::Alternation(ref mut x) => {
1397                     stack.extend(x.asts.drain(..));
1398                 }
1399                 Ast::Concat(ref mut x) => {
1400                     stack.extend(x.asts.drain(..));
1401                 }
1402             }
1403         }
1404     }
1405 }
1406
1407 /// A custom `Drop` impl is used for `ClassSet` such that it uses constant
1408 /// stack space but heap space proportional to the depth of the `ClassSet`.
1409 impl Drop for ClassSet {
1410     fn drop(&mut self) {
1411         use std::mem;
1412
1413         match *self {
1414             ClassSet::Item(ref item) => {
1415                 match *item {
1416                     ClassSetItem::Empty(_)
1417                     | ClassSetItem::Literal(_)
1418                     | ClassSetItem::Range(_)
1419                     | ClassSetItem::Ascii(_)
1420                     | ClassSetItem::Unicode(_)
1421                     | ClassSetItem::Perl(_) => return,
1422                     ClassSetItem::Bracketed(ref x) => {
1423                         if x.kind.is_empty() {
1424                             return;
1425                         }
1426                     }
1427                     ClassSetItem::Union(ref x) => {
1428                         if x.items.is_empty() {
1429                             return;
1430                         }
1431                     }
1432                 }
1433             }
1434             ClassSet::BinaryOp(ref op) => {
1435                 if op.lhs.is_empty() && op.rhs.is_empty() {
1436                     return;
1437                 }
1438             }
1439         }
1440
1441         let empty_span = || Span::splat(Position::new(0, 0, 0));
1442         let empty_set = || ClassSet::Item(ClassSetItem::Empty(empty_span()));
1443         let mut stack = vec![mem::replace(self, empty_set())];
1444         while let Some(mut set) = stack.pop() {
1445             match set {
1446                 ClassSet::Item(ref mut item) => {
1447                     match *item {
1448                         ClassSetItem::Empty(_)
1449                         | ClassSetItem::Literal(_)
1450                         | ClassSetItem::Range(_)
1451                         | ClassSetItem::Ascii(_)
1452                         | ClassSetItem::Unicode(_)
1453                         | ClassSetItem::Perl(_) => {}
1454                         ClassSetItem::Bracketed(ref mut x) => {
1455                             stack.push(mem::replace(&mut x.kind, empty_set()));
1456                         }
1457                         ClassSetItem::Union(ref mut x) => {
1458                             stack.extend(
1459                                 x.items.drain(..).map(ClassSet::Item));
1460                         }
1461                     }
1462                 }
1463                 ClassSet::BinaryOp(ref mut op) => {
1464                     stack.push(mem::replace(&mut op.lhs, empty_set()));
1465                     stack.push(mem::replace(&mut op.rhs, empty_set()));
1466                 }
1467             }
1468         }
1469     }
1470 }
1471
1472 #[cfg(test)]
1473 mod tests {
1474     use super::*;
1475
1476     // We use a thread with an explicit stack size to test that our destructor
1477     // for Ast can handle arbitrarily sized expressions in constant stack
1478     // space. In case we run on a platform without threads (WASM?), we limit
1479     // this test to Windows/Unix.
1480     #[test]
1481     #[cfg(any(unix, windows))]
1482     fn no_stack_overflow_on_drop() {
1483         use std::thread;
1484
1485         let run = || {
1486             let span = || Span::splat(Position::new(0, 0, 0));
1487             let mut ast = Ast::Empty(span());
1488             for i in 0..200 {
1489                 ast = Ast::Group(Group {
1490                     span: span(),
1491                     kind: GroupKind::CaptureIndex(i),
1492                     ast: Box::new(ast),
1493                 });
1494             }
1495             assert!(!ast.is_empty());
1496         };
1497
1498         // We run our test on a thread with a small stack size so we can
1499         // force the issue more easily.
1500         thread::Builder::new()
1501             .stack_size(1<<10)
1502             .spawn(run)
1503             .unwrap()
1504             .join()
1505             .unwrap();
1506     }
1507 }