vendor/regex-syntax/src/hir/translate.rs

   1 /*!
   2 Defines a translator that converts an `Ast` to an `Hir`.
   3 */
   4
   5 use std::cell::{Cell, RefCell};
   6 use std::result;
   7
   8 use crate::ast::{self, Ast, Span, Visitor};
   9 use crate::hir::{self, Error, ErrorKind, Hir};
  10 use crate::unicode::{self, ClassQuery};
  11
  12 type Result<T> = result::Result<T, Error>;
  13
  14 /// A builder for constructing an AST->HIR translator.
  15 #[derive(Clone, Debug)]
  16 pub struct TranslatorBuilder {
  17     allow_invalid_utf8: bool,
  18     flags: Flags,
  19 }
  20
  21 impl Default for TranslatorBuilder {
  22     fn default() -> TranslatorBuilder {
  23         TranslatorBuilder::new()
  24     }
  25 }
  26
  27 impl TranslatorBuilder {
  28     /// Create a new translator builder with a default c onfiguration.
  29     pub fn new() -> TranslatorBuilder {
  30         TranslatorBuilder {
  31             allow_invalid_utf8: false,
  32             flags: Flags::default(),
  33         }
  34     }
  35
  36     /// Build a translator using the current configuration.
  37     pub fn build(&self) -> Translator {
  38         Translator {
  39             stack: RefCell::new(vec![]),
  40             flags: Cell::new(self.flags),
  41             allow_invalid_utf8: self.allow_invalid_utf8,
  42         }
  43     }
  44
  45     /// When enabled, translation will permit the construction of a regular
  46     /// expression that may match invalid UTF-8.
  47     ///
  48     /// When disabled (the default), the translator is guaranteed to produce
  49     /// an expression that will only ever match valid UTF-8 (otherwise, the
  50     /// translator will return an error).
  51     ///
  52     /// Perhaps surprisingly, when invalid UTF-8 isn't allowed, a negated ASCII
  53     /// word boundary (uttered as `(?-u:\B)` in the concrete syntax) will cause
  54     /// the parser to return an error. Namely, a negated ASCII word boundary
  55     /// can result in matching positions that aren't valid UTF-8 boundaries.
  56     pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut TranslatorBuilder {
  57         self.allow_invalid_utf8 = yes;
  58         self
  59     }
  60
  61     /// Enable or disable the case insensitive flag (`i`) by default.
  62     pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
  63         self.flags.case_insensitive = if yes { Some(true) } else { None };
  64         self
  65     }
  66
  67     /// Enable or disable the multi-line matching flag (`m`) by default.
  68     pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
  69         self.flags.multi_line = if yes { Some(true) } else { None };
  70         self
  71     }
  72
  73     /// Enable or disable the "dot matches any character" flag (`s`) by
  74     /// default.
  75     pub fn dot_matches_new_line(
  76         &mut self,
  77         yes: bool,
  78     ) -> &mut TranslatorBuilder {
  79         self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
  80         self
  81     }
  82
  83     /// Enable or disable the "swap greed" flag (`U`) by default.
  84     pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
  85         self.flags.swap_greed = if yes { Some(true) } else { None };
  86         self
  87     }
  88
  89     /// Enable or disable the Unicode flag (`u`) by default.
  90     pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
  91         self.flags.unicode = if yes { None } else { Some(false) };
  92         self
  93     }
  94 }
  95
  96 /// A translator maps abstract syntax to a high level intermediate
  97 /// representation.
  98 ///
  99 /// A translator may be benefit from reuse. That is, a translator can translate
 100 /// many abstract syntax trees.
 101 ///
 102 /// A `Translator` can be configured in more detail via a
 103 /// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
 104 #[derive(Clone, Debug)]
 105 pub struct Translator {
 106     /// Our call stack, but on the heap.
 107     stack: RefCell<Vec<HirFrame>>,
 108     /// The current flag settings.
 109     flags: Cell<Flags>,
 110     /// Whether we're allowed to produce HIR that can match arbitrary bytes.
 111     allow_invalid_utf8: bool,
 112 }
 113
 114 impl Translator {
 115     /// Create a new translator using the default configuration.
 116     pub fn new() -> Translator {
 117         TranslatorBuilder::new().build()
 118     }
 119
 120     /// Translate the given abstract syntax tree (AST) into a high level
 121     /// intermediate representation (HIR).
 122     ///
 123     /// If there was a problem doing the translation, then an HIR-specific
 124     /// error is returned.
 125     ///
 126     /// The original pattern string used to produce the `Ast` *must* also be
 127     /// provided. The translator does not use the pattern string during any
 128     /// correct translation, but is used for error reporting.
 129     pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
 130         ast::visit(ast, TranslatorI::new(self, pattern))
 131     }
 132 }
 133
 134 /// An HirFrame is a single stack frame, represented explicitly, which is
 135 /// created for each item in the Ast that we traverse.
 136 ///
 137 /// Note that technically, this type doesn't represent our entire stack
 138 /// frame. In particular, the Ast visitor represents any state associated with
 139 /// traversing the Ast itself.
 140 #[derive(Clone, Debug)]
 141 enum HirFrame {
 142     /// An arbitrary HIR expression. These get pushed whenever we hit a base
 143     /// case in the Ast. They get popped after an inductive (i.e., recursive)
 144     /// step is complete.
 145     Expr(Hir),
 146     /// A Unicode character class. This frame is mutated as we descend into
 147     /// the Ast of a character class (which is itself its own mini recursive
 148     /// structure).
 149     ClassUnicode(hir::ClassUnicode),
 150     /// A byte-oriented character class. This frame is mutated as we descend
 151     /// into the Ast of a character class (which is itself its own mini
 152     /// recursive structure).
 153     ///
 154     /// Byte character classes are created when Unicode mode (`u`) is disabled.
 155     /// If `allow_invalid_utf8` is disabled (the default), then a byte
 156     /// character is only permitted to match ASCII text.
 157     ClassBytes(hir::ClassBytes),
 158     /// This is pushed on to the stack upon first seeing any kind of group,
 159     /// indicated by parentheses (including non-capturing groups). It is popped
 160     /// upon leaving a group.
 161     Group {
 162         /// The old active flags when this group was opened.
 163         ///
 164         /// If this group sets flags, then the new active flags are set to the
 165         /// result of merging the old flags with the flags introduced by this
 166         /// group. If the group doesn't set any flags, then this is simply
 167         /// equivalent to whatever flags were set when the group was opened.
 168         ///
 169         /// When this group is popped, the active flags should be restored to
 170         /// the flags set here.
 171         ///
 172         /// The "active" flags correspond to whatever flags are set in the
 173         /// Translator.
 174         old_flags: Flags,
 175     },
 176     /// This is pushed whenever a concatenation is observed. After visiting
 177     /// every sub-expression in the concatenation, the translator's stack is
 178     /// popped until it sees a Concat frame.
 179     Concat,
 180     /// This is pushed whenever an alternation is observed. After visiting
 181     /// every sub-expression in the alternation, the translator's stack is
 182     /// popped until it sees an Alternation frame.
 183     Alternation,
 184 }
 185
 186 impl HirFrame {
 187     /// Assert that the current stack frame is an Hir expression and return it.
 188     fn unwrap_expr(self) -> Hir {
 189         match self {
 190             HirFrame::Expr(expr) => expr,
 191             _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self),
 192         }
 193     }
 194
 195     /// Assert that the current stack frame is a Unicode class expression and
 196     /// return it.
 197     fn unwrap_class_unicode(self) -> hir::ClassUnicode {
 198         match self {
 199             HirFrame::ClassUnicode(cls) => cls,
 200             _ => panic!(
 201                 "tried to unwrap Unicode class \
 202                  from HirFrame, got: {:?}",
 203                 self
 204             ),
 205         }
 206     }
 207
 208     /// Assert that the current stack frame is a byte class expression and
 209     /// return it.
 210     fn unwrap_class_bytes(self) -> hir::ClassBytes {
 211         match self {
 212             HirFrame::ClassBytes(cls) => cls,
 213             _ => panic!(
 214                 "tried to unwrap byte class \
 215                  from HirFrame, got: {:?}",
 216                 self
 217             ),
 218         }
 219     }
 220
 221     /// Assert that the current stack frame is a group indicator and return
 222     /// its corresponding flags (the flags that were active at the time the
 223     /// group was entered).
 224     fn unwrap_group(self) -> Flags {
 225         match self {
 226             HirFrame::Group { old_flags } => old_flags,
 227             _ => {
 228                 panic!("tried to unwrap group from HirFrame, got: {:?}", self)
 229             }
 230         }
 231     }
 232 }
 233
 234 impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
 235     type Output = Hir;
 236     type Err = Error;
 237
 238     fn finish(self) -> Result<Hir> {
 239         // ... otherwise, we should have exactly one HIR on the stack.
 240         assert_eq!(self.trans().stack.borrow().len(), 1);
 241         Ok(self.pop().unwrap().unwrap_expr())
 242     }
 243
 244     fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
 245         match *ast {
 246             Ast::Class(ast::Class::Bracketed(_)) => {
 247                 if self.flags().unicode() {
 248                     let cls = hir::ClassUnicode::empty();
 249                     self.push(HirFrame::ClassUnicode(cls));
 250                 } else {
 251                     let cls = hir::ClassBytes::empty();
 252                     self.push(HirFrame::ClassBytes(cls));
 253                 }
 254             }
 255             Ast::Group(ref x) => {
 256                 let old_flags = x
 257                     .flags()
 258                     .map(|ast| self.set_flags(ast))
 259                     .unwrap_or_else(|| self.flags());
 260                 self.push(HirFrame::Group { old_flags });
 261             }
 262             Ast::Concat(ref x) if x.asts.is_empty() => {}
 263             Ast::Concat(_) => {
 264                 self.push(HirFrame::Concat);
 265             }
 266             Ast::Alternation(ref x) if x.asts.is_empty() => {}
 267             Ast::Alternation(_) => {
 268                 self.push(HirFrame::Alternation);
 269             }
 270             _ => {}
 271         }
 272         Ok(())
 273     }
 274
 275     fn visit_post(&mut self, ast: &Ast) -> Result<()> {
 276         match *ast {
 277             Ast::Empty(_) => {
 278                 self.push(HirFrame::Expr(Hir::empty()));
 279             }
 280             Ast::Flags(ref x) => {
 281                 self.set_flags(&x.flags);
 282                 // Flags in the AST are generally considered directives and
 283                 // not actual sub-expressions. However, they can be used in
 284                 // the concrete syntax like `((?i))`, and we need some kind of
 285                 // indication of an expression there, and Empty is the correct
 286                 // choice.
 287                 //
 288                 // There can also be things like `(?i)+`, but we rule those out
 289                 // in the parser. In the future, we might allow them for
 290                 // consistency sake.
 291                 self.push(HirFrame::Expr(Hir::empty()));
 292             }
 293             Ast::Literal(ref x) => {
 294                 self.push(HirFrame::Expr(self.hir_literal(x)?));
 295             }
 296             Ast::Dot(span) => {
 297                 self.push(HirFrame::Expr(self.hir_dot(span)?));
 298             }
 299             Ast::Assertion(ref x) => {
 300                 self.push(HirFrame::Expr(self.hir_assertion(x)?));
 301             }
 302             Ast::Class(ast::Class::Perl(ref x)) => {
 303                 if self.flags().unicode() {
 304                     let cls = self.hir_perl_unicode_class(x)?;
 305                     let hcls = hir::Class::Unicode(cls);
 306                     self.push(HirFrame::Expr(Hir::class(hcls)));
 307                 } else {
 308                     let cls = self.hir_perl_byte_class(x);
 309                     let hcls = hir::Class::Bytes(cls);
 310                     self.push(HirFrame::Expr(Hir::class(hcls)));
 311                 }
 312             }
 313             Ast::Class(ast::Class::Unicode(ref x)) => {
 314                 let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
 315                 self.push(HirFrame::Expr(Hir::class(cls)));
 316             }
 317             Ast::Class(ast::Class::Bracketed(ref ast)) => {
 318                 if self.flags().unicode() {
 319                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
 320                     self.unicode_fold_and_negate(
 321                         &ast.span,
 322                         ast.negated,
 323                         &mut cls,
 324                     )?;
 325                     if cls.ranges().is_empty() {
 326                         return Err(self.error(
 327                             ast.span,
 328                             ErrorKind::EmptyClassNotAllowed,
 329                         ));
 330                     }
 331                     let expr = Hir::class(hir::Class::Unicode(cls));
 332                     self.push(HirFrame::Expr(expr));
 333                 } else {
 334                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
 335                     self.bytes_fold_and_negate(
 336                         &ast.span,
 337                         ast.negated,
 338                         &mut cls,
 339                     )?;
 340                     if cls.ranges().is_empty() {
 341                         return Err(self.error(
 342                             ast.span,
 343                             ErrorKind::EmptyClassNotAllowed,
 344                         ));
 345                     }
 346
 347                     let expr = Hir::class(hir::Class::Bytes(cls));
 348                     self.push(HirFrame::Expr(expr));
 349                 }
 350             }
 351             Ast::Repetition(ref x) => {
 352                 let expr = self.pop().unwrap().unwrap_expr();
 353                 self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
 354             }
 355             Ast::Group(ref x) => {
 356                 let expr = self.pop().unwrap().unwrap_expr();
 357                 let old_flags = self.pop().unwrap().unwrap_group();
 358                 self.trans().flags.set(old_flags);
 359                 self.push(HirFrame::Expr(self.hir_group(x, expr)));
 360             }
 361             Ast::Concat(_) => {
 362                 let mut exprs = vec![];
 363                 while let Some(HirFrame::Expr(expr)) = self.pop() {
 364                     if !expr.kind().is_empty() {
 365                         exprs.push(expr);
 366                     }
 367                 }
 368                 exprs.reverse();
 369                 self.push(HirFrame::Expr(Hir::concat(exprs)));
 370             }
 371             Ast::Alternation(_) => {
 372                 let mut exprs = vec![];
 373                 while let Some(HirFrame::Expr(expr)) = self.pop() {
 374                     exprs.push(expr);
 375                 }
 376                 exprs.reverse();
 377                 self.push(HirFrame::Expr(Hir::alternation(exprs)));
 378             }
 379         }
 380         Ok(())
 381     }
 382
 383     fn visit_class_set_item_pre(
 384         &mut self,
 385         ast: &ast::ClassSetItem,
 386     ) -> Result<()> {
 387         match *ast {
 388             ast::ClassSetItem::Bracketed(_) => {
 389                 if self.flags().unicode() {
 390                     let cls = hir::ClassUnicode::empty();
 391                     self.push(HirFrame::ClassUnicode(cls));
 392                 } else {
 393                     let cls = hir::ClassBytes::empty();
 394                     self.push(HirFrame::ClassBytes(cls));
 395                 }
 396             }
 397             // We needn't handle the Union case here since the visitor will
 398             // do it for us.
 399             _ => {}
 400         }
 401         Ok(())
 402     }
 403
 404     fn visit_class_set_item_post(
 405         &mut self,
 406         ast: &ast::ClassSetItem,
 407     ) -> Result<()> {
 408         match *ast {
 409             ast::ClassSetItem::Empty(_) => {}
 410             ast::ClassSetItem::Literal(ref x) => {
 411                 if self.flags().unicode() {
 412                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
 413                     cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
 414                     self.push(HirFrame::ClassUnicode(cls));
 415                 } else {
 416                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
 417                     let byte = self.class_literal_byte(x)?;
 418                     cls.push(hir::ClassBytesRange::new(byte, byte));
 419                     self.push(HirFrame::ClassBytes(cls));
 420                 }
 421             }
 422             ast::ClassSetItem::Range(ref x) => {
 423                 if self.flags().unicode() {
 424                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
 425                     cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
 426                     self.push(HirFrame::ClassUnicode(cls));
 427                 } else {
 428                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
 429                     let start = self.class_literal_byte(&x.start)?;
 430                     let end = self.class_literal_byte(&x.end)?;
 431                     cls.push(hir::ClassBytesRange::new(start, end));
 432                     self.push(HirFrame::ClassBytes(cls));
 433                 }
 434             }
 435             ast::ClassSetItem::Ascii(ref x) => {
 436                 if self.flags().unicode() {
 437                     let xcls = self.hir_ascii_unicode_class(x)?;
 438                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
 439                     cls.union(&xcls);
 440                     self.push(HirFrame::ClassUnicode(cls));
 441                 } else {
 442                     let xcls = self.hir_ascii_byte_class(x)?;
 443                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
 444                     cls.union(&xcls);
 445                     self.push(HirFrame::ClassBytes(cls));
 446                 }
 447             }
 448             ast::ClassSetItem::Unicode(ref x) => {
 449                 let xcls = self.hir_unicode_class(x)?;
 450                 let mut cls = self.pop().unwrap().unwrap_class_unicode();
 451                 cls.union(&xcls);
 452                 self.push(HirFrame::ClassUnicode(cls));
 453             }
 454             ast::ClassSetItem::Perl(ref x) => {
 455                 if self.flags().unicode() {
 456                     let xcls = self.hir_perl_unicode_class(x)?;
 457                     let mut cls = self.pop().unwrap().unwrap_class_unicode();
 458                     cls.union(&xcls);
 459                     self.push(HirFrame::ClassUnicode(cls));
 460                 } else {
 461                     let xcls = self.hir_perl_byte_class(x);
 462                     let mut cls = self.pop().unwrap().unwrap_class_bytes();
 463                     cls.union(&xcls);
 464                     self.push(HirFrame::ClassBytes(cls));
 465                 }
 466             }
 467             ast::ClassSetItem::Bracketed(ref ast) => {
 468                 if self.flags().unicode() {
 469                     let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
 470                     self.unicode_fold_and_negate(
 471                         &ast.span,
 472                         ast.negated,
 473                         &mut cls1,
 474                     )?;
 475
 476                     let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
 477                     cls2.union(&cls1);
 478                     self.push(HirFrame::ClassUnicode(cls2));
 479                 } else {
 480                     let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
 481                     self.bytes_fold_and_negate(
 482                         &ast.span,
 483                         ast.negated,
 484                         &mut cls1,
 485                     )?;
 486
 487                     let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
 488                     cls2.union(&cls1);
 489                     self.push(HirFrame::ClassBytes(cls2));
 490                 }
 491             }
 492             // This is handled automatically by the visitor.
 493             ast::ClassSetItem::Union(_) => {}
 494         }
 495         Ok(())
 496     }
 497
 498     fn visit_class_set_binary_op_pre(
 499         &mut self,
 500         _op: &ast::ClassSetBinaryOp,
 501     ) -> Result<()> {
 502         if self.flags().unicode() {
 503             let cls = hir::ClassUnicode::empty();
 504             self.push(HirFrame::ClassUnicode(cls));
 505         } else {
 506             let cls = hir::ClassBytes::empty();
 507             self.push(HirFrame::ClassBytes(cls));
 508         }
 509         Ok(())
 510     }
 511
 512     fn visit_class_set_binary_op_in(
 513         &mut self,
 514         _op: &ast::ClassSetBinaryOp,
 515     ) -> Result<()> {
 516         if self.flags().unicode() {
 517             let cls = hir::ClassUnicode::empty();
 518             self.push(HirFrame::ClassUnicode(cls));
 519         } else {
 520             let cls = hir::ClassBytes::empty();
 521             self.push(HirFrame::ClassBytes(cls));
 522         }
 523         Ok(())
 524     }
 525
 526     fn visit_class_set_binary_op_post(
 527         &mut self,
 528         op: &ast::ClassSetBinaryOp,
 529     ) -> Result<()> {
 530         use crate::ast::ClassSetBinaryOpKind::*;
 531
 532         if self.flags().unicode() {
 533             let mut rhs = self.pop().unwrap().unwrap_class_unicode();
 534             let mut lhs = self.pop().unwrap().unwrap_class_unicode();
 535             let mut cls = self.pop().unwrap().unwrap_class_unicode();
 536             if self.flags().case_insensitive() {
 537                 rhs.try_case_fold_simple().map_err(|_| {
 538                     self.error(
 539                         op.rhs.span().clone(),
 540                         ErrorKind::UnicodeCaseUnavailable,
 541                     )
 542                 })?;
 543                 lhs.try_case_fold_simple().map_err(|_| {
 544                     self.error(
 545                         op.lhs.span().clone(),
 546                         ErrorKind::UnicodeCaseUnavailable,
 547                     )
 548                 })?;
 549             }
 550             match op.kind {
 551                 Intersection => lhs.intersect(&rhs),
 552                 Difference => lhs.difference(&rhs),
 553                 SymmetricDifference => lhs.symmetric_difference(&rhs),
 554             }
 555             cls.union(&lhs);
 556             self.push(HirFrame::ClassUnicode(cls));
 557         } else {
 558             let mut rhs = self.pop().unwrap().unwrap_class_bytes();
 559             let mut lhs = self.pop().unwrap().unwrap_class_bytes();
 560             let mut cls = self.pop().unwrap().unwrap_class_bytes();
 561             if self.flags().case_insensitive() {
 562                 rhs.case_fold_simple();
 563                 lhs.case_fold_simple();
 564             }
 565             match op.kind {
 566                 Intersection => lhs.intersect(&rhs),
 567                 Difference => lhs.difference(&rhs),
 568                 SymmetricDifference => lhs.symmetric_difference(&rhs),
 569             }
 570             cls.union(&lhs);
 571             self.push(HirFrame::ClassBytes(cls));
 572         }
 573         Ok(())
 574     }
 575 }
 576
 577 /// The internal implementation of a translator.
 578 ///
 579 /// This type is responsible for carrying around the original pattern string,
 580 /// which is not tied to the internal state of a translator.
 581 ///
 582 /// A TranslatorI exists for the time it takes to translate a single Ast.
 583 #[derive(Clone, Debug)]
 584 struct TranslatorI<'t, 'p> {
 585     trans: &'t Translator,
 586     pattern: &'p str,
 587 }
 588
 589 impl<'t, 'p> TranslatorI<'t, 'p> {
 590     /// Build a new internal translator.
 591     fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
 592         TranslatorI { trans: trans, pattern: pattern }
 593     }
 594
 595     /// Return a reference to the underlying translator.
 596     fn trans(&self) -> &Translator {
 597         &self.trans
 598     }
 599
 600     /// Push the given frame on to the call stack.
 601     fn push(&self, frame: HirFrame) {
 602         self.trans().stack.borrow_mut().push(frame);
 603     }
 604
 605     /// Pop the top of the call stack. If the call stack is empty, return None.
 606     fn pop(&self) -> Option<HirFrame> {
 607         self.trans().stack.borrow_mut().pop()
 608     }
 609
 610     /// Create a new error with the given span and error type.
 611     fn error(&self, span: Span, kind: ErrorKind) -> Error {
 612         Error { kind: kind, pattern: self.pattern.to_string(), span: span }
 613     }
 614
 615     /// Return a copy of the active flags.
 616     fn flags(&self) -> Flags {
 617         self.trans().flags.get()
 618     }
 619
 620     /// Set the flags of this translator from the flags set in the given AST.
 621     /// Then, return the old flags.
 622     fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
 623         let old_flags = self.flags();
 624         let mut new_flags = Flags::from_ast(ast_flags);
 625         new_flags.merge(&old_flags);
 626         self.trans().flags.set(new_flags);
 627         old_flags
 628     }
 629
 630     fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> {
 631         let ch = match self.literal_to_char(lit)? {
 632             byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)),
 633             hir::Literal::Unicode(ch) => ch,
 634         };
 635         if self.flags().case_insensitive() {
 636             self.hir_from_char_case_insensitive(lit.span, ch)
 637         } else {
 638             self.hir_from_char(lit.span, ch)
 639         }
 640     }
 641
 642     /// Convert an Ast literal to its scalar representation.
 643     ///
 644     /// When Unicode mode is enabled, then this always succeeds and returns a
 645     /// `char` (Unicode scalar value).
 646     ///
 647     /// When Unicode mode is disabled, then a raw byte is returned. If that
 648     /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
 649     /// an error.
 650     fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> {
 651         if self.flags().unicode() {
 652             return Ok(hir::Literal::Unicode(lit.c));
 653         }
 654         let byte = match lit.byte() {
 655             None => return Ok(hir::Literal::Unicode(lit.c)),
 656             Some(byte) => byte,
 657         };
 658         if byte <= 0x7F {
 659             return Ok(hir::Literal::Unicode(byte as char));
 660         }
 661         if !self.trans().allow_invalid_utf8 {
 662             return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
 663         }
 664         Ok(hir::Literal::Byte(byte))
 665     }
 666
 667     fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> {
 668         if !self.flags().unicode() && c.len_utf8() > 1 {
 669             return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
 670         }
 671         Ok(Hir::literal(hir::Literal::Unicode(c)))
 672     }
 673
 674     fn hir_from_char_case_insensitive(
 675         &self,
 676         span: Span,
 677         c: char,
 678     ) -> Result<Hir> {
 679         if self.flags().unicode() {
 680             // If case folding won't do anything, then don't bother trying.
 681             let map =
 682                 unicode::contains_simple_case_mapping(c, c).map_err(|_| {
 683                     self.error(span, ErrorKind::UnicodeCaseUnavailable)
 684                 })?;
 685             if !map {
 686                 return self.hir_from_char(span, c);
 687             }
 688             let mut cls =
 689                 hir::ClassUnicode::new(vec![hir::ClassUnicodeRange::new(
 690                     c, c,
 691                 )]);
 692             cls.try_case_fold_simple().map_err(|_| {
 693                 self.error(span, ErrorKind::UnicodeCaseUnavailable)
 694             })?;
 695             Ok(Hir::class(hir::Class::Unicode(cls)))
 696         } else {
 697             if c.len_utf8() > 1 {
 698                 return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
 699             }
 700             // If case folding won't do anything, then don't bother trying.
 701             match c {
 702                 'A'..='Z' | 'a'..='z' => {}
 703                 _ => return self.hir_from_char(span, c),
 704             }
 705             let mut cls =
 706                 hir::ClassBytes::new(vec![hir::ClassBytesRange::new(
 707                     c as u8, c as u8,
 708                 )]);
 709             cls.case_fold_simple();
 710             Ok(Hir::class(hir::Class::Bytes(cls)))
 711         }
 712     }
 713
 714     fn hir_dot(&self, span: Span) -> Result<Hir> {
 715         let unicode = self.flags().unicode();
 716         if !unicode && !self.trans().allow_invalid_utf8 {
 717             return Err(self.error(span, ErrorKind::InvalidUtf8));
 718         }
 719         Ok(if self.flags().dot_matches_new_line() {
 720             Hir::any(!unicode)
 721         } else {
 722             Hir::dot(!unicode)
 723         })
 724     }
 725
 726     fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
 727         let unicode = self.flags().unicode();
 728         let multi_line = self.flags().multi_line();
 729         Ok(match asst.kind {
 730             ast::AssertionKind::StartLine => Hir::anchor(if multi_line {
 731                 hir::Anchor::StartLine
 732             } else {
 733                 hir::Anchor::StartText
 734             }),
 735             ast::AssertionKind::EndLine => Hir::anchor(if multi_line {
 736                 hir::Anchor::EndLine
 737             } else {
 738                 hir::Anchor::EndText
 739             }),
 740             ast::AssertionKind::StartText => {
 741                 Hir::anchor(hir::Anchor::StartText)
 742             }
 743             ast::AssertionKind::EndText => Hir::anchor(hir::Anchor::EndText),
 744             ast::AssertionKind::WordBoundary => {
 745                 Hir::word_boundary(if unicode {
 746                     hir::WordBoundary::Unicode
 747                 } else {
 748                     hir::WordBoundary::Ascii
 749                 })
 750             }
 751             ast::AssertionKind::NotWordBoundary => {
 752                 Hir::word_boundary(if unicode {
 753                     hir::WordBoundary::UnicodeNegate
 754                 } else {
 755                     // It is possible for negated ASCII word boundaries to
 756                     // match at invalid UTF-8 boundaries, even when searching
 757                     // valid UTF-8.
 758                     if !self.trans().allow_invalid_utf8 {
 759                         return Err(
 760                             self.error(asst.span, ErrorKind::InvalidUtf8)
 761                         );
 762                     }
 763                     hir::WordBoundary::AsciiNegate
 764                 })
 765             }
 766         })
 767     }
 768
 769     fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir {
 770         let kind = match group.kind {
 771             ast::GroupKind::CaptureIndex(idx) => {
 772                 hir::GroupKind::CaptureIndex(idx)
 773             }
 774             ast::GroupKind::CaptureName(ref capname) => {
 775                 hir::GroupKind::CaptureName {
 776                     name: capname.name.clone(),
 777                     index: capname.index,
 778                 }
 779             }
 780             ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing,
 781         };
 782         Hir::group(hir::Group { kind: kind, hir: Box::new(expr) })
 783     }
 784
 785     fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
 786         let kind = match rep.op.kind {
 787             ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne,
 788             ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore,
 789             ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore,
 790             ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
 791                 hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m))
 792             }
 793             ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
 794                 hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m))
 795             }
 796             ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(
 797                 m,
 798                 n,
 799             )) => {
 800                 hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n))
 801             }
 802         };
 803         let greedy =
 804             if self.flags().swap_greed() { !rep.greedy } else { rep.greedy };
 805         Hir::repetition(hir::Repetition {
 806             kind: kind,
 807             greedy: greedy,
 808             hir: Box::new(expr),
 809         })
 810     }
 811
 812     fn hir_unicode_class(
 813         &self,
 814         ast_class: &ast::ClassUnicode,
 815     ) -> Result<hir::ClassUnicode> {
 816         use crate::ast::ClassUnicodeKind::*;
 817
 818         if !self.flags().unicode() {
 819             return Err(
 820                 self.error(ast_class.span, ErrorKind::UnicodeNotAllowed)
 821             );
 822         }
 823         let query = match ast_class.kind {
 824             OneLetter(name) => ClassQuery::OneLetter(name),
 825             Named(ref name) => ClassQuery::Binary(name),
 826             NamedValue { ref name, ref value, .. } => ClassQuery::ByValue {
 827                 property_name: name,
 828                 property_value: value,
 829             },
 830         };
 831         let mut result = self.convert_unicode_class_error(
 832             &ast_class.span,
 833             unicode::class(query),
 834         );
 835         if let Ok(ref mut class) = result {
 836             self.unicode_fold_and_negate(
 837                 &ast_class.span,
 838                 ast_class.negated,
 839                 class,
 840             )?;
 841             if class.ranges().is_empty() {
 842                 let err = self
 843                     .error(ast_class.span, ErrorKind::EmptyClassNotAllowed);
 844                 return Err(err);
 845             }
 846         }
 847         result
 848     }
 849
 850     fn hir_ascii_unicode_class(
 851         &self,
 852         ast: &ast::ClassAscii,
 853     ) -> Result<hir::ClassUnicode> {
 854         let mut cls = hir::ClassUnicode::new(
 855             ascii_class(&ast.kind)
 856                 .iter()
 857                 .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e)),
 858         );
 859         self.unicode_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
 860         Ok(cls)
 861     }
 862
 863     fn hir_ascii_byte_class(
 864         &self,
 865         ast: &ast::ClassAscii,
 866     ) -> Result<hir::ClassBytes> {
 867         let mut cls = hir::ClassBytes::new(
 868             ascii_class(&ast.kind)
 869                 .iter()
 870                 .map(|&(s, e)| hir::ClassBytesRange::new(s as u8, e as u8)),
 871         );
 872         self.bytes_fold_and_negate(&ast.span, ast.negated, &mut cls)?;
 873         Ok(cls)
 874     }
 875
 876     fn hir_perl_unicode_class(
 877         &self,
 878         ast_class: &ast::ClassPerl,
 879     ) -> Result<hir::ClassUnicode> {
 880         use crate::ast::ClassPerlKind::*;
 881
 882         assert!(self.flags().unicode());
 883         let result = match ast_class.kind {
 884             Digit => unicode::perl_digit(),
 885             Space => unicode::perl_space(),
 886             Word => unicode::perl_word(),
 887         };
 888         let mut class =
 889             self.convert_unicode_class_error(&ast_class.span, result)?;
 890         // We needn't apply case folding here because the Perl Unicode classes
 891         // are already closed under Unicode simple case folding.
 892         if ast_class.negated {
 893             class.negate();
 894         }
 895         Ok(class)
 896     }
 897
 898     fn hir_perl_byte_class(
 899         &self,
 900         ast_class: &ast::ClassPerl,
 901     ) -> hir::ClassBytes {
 902         use crate::ast::ClassPerlKind::*;
 903
 904         assert!(!self.flags().unicode());
 905         let mut class = match ast_class.kind {
 906             Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
 907             Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
 908             Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
 909         };
 910         // We needn't apply case folding here because the Perl ASCII classes
 911         // are already closed (under ASCII case folding).
 912         if ast_class.negated {
 913             class.negate();
 914         }
 915         class
 916     }
 917
 918     /// Converts the given Unicode specific error to an HIR translation error.
 919     ///
 920     /// The span given should approximate the position at which an error would
 921     /// occur.
 922     fn convert_unicode_class_error(
 923         &self,
 924         span: &Span,
 925         result: unicode::Result<hir::ClassUnicode>,
 926     ) -> Result<hir::ClassUnicode> {
 927         result.map_err(|err| {
 928             let sp = span.clone();
 929             match err {
 930                 unicode::Error::PropertyNotFound => {
 931                     self.error(sp, ErrorKind::UnicodePropertyNotFound)
 932                 }
 933                 unicode::Error::PropertyValueNotFound => {
 934                     self.error(sp, ErrorKind::UnicodePropertyValueNotFound)
 935                 }
 936                 unicode::Error::PerlClassNotFound => {
 937                     self.error(sp, ErrorKind::UnicodePerlClassNotFound)
 938                 }
 939             }
 940         })
 941     }
 942
 943     fn unicode_fold_and_negate(
 944         &self,
 945         span: &Span,
 946         negated: bool,
 947         class: &mut hir::ClassUnicode,
 948     ) -> Result<()> {
 949         // Note that we must apply case folding before negation!
 950         // Consider `(?i)[^x]`. If we applied negation field, then
 951         // the result would be the character class that matched any
 952         // Unicode scalar value.
 953         if self.flags().case_insensitive() {
 954             class.try_case_fold_simple().map_err(|_| {
 955                 self.error(span.clone(), ErrorKind::UnicodeCaseUnavailable)
 956             })?;
 957         }
 958         if negated {
 959             class.negate();
 960         }
 961         Ok(())
 962     }
 963
 964     fn bytes_fold_and_negate(
 965         &self,
 966         span: &Span,
 967         negated: bool,
 968         class: &mut hir::ClassBytes,
 969     ) -> Result<()> {
 970         // Note that we must apply case folding before negation!
 971         // Consider `(?i)[^x]`. If we applied negation first, then
 972         // the result would be the character class that matched any
 973         // Unicode scalar value.
 974         if self.flags().case_insensitive() {
 975             class.case_fold_simple();
 976         }
 977         if negated {
 978             class.negate();
 979         }
 980         if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() {
 981             return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
 982         }
 983         Ok(())
 984     }
 985
 986     /// Return a scalar byte value suitable for use as a literal in a byte
 987     /// character class.
 988     fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
 989         match self.literal_to_char(ast)? {
 990             hir::Literal::Byte(byte) => Ok(byte),
 991             hir::Literal::Unicode(ch) => {
 992                 if ch <= 0x7F as char {
 993                     Ok(ch as u8)
 994                 } else {
 995                     // We can't feasibly support Unicode in
 996                     // byte oriented classes. Byte classes don't
 997                     // do Unicode case folding.
 998                     Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
 999                 }
1000             }
1001         }
1002     }
1003 }
1004
1005 /// A translator's representation of a regular expression's flags at any given
1006 /// moment in time.
1007 ///
1008 /// Each flag can be in one of three states: absent, present but disabled or
1009 /// present but enabled.
1010 #[derive(Clone, Copy, Debug, Default)]
1011 struct Flags {
1012     case_insensitive: Option<bool>,
1013     multi_line: Option<bool>,
1014     dot_matches_new_line: Option<bool>,
1015     swap_greed: Option<bool>,
1016     unicode: Option<bool>,
1017     // Note that `ignore_whitespace` is omitted here because it is handled
1018     // entirely in the parser.
1019 }
1020
1021 impl Flags {
1022     fn from_ast(ast: &ast::Flags) -> Flags {
1023         let mut flags = Flags::default();
1024         let mut enable = true;
1025         for item in &ast.items {
1026             match item.kind {
1027                 ast::FlagsItemKind::Negation => {
1028                     enable = false;
1029                 }
1030                 ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
1031                     flags.case_insensitive = Some(enable);
1032                 }
1033                 ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
1034                     flags.multi_line = Some(enable);
1035                 }
1036                 ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
1037                     flags.dot_matches_new_line = Some(enable);
1038                 }
1039                 ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
1040                     flags.swap_greed = Some(enable);
1041                 }
1042                 ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
1043                     flags.unicode = Some(enable);
1044                 }
1045                 ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
1046             }
1047         }
1048         flags
1049     }
1050
1051     fn merge(&mut self, previous: &Flags) {
1052         if self.case_insensitive.is_none() {
1053             self.case_insensitive = previous.case_insensitive;
1054         }
1055         if self.multi_line.is_none() {
1056             self.multi_line = previous.multi_line;
1057         }
1058         if self.dot_matches_new_line.is_none() {
1059             self.dot_matches_new_line = previous.dot_matches_new_line;
1060         }
1061         if self.swap_greed.is_none() {
1062             self.swap_greed = previous.swap_greed;
1063         }
1064         if self.unicode.is_none() {
1065             self.unicode = previous.unicode;
1066         }
1067     }
1068
1069     fn case_insensitive(&self) -> bool {
1070         self.case_insensitive.unwrap_or(false)
1071     }
1072
1073     fn multi_line(&self) -> bool {
1074         self.multi_line.unwrap_or(false)
1075     }
1076
1077     fn dot_matches_new_line(&self) -> bool {
1078         self.dot_matches_new_line.unwrap_or(false)
1079     }
1080
1081     fn swap_greed(&self) -> bool {
1082         self.swap_greed.unwrap_or(false)
1083     }
1084
1085     fn unicode(&self) -> bool {
1086         self.unicode.unwrap_or(true)
1087     }
1088 }
1089
1090 fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
1091     let ranges: Vec<_> = ascii_class(kind)
1092         .iter()
1093         .cloned()
1094         .map(|(s, e)| hir::ClassBytesRange::new(s as u8, e as u8))
1095         .collect();
1096     hir::ClassBytes::new(ranges)
1097 }
1098
1099 fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
1100     use crate::ast::ClassAsciiKind::*;
1101     match *kind {
1102         Alnum => &[('0', '9'), ('A', 'Z'), ('a', 'z')],
1103         Alpha => &[('A', 'Z'), ('a', 'z')],
1104         Ascii => &[('\x00', '\x7F')],
1105         Blank => &[('\t', '\t'), (' ', ' ')],
1106         Cntrl => &[('\x00', '\x1F'), ('\x7F', '\x7F')],
1107         Digit => &[('0', '9')],
1108         Graph => &[('!', '~')],
1109         Lower => &[('a', 'z')],
1110         Print => &[(' ', '~')],
1111         Punct => &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')],
1112         Space => &[
1113             ('\t', '\t'),
1114             ('\n', '\n'),
1115             ('\x0B', '\x0B'),
1116             ('\x0C', '\x0C'),
1117             ('\r', '\r'),
1118             (' ', ' '),
1119         ],
1120         Upper => &[('A', 'Z')],
1121         Word => &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')],
1122         Xdigit => &[('0', '9'), ('A', 'F'), ('a', 'f')],
1123     }
1124 }
1125
1126 #[cfg(test)]
1127 mod tests {
1128     use crate::ast::parse::ParserBuilder;
1129     use crate::ast::{self, Ast, Position, Span};
1130     use crate::hir::{self, Hir, HirKind};
1131     use crate::unicode::{self, ClassQuery};
1132
1133     use super::{ascii_class, TranslatorBuilder};
1134
1135     // We create these errors to compare with real hir::Errors in the tests.
1136     // We define equality between TestError and hir::Error to disregard the
1137     // pattern string in hir::Error, which is annoying to provide in tests.
1138     #[derive(Clone, Debug)]
1139     struct TestError {
1140         span: Span,
1141         kind: hir::ErrorKind,
1142     }
1143
1144     impl PartialEq<hir::Error> for TestError {
1145         fn eq(&self, other: &hir::Error) -> bool {
1146             self.span == other.span && self.kind == other.kind
1147         }
1148     }
1149
1150     impl PartialEq<TestError> for hir::Error {
1151         fn eq(&self, other: &TestError) -> bool {
1152             self.span == other.span && self.kind == other.kind
1153         }
1154     }
1155
1156     fn parse(pattern: &str) -> Ast {
1157         ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
1158     }
1159
1160     fn t(pattern: &str) -> Hir {
1161         TranslatorBuilder::new()
1162             .allow_invalid_utf8(false)
1163             .build()
1164             .translate(pattern, &parse(pattern))
1165             .unwrap()
1166     }
1167
1168     fn t_err(pattern: &str) -> hir::Error {
1169         TranslatorBuilder::new()
1170             .allow_invalid_utf8(false)
1171             .build()
1172             .translate(pattern, &parse(pattern))
1173             .unwrap_err()
1174     }
1175
1176     fn t_bytes(pattern: &str) -> Hir {
1177         TranslatorBuilder::new()
1178             .allow_invalid_utf8(true)
1179             .build()
1180             .translate(pattern, &parse(pattern))
1181             .unwrap()
1182     }
1183
1184     fn hir_lit(s: &str) -> Hir {
1185         match s.len() {
1186             0 => Hir::empty(),
1187             _ => {
1188                 let lits = s
1189                     .chars()
1190                     .map(hir::Literal::Unicode)
1191                     .map(Hir::literal)
1192                     .collect();
1193                 Hir::concat(lits)
1194             }
1195         }
1196     }
1197
1198     fn hir_blit(s: &[u8]) -> Hir {
1199         match s.len() {
1200             0 => Hir::empty(),
1201             1 => Hir::literal(hir::Literal::Byte(s[0])),
1202             _ => {
1203                 let lits = s
1204                     .iter()
1205                     .cloned()
1206                     .map(hir::Literal::Byte)
1207                     .map(Hir::literal)
1208                     .collect();
1209                 Hir::concat(lits)
1210             }
1211         }
1212     }
1213
1214     fn hir_group(i: u32, expr: Hir) -> Hir {
1215         Hir::group(hir::Group {
1216             kind: hir::GroupKind::CaptureIndex(i),
1217             hir: Box::new(expr),
1218         })
1219     }
1220
1221     fn hir_group_name(i: u32, name: &str, expr: Hir) -> Hir {
1222         Hir::group(hir::Group {
1223             kind: hir::GroupKind::CaptureName {
1224                 name: name.to_string(),
1225                 index: i,
1226             },
1227             hir: Box::new(expr),
1228         })
1229     }
1230
1231     fn hir_group_nocap(expr: Hir) -> Hir {
1232         Hir::group(hir::Group {
1233             kind: hir::GroupKind::NonCapturing,
1234             hir: Box::new(expr),
1235         })
1236     }
1237
1238     fn hir_quest(greedy: bool, expr: Hir) -> Hir {
1239         Hir::repetition(hir::Repetition {
1240             kind: hir::RepetitionKind::ZeroOrOne,
1241             greedy: greedy,
1242             hir: Box::new(expr),
1243         })
1244     }
1245
1246     fn hir_star(greedy: bool, expr: Hir) -> Hir {
1247         Hir::repetition(hir::Repetition {
1248             kind: hir::RepetitionKind::ZeroOrMore,
1249             greedy: greedy,
1250             hir: Box::new(expr),
1251         })
1252     }
1253
1254     fn hir_plus(greedy: bool, expr: Hir) -> Hir {
1255         Hir::repetition(hir::Repetition {
1256             kind: hir::RepetitionKind::OneOrMore,
1257             greedy: greedy,
1258             hir: Box::new(expr),
1259         })
1260     }
1261
1262     fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir {
1263         Hir::repetition(hir::Repetition {
1264             kind: hir::RepetitionKind::Range(range),
1265             greedy: greedy,
1266             hir: Box::new(expr),
1267         })
1268     }
1269
1270     fn hir_alt(alts: Vec<Hir>) -> Hir {
1271         Hir::alternation(alts)
1272     }
1273
1274     fn hir_cat(exprs: Vec<Hir>) -> Hir {
1275         Hir::concat(exprs)
1276     }
1277
1278     #[allow(dead_code)]
1279     fn hir_uclass_query(query: ClassQuery<'_>) -> Hir {
1280         Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
1281     }
1282
1283     #[allow(dead_code)]
1284     fn hir_uclass_perl_word() -> Hir {
1285         Hir::class(hir::Class::Unicode(unicode::perl_word().unwrap()))
1286     }
1287
1288     fn hir_uclass(ranges: &[(char, char)]) -> Hir {
1289         let ranges: Vec<hir::ClassUnicodeRange> = ranges
1290             .iter()
1291             .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
1292             .collect();
1293         Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges)))
1294     }
1295
1296     fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
1297         let ranges: Vec<hir::ClassBytesRange> = ranges
1298             .iter()
1299             .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
1300             .collect();
1301         Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1302     }
1303
1304     fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir {
1305         let ranges: Vec<hir::ClassBytesRange> = ranges
1306             .iter()
1307             .map(|&(s, e)| {
1308                 assert!(s as u32 <= 0x7F);
1309                 assert!(e as u32 <= 0x7F);
1310                 hir::ClassBytesRange::new(s as u8, e as u8)
1311             })
1312             .collect();
1313         Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
1314     }
1315
1316     fn hir_case_fold(expr: Hir) -> Hir {
1317         match expr.into_kind() {
1318             HirKind::Class(mut cls) => {
1319                 cls.case_fold_simple();
1320                 Hir::class(cls)
1321             }
1322             _ => panic!("cannot case fold non-class Hir expr"),
1323         }
1324     }
1325
1326     fn hir_negate(expr: Hir) -> Hir {
1327         match expr.into_kind() {
1328             HirKind::Class(mut cls) => {
1329                 cls.negate();
1330                 Hir::class(cls)
1331             }
1332             _ => panic!("cannot negate non-class Hir expr"),
1333         }
1334     }
1335
1336     #[allow(dead_code)]
1337     fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
1338         use crate::hir::Class::{Bytes, Unicode};
1339
1340         match (expr1.into_kind(), expr2.into_kind()) {
1341             (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1342                 c1.union(&c2);
1343                 Hir::class(hir::Class::Unicode(c1))
1344             }
1345             (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1346                 c1.union(&c2);
1347                 Hir::class(hir::Class::Bytes(c1))
1348             }
1349             _ => panic!("cannot union non-class Hir exprs"),
1350         }
1351     }
1352
1353     #[allow(dead_code)]
1354     fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
1355         use crate::hir::Class::{Bytes, Unicode};
1356
1357         match (expr1.into_kind(), expr2.into_kind()) {
1358             (HirKind::Class(Unicode(mut c1)), HirKind::Class(Unicode(c2))) => {
1359                 c1.difference(&c2);
1360                 Hir::class(hir::Class::Unicode(c1))
1361             }
1362             (HirKind::Class(Bytes(mut c1)), HirKind::Class(Bytes(c2))) => {
1363                 c1.difference(&c2);
1364                 Hir::class(hir::Class::Bytes(c1))
1365             }
1366             _ => panic!("cannot difference non-class Hir exprs"),
1367         }
1368     }
1369
1370     fn hir_anchor(anchor: hir::Anchor) -> Hir {
1371         Hir::anchor(anchor)
1372     }
1373
1374     fn hir_word(wb: hir::WordBoundary) -> Hir {
1375         Hir::word_boundary(wb)
1376     }
1377
1378     #[test]
1379     fn empty() {
1380         assert_eq!(t(""), Hir::empty());
1381         assert_eq!(t("(?i)"), Hir::empty());
1382         assert_eq!(t("()"), hir_group(1, Hir::empty()));
1383         assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1384         assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty()));
1385         assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
1386         assert_eq!(
1387             t("()|()"),
1388             hir_alt(vec![
1389                 hir_group(1, Hir::empty()),
1390                 hir_group(2, Hir::empty()),
1391             ])
1392         );
1393         assert_eq!(
1394             t("(|b)"),
1395             hir_group(1, hir_alt(vec![Hir::empty(), hir_lit("b"),]))
1396         );
1397         assert_eq!(
1398             t("(a|)"),
1399             hir_group(1, hir_alt(vec![hir_lit("a"), Hir::empty(),]))
1400         );
1401         assert_eq!(
1402             t("(a||c)"),
1403             hir_group(
1404                 1,
1405                 hir_alt(vec![hir_lit("a"), Hir::empty(), hir_lit("c"),])
1406             )
1407         );
1408         assert_eq!(
1409             t("(||)"),
1410             hir_group(
1411                 1,
1412                 hir_alt(vec![Hir::empty(), Hir::empty(), Hir::empty(),])
1413             )
1414         );
1415     }
1416
1417     #[test]
1418     fn literal() {
1419         assert_eq!(t("a"), hir_lit("a"));
1420         assert_eq!(t("(?-u)a"), hir_lit("a"));
1421         assert_eq!(t("☃"), hir_lit("☃"));
1422         assert_eq!(t("abcd"), hir_lit("abcd"));
1423
1424         assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
1425         assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
1426         assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
1427         assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
1428
1429         assert_eq!(
1430             t_err("(?-u)☃"),
1431             TestError {
1432                 kind: hir::ErrorKind::UnicodeNotAllowed,
1433                 span: Span::new(
1434                     Position::new(5, 1, 6),
1435                     Position::new(8, 1, 7)
1436                 ),
1437             }
1438         );
1439         assert_eq!(
1440             t_err(r"(?-u)\xFF"),
1441             TestError {
1442                 kind: hir::ErrorKind::InvalidUtf8,
1443                 span: Span::new(
1444                     Position::new(5, 1, 6),
1445                     Position::new(9, 1, 10)
1446                 ),
1447             }
1448         );
1449     }
1450
1451     #[test]
1452     fn literal_case_insensitive() {
1453         #[cfg(feature = "unicode-case")]
1454         assert_eq!(t("(?i)a"), hir_uclass(&[('A', 'A'), ('a', 'a'),]));
1455         #[cfg(feature = "unicode-case")]
1456         assert_eq!(
1457             t("(?i:a)"),
1458             hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')],))
1459         );
1460         #[cfg(feature = "unicode-case")]
1461         assert_eq!(
1462             t("a(?i)a(?-i)a"),
1463             hir_cat(vec![
1464                 hir_lit("a"),
1465                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1466                 hir_lit("a"),
1467             ])
1468         );
1469         #[cfg(feature = "unicode-case")]
1470         assert_eq!(
1471             t("(?i)ab@c"),
1472             hir_cat(vec![
1473                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1474                 hir_uclass(&[('B', 'B'), ('b', 'b')]),
1475                 hir_lit("@"),
1476                 hir_uclass(&[('C', 'C'), ('c', 'c')]),
1477             ])
1478         );
1479         #[cfg(feature = "unicode-case")]
1480         assert_eq!(
1481             t("(?i)β"),
1482             hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
1483         );
1484
1485         assert_eq!(t("(?i-u)a"), hir_bclass(&[(b'A', b'A'), (b'a', b'a'),]));
1486         #[cfg(feature = "unicode-case")]
1487         assert_eq!(
1488             t("(?-u)a(?i)a(?-i)a"),
1489             hir_cat(vec![
1490                 hir_lit("a"),
1491                 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1492                 hir_lit("a"),
1493             ])
1494         );
1495         assert_eq!(
1496             t("(?i-u)ab@c"),
1497             hir_cat(vec![
1498                 hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
1499                 hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
1500                 hir_lit("@"),
1501                 hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
1502             ])
1503         );
1504
1505         assert_eq!(
1506             t_bytes("(?i-u)a"),
1507             hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1508         );
1509         assert_eq!(
1510             t_bytes("(?i-u)\x61"),
1511             hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1512         );
1513         assert_eq!(
1514             t_bytes(r"(?i-u)\x61"),
1515             hir_bclass(&[(b'A', b'A'), (b'a', b'a'),])
1516         );
1517         assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
1518
1519         assert_eq!(
1520             t_err("(?i-u)β"),
1521             TestError {
1522                 kind: hir::ErrorKind::UnicodeNotAllowed,
1523                 span: Span::new(
1524                     Position::new(6, 1, 7),
1525                     Position::new(8, 1, 8),
1526                 ),
1527             }
1528         );
1529     }
1530
1531     #[test]
1532     fn dot() {
1533         assert_eq!(
1534             t("."),
1535             hir_uclass(&[('\0', '\t'), ('\x0B', '\u{10FFFF}'),])
1536         );
1537         assert_eq!(t("(?s)."), hir_uclass(&[('\0', '\u{10FFFF}'),]));
1538         assert_eq!(
1539             t_bytes("(?-u)."),
1540             hir_bclass(&[(b'\0', b'\t'), (b'\x0B', b'\xFF'),])
1541         );
1542         assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[(b'\0', b'\xFF'),]));
1543
1544         // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
1545         assert_eq!(
1546             t_err("(?-u)."),
1547             TestError {
1548                 kind: hir::ErrorKind::InvalidUtf8,
1549                 span: Span::new(
1550                     Position::new(5, 1, 6),
1551                     Position::new(6, 1, 7)
1552                 ),
1553             }
1554         );
1555         assert_eq!(
1556             t_err("(?s-u)."),
1557             TestError {
1558                 kind: hir::ErrorKind::InvalidUtf8,
1559                 span: Span::new(
1560                     Position::new(6, 1, 7),
1561                     Position::new(7, 1, 8)
1562                 ),
1563             }
1564         );
1565     }
1566
1567     #[test]
1568     fn assertions() {
1569         assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText));
1570         assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText));
1571         assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText));
1572         assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText));
1573         assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine));
1574         assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine));
1575         assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText));
1576         assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText));
1577
1578         assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode));
1579         assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate));
1580         assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii));
1581         assert_eq!(
1582             t_bytes(r"(?-u)\B"),
1583             hir_word(hir::WordBoundary::AsciiNegate)
1584         );
1585
1586         assert_eq!(
1587             t_err(r"(?-u)\B"),
1588             TestError {
1589                 kind: hir::ErrorKind::InvalidUtf8,
1590                 span: Span::new(
1591                     Position::new(5, 1, 6),
1592                     Position::new(7, 1, 8)
1593                 ),
1594             }
1595         );
1596     }
1597
1598     #[test]
1599     fn group() {
1600         assert_eq!(t("(a)"), hir_group(1, hir_lit("a")));
1601         assert_eq!(
1602             t("(a)(b)"),
1603             hir_cat(vec![
1604                 hir_group(1, hir_lit("a")),
1605                 hir_group(2, hir_lit("b")),
1606             ])
1607         );
1608         assert_eq!(
1609             t("(a)|(b)"),
1610             hir_alt(vec![
1611                 hir_group(1, hir_lit("a")),
1612                 hir_group(2, hir_lit("b")),
1613             ])
1614         );
1615         assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty()));
1616         assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a")));
1617         assert_eq!(
1618             t("(?P<foo>a)(?P<bar>b)"),
1619             hir_cat(vec![
1620                 hir_group_name(1, "foo", hir_lit("a")),
1621                 hir_group_name(2, "bar", hir_lit("b")),
1622             ])
1623         );
1624         assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
1625         assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
1626         assert_eq!(
1627             t("(?:a)(b)"),
1628             hir_cat(vec![
1629                 hir_group_nocap(hir_lit("a")),
1630                 hir_group(1, hir_lit("b")),
1631             ])
1632         );
1633         assert_eq!(
1634             t("(a)(?:b)(c)"),
1635             hir_cat(vec![
1636                 hir_group(1, hir_lit("a")),
1637                 hir_group_nocap(hir_lit("b")),
1638                 hir_group(2, hir_lit("c")),
1639             ])
1640         );
1641         assert_eq!(
1642             t("(a)(?P<foo>b)(c)"),
1643             hir_cat(vec![
1644                 hir_group(1, hir_lit("a")),
1645                 hir_group_name(2, "foo", hir_lit("b")),
1646                 hir_group(3, hir_lit("c")),
1647             ])
1648         );
1649         assert_eq!(t("()"), hir_group(1, Hir::empty()));
1650         assert_eq!(t("((?i))"), hir_group(1, Hir::empty()));
1651         assert_eq!(t("((?x))"), hir_group(1, Hir::empty()));
1652         assert_eq!(t("(((?x)))"), hir_group(1, hir_group(2, Hir::empty())));
1653     }
1654
1655     #[test]
1656     fn flags() {
1657         #[cfg(feature = "unicode-case")]
1658         assert_eq!(
1659             t("(?i:a)a"),
1660             hir_cat(vec![
1661                 hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])),
1662                 hir_lit("a"),
1663             ])
1664         );
1665         assert_eq!(
1666             t("(?i-u:a)β"),
1667             hir_cat(vec![
1668                 hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1669                 hir_lit("β"),
1670             ])
1671         );
1672         assert_eq!(
1673             t("(?:(?i-u)a)b"),
1674             hir_cat(vec![
1675                 hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1676                 hir_lit("b"),
1677             ])
1678         );
1679         assert_eq!(
1680             t("((?i-u)a)b"),
1681             hir_cat(vec![
1682                 hir_group(1, hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
1683                 hir_lit("b"),
1684             ])
1685         );
1686         #[cfg(feature = "unicode-case")]
1687         assert_eq!(
1688             t("(?i)(?-i:a)a"),
1689             hir_cat(vec![
1690                 hir_group_nocap(hir_lit("a")),
1691                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1692             ])
1693         );
1694         #[cfg(feature = "unicode-case")]
1695         assert_eq!(
1696             t("(?im)a^"),
1697             hir_cat(vec![
1698                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1699                 hir_anchor(hir::Anchor::StartLine),
1700             ])
1701         );
1702         #[cfg(feature = "unicode-case")]
1703         assert_eq!(
1704             t("(?im)a^(?i-m)a^"),
1705             hir_cat(vec![
1706                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1707                 hir_anchor(hir::Anchor::StartLine),
1708                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1709                 hir_anchor(hir::Anchor::StartText),
1710             ])
1711         );
1712         assert_eq!(
1713             t("(?U)a*a*?(?-U)a*a*?"),
1714             hir_cat(vec![
1715                 hir_star(false, hir_lit("a")),
1716                 hir_star(true, hir_lit("a")),
1717                 hir_star(true, hir_lit("a")),
1718                 hir_star(false, hir_lit("a")),
1719             ])
1720         );
1721         #[cfg(feature = "unicode-case")]
1722         assert_eq!(
1723             t("(?:a(?i)a)a"),
1724             hir_cat(vec![
1725                 hir_group_nocap(hir_cat(vec![
1726                     hir_lit("a"),
1727                     hir_uclass(&[('A', 'A'), ('a', 'a')]),
1728                 ])),
1729                 hir_lit("a"),
1730             ])
1731         );
1732         #[cfg(feature = "unicode-case")]
1733         assert_eq!(
1734             t("(?i)(?:a(?-i)a)a"),
1735             hir_cat(vec![
1736                 hir_group_nocap(hir_cat(vec![
1737                     hir_uclass(&[('A', 'A'), ('a', 'a')]),
1738                     hir_lit("a"),
1739                 ])),
1740                 hir_uclass(&[('A', 'A'), ('a', 'a')]),
1741             ])
1742         );
1743     }
1744
1745     #[test]
1746     fn escape() {
1747         assert_eq!(
1748             t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
1749             hir_lit(r"\.+*?()|[]{}^$#")
1750         );
1751     }
1752
1753     #[test]
1754     fn repetition() {
1755         assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
1756         assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
1757         assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
1758         assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
1759         assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
1760         assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
1761
1762         assert_eq!(
1763             t("a{1}"),
1764             hir_range(true, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1765         );
1766         assert_eq!(
1767             t("a{1,}"),
1768             hir_range(true, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1769         );
1770         assert_eq!(
1771             t("a{1,2}"),
1772             hir_range(true, hir::RepetitionRange::Bounded(1, 2), hir_lit("a"),)
1773         );
1774         assert_eq!(
1775             t("a{1}?"),
1776             hir_range(false, hir::RepetitionRange::Exactly(1), hir_lit("a"),)
1777         );
1778         assert_eq!(
1779             t("a{1,}?"),
1780             hir_range(false, hir::RepetitionRange::AtLeast(1), hir_lit("a"),)
1781         );
1782         assert_eq!(
1783             t("a{1,2}?"),
1784             hir_range(
1785                 false,
1786                 hir::RepetitionRange::Bounded(1, 2),
1787                 hir_lit("a"),
1788             )
1789         );
1790
1791         assert_eq!(
1792             t("ab?"),
1793             hir_cat(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1794         );
1795         assert_eq!(
1796             t("(ab)?"),
1797             hir_quest(
1798                 true,
1799                 hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1800             )
1801         );
1802         assert_eq!(
1803             t("a|b?"),
1804             hir_alt(vec![hir_lit("a"), hir_quest(true, hir_lit("b")),])
1805         );
1806     }
1807
1808     #[test]
1809     fn cat_alt() {
1810         assert_eq!(
1811             t("(ab)"),
1812             hir_group(1, hir_cat(vec![hir_lit("a"), hir_lit("b"),]))
1813         );
1814         assert_eq!(t("a|b"), hir_alt(vec![hir_lit("a"), hir_lit("b"),]));
1815         assert_eq!(
1816             t("a|b|c"),
1817             hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1818         );
1819         assert_eq!(
1820             t("ab|bc|cd"),
1821             hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1822         );
1823         assert_eq!(
1824             t("(a|b)"),
1825             hir_group(1, hir_alt(vec![hir_lit("a"), hir_lit("b"),]))
1826         );
1827         assert_eq!(
1828             t("(a|b|c)"),
1829             hir_group(
1830                 1,
1831                 hir_alt(vec![hir_lit("a"), hir_lit("b"), hir_lit("c"),])
1832             )
1833         );
1834         assert_eq!(
1835             t("(ab|bc|cd)"),
1836             hir_group(
1837                 1,
1838                 hir_alt(vec![hir_lit("ab"), hir_lit("bc"), hir_lit("cd"),])
1839             )
1840         );
1841         assert_eq!(
1842             t("(ab|(bc|(cd)))"),
1843             hir_group(
1844                 1,
1845                 hir_alt(vec![
1846                     hir_lit("ab"),
1847                     hir_group(
1848                         2,
1849                         hir_alt(vec![
1850                             hir_lit("bc"),
1851                             hir_group(3, hir_lit("cd")),
1852                         ])
1853                     ),
1854                 ])
1855             )
1856         );
1857     }
1858
1859     #[test]
1860     fn class_ascii() {
1861         assert_eq!(
1862             t("[[:alnum:]]"),
1863             hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum))
1864         );
1865         assert_eq!(
1866             t("[[:alpha:]]"),
1867             hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha))
1868         );
1869         assert_eq!(
1870             t("[[:ascii:]]"),
1871             hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii))
1872         );
1873         assert_eq!(
1874             t("[[:blank:]]"),
1875             hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank))
1876         );
1877         assert_eq!(
1878             t("[[:cntrl:]]"),
1879             hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl))
1880         );
1881         assert_eq!(
1882             t("[[:digit:]]"),
1883             hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit))
1884         );
1885         assert_eq!(
1886             t("[[:graph:]]"),
1887             hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph))
1888         );
1889         assert_eq!(
1890             t("[[:lower:]]"),
1891             hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))
1892         );
1893         assert_eq!(
1894             t("[[:print:]]"),
1895             hir_uclass(ascii_class(&ast::ClassAsciiKind::Print))
1896         );
1897         assert_eq!(
1898             t("[[:punct:]]"),
1899             hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct))
1900         );
1901         assert_eq!(
1902             t("[[:space:]]"),
1903             hir_uclass(ascii_class(&ast::ClassAsciiKind::Space))
1904         );
1905         assert_eq!(
1906             t("[[:upper:]]"),
1907             hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper))
1908         );
1909         assert_eq!(
1910             t("[[:word:]]"),
1911             hir_uclass(ascii_class(&ast::ClassAsciiKind::Word))
1912         );
1913         assert_eq!(
1914             t("[[:xdigit:]]"),
1915             hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit))
1916         );
1917
1918         assert_eq!(
1919             t("[[:^lower:]]"),
1920             hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)))
1921         );
1922         #[cfg(feature = "unicode-case")]
1923         assert_eq!(
1924             t("(?i)[[:lower:]]"),
1925             hir_uclass(&[
1926                 ('A', 'Z'),
1927                 ('a', 'z'),
1928                 ('\u{17F}', '\u{17F}'),
1929                 ('\u{212A}', '\u{212A}'),
1930             ])
1931         );
1932
1933         assert_eq!(
1934             t("(?-u)[[:lower:]]"),
1935             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower))
1936         );
1937         assert_eq!(
1938             t("(?i-u)[[:lower:]]"),
1939             hir_case_fold(hir_bclass_from_char(ascii_class(
1940                 &ast::ClassAsciiKind::Lower
1941             )))
1942         );
1943
1944         assert_eq!(
1945             t_err("(?-u)[[:^lower:]]"),
1946             TestError {
1947                 kind: hir::ErrorKind::InvalidUtf8,
1948                 span: Span::new(
1949                     Position::new(6, 1, 7),
1950                     Position::new(16, 1, 17)
1951                 ),
1952             }
1953         );
1954         assert_eq!(
1955             t_err("(?i-u)[[:^lower:]]"),
1956             TestError {
1957                 kind: hir::ErrorKind::InvalidUtf8,
1958                 span: Span::new(
1959                     Position::new(7, 1, 8),
1960                     Position::new(17, 1, 18)
1961                 ),
1962             }
1963         );
1964     }
1965
1966     #[test]
1967     fn class_ascii_multiple() {
1968         // See: https://github.com/rust-lang/regex/issues/680
1969         assert_eq!(
1970             t("[[:alnum:][:^ascii:]]"),
1971             hir_union(
1972                 hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)),
1973                 hir_uclass(&[('\u{80}', '\u{10FFFF}')]),
1974             ),
1975         );
1976         assert_eq!(
1977             t_bytes("(?-u)[[:alnum:][:^ascii:]]"),
1978             hir_union(
1979                 hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Alnum)),
1980                 hir_bclass(&[(0x80, 0xFF)]),
1981             ),
1982         );
1983     }
1984
1985     #[test]
1986     #[cfg(feature = "unicode-perl")]
1987     fn class_perl() {
1988         // Unicode
1989         assert_eq!(t(r"\d"), hir_uclass_query(ClassQuery::Binary("digit")));
1990         assert_eq!(t(r"\s"), hir_uclass_query(ClassQuery::Binary("space")));
1991         assert_eq!(t(r"\w"), hir_uclass_perl_word());
1992         #[cfg(feature = "unicode-case")]
1993         assert_eq!(
1994             t(r"(?i)\d"),
1995             hir_uclass_query(ClassQuery::Binary("digit"))
1996         );
1997         #[cfg(feature = "unicode-case")]
1998         assert_eq!(
1999             t(r"(?i)\s"),
2000             hir_uclass_query(ClassQuery::Binary("space"))
2001         );
2002         #[cfg(feature = "unicode-case")]
2003         assert_eq!(t(r"(?i)\w"), hir_uclass_perl_word());
2004
2005         // Unicode, negated
2006         assert_eq!(
2007             t(r"\D"),
2008             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2009         );
2010         assert_eq!(
2011             t(r"\S"),
2012             hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
2013         );
2014         assert_eq!(t(r"\W"), hir_negate(hir_uclass_perl_word()));
2015         #[cfg(feature = "unicode-case")]
2016         assert_eq!(
2017             t(r"(?i)\D"),
2018             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2019         );
2020         #[cfg(feature = "unicode-case")]
2021         assert_eq!(
2022             t(r"(?i)\S"),
2023             hir_negate(hir_uclass_query(ClassQuery::Binary("space")))
2024         );
2025         #[cfg(feature = "unicode-case")]
2026         assert_eq!(t(r"(?i)\W"), hir_negate(hir_uclass_perl_word()));
2027
2028         // ASCII only
2029         assert_eq!(
2030             t(r"(?-u)\d"),
2031             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2032         );
2033         assert_eq!(
2034             t(r"(?-u)\s"),
2035             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
2036         );
2037         assert_eq!(
2038             t(r"(?-u)\w"),
2039             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
2040         );
2041         assert_eq!(
2042             t(r"(?i-u)\d"),
2043             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2044         );
2045         assert_eq!(
2046             t(r"(?i-u)\s"),
2047             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space))
2048         );
2049         assert_eq!(
2050             t(r"(?i-u)\w"),
2051             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word))
2052         );
2053
2054         // ASCII only, negated
2055         assert_eq!(
2056             t(r"(?-u)\D"),
2057             hir_negate(hir_bclass_from_char(ascii_class(
2058                 &ast::ClassAsciiKind::Digit
2059             )))
2060         );
2061         assert_eq!(
2062             t(r"(?-u)\S"),
2063             hir_negate(hir_bclass_from_char(ascii_class(
2064                 &ast::ClassAsciiKind::Space
2065             )))
2066         );
2067         assert_eq!(
2068             t(r"(?-u)\W"),
2069             hir_negate(hir_bclass_from_char(ascii_class(
2070                 &ast::ClassAsciiKind::Word
2071             )))
2072         );
2073         assert_eq!(
2074             t(r"(?i-u)\D"),
2075             hir_negate(hir_bclass_from_char(ascii_class(
2076                 &ast::ClassAsciiKind::Digit
2077             )))
2078         );
2079         assert_eq!(
2080             t(r"(?i-u)\S"),
2081             hir_negate(hir_bclass_from_char(ascii_class(
2082                 &ast::ClassAsciiKind::Space
2083             )))
2084         );
2085         assert_eq!(
2086             t(r"(?i-u)\W"),
2087             hir_negate(hir_bclass_from_char(ascii_class(
2088                 &ast::ClassAsciiKind::Word
2089             )))
2090         );
2091     }
2092
2093     #[test]
2094     #[cfg(not(feature = "unicode-perl"))]
2095     fn class_perl_word_disabled() {
2096         assert_eq!(
2097             t_err(r"\w"),
2098             TestError {
2099                 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2100                 span: Span::new(
2101                     Position::new(0, 1, 1),
2102                     Position::new(2, 1, 3)
2103                 ),
2104             }
2105         );
2106     }
2107
2108     #[test]
2109     #[cfg(all(not(feature = "unicode-perl"), not(feature = "unicode-bool")))]
2110     fn class_perl_space_disabled() {
2111         assert_eq!(
2112             t_err(r"\s"),
2113             TestError {
2114                 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2115                 span: Span::new(
2116                     Position::new(0, 1, 1),
2117                     Position::new(2, 1, 3)
2118                 ),
2119             }
2120         );
2121     }
2122
2123     #[test]
2124     #[cfg(all(
2125         not(feature = "unicode-perl"),
2126         not(feature = "unicode-gencat")
2127     ))]
2128     fn class_perl_digit_disabled() {
2129         assert_eq!(
2130             t_err(r"\d"),
2131             TestError {
2132                 kind: hir::ErrorKind::UnicodePerlClassNotFound,
2133                 span: Span::new(
2134                     Position::new(0, 1, 1),
2135                     Position::new(2, 1, 3)
2136                 ),
2137             }
2138         );
2139     }
2140
2141     #[test]
2142     #[cfg(feature = "unicode-gencat")]
2143     fn class_unicode_gencat() {
2144         assert_eq!(t(r"\pZ"), hir_uclass_query(ClassQuery::Binary("Z")));
2145         assert_eq!(t(r"\pz"), hir_uclass_query(ClassQuery::Binary("Z")));
2146         assert_eq!(
2147             t(r"\p{Separator}"),
2148             hir_uclass_query(ClassQuery::Binary("Z"))
2149         );
2150         assert_eq!(
2151             t(r"\p{se      PaRa ToR}"),
2152             hir_uclass_query(ClassQuery::Binary("Z"))
2153         );
2154         assert_eq!(
2155             t(r"\p{gc:Separator}"),
2156             hir_uclass_query(ClassQuery::Binary("Z"))
2157         );
2158         assert_eq!(
2159             t(r"\p{gc=Separator}"),
2160             hir_uclass_query(ClassQuery::Binary("Z"))
2161         );
2162         assert_eq!(
2163             t(r"\p{Other}"),
2164             hir_uclass_query(ClassQuery::Binary("Other"))
2165         );
2166         assert_eq!(t(r"\pC"), hir_uclass_query(ClassQuery::Binary("Other")));
2167
2168         assert_eq!(
2169             t(r"\PZ"),
2170             hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2171         );
2172         assert_eq!(
2173             t(r"\P{separator}"),
2174             hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2175         );
2176         assert_eq!(
2177             t(r"\P{gc!=separator}"),
2178             hir_negate(hir_uclass_query(ClassQuery::Binary("Z")))
2179         );
2180
2181         assert_eq!(t(r"\p{any}"), hir_uclass_query(ClassQuery::Binary("Any")));
2182         assert_eq!(
2183             t(r"\p{assigned}"),
2184             hir_uclass_query(ClassQuery::Binary("Assigned"))
2185         );
2186         assert_eq!(
2187             t(r"\p{ascii}"),
2188             hir_uclass_query(ClassQuery::Binary("ASCII"))
2189         );
2190         assert_eq!(
2191             t(r"\p{gc:any}"),
2192             hir_uclass_query(ClassQuery::Binary("Any"))
2193         );
2194         assert_eq!(
2195             t(r"\p{gc:assigned}"),
2196             hir_uclass_query(ClassQuery::Binary("Assigned"))
2197         );
2198         assert_eq!(
2199             t(r"\p{gc:ascii}"),
2200             hir_uclass_query(ClassQuery::Binary("ASCII"))
2201         );
2202
2203         assert_eq!(
2204             t_err(r"(?-u)\pZ"),
2205             TestError {
2206                 kind: hir::ErrorKind::UnicodeNotAllowed,
2207                 span: Span::new(
2208                     Position::new(5, 1, 6),
2209                     Position::new(8, 1, 9)
2210                 ),
2211             }
2212         );
2213         assert_eq!(
2214             t_err(r"(?-u)\p{Separator}"),
2215             TestError {
2216                 kind: hir::ErrorKind::UnicodeNotAllowed,
2217                 span: Span::new(
2218                     Position::new(5, 1, 6),
2219                     Position::new(18, 1, 19)
2220                 ),
2221             }
2222         );
2223         assert_eq!(
2224             t_err(r"\pE"),
2225             TestError {
2226                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2227                 span: Span::new(
2228                     Position::new(0, 1, 1),
2229                     Position::new(3, 1, 4)
2230                 ),
2231             }
2232         );
2233         assert_eq!(
2234             t_err(r"\p{Foo}"),
2235             TestError {
2236                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2237                 span: Span::new(
2238                     Position::new(0, 1, 1),
2239                     Position::new(7, 1, 8)
2240                 ),
2241             }
2242         );
2243         assert_eq!(
2244             t_err(r"\p{gc:Foo}"),
2245             TestError {
2246                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2247                 span: Span::new(
2248                     Position::new(0, 1, 1),
2249                     Position::new(10, 1, 11)
2250                 ),
2251             }
2252         );
2253     }
2254
2255     #[test]
2256     #[cfg(not(feature = "unicode-gencat"))]
2257     fn class_unicode_gencat_disabled() {
2258         assert_eq!(
2259             t_err(r"\p{Separator}"),
2260             TestError {
2261                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2262                 span: Span::new(
2263                     Position::new(0, 1, 1),
2264                     Position::new(13, 1, 14)
2265                 ),
2266             }
2267         );
2268
2269         assert_eq!(
2270             t_err(r"\p{Any}"),
2271             TestError {
2272                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2273                 span: Span::new(
2274                     Position::new(0, 1, 1),
2275                     Position::new(7, 1, 8)
2276                 ),
2277             }
2278         );
2279     }
2280
2281     #[test]
2282     #[cfg(feature = "unicode-script")]
2283     fn class_unicode_script() {
2284         assert_eq!(
2285             t(r"\p{Greek}"),
2286             hir_uclass_query(ClassQuery::Binary("Greek"))
2287         );
2288         #[cfg(feature = "unicode-case")]
2289         assert_eq!(
2290             t(r"(?i)\p{Greek}"),
2291             hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek")))
2292         );
2293         #[cfg(feature = "unicode-case")]
2294         assert_eq!(
2295             t(r"(?i)\P{Greek}"),
2296             hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2297                 "Greek"
2298             ))))
2299         );
2300
2301         assert_eq!(
2302             t_err(r"\p{sc:Foo}"),
2303             TestError {
2304                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2305                 span: Span::new(
2306                     Position::new(0, 1, 1),
2307                     Position::new(10, 1, 11)
2308                 ),
2309             }
2310         );
2311         assert_eq!(
2312             t_err(r"\p{scx:Foo}"),
2313             TestError {
2314                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2315                 span: Span::new(
2316                     Position::new(0, 1, 1),
2317                     Position::new(11, 1, 12)
2318                 ),
2319             }
2320         );
2321     }
2322
2323     #[test]
2324     #[cfg(not(feature = "unicode-script"))]
2325     fn class_unicode_script_disabled() {
2326         assert_eq!(
2327             t_err(r"\p{Greek}"),
2328             TestError {
2329                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2330                 span: Span::new(
2331                     Position::new(0, 1, 1),
2332                     Position::new(9, 1, 10)
2333                 ),
2334             }
2335         );
2336
2337         assert_eq!(
2338             t_err(r"\p{scx:Greek}"),
2339             TestError {
2340                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2341                 span: Span::new(
2342                     Position::new(0, 1, 1),
2343                     Position::new(13, 1, 14)
2344                 ),
2345             }
2346         );
2347     }
2348
2349     #[test]
2350     #[cfg(feature = "unicode-age")]
2351     fn class_unicode_age() {
2352         assert_eq!(
2353             t_err(r"\p{age:Foo}"),
2354             TestError {
2355                 kind: hir::ErrorKind::UnicodePropertyValueNotFound,
2356                 span: Span::new(
2357                     Position::new(0, 1, 1),
2358                     Position::new(11, 1, 12)
2359                 ),
2360             }
2361         );
2362     }
2363
2364     #[test]
2365     #[cfg(feature = "unicode-gencat")]
2366     fn class_unicode_any_empty() {
2367         assert_eq!(
2368             t_err(r"\P{any}"),
2369             TestError {
2370                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2371                 span: Span::new(
2372                     Position::new(0, 1, 1),
2373                     Position::new(7, 1, 8)
2374                 ),
2375             }
2376         );
2377     }
2378
2379     #[test]
2380     #[cfg(not(feature = "unicode-age"))]
2381     fn class_unicode_age_disabled() {
2382         assert_eq!(
2383             t_err(r"\p{age:3.0}"),
2384             TestError {
2385                 kind: hir::ErrorKind::UnicodePropertyNotFound,
2386                 span: Span::new(
2387                     Position::new(0, 1, 1),
2388                     Position::new(11, 1, 12)
2389                 ),
2390             }
2391         );
2392     }
2393
2394     #[test]
2395     fn class_bracketed() {
2396         assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')]));
2397         assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')])));
2398         assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
2399         assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
2400         assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
2401         assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
2402         assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
2403         assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
2404         #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2405         assert_eq!(t(r"[\d]"), hir_uclass_query(ClassQuery::Binary("digit")));
2406         #[cfg(feature = "unicode-gencat")]
2407         assert_eq!(
2408             t(r"[\pZ]"),
2409             hir_uclass_query(ClassQuery::Binary("separator"))
2410         );
2411         #[cfg(feature = "unicode-gencat")]
2412         assert_eq!(
2413             t(r"[\p{separator}]"),
2414             hir_uclass_query(ClassQuery::Binary("separator"))
2415         );
2416         #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2417         assert_eq!(t(r"[^\D]"), hir_uclass_query(ClassQuery::Binary("digit")));
2418         #[cfg(feature = "unicode-gencat")]
2419         assert_eq!(
2420             t(r"[^\PZ]"),
2421             hir_uclass_query(ClassQuery::Binary("separator"))
2422         );
2423         #[cfg(feature = "unicode-gencat")]
2424         assert_eq!(
2425             t(r"[^\P{separator}]"),
2426             hir_uclass_query(ClassQuery::Binary("separator"))
2427         );
2428         #[cfg(all(
2429             feature = "unicode-case",
2430             any(feature = "unicode-perl", feature = "unicode-gencat")
2431         ))]
2432         assert_eq!(
2433             t(r"(?i)[^\D]"),
2434             hir_uclass_query(ClassQuery::Binary("digit"))
2435         );
2436         #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2437         assert_eq!(
2438             t(r"(?i)[^\P{greek}]"),
2439             hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek")))
2440         );
2441
2442         assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
2443         assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
2444         assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
2445
2446         #[cfg(feature = "unicode-case")]
2447         assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
2448         #[cfg(feature = "unicode-case")]
2449         assert_eq!(
2450             t("(?i)[k]"),
2451             hir_uclass(&[('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),])
2452         );
2453         #[cfg(feature = "unicode-case")]
2454         assert_eq!(
2455             t("(?i)[β]"),
2456             hir_uclass(&[('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),])
2457         );
2458         assert_eq!(t("(?i-u)[k]"), hir_bclass(&[(b'K', b'K'), (b'k', b'k'),]));
2459
2460         assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')])));
2461         assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')])));
2462         assert_eq!(
2463             t_bytes("(?-u)[^a]"),
2464             hir_negate(hir_bclass(&[(b'a', b'a')]))
2465         );
2466         #[cfg(any(feature = "unicode-perl", feature = "unicode-gencat"))]
2467         assert_eq!(
2468             t(r"[^\d]"),
2469             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2470         );
2471         #[cfg(feature = "unicode-gencat")]
2472         assert_eq!(
2473             t(r"[^\pZ]"),
2474             hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2475         );
2476         #[cfg(feature = "unicode-gencat")]
2477         assert_eq!(
2478             t(r"[^\p{separator}]"),
2479             hir_negate(hir_uclass_query(ClassQuery::Binary("separator")))
2480         );
2481         #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2482         assert_eq!(
2483             t(r"(?i)[^\p{greek}]"),
2484             hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2485                 "greek"
2486             ))))
2487         );
2488         #[cfg(all(feature = "unicode-case", feature = "unicode-script"))]
2489         assert_eq!(
2490             t(r"(?i)[\P{greek}]"),
2491             hir_negate(hir_case_fold(hir_uclass_query(ClassQuery::Binary(
2492                 "greek"
2493             ))))
2494         );
2495
2496         // Test some weird cases.
2497         assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
2498
2499         assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
2500         assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
2501         assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
2502         assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
2503         assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
2504
2505         assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
2506         assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
2507         assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
2508         assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
2509         assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
2510
2511         assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
2512         assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
2513         assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
2514         assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
2515         assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
2516
2517         assert_eq!(
2518             t_err("(?-u)[^a]"),
2519             TestError {
2520                 kind: hir::ErrorKind::InvalidUtf8,
2521                 span: Span::new(
2522                     Position::new(5, 1, 6),
2523                     Position::new(9, 1, 10)
2524                 ),
2525             }
2526         );
2527         #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2528         assert_eq!(
2529             t_err(r"[^\s\S]"),
2530             TestError {
2531                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2532                 span: Span::new(
2533                     Position::new(0, 1, 1),
2534                     Position::new(7, 1, 8)
2535                 ),
2536             }
2537         );
2538         #[cfg(any(feature = "unicode-perl", feature = "unicode-bool"))]
2539         assert_eq!(
2540             t_err(r"(?-u)[^\s\S]"),
2541             TestError {
2542                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2543                 span: Span::new(
2544                     Position::new(5, 1, 6),
2545                     Position::new(12, 1, 13)
2546                 ),
2547             }
2548         );
2549     }
2550
2551     #[test]
2552     fn class_bracketed_union() {
2553         assert_eq!(t("[a-zA-Z]"), hir_uclass(&[('A', 'Z'), ('a', 'z')]));
2554         #[cfg(feature = "unicode-gencat")]
2555         assert_eq!(
2556             t(r"[a\pZb]"),
2557             hir_union(
2558                 hir_uclass(&[('a', 'b')]),
2559                 hir_uclass_query(ClassQuery::Binary("separator"))
2560             )
2561         );
2562         #[cfg(all(feature = "unicode-gencat", feature = "unicode-script"))]
2563         assert_eq!(
2564             t(r"[\pZ\p{Greek}]"),
2565             hir_union(
2566                 hir_uclass_query(ClassQuery::Binary("greek")),
2567                 hir_uclass_query(ClassQuery::Binary("separator"))
2568             )
2569         );
2570         #[cfg(all(
2571             feature = "unicode-age",
2572             feature = "unicode-gencat",
2573             feature = "unicode-script"
2574         ))]
2575         assert_eq!(
2576             t(r"[\p{age:3.0}\pZ\p{Greek}]"),
2577             hir_union(
2578                 hir_uclass_query(ClassQuery::ByValue {
2579                     property_name: "age",
2580                     property_value: "3.0",
2581                 }),
2582                 hir_union(
2583                     hir_uclass_query(ClassQuery::Binary("greek")),
2584                     hir_uclass_query(ClassQuery::Binary("separator"))
2585                 )
2586             )
2587         );
2588         #[cfg(all(
2589             feature = "unicode-age",
2590             feature = "unicode-gencat",
2591             feature = "unicode-script"
2592         ))]
2593         assert_eq!(
2594             t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
2595             hir_union(
2596                 hir_uclass_query(ClassQuery::ByValue {
2597                     property_name: "age",
2598                     property_value: "3.0",
2599                 }),
2600                 hir_union(
2601                     hir_uclass_query(ClassQuery::Binary("cyrillic")),
2602                     hir_union(
2603                         hir_uclass_query(ClassQuery::Binary("greek")),
2604                         hir_uclass_query(ClassQuery::Binary("separator"))
2605                     )
2606                 )
2607             )
2608         );
2609
2610         #[cfg(all(
2611             feature = "unicode-age",
2612             feature = "unicode-case",
2613             feature = "unicode-gencat",
2614             feature = "unicode-script"
2615         ))]
2616         assert_eq!(
2617             t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
2618             hir_case_fold(hir_union(
2619                 hir_uclass_query(ClassQuery::ByValue {
2620                     property_name: "age",
2621                     property_value: "3.0",
2622                 }),
2623                 hir_union(
2624                     hir_uclass_query(ClassQuery::Binary("greek")),
2625                     hir_uclass_query(ClassQuery::Binary("separator"))
2626                 )
2627             ))
2628         );
2629         #[cfg(all(
2630             feature = "unicode-age",
2631             feature = "unicode-gencat",
2632             feature = "unicode-script"
2633         ))]
2634         assert_eq!(
2635             t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
2636             hir_negate(hir_union(
2637                 hir_uclass_query(ClassQuery::ByValue {
2638                     property_name: "age",
2639                     property_value: "3.0",
2640                 }),
2641                 hir_union(
2642                     hir_uclass_query(ClassQuery::Binary("greek")),
2643                     hir_uclass_query(ClassQuery::Binary("separator"))
2644                 )
2645             ))
2646         );
2647         #[cfg(all(
2648             feature = "unicode-age",
2649             feature = "unicode-case",
2650             feature = "unicode-gencat",
2651             feature = "unicode-script"
2652         ))]
2653         assert_eq!(
2654             t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
2655             hir_negate(hir_case_fold(hir_union(
2656                 hir_uclass_query(ClassQuery::ByValue {
2657                     property_name: "age",
2658                     property_value: "3.0",
2659                 }),
2660                 hir_union(
2661                     hir_uclass_query(ClassQuery::Binary("greek")),
2662                     hir_uclass_query(ClassQuery::Binary("separator"))
2663                 )
2664             )))
2665         );
2666     }
2667
2668     #[test]
2669     fn class_bracketed_nested() {
2670         assert_eq!(t(r"[a[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2671         assert_eq!(t(r"[a-b[^c]]"), hir_negate(hir_uclass(&[('c', 'c')])));
2672         assert_eq!(t(r"[a-c[^c]]"), hir_negate(hir_uclass(&[])));
2673
2674         assert_eq!(t(r"[^a[^c]]"), hir_uclass(&[('c', 'c')]));
2675         assert_eq!(t(r"[^a-b[^c]]"), hir_uclass(&[('c', 'c')]));
2676
2677         #[cfg(feature = "unicode-case")]
2678         assert_eq!(
2679             t(r"(?i)[a[^c]]"),
2680             hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2681         );
2682         #[cfg(feature = "unicode-case")]
2683         assert_eq!(
2684             t(r"(?i)[a-b[^c]]"),
2685             hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')])))
2686         );
2687
2688         #[cfg(feature = "unicode-case")]
2689         assert_eq!(t(r"(?i)[^a[^c]]"), hir_uclass(&[('C', 'C'), ('c', 'c')]));
2690         #[cfg(feature = "unicode-case")]
2691         assert_eq!(
2692             t(r"(?i)[^a-b[^c]]"),
2693             hir_uclass(&[('C', 'C'), ('c', 'c')])
2694         );
2695
2696         assert_eq!(
2697             t_err(r"[^a-c[^c]]"),
2698             TestError {
2699                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2700                 span: Span::new(
2701                     Position::new(0, 1, 1),
2702                     Position::new(10, 1, 11)
2703                 ),
2704             }
2705         );
2706         #[cfg(feature = "unicode-case")]
2707         assert_eq!(
2708             t_err(r"(?i)[^a-c[^c]]"),
2709             TestError {
2710                 kind: hir::ErrorKind::EmptyClassNotAllowed,
2711                 span: Span::new(
2712                     Position::new(4, 1, 5),
2713                     Position::new(14, 1, 15)
2714                 ),
2715             }
2716         );
2717     }
2718
2719     #[test]
2720     fn class_bracketed_intersect() {
2721         assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
2722         assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2723         assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
2724         assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
2725         assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
2726         assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
2727         assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
2728         assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
2729         assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2730
2731         assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
2732         assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2733         assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
2734         assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
2735         assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
2736         assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
2737
2738         #[cfg(feature = "unicode-case")]
2739         assert_eq!(
2740             t("(?i)[abc&&b-c]"),
2741             hir_case_fold(hir_uclass(&[('b', 'c')]))
2742         );
2743         #[cfg(feature = "unicode-case")]
2744         assert_eq!(
2745             t("(?i)[abc&&[b-c]]"),
2746             hir_case_fold(hir_uclass(&[('b', 'c')]))
2747         );
2748         #[cfg(feature = "unicode-case")]
2749         assert_eq!(
2750             t("(?i)[[abc]&&[b-c]]"),
2751             hir_case_fold(hir_uclass(&[('b', 'c')]))
2752         );
2753         #[cfg(feature = "unicode-case")]
2754         assert_eq!(
2755             t("(?i)[a-z&&b-y&&c-x]"),
2756             hir_case_fold(hir_uclass(&[('c', 'x')]))
2757         );
2758         #[cfg(feature = "unicode-case")]
2759         assert_eq!(
2760             t("(?i)[c-da-b&&a-d]"),
2761             hir_case_fold(hir_uclass(&[('a', 'd')]))
2762         );
2763         #[cfg(feature = "unicode-case")]
2764         assert_eq!(
2765             t("(?i)[a-d&&c-da-b]"),
2766             hir_case_fold(hir_uclass(&[('a', 'd')]))
2767         );
2768
2769         assert_eq!(
2770             t("(?i-u)[abc&&b-c]"),
2771             hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2772         );
2773         assert_eq!(
2774             t("(?i-u)[abc&&[b-c]]"),
2775             hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2776         );
2777         assert_eq!(
2778             t("(?i-u)[[abc]&&[b-c]]"),
2779             hir_case_fold(hir_bclass(&[(b'b', b'c')]))
2780         );
2781         assert_eq!(
2782             t("(?i-u)[a-z&&b-y&&c-x]"),
2783             hir_case_fold(hir_bclass(&[(b'c', b'x')]))
2784         );
2785         assert_eq!(
2786             t("(?i-u)[c-da-b&&a-d]"),
2787             hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2788         );
2789         assert_eq!(
2790             t("(?i-u)[a-d&&c-da-b]"),
2791             hir_case_fold(hir_bclass(&[(b'a', b'd')]))
2792         );
2793
2794         // In `[a^]`, `^` does not need to be escaped, so it makes sense that
2795         // `^` is also allowed to be unescaped after `&&`.
2796         assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
2797         // `]` needs to be escaped after `&&` since it's not at start of class.
2798         assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
2799         assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
2800         assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
2801         assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
2802         // Test precedence.
2803         assert_eq!(
2804             t(r"[a-w&&[^c-g]z]"),
2805             hir_uclass(&[('a', 'b'), ('h', 'w')])
2806         );
2807     }
2808
2809     #[test]
2810     fn class_bracketed_intersect_negate() {
2811         #[cfg(feature = "unicode-perl")]
2812         assert_eq!(
2813             t(r"[^\w&&\d]"),
2814             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2815         );
2816         assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
2817         #[cfg(feature = "unicode-perl")]
2818         assert_eq!(
2819             t(r"[^[\w&&\d]]"),
2820             hir_negate(hir_uclass_query(ClassQuery::Binary("digit")))
2821         );
2822         #[cfg(feature = "unicode-perl")]
2823         assert_eq!(
2824             t(r"[^[^\w&&\d]]"),
2825             hir_uclass_query(ClassQuery::Binary("digit"))
2826         );
2827         #[cfg(feature = "unicode-perl")]
2828         assert_eq!(t(r"[[[^\w]&&[^\d]]]"), hir_negate(hir_uclass_perl_word()));
2829
2830         #[cfg(feature = "unicode-perl")]
2831         assert_eq!(
2832             t_bytes(r"(?-u)[^\w&&\d]"),
2833             hir_negate(hir_bclass_from_char(ascii_class(
2834                 &ast::ClassAsciiKind::Digit
2835             )))
2836         );
2837         assert_eq!(
2838             t_bytes(r"(?-u)[^[a-z&&a-c]]"),
2839             hir_negate(hir_bclass(&[(b'a', b'c')]))
2840         );
2841         assert_eq!(
2842             t_bytes(r"(?-u)[^[\w&&\d]]"),
2843             hir_negate(hir_bclass_from_char(ascii_class(
2844                 &ast::ClassAsciiKind::Digit
2845             )))
2846         );
2847         assert_eq!(
2848             t_bytes(r"(?-u)[^[^\w&&\d]]"),
2849             hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit))
2850         );
2851         assert_eq!(
2852             t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
2853             hir_negate(hir_bclass_from_char(ascii_class(
2854                 &ast::ClassAsciiKind::Word
2855             )))
2856         );
2857     }
2858
2859     #[test]
2860     fn class_bracketed_difference() {
2861         #[cfg(feature = "unicode-gencat")]
2862         assert_eq!(
2863             t(r"[\pL--[:ascii:]]"),
2864             hir_difference(
2865                 hir_uclass_query(ClassQuery::Binary("letter")),
2866                 hir_uclass(&[('\0', '\x7F')])
2867             )
2868         );
2869
2870         assert_eq!(
2871             t(r"(?-u)[[:alpha:]--[:lower:]]"),
2872             hir_bclass(&[(b'A', b'Z')])
2873         );
2874     }
2875
2876     #[test]
2877     fn class_bracketed_symmetric_difference() {
2878         #[cfg(feature = "unicode-script")]
2879         assert_eq!(
2880             t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
2881             hir_uclass(&[
2882                 ('\u{0342}', '\u{0342}'),
2883                 ('\u{0345}', '\u{0345}'),
2884                 ('\u{1DC0}', '\u{1DC1}'),
2885             ])
2886         );
2887         assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));
2888
2889         assert_eq!(
2890             t(r"(?-u)[a-g~~c-j]"),
2891             hir_bclass(&[(b'a', b'b'), (b'h', b'j')])
2892         );
2893     }
2894
2895     #[test]
2896     fn ignore_whitespace() {
2897         assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
2898         assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
2899         assert_eq!(
2900             t(r"(?x)\x # comment
2901 { # comment
2902     53 # comment
2903 } #comment"),
2904             hir_lit("S")
2905         );
2906
2907         assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
2908         assert_eq!(
2909             t(r"(?x)\x # comment
2910         53 # comment"),
2911             hir_lit("S")
2912         );
2913         assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
2914
2915         #[cfg(feature = "unicode-gencat")]
2916         assert_eq!(
2917             t(r"(?x)\p # comment
2918 { # comment
2919     Separator # comment
2920 } # comment"),
2921             hir_uclass_query(ClassQuery::Binary("separator"))
2922         );
2923
2924         assert_eq!(
2925             t(r"(?x)a # comment
2926 { # comment
2927     5 # comment
2928     , # comment
2929     10 # comment
2930 } # comment"),
2931             hir_range(
2932                 true,
2933                 hir::RepetitionRange::Bounded(5, 10),
2934                 hir_lit("a")
2935             )
2936         );
2937
2938         assert_eq!(t(r"(?x)a\  # hi there"), hir_lit("a "));
2939     }
2940
2941     #[test]
2942     fn analysis_is_always_utf8() {
2943         // Positive examples.
2944         assert!(t_bytes(r"a").is_always_utf8());
2945         assert!(t_bytes(r"ab").is_always_utf8());
2946         assert!(t_bytes(r"(?-u)a").is_always_utf8());
2947         assert!(t_bytes(r"(?-u)ab").is_always_utf8());
2948         assert!(t_bytes(r"\xFF").is_always_utf8());
2949         assert!(t_bytes(r"\xFF\xFF").is_always_utf8());
2950         assert!(t_bytes(r"[^a]").is_always_utf8());
2951         assert!(t_bytes(r"[^a][^a]").is_always_utf8());
2952         assert!(t_bytes(r"\b").is_always_utf8());
2953         assert!(t_bytes(r"\B").is_always_utf8());
2954         assert!(t_bytes(r"(?-u)\b").is_always_utf8());
2955
2956         // Negative examples.
2957         assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8());
2958         assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8());
2959         assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8());
2960         assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8());
2961         assert!(!t_bytes(r"(?-u)\B").is_always_utf8());
2962     }
2963
2964     #[test]
2965     fn analysis_is_all_assertions() {
2966         // Positive examples.
2967         assert!(t(r"\b").is_all_assertions());
2968         assert!(t(r"\B").is_all_assertions());
2969         assert!(t(r"^").is_all_assertions());
2970         assert!(t(r"$").is_all_assertions());
2971         assert!(t(r"\A").is_all_assertions());
2972         assert!(t(r"\z").is_all_assertions());
2973         assert!(t(r"$^\z\A\b\B").is_all_assertions());
2974         assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions());
2975         assert!(t(r"^$|$^").is_all_assertions());
2976         assert!(t(r"((\b)+())*^").is_all_assertions());
2977
2978         // Negative examples.
2979         assert!(!t(r"^a").is_all_assertions());
2980     }
2981
2982     #[test]
2983     fn analysis_is_anchored() {
2984         // Positive examples.
2985         assert!(t(r"^").is_anchored_start());
2986         assert!(t(r"$").is_anchored_end());
2987         assert!(t(r"^").is_line_anchored_start());
2988         assert!(t(r"$").is_line_anchored_end());
2989
2990         assert!(t(r"^^").is_anchored_start());
2991         assert!(t(r"$$").is_anchored_end());
2992         assert!(t(r"^^").is_line_anchored_start());
2993         assert!(t(r"$$").is_line_anchored_end());
2994
2995         assert!(t(r"^$").is_anchored_start());
2996         assert!(t(r"^$").is_anchored_end());
2997         assert!(t(r"^$").is_line_anchored_start());
2998         assert!(t(r"^$").is_line_anchored_end());
2999
3000         assert!(t(r"^foo").is_anchored_start());
3001         assert!(t(r"foo$").is_anchored_end());
3002         assert!(t(r"^foo").is_line_anchored_start());
3003         assert!(t(r"foo$").is_line_anchored_end());
3004
3005         assert!(t(r"^foo|^bar").is_anchored_start());
3006         assert!(t(r"foo$|bar$").is_anchored_end());
3007         assert!(t(r"^foo|^bar").is_line_anchored_start());
3008         assert!(t(r"foo$|bar$").is_line_anchored_end());
3009
3010         assert!(t(r"^(foo|bar)").is_anchored_start());
3011         assert!(t(r"(foo|bar)$").is_anchored_end());
3012         assert!(t(r"^(foo|bar)").is_line_anchored_start());
3013         assert!(t(r"(foo|bar)$").is_line_anchored_end());
3014
3015         assert!(t(r"^+").is_anchored_start());
3016         assert!(t(r"$+").is_anchored_end());
3017         assert!(t(r"^+").is_line_anchored_start());
3018         assert!(t(r"$+").is_line_anchored_end());
3019         assert!(t(r"^++").is_anchored_start());
3020         assert!(t(r"$++").is_anchored_end());
3021         assert!(t(r"^++").is_line_anchored_start());
3022         assert!(t(r"$++").is_line_anchored_end());
3023         assert!(t(r"(^)+").is_anchored_start());
3024         assert!(t(r"($)+").is_anchored_end());
3025         assert!(t(r"(^)+").is_line_anchored_start());
3026         assert!(t(r"($)+").is_line_anchored_end());
3027
3028         assert!(t(r"$^").is_anchored_start());
3029         assert!(t(r"$^").is_anchored_start());
3030         assert!(t(r"$^").is_line_anchored_end());
3031         assert!(t(r"$^").is_line_anchored_end());
3032         assert!(t(r"$^|^$").is_anchored_start());
3033         assert!(t(r"$^|^$").is_anchored_end());
3034         assert!(t(r"$^|^$").is_line_anchored_start());
3035         assert!(t(r"$^|^$").is_line_anchored_end());
3036
3037         assert!(t(r"\b^").is_anchored_start());
3038         assert!(t(r"$\b").is_anchored_end());
3039         assert!(t(r"\b^").is_line_anchored_start());
3040         assert!(t(r"$\b").is_line_anchored_end());
3041         assert!(t(r"^(?m:^)").is_anchored_start());
3042         assert!(t(r"(?m:$)$").is_anchored_end());
3043         assert!(t(r"^(?m:^)").is_line_anchored_start());
3044         assert!(t(r"(?m:$)$").is_line_anchored_end());
3045         assert!(t(r"(?m:^)^").is_anchored_start());
3046         assert!(t(r"$(?m:$)").is_anchored_end());
3047         assert!(t(r"(?m:^)^").is_line_anchored_start());
3048         assert!(t(r"$(?m:$)").is_line_anchored_end());
3049
3050         // Negative examples.
3051         assert!(!t(r"(?m)^").is_anchored_start());
3052         assert!(!t(r"(?m)$").is_anchored_end());
3053         assert!(!t(r"(?m:^$)|$^").is_anchored_start());
3054         assert!(!t(r"(?m:^$)|$^").is_anchored_end());
3055         assert!(!t(r"$^|(?m:^$)").is_anchored_start());
3056         assert!(!t(r"$^|(?m:^$)").is_anchored_end());
3057
3058         assert!(!t(r"a^").is_anchored_start());
3059         assert!(!t(r"$a").is_anchored_start());
3060         assert!(!t(r"a^").is_line_anchored_start());
3061         assert!(!t(r"$a").is_line_anchored_start());
3062
3063         assert!(!t(r"a^").is_anchored_end());
3064         assert!(!t(r"$a").is_anchored_end());
3065         assert!(!t(r"a^").is_line_anchored_end());
3066         assert!(!t(r"$a").is_line_anchored_end());
3067
3068         assert!(!t(r"^foo|bar").is_anchored_start());
3069         assert!(!t(r"foo|bar$").is_anchored_end());
3070         assert!(!t(r"^foo|bar").is_line_anchored_start());
3071         assert!(!t(r"foo|bar$").is_line_anchored_end());
3072
3073         assert!(!t(r"^*").is_anchored_start());
3074         assert!(!t(r"$*").is_anchored_end());
3075         assert!(!t(r"^*").is_line_anchored_start());
3076         assert!(!t(r"$*").is_line_anchored_end());
3077         assert!(!t(r"^*+").is_anchored_start());
3078         assert!(!t(r"$*+").is_anchored_end());
3079         assert!(!t(r"^*+").is_line_anchored_start());
3080         assert!(!t(r"$*+").is_line_anchored_end());
3081         assert!(!t(r"^+*").is_anchored_start());
3082         assert!(!t(r"$+*").is_anchored_end());
3083         assert!(!t(r"^+*").is_line_anchored_start());
3084         assert!(!t(r"$+*").is_line_anchored_end());
3085         assert!(!t(r"(^)*").is_anchored_start());
3086         assert!(!t(r"($)*").is_anchored_end());
3087         assert!(!t(r"(^)*").is_line_anchored_start());
3088         assert!(!t(r"($)*").is_line_anchored_end());
3089     }
3090
3091     #[test]
3092     fn analysis_is_line_anchored() {
3093         assert!(t(r"(?m)^(foo|bar)").is_line_anchored_start());
3094         assert!(t(r"(?m)(foo|bar)$").is_line_anchored_end());
3095
3096         assert!(t(r"(?m)^foo|^bar").is_line_anchored_start());
3097         assert!(t(r"(?m)foo$|bar$").is_line_anchored_end());
3098
3099         assert!(t(r"(?m)^").is_line_anchored_start());
3100         assert!(t(r"(?m)$").is_line_anchored_end());
3101
3102         assert!(t(r"(?m:^$)|$^").is_line_anchored_start());
3103         assert!(t(r"(?m:^$)|$^").is_line_anchored_end());
3104
3105         assert!(t(r"$^|(?m:^$)").is_line_anchored_start());
3106         assert!(t(r"$^|(?m:^$)").is_line_anchored_end());
3107     }
3108
3109     #[test]
3110     fn analysis_is_any_anchored() {
3111         // Positive examples.
3112         assert!(t(r"^").is_any_anchored_start());
3113         assert!(t(r"$").is_any_anchored_end());
3114         assert!(t(r"\A").is_any_anchored_start());
3115         assert!(t(r"\z").is_any_anchored_end());
3116
3117         // Negative examples.
3118         assert!(!t(r"(?m)^").is_any_anchored_start());
3119         assert!(!t(r"(?m)$").is_any_anchored_end());
3120         assert!(!t(r"$").is_any_anchored_start());
3121         assert!(!t(r"^").is_any_anchored_end());
3122     }
3123
3124     #[test]
3125     fn analysis_is_match_empty() {
3126         // Positive examples.
3127         assert!(t(r"").is_match_empty());
3128         assert!(t(r"()").is_match_empty());
3129         assert!(t(r"()*").is_match_empty());
3130         assert!(t(r"()+").is_match_empty());
3131         assert!(t(r"()?").is_match_empty());
3132         assert!(t(r"a*").is_match_empty());
3133         assert!(t(r"a?").is_match_empty());
3134         assert!(t(r"a{0}").is_match_empty());
3135         assert!(t(r"a{0,}").is_match_empty());
3136         assert!(t(r"a{0,1}").is_match_empty());
3137         assert!(t(r"a{0,10}").is_match_empty());
3138         #[cfg(feature = "unicode-gencat")]
3139         assert!(t(r"\pL*").is_match_empty());
3140         assert!(t(r"a*|b").is_match_empty());
3141         assert!(t(r"b|a*").is_match_empty());
3142         assert!(t(r"a|").is_match_empty());
3143         assert!(t(r"|a").is_match_empty());
3144         assert!(t(r"a||b").is_match_empty());
3145         assert!(t(r"a*a?(abcd)*").is_match_empty());
3146         assert!(t(r"^").is_match_empty());
3147         assert!(t(r"$").is_match_empty());
3148         assert!(t(r"(?m)^").is_match_empty());
3149         assert!(t(r"(?m)$").is_match_empty());
3150         assert!(t(r"\A").is_match_empty());
3151         assert!(t(r"\z").is_match_empty());
3152         assert!(t(r"\B").is_match_empty());
3153         assert!(t_bytes(r"(?-u)\B").is_match_empty());
3154         assert!(t(r"\b").is_match_empty());
3155         assert!(t(r"(?-u)\b").is_match_empty());
3156
3157         // Negative examples.
3158         assert!(!t(r"a+").is_match_empty());
3159         assert!(!t(r"a{1}").is_match_empty());
3160         assert!(!t(r"a{1,}").is_match_empty());
3161         assert!(!t(r"a{1,2}").is_match_empty());
3162         assert!(!t(r"a{1,10}").is_match_empty());
3163         assert!(!t(r"b|a").is_match_empty());
3164         assert!(!t(r"a*a+(abcd)*").is_match_empty());
3165     }
3166
3167     #[test]
3168     fn analysis_is_literal() {
3169         // Positive examples.
3170         assert!(t(r"a").is_literal());
3171         assert!(t(r"ab").is_literal());
3172         assert!(t(r"abc").is_literal());
3173         assert!(t(r"(?m)abc").is_literal());
3174
3175         // Negative examples.
3176         assert!(!t(r"").is_literal());
3177         assert!(!t(r"^").is_literal());
3178         assert!(!t(r"a|b").is_literal());
3179         assert!(!t(r"(a)").is_literal());
3180         assert!(!t(r"a+").is_literal());
3181         assert!(!t(r"foo(a)").is_literal());
3182         assert!(!t(r"(a)foo").is_literal());
3183         assert!(!t(r"[a]").is_literal());
3184     }
3185
3186     #[test]
3187     fn analysis_is_alternation_literal() {
3188         // Positive examples.
3189         assert!(t(r"a").is_alternation_literal());
3190         assert!(t(r"ab").is_alternation_literal());
3191         assert!(t(r"abc").is_alternation_literal());
3192         assert!(t(r"(?m)abc").is_alternation_literal());
3193         assert!(t(r"a|b").is_alternation_literal());
3194         assert!(t(r"a|b|c").is_alternation_literal());
3195         assert!(t(r"foo|bar").is_alternation_literal());
3196         assert!(t(r"foo|bar|baz").is_alternation_literal());
3197
3198         // Negative examples.
3199         assert!(!t(r"").is_alternation_literal());
3200         assert!(!t(r"^").is_alternation_literal());
3201         assert!(!t(r"(a)").is_alternation_literal());
3202         assert!(!t(r"a+").is_alternation_literal());
3203         assert!(!t(r"foo(a)").is_alternation_literal());
3204         assert!(!t(r"(a)foo").is_alternation_literal());
3205         assert!(!t(r"[a]").is_alternation_literal());
3206         assert!(!t(r"[a]|b").is_alternation_literal());
3207         assert!(!t(r"a|[b]").is_alternation_literal());
3208         assert!(!t(r"(a)|b").is_alternation_literal());
3209         assert!(!t(r"a|(b)").is_alternation_literal());
3210     }
3211 }