src/vendor/regex-0.2.10/src/dfa.rs

   1 // Copyright 2014-2016 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 /*!
  12 The DFA matching engine.
  13
  14 A DFA provides faster matching because the engine is in exactly one state at
  15 any point in time. In the NFA, there may be multiple active states, and
  16 considerable CPU cycles are spent shuffling them around. In finite automata
  17 speak, the DFA follows epsilon transitions in the regex far less than the NFA.
  18
  19 A DFA is a classic trade off between time and space. The NFA is slower, but
  20 its memory requirements are typically small and predictable. The DFA is faster,
  21 but given the right regex and the right input, the number of states in the
  22 DFA can grow exponentially. To mitigate this space problem, we do two things:
  23
  24 1. We implement an *online* DFA. That is, the DFA is constructed from the NFA
  25    during a search. When a new state is computed, it is stored in a cache so
  26    that it may be reused. An important consequence of this implementation
  27    is that states that are never reached for a particular input are never
  28    computed. (This is impossible in an "offline" DFA which needs to compute
  29    all possible states up front.)
  30 2. If the cache gets too big, we wipe it and continue matching.
  31
  32 In pathological cases, a new state can be created for every byte of input.
  33 (e.g., The regex `(a|b)*a(a|b){20}` on a long sequence of a's and b's.)
  34 In this case, performance regresses to slightly slower than the full NFA
  35 simulation, in large part because the cache becomes useless. If the cache
  36 is wiped too frequently, the DFA quits and control falls back to one of the
  37 NFA simulations.
  38
  39 Because of the "lazy" nature of this DFA, the inner matching loop is
  40 considerably more complex than one might expect out of a DFA. A number of
  41 tricks are employed to make it fast. Tread carefully.
  42
  43 N.B. While this implementation is heavily commented, Russ Cox's series of
  44 articles on regexes is strongly recommended: https://swtch.com/~rsc/regexp/
  45 (As is the DFA implementation in RE2, which heavily influenced this
  46 implementation.)
  47 */
  48
  49 use std::collections::HashMap;
  50 use std::fmt;
  51 use std::iter::repeat;
  52 use std::mem;
  53
  54 use exec::ProgramCache;
  55 use prog::{Inst, Program};
  56 use sparse::SparseSet;
  57
  58 /// Return true if and only if the given program can be executed by a DFA.
  59 ///
  60 /// Generally, a DFA is always possible. A pathological case where it is not
  61 /// possible is if the number of NFA states exceeds `u32::MAX`, in which case,
  62 /// this function will return false.
  63 ///
  64 /// This function will also return false if the given program has any Unicode
  65 /// instructions (Char or Ranges) since the DFA operates on bytes only.
  66 pub fn can_exec(insts: &Program) -> bool {
  67     use prog::Inst::*;
  68     // If for some reason we manage to allocate a regex program with more
  69     // than i32::MAX instructions, then we can't execute the DFA because we
  70     // use 32 bit instruction pointer deltas for memory savings.
  71     // If i32::MAX is the largest positive delta,
  72     // then -i32::MAX == i32::MIN + 1 is the largest negative delta,
  73     // and we are OK to use 32 bits.
  74     if insts.dfa_size_limit == 0 || insts.len() > ::std::i32::MAX as usize {
  75         return false;
  76     }
  77     for inst in insts {
  78         match *inst {
  79             Char(_) | Ranges(_) => return false,
  80             EmptyLook(_) | Match(_) | Save(_) | Split(_) | Bytes(_) => {}
  81         }
  82     }
  83     true
  84 }
  85
  86 /// A reusable cache of DFA states.
  87 ///
  88 /// This cache is reused between multiple invocations of the same regex
  89 /// program. (It is not shared simultaneously between threads. If there is
  90 /// contention, then new caches are created.)
  91 #[derive(Clone, Debug)]
  92 pub struct Cache {
  93     /// Group persistent DFA related cache state together. The sparse sets
  94     /// listed below are used as scratch space while computing uncached states.
  95     inner: CacheInner,
  96     /// qcur and qnext are ordered sets with constant time
  97     /// addition/membership/clearing-whole-set and linear time iteration. They
  98     /// are used to manage the sets of NFA states in DFA states when computing
  99     /// cached DFA states. In particular, the order of the NFA states matters
 100     /// for leftmost-first style matching. Namely, when computing a cached
 101     /// state, the set of NFA states stops growing as soon as the first Match
 102     /// instruction is observed.
 103     qcur: SparseSet,
 104     qnext: SparseSet,
 105 }
 106
 107 /// `CacheInner` is logically just a part of Cache, but groups together fields
 108 /// that aren't passed as function parameters throughout search. (This split
 109 /// is mostly an artifact of the borrow checker. It is happily paid.)
 110 #[derive(Clone, Debug)]
 111 struct CacheInner {
 112     /// A cache of pre-compiled DFA states, keyed by the set of NFA states
 113     /// and the set of empty-width flags set at the byte in the input when the
 114     /// state was observed.
 115     ///
 116     /// A StatePtr is effectively a `*State`, but to avoid various inconvenient
 117     /// things, we just pass indexes around manually. The performance impact of
 118     /// this is probably an instruction or two in the inner loop. However, on
 119     /// 64 bit, each StatePtr is half the size of a *State.
 120     compiled: HashMap<State, StatePtr>,
 121     /// The transition table.
 122     ///
 123     /// The transition table is laid out in row-major order, where states are
 124     /// rows and the transitions for each state are columns. At a high level,
 125     /// given state `s` and byte `b`, the next state can be found at index
 126     /// `s * 256 + b`.
 127     ///
 128     /// This is, of course, a lie. A StatePtr is actually a pointer to the
 129     /// *start* of a row in this table. When indexing in the DFA's inner loop,
 130     /// this removes the need to multiply the StatePtr by the stride. Yes, it
 131     /// matters. This reduces the number of states we can store, but: the
 132     /// stride is rarely 256 since we define transitions in terms of
 133     /// *equivalence classes* of bytes. Each class corresponds to a set of
 134     /// bytes that never discriminate a distinct path through the DFA from each
 135     /// other.
 136     trans: Transitions,
 137     /// Our set of states. Note that `StatePtr / num_byte_classes` indexes
 138     /// this Vec rather than just a `StatePtr`.
 139     states: Vec<State>,
 140     /// A set of cached start states, which are limited to the number of
 141     /// permutations of flags set just before the initial byte of input. (The
 142     /// index into this vec is a `EmptyFlags`.)
 143     ///
 144     /// N.B. A start state can be "dead" (i.e., no possible match), so we
 145     /// represent it with a StatePtr.
 146     start_states: Vec<StatePtr>,
 147     /// Stack scratch space used to follow epsilon transitions in the NFA.
 148     /// (This permits us to avoid recursion.)
 149     ///
 150     /// The maximum stack size is the number of NFA states.
 151     stack: Vec<InstPtr>,
 152     /// The total number of times this cache has been flushed by the DFA
 153     /// because of space constraints.
 154     flush_count: u64,
 155     /// The total heap size of the DFA's cache. We use this to determine when
 156     /// we should flush the cache.
 157     size: usize,
 158 }
 159
 160 /// The transition table.
 161 ///
 162 /// It is laid out in row-major order, with states as rows and byte class
 163 /// transitions as columns.
 164 ///
 165 /// The transition table is responsible for producing valid `StatePtrs`. A
 166 /// `StatePtr` points to the start of a particular row in this table. When
 167 /// indexing to find the next state this allows us to avoid a multiplication
 168 /// when computing an index into the table.
 169 #[derive(Clone)]
 170 struct Transitions {
 171     /// The table.
 172     table: Vec<StatePtr>,
 173     /// The stride.
 174     num_byte_classes: usize,
 175 }
 176
 177 /// Fsm encapsulates the actual execution of the DFA.
 178 #[derive(Debug)]
 179 pub struct Fsm<'a> {
 180     /// prog contains the NFA instruction opcodes. DFA execution uses either
 181     /// the `dfa` instructions or the `dfa_reverse` instructions from
 182     /// `exec::ExecReadOnly`. (It never uses `ExecReadOnly.nfa`, which may have
 183     /// Unicode opcodes that cannot be executed by the DFA.)
 184     prog: &'a Program,
 185     /// The start state. We record it here because the pointer may change
 186     /// when the cache is wiped.
 187     start: StatePtr,
 188     /// The current position in the input.
 189     at: usize,
 190     /// Should we quit after seeing the first match? e.g., When the caller
 191     /// uses `is_match` or `shortest_match`.
 192     quit_after_match: bool,
 193     /// The last state that matched.
 194     ///
 195     /// When no match has occurred, this is set to STATE_UNKNOWN.
 196     ///
 197     /// This is only useful when matching regex sets. The last match state
 198     /// is useful because it contains all of the match instructions seen,
 199     /// thereby allowing us to enumerate which regexes in the set matched.
 200     last_match_si: StatePtr,
 201     /// The input position of the last cache flush. We use this to determine
 202     /// if we're thrashing in the cache too often. If so, the DFA quits so
 203     /// that we can fall back to the NFA algorithm.
 204     last_cache_flush: usize,
 205     /// All cached DFA information that is persisted between searches.
 206     cache: &'a mut CacheInner,
 207 }
 208
 209 /// The result of running the DFA.
 210 ///
 211 /// Generally, the result is either a match or not a match, but sometimes the
 212 /// DFA runs too slowly because the cache size is too small. In that case, it
 213 /// gives up with the intent of falling back to the NFA algorithm.
 214 ///
 215 /// The DFA can also give up if it runs out of room to create new states, or if
 216 /// it sees non-ASCII bytes in the presence of a Unicode word boundary.
 217 #[derive(Clone, Debug)]
 218 pub enum Result<T> {
 219     Match(T),
 220     NoMatch(usize),
 221     Quit,
 222 }
 223
 224 impl<T> Result<T> {
 225     /// Returns true if this result corresponds to a match.
 226     pub fn is_match(&self) -> bool {
 227         match *self {
 228             Result::Match(_) => true,
 229             Result::NoMatch(_) | Result::Quit => false,
 230         }
 231     }
 232
 233     /// Maps the given function onto T and returns the result.
 234     ///
 235     /// If this isn't a match, then this is a no-op.
 236     pub fn map<U, F: FnMut(T) -> U>(self, mut f: F) -> Result<U> {
 237         match self {
 238             Result::Match(t) => Result::Match(f(t)),
 239             Result::NoMatch(x) => Result::NoMatch(x),
 240             Result::Quit => Result::Quit,
 241         }
 242     }
 243
 244     /// Sets the non-match position.
 245     ///
 246     /// If this isn't a non-match, then this is a no-op.
 247     fn set_non_match(self, at: usize) -> Result<T> {
 248         match self {
 249             Result::NoMatch(_) => Result::NoMatch(at),
 250             r => r,
 251         }
 252     }
 253 }
 254
 255 /// `State` is a DFA state. It contains an ordered set of NFA states (not
 256 /// necessarily complete) and a smattering of flags.
 257 ///
 258 /// The flags are packed into the first byte of data.
 259 ///
 260 /// States don't carry their transitions. Instead, transitions are stored in
 261 /// a single row-major table.
 262 ///
 263 /// Delta encoding is used to store the instruction pointers.
 264 /// The first instruction pointer is stored directly starting
 265 /// at data[1], and each following pointer is stored as an offset
 266 /// to the previous one. If a delta is in the range -127..127,
 267 /// it is packed into a single byte; Otherwise the byte 128 (-128 as an i8)
 268 /// is coded as a flag, followed by 4 bytes encoding the delta.
 269 #[derive(Clone, Eq, Hash, PartialEq)]
 270 struct State{
 271     data: Box<[u8]>,
 272 }
 273
 274 /// `InstPtr` is a 32 bit pointer into a sequence of opcodes (i.e., it indexes
 275 /// an NFA state).
 276 ///
 277 /// Throughout this library, this is usually set to `usize`, but we force a
 278 /// `u32` here for the DFA to save on space.
 279 type InstPtr = u32;
 280
 281 /// Adds ip to data using delta encoding with respect to prev.
 282 ///
 283 /// After completion, `data` will contain `ip` and `prev` will be set to `ip`.
 284 fn push_inst_ptr(data: &mut Vec<u8>, prev: &mut InstPtr, ip: InstPtr) {
 285     let delta = (ip as i32) - (*prev as i32);
 286     write_vari32(data, delta);
 287     *prev = ip;
 288 }
 289
 290 struct InstPtrs<'a> {
 291     base: usize,
 292     data: &'a [u8],
 293 }
 294
 295 impl <'a>Iterator for InstPtrs<'a> {
 296     type Item = usize;
 297
 298     fn next(&mut self) -> Option<usize> {
 299         if self.data.is_empty() {
 300             return None;
 301         }
 302         let (delta, nread) = read_vari32(self.data);
 303         let base = self.base as i32 + delta;
 304         debug_assert!(base >= 0);
 305         debug_assert!(nread > 0);
 306         self.data = &self.data[nread..];
 307         self.base = base as usize;
 308         Some(self.base)
 309     }
 310 }
 311
 312 impl State {
 313     fn flags(&self) -> StateFlags {
 314         StateFlags(self.data[0])
 315     }
 316
 317     fn inst_ptrs(&self) -> InstPtrs {
 318         InstPtrs {
 319             base: 0,
 320             data: &self.data[1..],
 321         }
 322     }
 323 }
 324
 325 /// `StatePtr` is a 32 bit pointer to the start of a row in the transition
 326 /// table.
 327 ///
 328 /// It has many special values. There are two types of special values:
 329 /// sentinels and flags.
 330 ///
 331 /// Sentinels corresponds to special states that carry some kind of
 332 /// significance. There are three such states: unknown, dead and quit states.
 333 ///
 334 /// Unknown states are states that haven't been computed yet. They indicate
 335 /// that a transition should be filled in that points to either an existing
 336 /// cached state or a new state altogether. In general, an unknown state means
 337 /// "follow the NFA's epsilon transitions."
 338 ///
 339 /// Dead states are states that can never lead to a match, no matter what
 340 /// subsequent input is observed. This means that the DFA should quit
 341 /// immediately and return the longest match it has found thus far.
 342 ///
 343 /// Quit states are states that imply the DFA is not capable of matching the
 344 /// regex correctly. Currently, this is only used when a Unicode word boundary
 345 /// exists in the regex *and* a non-ASCII byte is observed.
 346 ///
 347 /// The other type of state pointer is a state pointer with special flag bits.
 348 /// There are two flags: a start flag and a match flag. The lower bits of both
 349 /// kinds always contain a "valid" `StatePtr` (indicated by the `STATE_MAX`
 350 /// mask).
 351 ///
 352 /// The start flag means that the state is a start state, and therefore may be
 353 /// subject to special prefix scanning optimizations.
 354 ///
 355 /// The match flag means that the state is a match state, and therefore the
 356 /// current position in the input (while searching) should be recorded.
 357 ///
 358 /// The above exists mostly in the service of making the inner loop fast.
 359 /// In particular, the inner *inner* loop looks something like this:
 360 ///
 361 /// ```ignore
 362 /// while state <= STATE_MAX and i < len(text):
 363 ///     state = state.next[i]
 364 /// ```
 365 ///
 366 /// This is nice because it lets us execute a lazy DFA as if it were an
 367 /// entirely offline DFA (i.e., with very few instructions). The loop will
 368 /// quit only when we need to examine a case that needs special attention.
 369 type StatePtr = u32;
 370
 371 /// An unknown state means that the state has not been computed yet, and that
 372 /// the only way to progress is to compute it.
 373 const STATE_UNKNOWN: StatePtr = 1<<31;
 374
 375 /// A dead state means that the state has been computed and it is known that
 376 /// once it is entered, no future match can ever occur.
 377 const STATE_DEAD: StatePtr = STATE_UNKNOWN + 1;
 378
 379 /// A quit state means that the DFA came across some input that it doesn't
 380 /// know how to process correctly. The DFA should quit and another matching
 381 /// engine should be run in its place.
 382 const STATE_QUIT: StatePtr = STATE_DEAD + 1;
 383
 384 /// A start state is a state that the DFA can start in.
 385 ///
 386 /// Note that start states have their lower bits set to a state pointer.
 387 const STATE_START: StatePtr = 1<<30;
 388
 389 /// A match state means that the regex has successfully matched.
 390 ///
 391 /// Note that match states have their lower bits set to a state pointer.
 392 const STATE_MATCH: StatePtr = 1<<29;
 393
 394 /// The maximum state pointer. This is useful to mask out the "valid" state
 395 /// pointer from a state with the "start" or "match" bits set.
 396 ///
 397 /// It doesn't make sense to use this with unknown, dead or quit state
 398 /// pointers, since those pointers are sentinels and never have their lower
 399 /// bits set to anything meaningful.
 400 const STATE_MAX: StatePtr = STATE_MATCH - 1;
 401
 402 /// Byte is a u8 in spirit, but a u16 in practice so that we can represent the
 403 /// special EOF sentinel value.
 404 #[derive(Copy, Clone, Debug)]
 405 struct Byte(u16);
 406
 407 /// A set of flags for zero-width assertions.
 408 #[derive(Clone, Copy, Eq, Debug, Default, Hash, PartialEq)]
 409 struct EmptyFlags {
 410     start: bool,
 411     end: bool,
 412     start_line: bool,
 413     end_line: bool,
 414     word_boundary: bool,
 415     not_word_boundary: bool,
 416 }
 417
 418 /// A set of flags describing various configurations of a DFA state. This is
 419 /// represented by a `u8` so that it is compact.
 420 #[derive(Clone, Copy, Eq, Default, Hash, PartialEq)]
 421 struct StateFlags(u8);
 422
 423 impl Cache {
 424     /// Create new empty cache for the DFA engine.
 425     pub fn new(prog: &Program) -> Self {
 426         // We add 1 to account for the special EOF byte.
 427         let num_byte_classes = (prog.byte_classes[255] as usize + 1) + 1;
 428         let starts = vec![STATE_UNKNOWN; 256];
 429         let mut cache = Cache {
 430             inner: CacheInner {
 431                 compiled: HashMap::new(),
 432                 trans: Transitions::new(num_byte_classes),
 433                 states: vec![],
 434                 start_states: starts,
 435                 stack: vec![],
 436                 flush_count: 0,
 437                 size: 0,
 438             },
 439             qcur: SparseSet::new(prog.insts.len()),
 440             qnext: SparseSet::new(prog.insts.len()),
 441         };
 442         cache.inner.reset_size();
 443         cache
 444     }
 445 }
 446
 447 impl CacheInner {
 448     /// Resets the cache size to account for fixed costs, such as the program
 449     /// and stack sizes.
 450     fn reset_size(&mut self) {
 451         self.size =
 452             (self.start_states.len() * mem::size_of::<StatePtr>())
 453             + (self.stack.len() * mem::size_of::<InstPtr>());
 454     }
 455 }
 456
 457 impl<'a> Fsm<'a> {
 458     #[inline(always)] // reduces constant overhead
 459     pub fn forward(
 460         prog: &'a Program,
 461         cache: &ProgramCache,
 462         quit_after_match: bool,
 463         text: &[u8],
 464         at: usize,
 465     ) -> Result<usize> {
 466         let mut cache = cache.borrow_mut();
 467         let cache = &mut cache.dfa;
 468         let mut dfa = Fsm {
 469             prog: prog,
 470             start: 0, // filled in below
 471             at: at,
 472             quit_after_match: quit_after_match,
 473             last_match_si: STATE_UNKNOWN,
 474             last_cache_flush: at,
 475             cache: &mut cache.inner,
 476         };
 477         let (empty_flags, state_flags) = dfa.start_flags(text, at);
 478         dfa.start = match dfa.start_state(
 479             &mut cache.qcur,
 480             empty_flags,
 481             state_flags,
 482         ) {
 483             None => return Result::Quit,
 484             Some(STATE_DEAD) => return Result::NoMatch(at),
 485             Some(si) => si,
 486         };
 487         debug_assert!(dfa.start != STATE_UNKNOWN);
 488         dfa.exec_at(&mut cache.qcur, &mut cache.qnext, text)
 489     }
 490
 491     #[inline(always)] // reduces constant overhead
 492     pub fn reverse(
 493         prog: &'a Program,
 494         cache: &ProgramCache,
 495         quit_after_match: bool,
 496         text: &[u8],
 497         at: usize,
 498     ) -> Result<usize> {
 499         let mut cache = cache.borrow_mut();
 500         let cache = &mut cache.dfa_reverse;
 501         let mut dfa = Fsm {
 502             prog: prog,
 503             start: 0, // filled in below
 504             at: at,
 505             quit_after_match: quit_after_match,
 506             last_match_si: STATE_UNKNOWN,
 507             last_cache_flush: at,
 508             cache: &mut cache.inner,
 509         };
 510         let (empty_flags, state_flags) = dfa.start_flags_reverse(text, at);
 511         dfa.start = match dfa.start_state(
 512             &mut cache.qcur,
 513             empty_flags,
 514             state_flags,
 515         ) {
 516             None => return Result::Quit,
 517             Some(STATE_DEAD) => return Result::NoMatch(at),
 518             Some(si) => si,
 519         };
 520         debug_assert!(dfa.start != STATE_UNKNOWN);
 521         dfa.exec_at_reverse(&mut cache.qcur, &mut cache.qnext, text)
 522     }
 523
 524     #[inline(always)] // reduces constant overhead
 525     pub fn forward_many(
 526         prog: &'a Program,
 527         cache: &ProgramCache,
 528         matches: &mut [bool],
 529         text: &[u8],
 530         at: usize,
 531     ) -> Result<usize> {
 532         debug_assert!(matches.len() == prog.matches.len());
 533         let mut cache = cache.borrow_mut();
 534         let cache = &mut cache.dfa;
 535         let mut dfa = Fsm {
 536             prog: prog,
 537             start: 0, // filled in below
 538             at: at,
 539             quit_after_match: false,
 540             last_match_si: STATE_UNKNOWN,
 541             last_cache_flush: at,
 542             cache: &mut cache.inner,
 543         };
 544         let (empty_flags, state_flags) = dfa.start_flags(text, at);
 545         dfa.start = match dfa.start_state(
 546             &mut cache.qcur,
 547             empty_flags,
 548             state_flags,
 549         ) {
 550             None => return Result::Quit,
 551             Some(STATE_DEAD) => return Result::NoMatch(at),
 552             Some(si) => si,
 553         };
 554         debug_assert!(dfa.start != STATE_UNKNOWN);
 555         let result = dfa.exec_at(&mut cache.qcur, &mut cache.qnext, text);
 556         if result.is_match() {
 557             if matches.len() == 1 {
 558                 matches[0] = true;
 559             } else {
 560                 debug_assert!(dfa.last_match_si != STATE_UNKNOWN);
 561                 debug_assert!(dfa.last_match_si != STATE_DEAD);
 562                 for ip in dfa.state(dfa.last_match_si).inst_ptrs() {
 563                     if let Inst::Match(slot) = dfa.prog[ip] {
 564                         matches[slot] = true;
 565                     }
 566                 }
 567             }
 568         }
 569         result
 570     }
 571
 572     /// Executes the DFA on a forward NFA.
 573     ///
 574     /// {qcur,qnext} are scratch ordered sets which may be non-empty.
 575     #[inline(always)] // reduces constant overhead
 576     fn exec_at(
 577         &mut self,
 578         qcur: &mut SparseSet,
 579         qnext: &mut SparseSet,
 580         text: &[u8],
 581     ) -> Result<usize> {
 582         // For the most part, the DFA is basically:
 583         //
 584         //   last_match = null
 585         //   while current_byte != EOF:
 586         //     si = current_state.next[current_byte]
 587         //     if si is match
 588         //       last_match = si
 589         //   return last_match
 590         //
 591         // However, we need to deal with a few things:
 592         //
 593         //   1. This is an *online* DFA, so the current state's next list
 594         //      may not point to anywhere yet, so we must go out and compute
 595         //      them. (They are then cached into the current state's next list
 596         //      to avoid re-computation.)
 597         //   2. If we come across a state that is known to be dead (i.e., never
 598         //      leads to a match), then we can quit early.
 599         //   3. If the caller just wants to know if a match occurs, then we
 600         //      can quit as soon as we know we have a match. (Full leftmost
 601         //      first semantics require continuing on.)
 602         //   4. If we're in the start state, then we can use a pre-computed set
 603         //      of prefix literals to skip quickly along the input.
 604         //   5. After the input is exhausted, we run the DFA on one symbol
 605         //      that stands for EOF. This is useful for handling empty width
 606         //      assertions.
 607         //   6. We can't actually do state.next[byte]. Instead, we have to do
 608         //      state.next[byte_classes[byte]], which permits us to keep the
 609         //      'next' list very small.
 610         //
 611         // Since there's a bunch of extra stuff we need to consider, we do some
 612         // pretty hairy tricks to get the inner loop to run as fast as
 613         // possible.
 614         debug_assert!(!self.prog.is_reverse);
 615
 616         // The last match is the currently known ending match position. It is
 617         // reported as an index to the most recent byte that resulted in a
 618         // transition to a match state and is always stored in capture slot `1`
 619         // when searching forwards. Its maximum value is `text.len()`.
 620         let mut result = Result::NoMatch(self.at);
 621         let (mut prev_si, mut next_si) = (self.start, self.start);
 622         let mut at = self.at;
 623         while at < text.len() {
 624             // This is the real inner loop. We take advantage of special bits
 625             // set in the state pointer to determine whether a state is in the
 626             // "common" case or not. Specifically, the common case is a
 627             // non-match non-start non-dead state that has already been
 628             // computed. So long as we remain in the common case, this inner
 629             // loop will chew through the input.
 630             //
 631             // We also unroll the loop 4 times to amortize the cost of checking
 632             // whether we've consumed the entire input. We are also careful
 633             // to make sure that `prev_si` always represents the previous state
 634             // and `next_si` always represents the next state after the loop
 635             // exits, even if it isn't always true inside the loop.
 636             while next_si <= STATE_MAX && at < text.len() {
 637                 // Argument for safety is in the definition of next_si.
 638                 prev_si = unsafe { self.next_si(next_si, text, at) };
 639                 at += 1;
 640                 if prev_si > STATE_MAX || at + 2 >= text.len() {
 641                     mem::swap(&mut prev_si, &mut next_si);
 642                     break;
 643                 }
 644                 next_si = unsafe { self.next_si(prev_si, text, at) };
 645                 at += 1;
 646                 if next_si > STATE_MAX {
 647                     break;
 648                 }
 649                 prev_si = unsafe { self.next_si(next_si, text, at) };
 650                 at += 1;
 651                 if prev_si > STATE_MAX {
 652                     mem::swap(&mut prev_si, &mut next_si);
 653                     break;
 654                 }
 655                 next_si = unsafe { self.next_si(prev_si, text, at) };
 656                 at += 1;
 657             }
 658             if next_si & STATE_MATCH > 0 {
 659                 // A match state is outside of the common case because it needs
 660                 // special case analysis. In particular, we need to record the
 661                 // last position as having matched and possibly quit the DFA if
 662                 // we don't need to keep matching.
 663                 next_si &= !STATE_MATCH;
 664                 result = Result::Match(at - 1);
 665                 if self.quit_after_match {
 666                     return result;
 667                 }
 668                 self.last_match_si = next_si;
 669                 prev_si = next_si;
 670
 671                 // This permits short-circuiting when matching a regex set.
 672                 // In particular, if this DFA state contains only match states,
 673                 // then it's impossible to extend the set of matches since
 674                 // match states are final. Therefore, we can quit.
 675                 if self.prog.matches.len() > 1 {
 676                     let state = self.state(next_si);
 677                     let just_matches = state.inst_ptrs()
 678                          .all(|ip| self.prog[ip].is_match());
 679                     if just_matches {
 680                         return result;
 681                     }
 682                 }
 683
 684                 // Another inner loop! If the DFA stays in this particular
 685                 // match state, then we can rip through all of the input
 686                 // very quickly, and only recording the match location once
 687                 // we've left this particular state.
 688                 let cur = at;
 689                 while (next_si & !STATE_MATCH) == prev_si
 690                     && at + 2 < text.len() {
 691                     // Argument for safety is in the definition of next_si.
 692                     next_si = unsafe {
 693                         self.next_si(next_si & !STATE_MATCH, text, at)
 694                     };
 695                     at += 1;
 696                 }
 697                 if at > cur {
 698                     result = Result::Match(at - 2);
 699                 }
 700             } else if next_si & STATE_START > 0 {
 701                 // A start state isn't in the common case because we may
 702                 // what to do quick prefix scanning. If the program doesn't
 703                 // have a detected prefix, then start states are actually
 704                 // considered common and this case is never reached.
 705                 debug_assert!(self.has_prefix());
 706                 next_si &= !STATE_START;
 707                 prev_si = next_si;
 708                 at = match self.prefix_at(text, at) {
 709                     None => return Result::NoMatch(text.len()),
 710                     Some(i) => i,
 711                 };
 712             } else if next_si >= STATE_UNKNOWN {
 713                 if next_si == STATE_QUIT {
 714                     return Result::Quit;
 715                 }
 716                 // Finally, this corresponds to the case where the transition
 717                 // entered a state that can never lead to a match or a state
 718                 // that hasn't been computed yet. The latter being the "slow"
 719                 // path.
 720                 let byte = Byte::byte(text[at - 1]);
 721                 // We no longer care about the special bits in the state
 722                 // pointer.
 723                 prev_si &= STATE_MAX;
 724                 // Record where we are. This is used to track progress for
 725                 // determining whether we should quit if we've flushed the
 726                 // cache too much.
 727                 self.at = at;
 728                 next_si = match self.next_state(qcur, qnext, prev_si, byte) {
 729                     None => return Result::Quit,
 730                     Some(STATE_DEAD) => return result.set_non_match(at),
 731                     Some(si) => si,
 732                 };
 733                 debug_assert!(next_si != STATE_UNKNOWN);
 734                 if next_si & STATE_MATCH > 0 {
 735                     next_si &= !STATE_MATCH;
 736                     result = Result::Match(at - 1);
 737                     if self.quit_after_match {
 738                         return result;
 739                     }
 740                     self.last_match_si = next_si;
 741                 }
 742                 prev_si = next_si;
 743             } else {
 744                 prev_si = next_si;
 745             }
 746         }
 747
 748         // Run the DFA once more on the special EOF senitnel value.
 749         // We don't care about the special bits in the state pointer any more,
 750         // so get rid of them.
 751         prev_si &= STATE_MAX;
 752         prev_si = match self.next_state(qcur, qnext, prev_si, Byte::eof()) {
 753             None => return Result::Quit,
 754             Some(STATE_DEAD) => return result.set_non_match(text.len()),
 755             Some(si) => si & !STATE_START,
 756         };
 757         debug_assert!(prev_si != STATE_UNKNOWN);
 758         if prev_si & STATE_MATCH > 0 {
 759             prev_si &= !STATE_MATCH;
 760             self.last_match_si = prev_si;
 761             result = Result::Match(text.len());
 762         }
 763         result
 764     }
 765
 766     /// Executes the DFA on a reverse NFA.
 767     #[inline(always)] // reduces constant overhead
 768     fn exec_at_reverse(
 769         &mut self,
 770         qcur: &mut SparseSet,
 771         qnext: &mut SparseSet,
 772         text: &[u8],
 773     ) -> Result<usize> {
 774         // The comments in `exec_at` above mostly apply here too. The main
 775         // difference is that we move backwards over the input and we look for
 776         // the longest possible match instead of the leftmost-first match.
 777         //
 778         // N.B. The code duplication here is regrettable. Efforts to improve
 779         // it without sacrificing performance are welcome. ---AG
 780         debug_assert!(self.prog.is_reverse);
 781         let mut result = Result::NoMatch(self.at);
 782         let (mut prev_si, mut next_si) = (self.start, self.start);
 783         let mut at = self.at;
 784         while at > 0 {
 785             while next_si <= STATE_MAX && at > 0 {
 786                 // Argument for safety is in the definition of next_si.
 787                 at -= 1;
 788                 prev_si = unsafe { self.next_si(next_si, text, at) };
 789                 if prev_si > STATE_MAX || at <= 4 {
 790                     mem::swap(&mut prev_si, &mut next_si);
 791                     break;
 792                 }
 793                 at -= 1;
 794                 next_si = unsafe { self.next_si(prev_si, text, at) };
 795                 if next_si > STATE_MAX {
 796                     break;
 797                 }
 798                 at -= 1;
 799                 prev_si = unsafe { self.next_si(next_si, text, at) };
 800                 if prev_si > STATE_MAX {
 801                     mem::swap(&mut prev_si, &mut next_si);
 802                     break;
 803                 }
 804                 at -= 1;
 805                 next_si = unsafe { self.next_si(prev_si, text, at) };
 806             }
 807             if next_si & STATE_MATCH > 0 {
 808                 next_si &= !STATE_MATCH;
 809                 result = Result::Match(at + 1);
 810                 if self.quit_after_match {
 811                     return result
 812                 }
 813                 self.last_match_si = next_si;
 814                 prev_si = next_si;
 815                 let cur = at;
 816                 while (next_si & !STATE_MATCH) == prev_si && at >= 2 {
 817                     // Argument for safety is in the definition of next_si.
 818                     at -= 1;
 819                     next_si = unsafe {
 820                         self.next_si(next_si & !STATE_MATCH, text, at)
 821                     };
 822                 }
 823                 if at < cur {
 824                     result = Result::Match(at + 2);
 825                 }
 826             } else if next_si >= STATE_UNKNOWN {
 827                 if next_si == STATE_QUIT {
 828                     return Result::Quit;
 829                 }
 830                 let byte = Byte::byte(text[at]);
 831                 prev_si &= STATE_MAX;
 832                 self.at = at;
 833                 next_si = match self.next_state(qcur, qnext, prev_si, byte) {
 834                     None => return Result::Quit,
 835                     Some(STATE_DEAD) => return result.set_non_match(at),
 836                     Some(si) => si,
 837                 };
 838                 debug_assert!(next_si != STATE_UNKNOWN);
 839                 if next_si & STATE_MATCH > 0 {
 840                     next_si &= !STATE_MATCH;
 841                     result = Result::Match(at + 1);
 842                     if self.quit_after_match {
 843                         return result;
 844                     }
 845                     self.last_match_si = next_si;
 846                 }
 847                 prev_si = next_si;
 848             } else {
 849                 prev_si = next_si;
 850             }
 851         }
 852
 853         // Run the DFA once more on the special EOF senitnel value.
 854         prev_si = match self.next_state(qcur, qnext, prev_si, Byte::eof()) {
 855             None => return Result::Quit,
 856             Some(STATE_DEAD) => return result.set_non_match(0),
 857             Some(si) => si,
 858         };
 859         debug_assert!(prev_si != STATE_UNKNOWN);
 860         if prev_si & STATE_MATCH > 0 {
 861             prev_si &= !STATE_MATCH;
 862             self.last_match_si = prev_si;
 863             result = Result::Match(0);
 864         }
 865         result
 866     }
 867
 868     /// next_si transitions to the next state, where the transition input
 869     /// corresponds to text[i].
 870     ///
 871     /// This elides bounds checks, and is therefore unsafe.
 872     #[inline(always)]
 873     unsafe fn next_si(&self, si: StatePtr, text: &[u8], i: usize) -> StatePtr {
 874         // What is the argument for safety here?
 875         // We have three unchecked accesses that could possibly violate safety:
 876         //
 877         //   1. The given byte of input (`text[i]`).
 878         //   2. The class of the byte of input (`classes[text[i]]`).
 879         //   3. The transition for the class (`trans[si + cls]`).
 880         //
 881         // (1) is only safe when calling next_si is guarded by
 882         // `i < text.len()`.
 883         //
 884         // (2) is the easiest case to guarantee since `text[i]` is always a
 885         // `u8` and `self.prog.byte_classes` always has length `u8::MAX`.
 886         // (See `ByteClassSet.byte_classes` in `compile.rs`.)
 887         //
 888         // (3) is only safe if (1)+(2) are safe. Namely, the transitions
 889         // of every state are defined to have length equal to the number of
 890         // byte classes in the program. Therefore, a valid class leads to a
 891         // valid transition. (All possible transitions are valid lookups, even
 892         // if it points to a state that hasn't been computed yet.) (3) also
 893         // relies on `si` being correct, but StatePtrs should only ever be
 894         // retrieved from the transition table, which ensures they are correct.
 895         debug_assert!(i < text.len());
 896         let b = *text.get_unchecked(i);
 897         debug_assert!((b as usize) < self.prog.byte_classes.len());
 898         let cls = *self.prog.byte_classes.get_unchecked(b as usize);
 899         self.cache.trans.next_unchecked(si, cls as usize)
 900     }
 901
 902     /// Computes the next state given the current state and the current input
 903     /// byte (which may be EOF).
 904     ///
 905     /// If STATE_DEAD is returned, then there is no valid state transition.
 906     /// This implies that no permutation of future input can lead to a match
 907     /// state.
 908     ///
 909     /// STATE_UNKNOWN can never be returned.
 910     fn exec_byte(
 911         &mut self,
 912         qcur: &mut SparseSet,
 913         qnext: &mut SparseSet,
 914         mut si: StatePtr,
 915         b: Byte,
 916     ) -> Option<StatePtr> {
 917         use prog::Inst::*;
 918
 919         // Initialize a queue with the current DFA state's NFA states.
 920         qcur.clear();
 921         for ip in self.state(si).inst_ptrs() {
 922             qcur.insert(ip);
 923         }
 924
 925         // Before inspecting the current byte, we may need to also inspect
 926         // whether the position immediately preceding the current byte
 927         // satisfies the empty assertions found in the current state.
 928         //
 929         // We only need to do this step if there are any empty assertions in
 930         // the current state.
 931         let is_word_last = self.state(si).flags().is_word();
 932         let is_word = b.is_ascii_word();
 933         if self.state(si).flags().has_empty() {
 934             // Compute the flags immediately preceding the current byte.
 935             // This means we only care about the "end" or "end line" flags.
 936             // (The "start" flags are computed immediately proceding the
 937             // current byte and is handled below.)
 938             let mut flags = EmptyFlags::default();
 939             if b.is_eof() {
 940                 flags.end = true;
 941                 flags.end_line = true;
 942             } else if b.as_byte().map_or(false, |b| b == b'\n') {
 943                 flags.end_line = true;
 944             }
 945             if is_word_last == is_word {
 946                 flags.not_word_boundary = true;
 947             } else {
 948                 flags.word_boundary = true;
 949             }
 950             // Now follow epsilon transitions from every NFA state, but make
 951             // sure we only follow transitions that satisfy our flags.
 952             qnext.clear();
 953             for &ip in &*qcur {
 954                 self.follow_epsilons(usize_to_u32(ip), qnext, flags);
 955             }
 956             mem::swap(qcur, qnext);
 957         }
 958
 959         // Now we set flags for immediately after the current byte. Since start
 960         // states are processed separately, and are the only states that can
 961         // have the StartText flag set, we therefore only need to worry about
 962         // the StartLine flag here.
 963         //
 964         // We do also keep track of whether this DFA state contains a NFA state
 965         // that is a matching state. This is precisely how we delay the DFA
 966         // matching by one byte in order to process the special EOF sentinel
 967         // byte. Namely, if this DFA state containing a matching NFA state,
 968         // then it is the *next* DFA state that is marked as a match.
 969         let mut empty_flags = EmptyFlags::default();
 970         let mut state_flags = StateFlags::default();
 971         empty_flags.start_line = b.as_byte().map_or(false, |b| b == b'\n');
 972         if b.is_ascii_word() {
 973             state_flags.set_word();
 974         }
 975         // Now follow all epsilon transitions again, but only after consuming
 976         // the current byte.
 977         qnext.clear();
 978         for &ip in &*qcur {
 979             match self.prog[ip as usize] {
 980                 // These states never happen in a byte-based program.
 981                 Char(_) | Ranges(_) => unreachable!(),
 982                 // These states are handled when following epsilon transitions.
 983                 Save(_) | Split(_) | EmptyLook(_) => {}
 984                 Match(_) => {
 985                     state_flags.set_match();
 986                     if !self.continue_past_first_match() {
 987                         break;
 988                     } else if self.prog.matches.len() > 1
 989                             && !qnext.contains(ip as usize) {
 990                         // If we are continuing on to find other matches,
 991                         // then keep a record of the match states we've seen.
 992                         qnext.insert(ip);
 993                     }
 994                 }
 995                 Bytes(ref inst) => {
 996                     if b.as_byte().map_or(false, |b| inst.matches(b)) {
 997                         self.follow_epsilons(
 998                             inst.goto as InstPtr, qnext, empty_flags);
 999                     }
1000                 }
1001             }
1002         }
1003
1004         let cache =
1005             if b.is_eof() && self.prog.matches.len() > 1 {
1006                 // If we're processing the last byte of the input and we're
1007                 // matching a regex set, then make the next state contain the
1008                 // previous states transitions. We do this so that the main
1009                 // matching loop can extract all of the match instructions.
1010                 mem::swap(qcur, qnext);
1011                 // And don't cache this state because it's totally bunk.
1012                 false
1013             } else {
1014                 true
1015             };
1016
1017         // We've now built up the set of NFA states that ought to comprise the
1018         // next DFA state, so try to find it in the cache, and if it doesn't
1019         // exist, cache it.
1020         //
1021         // N.B. We pass `&mut si` here because the cache may clear itself if
1022         // it has gotten too full. When that happens, the location of the
1023         // current state may change.
1024         let mut next = match self.cached_state(
1025             qnext,
1026             state_flags,
1027             Some(&mut si),
1028         ) {
1029             None => return None,
1030             Some(next) => next,
1031         };
1032         if (self.start & !STATE_START) == next {
1033             // Start states can never be match states since all matches are
1034             // delayed by one byte.
1035             debug_assert!(!self.state(next).flags().is_match());
1036             next = self.start_ptr(next);
1037         }
1038         if next <= STATE_MAX && self.state(next).flags().is_match() {
1039             next |= STATE_MATCH;
1040         }
1041         debug_assert!(next != STATE_UNKNOWN);
1042         // And now store our state in the current state's next list.
1043         if cache {
1044             let cls = self.byte_class(b);
1045             self.cache.trans.set_next(si, cls, next);
1046         }
1047         Some(next)
1048     }
1049
1050     /// Follows the epsilon transitions starting at (and including) `ip`. The
1051     /// resulting states are inserted into the ordered set `q`.
1052     ///
1053     /// Conditional epsilon transitions (i.e., empty width assertions) are only
1054     /// followed if they are satisfied by the given flags, which should
1055     /// represent the flags set at the current location in the input.
1056     ///
1057     /// If the current location corresponds to the empty string, then only the
1058     /// end line and/or end text flags may be set. If the current location
1059     /// corresponds to a real byte in the input, then only the start line
1060     /// and/or start text flags may be set.
1061     ///
1062     /// As an exception to the above, when finding the initial state, any of
1063     /// the above flags may be set:
1064     ///
1065     /// If matching starts at the beginning of the input, then start text and
1066     /// start line should be set. If the input is empty, then end text and end
1067     /// line should also be set.
1068     ///
1069     /// If matching starts after the beginning of the input, then only start
1070     /// line should be set if the preceding byte is `\n`. End line should never
1071     /// be set in this case. (Even if the proceding byte is a `\n`, it will
1072     /// be handled in a subsequent DFA state.)
1073     fn follow_epsilons(
1074         &mut self,
1075         ip: InstPtr,
1076         q: &mut SparseSet,
1077         flags: EmptyFlags,
1078     ) {
1079         use prog::Inst::*;
1080         use prog::EmptyLook::*;
1081
1082         // We need to traverse the NFA to follow epsilon transitions, so avoid
1083         // recursion with an explicit stack.
1084         self.cache.stack.push(ip);
1085         while let Some(mut ip) = self.cache.stack.pop() {
1086             // Try to munch through as many states as possible without
1087             // pushes/pops to the stack.
1088             loop {
1089                 // Don't visit states we've already added.
1090                 if q.contains(ip as usize) {
1091                     break;
1092                 }
1093                 q.insert(ip as usize);
1094                 match self.prog[ip as usize] {
1095                     Char(_) | Ranges(_) => unreachable!(),
1096                     Match(_) | Bytes(_) => {
1097                         break;
1098                     }
1099                     EmptyLook(ref inst) => {
1100                         // Only follow empty assertion states if our flags
1101                         // satisfy the assertion.
1102                         match inst.look {
1103                             StartLine if flags.start_line => {
1104                                 ip = inst.goto as InstPtr;
1105                             }
1106                             EndLine if flags.end_line => {
1107                                 ip = inst.goto as InstPtr;
1108                             }
1109                             StartText if flags.start => {
1110                                 ip = inst.goto as InstPtr;
1111                             }
1112                             EndText if flags.end => {
1113                                 ip = inst.goto as InstPtr;
1114                             }
1115                             WordBoundaryAscii if flags.word_boundary => {
1116                                 ip = inst.goto as InstPtr;
1117                             }
1118                             NotWordBoundaryAscii if flags.not_word_boundary => {
1119                                 ip = inst.goto as InstPtr;
1120                             }
1121                             WordBoundary if flags.word_boundary => {
1122                                 ip = inst.goto as InstPtr;
1123                             }
1124                             NotWordBoundary if flags.not_word_boundary => {
1125                                 ip = inst.goto as InstPtr;
1126                             }
1127                             StartLine | EndLine | StartText | EndText
1128                             | WordBoundaryAscii | NotWordBoundaryAscii
1129                             | WordBoundary | NotWordBoundary => {
1130                                 break;
1131                             }
1132                         }
1133                     }
1134                     Save(ref inst) => {
1135                         ip = inst.goto as InstPtr;
1136                     }
1137                     Split(ref inst) => {
1138                         self.cache.stack.push(inst.goto2 as InstPtr);
1139                         ip = inst.goto1 as InstPtr;
1140                     }
1141                 }
1142             }
1143         }
1144     }
1145
1146     /// Find a previously computed state matching the given set of instructions
1147     /// and is_match bool.
1148     ///
1149     /// The given set of instructions should represent a single state in the
1150     /// NFA along with all states reachable without consuming any input.
1151     ///
1152     /// The is_match bool should be true if and only if the preceding DFA state
1153     /// contains an NFA matching state. The cached state produced here will
1154     /// then signify a match. (This enables us to delay a match by one byte,
1155     /// in order to account for the EOF sentinel byte.)
1156     ///
1157     /// If the cache is full, then it is wiped before caching a new state.
1158     ///
1159     /// The current state should be specified if it exists, since it will need
1160     /// to be preserved if the cache clears itself. (Start states are
1161     /// always saved, so they should not be passed here.) It takes a mutable
1162     /// pointer to the index because if the cache is cleared, the state's
1163     /// location may change.
1164     fn cached_state(
1165         &mut self,
1166         q: &SparseSet,
1167         mut state_flags: StateFlags,
1168         current_state: Option<&mut StatePtr>,
1169     ) -> Option<StatePtr> {
1170         // If we couldn't come up with a non-empty key to represent this state,
1171         // then it is dead and can never lead to a match.
1172         //
1173         // Note that inst_flags represent the set of empty width assertions
1174         // in q. We use this as an optimization in exec_byte to determine when
1175         // we should follow epsilon transitions at the empty string preceding
1176         // the current byte.
1177         let key = match self.cached_state_key(q, &mut state_flags) {
1178             None => return Some(STATE_DEAD),
1179             Some(v) => v,
1180         };
1181         // In the cache? Cool. Done.
1182         if let Some(&si) = self.cache.compiled.get(&key) {
1183             return Some(si);
1184         }
1185         // If the cache has gotten too big, wipe it.
1186         if self.approximate_size() > self.prog.dfa_size_limit
1187             && !self.clear_cache_and_save(current_state)
1188             {
1189                 // Ooops. DFA is giving up.
1190                 return None;
1191             }
1192         // Allocate room for our state and add it.
1193         self.add_state(key)
1194     }
1195
1196     /// Produces a key suitable for describing a state in the DFA cache.
1197     ///
1198     /// The key invariant here is that equivalent keys are produced for any two
1199     /// sets of ordered NFA states (and toggling of whether the previous NFA
1200     /// states contain a match state) that do not discriminate a match for any
1201     /// input.
1202     ///
1203     /// Specifically, q should be an ordered set of NFA states and is_match
1204     /// should be true if and only if the previous NFA states contained a match
1205     /// state.
1206     fn cached_state_key(
1207         &mut self,
1208         q: &SparseSet,
1209         state_flags: &mut StateFlags,
1210     ) -> Option<State> {
1211         use prog::Inst::*;
1212
1213         // We need to build up enough information to recognize pre-built states
1214         // in the DFA. Generally speaking, this includes every instruction
1215         // except for those which are purely epsilon transitions, e.g., the
1216         // Save and Split instructions.
1217         //
1218         // Empty width assertions are also epsilon transitions, but since they
1219         // are conditional, we need to make them part of a state's key in the
1220         // cache.
1221
1222         // Reserve 1 byte for flags.
1223         let mut insts = vec![0];
1224         let mut prev = 0;
1225         for &ip in q {
1226             let ip = usize_to_u32(ip);
1227             match self.prog[ip as usize] {
1228                 Char(_) | Ranges(_) => unreachable!(),
1229                 Save(_) | Split(_) => {}
1230                 Bytes(_) => push_inst_ptr(&mut insts, &mut prev, ip),
1231                 EmptyLook(_) => {
1232                     state_flags.set_empty();
1233                     push_inst_ptr(&mut insts, &mut prev, ip)
1234                 }
1235                 Match(_) => {
1236                     push_inst_ptr(&mut insts, &mut prev, ip);
1237                     if !self.continue_past_first_match() {
1238                         break;
1239                     }
1240                 }
1241             }
1242         }
1243         // If we couldn't transition to any other instructions and we didn't
1244         // see a match when expanding NFA states previously, then this is a
1245         // dead state and no amount of additional input can transition out
1246         // of this state.
1247         if insts.len() == 1 && !state_flags.is_match() {
1248             None
1249         } else {
1250             let StateFlags(f) = *state_flags;
1251             insts[0] = f;
1252             Some(State { data: insts.into_boxed_slice() })
1253         }
1254     }
1255
1256     /// Clears the cache, but saves and restores current_state if it is not
1257     /// none.
1258     ///
1259     /// The current state must be provided here in case its location in the
1260     /// cache changes.
1261     ///
1262     /// This returns false if the cache is not cleared and the DFA should
1263     /// give up.
1264     fn clear_cache_and_save(
1265         &mut self,
1266         current_state: Option<&mut StatePtr>,
1267     ) -> bool {
1268         if self.cache.states.is_empty() {
1269             // Nothing to clear...
1270             return true;
1271         }
1272         match current_state {
1273             None => self.clear_cache(),
1274             Some(si) => {
1275                 let cur = self.state(*si).clone();
1276                 if !self.clear_cache() {
1277                     return false;
1278                 }
1279                 // The unwrap is OK because we just cleared the cache and
1280                 // therefore know that the next state pointer won't exceed
1281                 // STATE_MAX.
1282                 *si = self.restore_state(cur).unwrap();
1283                 true
1284             }
1285         }
1286     }
1287
1288     /// Wipes the state cache, but saves and restores the current start state.
1289     ///
1290     /// This returns false if the cache is not cleared and the DFA should
1291     /// give up.
1292     fn clear_cache(&mut self) -> bool {
1293         // Bail out of the DFA if we're moving too "slowly."
1294         // A heuristic from RE2: assume the DFA is too slow if it is processing
1295         // 10 or fewer bytes per state.
1296         // Additionally, we permit the cache to be flushed a few times before
1297         // caling it quits.
1298         let nstates = self.cache.states.len();
1299         if self.cache.flush_count >= 3
1300             && self.at >= self.last_cache_flush
1301             && (self.at - self.last_cache_flush) <= 10 * nstates {
1302             return false;
1303         }
1304         // Update statistics tracking cache flushes.
1305         self.last_cache_flush = self.at;
1306         self.cache.flush_count += 1;
1307
1308         // OK, actually flush the cache.
1309         let start = self.state(self.start & !STATE_START).clone();
1310         let last_match = if self.last_match_si <= STATE_MAX {
1311             Some(self.state(self.last_match_si).clone())
1312         } else {
1313             None
1314         };
1315         self.cache.reset_size();
1316         self.cache.trans.clear();
1317         self.cache.states.clear();
1318         self.cache.compiled.clear();
1319         for s in &mut self.cache.start_states {
1320             *s = STATE_UNKNOWN;
1321         }
1322         // The unwraps are OK because we just cleared the cache and therefore
1323         // know that the next state pointer won't exceed STATE_MAX.
1324         let start_ptr = self.restore_state(start).unwrap();
1325         self.start = self.start_ptr(start_ptr);
1326         if let Some(last_match) = last_match {
1327             self.last_match_si = self.restore_state(last_match).unwrap();
1328         }
1329         true
1330     }
1331
1332     /// Restores the given state back into the cache, and returns a pointer
1333     /// to it.
1334     fn restore_state(&mut self, state: State) -> Option<StatePtr> {
1335         // If we've already stored this state, just return a pointer to it.
1336         // None will be the wiser.
1337         if let Some(&si) = self.cache.compiled.get(&state) {
1338             return Some(si);
1339         }
1340         self.add_state(state)
1341     }
1342
1343     /// Returns the next state given the current state si and current byte
1344     /// b. {qcur,qnext} are used as scratch space for storing ordered NFA
1345     /// states.
1346     ///
1347     /// This tries to fetch the next state from the cache, but if that fails,
1348     /// it computes the next state, caches it and returns a pointer to it.
1349     ///
1350     /// The pointer can be to a real state, or it can be STATE_DEAD.
1351     /// STATE_UNKNOWN cannot be returned.
1352     ///
1353     /// None is returned if a new state could not be allocated (i.e., the DFA
1354     /// ran out of space and thinks it's running too slowly).
1355     fn next_state(
1356         &mut self,
1357         qcur: &mut SparseSet,
1358         qnext: &mut SparseSet,
1359         si: StatePtr,
1360         b: Byte,
1361     ) -> Option<StatePtr> {
1362         if si == STATE_DEAD {
1363             return Some(STATE_DEAD);
1364         }
1365         match self.cache.trans.next(si, self.byte_class(b)) {
1366             STATE_UNKNOWN => self.exec_byte(qcur, qnext, si, b),
1367             STATE_QUIT => None,
1368             STATE_DEAD => Some(STATE_DEAD),
1369             nsi => Some(nsi),
1370         }
1371     }
1372
1373     /// Computes and returns the start state, where searching begins at
1374     /// position `at` in `text`. If the state has already been computed,
1375     /// then it is pulled from the cache. If the state hasn't been cached,
1376     /// then it is computed, cached and a pointer to it is returned.
1377     ///
1378     /// This may return STATE_DEAD but never STATE_UNKNOWN.
1379     #[inline(always)] // reduces constant overhead
1380     fn start_state(
1381         &mut self,
1382         q: &mut SparseSet,
1383         empty_flags: EmptyFlags,
1384         state_flags: StateFlags,
1385     ) -> Option<StatePtr> {
1386         // Compute an index into our cache of start states based on the set
1387         // of empty/state flags set at the current position in the input. We
1388         // don't use every flag since not all flags matter. For example, since
1389         // matches are delayed by one byte, start states can never be match
1390         // states.
1391         let flagi = {
1392             (((empty_flags.start as u8) << 0) |
1393              ((empty_flags.end as u8) << 1) |
1394              ((empty_flags.start_line as u8) << 2) |
1395              ((empty_flags.end_line as u8) << 3) |
1396              ((empty_flags.word_boundary as u8) << 4) |
1397              ((empty_flags.not_word_boundary as u8) << 5) |
1398              ((state_flags.is_word() as u8) << 6))
1399             as usize
1400         };
1401         match self.cache.start_states[flagi] {
1402             STATE_UNKNOWN => {}
1403             STATE_DEAD => return Some(STATE_DEAD),
1404             si => return Some(si),
1405         }
1406         q.clear();
1407         let start = usize_to_u32(self.prog.start);
1408         self.follow_epsilons(start, q, empty_flags);
1409         // Start states can never be match states because we delay every match
1410         // by one byte. Given an empty string and an empty match, the match
1411         // won't actually occur until the DFA processes the special EOF
1412         // sentinel byte.
1413         let sp = match self.cached_state(q, state_flags, None) {
1414             None => return None,
1415             Some(sp) => self.start_ptr(sp),
1416         };
1417         self.cache.start_states[flagi] = sp;
1418         Some(sp)
1419     }
1420
1421     /// Computes the set of starting flags for the given position in text.
1422     ///
1423     /// This should only be used when executing the DFA forwards over the
1424     /// input.
1425     fn start_flags(&self, text: &[u8], at: usize) -> (EmptyFlags, StateFlags) {
1426         let mut empty_flags = EmptyFlags::default();
1427         let mut state_flags = StateFlags::default();
1428         empty_flags.start = at == 0;
1429         empty_flags.end = text.is_empty();
1430         empty_flags.start_line = at == 0 || text[at - 1] == b'\n';
1431         empty_flags.end_line = text.is_empty();
1432
1433         let is_word_last = at > 0 && Byte::byte(text[at - 1]).is_ascii_word();
1434         let is_word = at < text.len() && Byte::byte(text[at]).is_ascii_word();
1435         if is_word_last {
1436             state_flags.set_word();
1437         }
1438         if is_word == is_word_last {
1439             empty_flags.not_word_boundary = true;
1440         } else {
1441             empty_flags.word_boundary = true;
1442         }
1443         (empty_flags, state_flags)
1444     }
1445
1446     /// Computes the set of starting flags for the given position in text.
1447     ///
1448     /// This should only be used when executing the DFA in reverse over the
1449     /// input.
1450     fn start_flags_reverse(
1451         &self,
1452         text: &[u8],
1453         at: usize,
1454     ) -> (EmptyFlags, StateFlags) {
1455         let mut empty_flags = EmptyFlags::default();
1456         let mut state_flags = StateFlags::default();
1457         empty_flags.start = at == text.len();
1458         empty_flags.end = text.is_empty();
1459         empty_flags.start_line = at == text.len() || text[at] == b'\n';
1460         empty_flags.end_line = text.is_empty();
1461
1462         let is_word_last =
1463             at < text.len() && Byte::byte(text[at]).is_ascii_word();
1464         let is_word = at > 0 && Byte::byte(text[at - 1]).is_ascii_word();
1465         if is_word_last {
1466             state_flags.set_word();
1467         }
1468         if is_word == is_word_last {
1469             empty_flags.not_word_boundary = true;
1470         } else {
1471             empty_flags.word_boundary = true;
1472         }
1473         (empty_flags, state_flags)
1474     }
1475
1476     /// Returns a reference to a State given a pointer to it.
1477     fn state(&self, si: StatePtr) -> &State {
1478         &self.cache.states[si as usize / self.num_byte_classes()]
1479     }
1480
1481     /// Adds the given state to the DFA.
1482     ///
1483     /// This allocates room for transitions out of this state in
1484     /// self.cache.trans. The transitions can be set with the returned
1485     /// StatePtr.
1486     ///
1487     /// If None is returned, then the state limit was reached and the DFA
1488     /// should quit.
1489     fn add_state(&mut self, state: State) -> Option<StatePtr> {
1490         // This will fail if the next state pointer exceeds STATE_PTR. In
1491         // practice, the cache limit will prevent us from ever getting here,
1492         // but maybe callers will set the cache size to something ridiculous...
1493         let si = match self.cache.trans.add() {
1494             None => return None,
1495             Some(si) => si,
1496         };
1497         // If the program has a Unicode word boundary, then set any transitions
1498         // for non-ASCII bytes to STATE_QUIT. If the DFA stumbles over such a
1499         // transition, then it will quit and an alternative matching engine
1500         // will take over.
1501         if self.prog.has_unicode_word_boundary {
1502             for b in 128..256 {
1503                 let cls = self.byte_class(Byte::byte(b as u8));
1504                 self.cache.trans.set_next(si, cls, STATE_QUIT);
1505             }
1506         }
1507         // Finally, put our actual state on to our heap of states and index it
1508         // so we can find it later.
1509         self.cache.size +=
1510             self.cache.trans.state_heap_size()
1511             + (2 * state.data.len())
1512             + (2 * mem::size_of::<State>())
1513             + mem::size_of::<StatePtr>();
1514         self.cache.states.push(state.clone());
1515         self.cache.compiled.insert(state, si);
1516         // Transition table and set of states and map should all be in sync.
1517         debug_assert!(self.cache.states.len()
1518                       == self.cache.trans.num_states());
1519         debug_assert!(self.cache.states.len()
1520                       == self.cache.compiled.len());
1521         Some(si)
1522     }
1523
1524     /// Quickly finds the next occurrence of any literal prefixes in the regex.
1525     /// If there are no literal prefixes, then the current position is
1526     /// returned. If there are literal prefixes and one could not be found,
1527     /// then None is returned.
1528     ///
1529     /// This should only be called when the DFA is in a start state.
1530     fn prefix_at(&self, text: &[u8], at: usize) -> Option<usize> {
1531         self.prog.prefixes.find(&text[at..]).map(|(s, _)| at + s)
1532     }
1533
1534     /// Returns the number of byte classes required to discriminate transitions
1535     /// in each state.
1536     ///
1537     /// invariant: num_byte_classes() == len(State.next)
1538     fn num_byte_classes(&self) -> usize {
1539         // We add 1 to account for the special EOF byte.
1540         (self.prog.byte_classes[255] as usize + 1) + 1
1541     }
1542
1543     /// Given an input byte or the special EOF sentinel, return its
1544     /// corresponding byte class.
1545     #[inline(always)]
1546     fn byte_class(&self, b: Byte) -> usize {
1547         match b.as_byte() {
1548             None => self.num_byte_classes() - 1,
1549             Some(b) => self.u8_class(b),
1550         }
1551     }
1552
1553     /// Like byte_class, but explicitly for u8s.
1554     #[inline(always)]
1555     fn u8_class(&self, b: u8) -> usize {
1556         self.prog.byte_classes[b as usize] as usize
1557     }
1558
1559     /// Returns true if the DFA should continue searching past the first match.
1560     ///
1561     /// Leftmost first semantics in the DFA are preserved by not following NFA
1562     /// transitions after the first match is seen.
1563     ///
1564     /// On occasion, we want to avoid leftmost first semantics to find either
1565     /// the longest match (for reverse search) or all possible matches (for
1566     /// regex sets).
1567     fn continue_past_first_match(&self) -> bool {
1568         self.prog.is_reverse || self.prog.matches.len() > 1
1569     }
1570
1571     /// Returns true if there is a prefix we can quickly search for.
1572     fn has_prefix(&self) -> bool {
1573         !self.prog.is_reverse
1574         && !self.prog.prefixes.is_empty()
1575         && !self.prog.is_anchored_start
1576     }
1577
1578     /// Sets the STATE_START bit in the given state pointer if and only if
1579     /// we have a prefix to scan for.
1580     ///
1581     /// If there's no prefix, then it's a waste to treat the start state
1582     /// specially.
1583     fn start_ptr(&self, si: StatePtr) -> StatePtr {
1584         if self.has_prefix() {
1585             si | STATE_START
1586         } else {
1587             si
1588         }
1589     }
1590
1591     /// Approximate size returns the approximate heap space currently used by
1592     /// the DFA. It is used to determine whether the DFA's state cache needs to
1593     /// be wiped. Namely, it is possible that for certain regexes on certain
1594     /// inputs, a new state could be created for every byte of input. (This is
1595     /// bad for memory use, so we bound it with a cache.)
1596     fn approximate_size(&self) -> usize {
1597         self.cache.size + self.prog.approximate_size()
1598     }
1599 }
1600
1601 impl Transitions {
1602     /// Create a new transition table.
1603     ///
1604     /// The number of byte classes corresponds to the stride. Every state will
1605     /// have `num_byte_classes` slots for transitions.
1606     fn new(num_byte_classes: usize) -> Transitions {
1607         Transitions {
1608             table: vec![],
1609             num_byte_classes: num_byte_classes,
1610         }
1611     }
1612
1613     /// Returns the total number of states currently in this table.
1614     fn num_states(&self) -> usize {
1615         self.table.len() / self.num_byte_classes
1616     }
1617
1618     /// Allocates room for one additional state and returns a pointer to it.
1619     ///
1620     /// If there's no more room, None is returned.
1621     fn add(&mut self) -> Option<StatePtr> {
1622         let si = self.table.len();
1623         if si > STATE_MAX as usize {
1624             return None;
1625         }
1626         self.table.extend(repeat(STATE_UNKNOWN).take(self.num_byte_classes));
1627         Some(usize_to_u32(si))
1628     }
1629
1630     /// Clears the table of all states.
1631     fn clear(&mut self) {
1632         self.table.clear();
1633     }
1634
1635     /// Sets the transition from (si, cls) to next.
1636     fn set_next(&mut self, si: StatePtr, cls: usize, next: StatePtr) {
1637         self.table[si as usize + cls] = next;
1638     }
1639
1640     /// Returns the transition corresponding to (si, cls).
1641     fn next(&self, si: StatePtr, cls: usize) -> StatePtr {
1642         self.table[si as usize + cls]
1643     }
1644
1645     /// The heap size, in bytes, of a single state in the transition table.
1646     fn state_heap_size(&self) -> usize {
1647         self.num_byte_classes * mem::size_of::<StatePtr>()
1648     }
1649
1650     /// Like `next`, but uses unchecked access and is therefore unsafe.
1651     unsafe fn next_unchecked(&self, si: StatePtr, cls: usize) -> StatePtr {
1652         debug_assert!((si as usize) < self.table.len());
1653         debug_assert!(cls < self.num_byte_classes);
1654         *self.table.get_unchecked(si as usize + cls)
1655     }
1656 }
1657
1658 impl StateFlags {
1659     fn is_match(&self) -> bool {
1660         self.0 & 0b0000000_1 > 0
1661     }
1662
1663     fn set_match(&mut self) {
1664         self.0 |= 0b0000000_1;
1665     }
1666
1667     fn is_word(&self) -> bool {
1668         self.0 & 0b000000_1_0 > 0
1669     }
1670
1671     fn set_word(&mut self) {
1672         self.0 |= 0b000000_1_0;
1673     }
1674
1675     fn has_empty(&self) -> bool {
1676         self.0 & 0b00000_1_00 > 0
1677     }
1678
1679     fn set_empty(&mut self) {
1680         self.0 |= 0b00000_1_00;
1681     }
1682 }
1683
1684 impl Byte {
1685     fn byte(b: u8) -> Self { Byte(b as u16) }
1686     fn eof() -> Self { Byte(256) }
1687     fn is_eof(&self) -> bool { self.0 == 256 }
1688
1689     fn is_ascii_word(&self) -> bool {
1690         let b = match self.as_byte() {
1691             None => return false,
1692             Some(b) => b,
1693         };
1694         match b {
1695             b'A'...b'Z' | b'a'...b'z' | b'0'...b'9' | b'_' => true,
1696             _ => false,
1697         }
1698     }
1699
1700     fn as_byte(&self) -> Option<u8> {
1701         if self.is_eof() {
1702             None
1703         } else {
1704             Some(self.0 as u8)
1705         }
1706     }
1707 }
1708
1709 impl fmt::Debug for State {
1710     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1711         let ips: Vec<usize> = self.inst_ptrs().collect();
1712         f.debug_struct("State")
1713          .field("flags", &self.flags())
1714          .field("insts", &ips)
1715          .finish()
1716     }
1717 }
1718
1719 impl fmt::Debug for Transitions {
1720     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1721         let mut fmtd = f.debug_map();
1722         for si in 0..self.num_states() {
1723             let s = si * self.num_byte_classes;
1724             let e = s + self.num_byte_classes;
1725             fmtd.entry(&si.to_string(), &TransitionsRow(&self.table[s..e]));
1726         }
1727         fmtd.finish()
1728     }
1729 }
1730
1731 struct TransitionsRow<'a>(&'a [StatePtr]);
1732
1733 impl<'a> fmt::Debug for TransitionsRow<'a> {
1734     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1735         let mut fmtd = f.debug_map();
1736         for (b, si) in self.0.iter().enumerate() {
1737             match *si {
1738                 STATE_UNKNOWN => {}
1739                 STATE_DEAD => {
1740                     fmtd.entry(&vb(b as usize), &"DEAD");
1741                 }
1742                 si => {
1743                     fmtd.entry(&vb(b as usize), &si.to_string());
1744                 }
1745             }
1746         }
1747         fmtd.finish()
1748     }
1749 }
1750
1751 impl fmt::Debug for StateFlags {
1752     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1753         f.debug_struct("StateFlags")
1754          .field("is_match", &self.is_match())
1755          .field("is_word", &self.is_word())
1756          .field("has_empty", &self.has_empty())
1757          .finish()
1758     }
1759 }
1760
1761 /// Helper function for formatting a byte as a nice-to-read escaped string.
1762 fn vb(b: usize) -> String {
1763     use std::ascii::escape_default;
1764
1765     if b > ::std::u8::MAX as usize {
1766         "EOF".to_owned()
1767     } else {
1768         let escaped = escape_default(b as u8).collect::<Vec<u8>>();
1769         String::from_utf8_lossy(&escaped).into_owned()
1770     }
1771 }
1772
1773 fn usize_to_u32(n: usize) -> u32 {
1774     if (n as u64) > (::std::u32::MAX as u64) {
1775         panic!("BUG: {} is too big to fit into u32", n)
1776     }
1777     n as u32
1778 }
1779
1780 #[allow(dead_code)] // useful for debugging
1781 fn show_state_ptr(si: StatePtr) -> String {
1782     let mut s = format!("{:?}", si & STATE_MAX);
1783     if si == STATE_UNKNOWN {
1784         s = format!("{} (unknown)", s);
1785     }
1786     if si == STATE_DEAD {
1787         s = format!("{} (dead)", s);
1788     }
1789     if si == STATE_QUIT {
1790         s = format!("{} (quit)", s);
1791     }
1792     if si & STATE_START > 0 {
1793         s = format!("{} (start)", s);
1794     }
1795     if si & STATE_MATCH > 0 {
1796         s = format!("{} (match)", s);
1797     }
1798     s
1799 }
1800
1801 /// https://developers.google.com/protocol-buffers/docs/encoding#varints
1802 fn write_vari32(data: &mut Vec<u8>, n: i32) {
1803     let mut un = (n as u32) << 1;
1804     if n < 0 {
1805         un = !un;
1806     }
1807     write_varu32(data, un)
1808 }
1809
1810 /// https://developers.google.com/protocol-buffers/docs/encoding#varints
1811 fn read_vari32(data: &[u8]) -> (i32, usize) {
1812     let (un, i) = read_varu32(data);
1813     let mut n = (un >> 1) as i32;
1814     if un & 1 != 0 {
1815         n = !n;
1816     }
1817     (n, i)
1818 }
1819
1820 /// https://developers.google.com/protocol-buffers/docs/encoding#varints
1821 fn write_varu32(data: &mut Vec<u8>, mut n: u32) {
1822     while n >= 0b1000_0000 {
1823         data.push((n as u8) | 0b1000_0000);
1824         n >>= 7;
1825     }
1826     data.push(n as u8);
1827 }
1828
1829 /// https://developers.google.com/protocol-buffers/docs/encoding#varints
1830 fn read_varu32(data: &[u8]) -> (u32, usize) {
1831     let mut n: u32 = 0;
1832     let mut shift: u32 = 0;
1833     for (i, &b) in data.iter().enumerate() {
1834         if b < 0b1000_0000 {
1835             return (n | ((b as u32) << shift), i + 1);
1836         }
1837         n |= ((b as u32) & 0b0111_1111) << shift;
1838         shift += 7;
1839     }
1840     (0, 0)
1841 }
1842
1843 #[cfg(test)]
1844 mod tests {
1845     extern crate rand;
1846
1847     use quickcheck::{QuickCheck, StdGen, quickcheck};
1848     use super::{
1849         StateFlags, State, push_inst_ptr,
1850         write_varu32, read_varu32, write_vari32, read_vari32,
1851     };
1852
1853     #[test]
1854     fn prop_state_encode_decode() {
1855         fn p(ips: Vec<u32>, flags: u8) -> bool {
1856             let mut data = vec![flags];
1857             let mut prev = 0;
1858             for &ip in ips.iter() {
1859                 push_inst_ptr(&mut data, &mut prev, ip);
1860             }
1861             let state = State { data: data.into_boxed_slice() };
1862
1863             let expected: Vec<usize> =
1864                 ips.into_iter().map(|ip| ip as usize).collect();
1865             let got: Vec<usize> = state.inst_ptrs().collect();
1866             expected == got && state.flags() == StateFlags(flags)
1867         }
1868         QuickCheck::new()
1869             .gen(StdGen::new(self::rand::thread_rng(), 10_000))
1870             .quickcheck(p as fn(Vec<u32>, u8) -> bool);
1871     }
1872
1873     #[test]
1874     fn prop_read_write_u32() {
1875         fn p(n: u32) -> bool {
1876             let mut buf = vec![];
1877             write_varu32(&mut buf, n);
1878             let (got, nread) = read_varu32(&buf);
1879             nread == buf.len() && got == n
1880         }
1881         quickcheck(p as fn(u32) -> bool);
1882     }
1883
1884     #[test]
1885     fn prop_read_write_i32() {
1886         fn p(n: i32) -> bool {
1887             let mut buf = vec![];
1888             write_vari32(&mut buf, n);
1889             let (got, nread) = read_vari32(&buf);
1890             nread == buf.len() && got == n
1891         }
1892         quickcheck(p as fn(i32) -> bool);
1893     }
1894 }