vendor/globset/src/glob.rs

   1 use std::path::{is_separator, Path};
   2
   3 use regex_automata::meta::Regex;
   4
   5 use crate::{new_regex, Candidate, Error, ErrorKind};
   6
   7 /// Describes a matching strategy for a particular pattern.
   8 ///
   9 /// This provides a way to more quickly determine whether a pattern matches
  10 /// a particular file path in a way that scales with a large number of
  11 /// patterns. For example, if many patterns are of the form `*.ext`, then it's
  12 /// possible to test whether any of those patterns matches by looking up a
  13 /// file path's extension in a hash table.
  14 #[derive(Clone, Debug, Eq, PartialEq)]
  15 pub(crate) enum MatchStrategy {
  16     /// A pattern matches if and only if the entire file path matches this
  17     /// literal string.
  18     Literal(String),
  19     /// A pattern matches if and only if the file path's basename matches this
  20     /// literal string.
  21     BasenameLiteral(String),
  22     /// A pattern matches if and only if the file path's extension matches this
  23     /// literal string.
  24     Extension(String),
  25     /// A pattern matches if and only if this prefix literal is a prefix of the
  26     /// candidate file path.
  27     Prefix(String),
  28     /// A pattern matches if and only if this prefix literal is a prefix of the
  29     /// candidate file path.
  30     ///
  31     /// An exception: if `component` is true, then `suffix` must appear at the
  32     /// beginning of a file path or immediately following a `/`.
  33     Suffix {
  34         /// The actual suffix.
  35         suffix: String,
  36         /// Whether this must start at the beginning of a path component.
  37         component: bool,
  38     },
  39     /// A pattern matches only if the given extension matches the file path's
  40     /// extension. Note that this is a necessary but NOT sufficient criterion.
  41     /// Namely, if the extension matches, then a full regex search is still
  42     /// required.
  43     RequiredExtension(String),
  44     /// A regex needs to be used for matching.
  45     Regex,
  46 }
  47
  48 impl MatchStrategy {
  49     /// Returns a matching strategy for the given pattern.
  50     pub(crate) fn new(pat: &Glob) -> MatchStrategy {
  51         if let Some(lit) = pat.basename_literal() {
  52             MatchStrategy::BasenameLiteral(lit)
  53         } else if let Some(lit) = pat.literal() {
  54             MatchStrategy::Literal(lit)
  55         } else if let Some(ext) = pat.ext() {
  56             MatchStrategy::Extension(ext)
  57         } else if let Some(prefix) = pat.prefix() {
  58             MatchStrategy::Prefix(prefix)
  59         } else if let Some((suffix, component)) = pat.suffix() {
  60             MatchStrategy::Suffix { suffix, component }
  61         } else if let Some(ext) = pat.required_ext() {
  62             MatchStrategy::RequiredExtension(ext)
  63         } else {
  64             MatchStrategy::Regex
  65         }
  66     }
  67 }
  68
  69 /// Glob represents a successfully parsed shell glob pattern.
  70 ///
  71 /// It cannot be used directly to match file paths, but it can be converted
  72 /// to a regular expression string or a matcher.
  73 #[derive(Clone, Debug, Eq)]
  74 pub struct Glob {
  75     glob: String,
  76     re: String,
  77     opts: GlobOptions,
  78     tokens: Tokens,
  79 }
  80
  81 impl PartialEq for Glob {
  82     fn eq(&self, other: &Glob) -> bool {
  83         self.glob == other.glob && self.opts == other.opts
  84     }
  85 }
  86
  87 impl std::hash::Hash for Glob {
  88     fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
  89         self.glob.hash(state);
  90         self.opts.hash(state);
  91     }
  92 }
  93
  94 impl std::fmt::Display for Glob {
  95     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  96         self.glob.fmt(f)
  97     }
  98 }
  99
 100 impl std::str::FromStr for Glob {
 101     type Err = Error;
 102
 103     fn from_str(glob: &str) -> Result<Self, Self::Err> {
 104         Self::new(glob)
 105     }
 106 }
 107
 108 /// A matcher for a single pattern.
 109 #[derive(Clone, Debug)]
 110 pub struct GlobMatcher {
 111     /// The underlying pattern.
 112     pat: Glob,
 113     /// The pattern, as a compiled regex.
 114     re: Regex,
 115 }
 116
 117 impl GlobMatcher {
 118     /// Tests whether the given path matches this pattern or not.
 119     pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
 120         self.is_match_candidate(&Candidate::new(path.as_ref()))
 121     }
 122
 123     /// Tests whether the given path matches this pattern or not.
 124     pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
 125         self.re.is_match(&path.path)
 126     }
 127
 128     /// Returns the `Glob` used to compile this matcher.
 129     pub fn glob(&self) -> &Glob {
 130         &self.pat
 131     }
 132 }
 133
 134 /// A strategic matcher for a single pattern.
 135 #[cfg(test)]
 136 #[derive(Clone, Debug)]
 137 struct GlobStrategic {
 138     /// The match strategy to use.
 139     strategy: MatchStrategy,
 140     /// The pattern, as a compiled regex.
 141     re: Regex,
 142 }
 143
 144 #[cfg(test)]
 145 impl GlobStrategic {
 146     /// Tests whether the given path matches this pattern or not.
 147     fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
 148         self.is_match_candidate(&Candidate::new(path.as_ref()))
 149     }
 150
 151     /// Tests whether the given path matches this pattern or not.
 152     fn is_match_candidate(&self, candidate: &Candidate<'_>) -> bool {
 153         let byte_path = &*candidate.path;
 154
 155         match self.strategy {
 156             MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
 157             MatchStrategy::BasenameLiteral(ref lit) => {
 158                 lit.as_bytes() == &*candidate.basename
 159             }
 160             MatchStrategy::Extension(ref ext) => {
 161                 ext.as_bytes() == &*candidate.ext
 162             }
 163             MatchStrategy::Prefix(ref pre) => {
 164                 starts_with(pre.as_bytes(), byte_path)
 165             }
 166             MatchStrategy::Suffix { ref suffix, component } => {
 167                 if component && byte_path == &suffix.as_bytes()[1..] {
 168                     return true;
 169                 }
 170                 ends_with(suffix.as_bytes(), byte_path)
 171             }
 172             MatchStrategy::RequiredExtension(ref ext) => {
 173                 let ext = ext.as_bytes();
 174                 &*candidate.ext == ext && self.re.is_match(byte_path)
 175             }
 176             MatchStrategy::Regex => self.re.is_match(byte_path),
 177         }
 178     }
 179 }
 180
 181 /// A builder for a pattern.
 182 ///
 183 /// This builder enables configuring the match semantics of a pattern. For
 184 /// example, one can make matching case insensitive.
 185 ///
 186 /// The lifetime `'a` refers to the lifetime of the pattern string.
 187 #[derive(Clone, Debug)]
 188 pub struct GlobBuilder<'a> {
 189     /// The glob pattern to compile.
 190     glob: &'a str,
 191     /// Options for the pattern.
 192     opts: GlobOptions,
 193 }
 194
 195 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
 196 struct GlobOptions {
 197     /// Whether to match case insensitively.
 198     case_insensitive: bool,
 199     /// Whether to require a literal separator to match a separator in a file
 200     /// path. e.g., when enabled, `*` won't match `/`.
 201     literal_separator: bool,
 202     /// Whether or not to use `\` to escape special characters.
 203     /// e.g., when enabled, `\*` will match a literal `*`.
 204     backslash_escape: bool,
 205     /// Whether or not an empty case in an alternate will be removed.
 206     /// e.g., when enabled, `{,a}` will match "" and "a".
 207     empty_alternates: bool,
 208 }
 209
 210 impl GlobOptions {
 211     fn default() -> GlobOptions {
 212         GlobOptions {
 213             case_insensitive: false,
 214             literal_separator: false,
 215             backslash_escape: !is_separator('\\'),
 216             empty_alternates: false,
 217         }
 218     }
 219 }
 220
 221 #[derive(Clone, Debug, Default, Eq, PartialEq)]
 222 struct Tokens(Vec<Token>);
 223
 224 impl std::ops::Deref for Tokens {
 225     type Target = Vec<Token>;
 226     fn deref(&self) -> &Vec<Token> {
 227         &self.0
 228     }
 229 }
 230
 231 impl std::ops::DerefMut for Tokens {
 232     fn deref_mut(&mut self) -> &mut Vec<Token> {
 233         &mut self.0
 234     }
 235 }
 236
 237 #[derive(Clone, Debug, Eq, PartialEq)]
 238 enum Token {
 239     Literal(char),
 240     Any,
 241     ZeroOrMore,
 242     RecursivePrefix,
 243     RecursiveSuffix,
 244     RecursiveZeroOrMore,
 245     Class { negated: bool, ranges: Vec<(char, char)> },
 246     Alternates(Vec<Tokens>),
 247 }
 248
 249 impl Glob {
 250     /// Builds a new pattern with default options.
 251     pub fn new(glob: &str) -> Result<Glob, Error> {
 252         GlobBuilder::new(glob).build()
 253     }
 254
 255     /// Returns a matcher for this pattern.
 256     pub fn compile_matcher(&self) -> GlobMatcher {
 257         let re =
 258             new_regex(&self.re).expect("regex compilation shouldn't fail");
 259         GlobMatcher { pat: self.clone(), re }
 260     }
 261
 262     /// Returns a strategic matcher.
 263     ///
 264     /// This isn't exposed because it's not clear whether it's actually
 265     /// faster than just running a regex for a *single* pattern. If it
 266     /// is faster, then GlobMatcher should do it automatically.
 267     #[cfg(test)]
 268     fn compile_strategic_matcher(&self) -> GlobStrategic {
 269         let strategy = MatchStrategy::new(self);
 270         let re =
 271             new_regex(&self.re).expect("regex compilation shouldn't fail");
 272         GlobStrategic { strategy, re }
 273     }
 274
 275     /// Returns the original glob pattern used to build this pattern.
 276     pub fn glob(&self) -> &str {
 277         &self.glob
 278     }
 279
 280     /// Returns the regular expression string for this glob.
 281     ///
 282     /// Note that regular expressions for globs are intended to be matched on
 283     /// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In
 284     /// particular, globs are frequently used on file paths, where there is no
 285     /// general guarantee that file paths are themselves valid UTF-8. As a
 286     /// result, callers will need to ensure that they are using a regex API
 287     /// that can match on arbitrary bytes. For example, the
 288     /// [`regex`](https://crates.io/regex)
 289     /// crate's
 290     /// [`Regex`](https://docs.rs/regex/*/regex/struct.Regex.html)
 291     /// API is not suitable for this since it matches on `&str`, but its
 292     /// [`bytes::Regex`](https://docs.rs/regex/*/regex/bytes/struct.Regex.html)
 293     /// API is suitable for this.
 294     pub fn regex(&self) -> &str {
 295         &self.re
 296     }
 297
 298     /// Returns the pattern as a literal if and only if the pattern must match
 299     /// an entire path exactly.
 300     ///
 301     /// The basic format of these patterns is `{literal}`.
 302     fn literal(&self) -> Option<String> {
 303         if self.opts.case_insensitive {
 304             return None;
 305         }
 306         let mut lit = String::new();
 307         for t in &*self.tokens {
 308             let Token::Literal(c) = *t else { return None };
 309             lit.push(c);
 310         }
 311         if lit.is_empty() {
 312             None
 313         } else {
 314             Some(lit)
 315         }
 316     }
 317
 318     /// Returns an extension if this pattern matches a file path if and only
 319     /// if the file path has the extension returned.
 320     ///
 321     /// Note that this extension returned differs from the extension that
 322     /// std::path::Path::extension returns. Namely, this extension includes
 323     /// the '.'. Also, paths like `.rs` are considered to have an extension
 324     /// of `.rs`.
 325     fn ext(&self) -> Option<String> {
 326         if self.opts.case_insensitive {
 327             return None;
 328         }
 329         let start = match *self.tokens.get(0)? {
 330             Token::RecursivePrefix => 1,
 331             _ => 0,
 332         };
 333         match *self.tokens.get(start)? {
 334             Token::ZeroOrMore => {
 335                 // If there was no recursive prefix, then we only permit
 336                 // `*` if `*` can match a `/`. For example, if `*` can't
 337                 // match `/`, then `*.c` doesn't match `foo/bar.c`.
 338                 if start == 0 && self.opts.literal_separator {
 339                     return None;
 340                 }
 341             }
 342             _ => return None,
 343         }
 344         match *self.tokens.get(start + 1)? {
 345             Token::Literal('.') => {}
 346             _ => return None,
 347         }
 348         let mut lit = ".".to_string();
 349         for t in self.tokens[start + 2..].iter() {
 350             match *t {
 351                 Token::Literal('.') | Token::Literal('/') => return None,
 352                 Token::Literal(c) => lit.push(c),
 353                 _ => return None,
 354             }
 355         }
 356         if lit.is_empty() {
 357             None
 358         } else {
 359             Some(lit)
 360         }
 361     }
 362
 363     /// This is like `ext`, but returns an extension even if it isn't sufficient
 364     /// to imply a match. Namely, if an extension is returned, then it is
 365     /// necessary but not sufficient for a match.
 366     fn required_ext(&self) -> Option<String> {
 367         if self.opts.case_insensitive {
 368             return None;
 369         }
 370         // We don't care at all about the beginning of this pattern. All we
 371         // need to check for is if it ends with a literal of the form `.ext`.
 372         let mut ext: Vec<char> = vec![]; // built in reverse
 373         for t in self.tokens.iter().rev() {
 374             match *t {
 375                 Token::Literal('/') => return None,
 376                 Token::Literal(c) => {
 377                     ext.push(c);
 378                     if c == '.' {
 379                         break;
 380                     }
 381                 }
 382                 _ => return None,
 383             }
 384         }
 385         if ext.last() != Some(&'.') {
 386             None
 387         } else {
 388             ext.reverse();
 389             Some(ext.into_iter().collect())
 390         }
 391     }
 392
 393     /// Returns a literal prefix of this pattern if the entire pattern matches
 394     /// if the literal prefix matches.
 395     fn prefix(&self) -> Option<String> {
 396         if self.opts.case_insensitive {
 397             return None;
 398         }
 399         let (end, need_sep) = match *self.tokens.last()? {
 400             Token::ZeroOrMore => {
 401                 if self.opts.literal_separator {
 402                     // If a trailing `*` can't match a `/`, then we can't
 403                     // assume a match of the prefix corresponds to a match
 404                     // of the overall pattern. e.g., `foo/*` with
 405                     // `literal_separator` enabled matches `foo/bar` but not
 406                     // `foo/bar/baz`, even though `foo/bar/baz` has a `foo/`
 407                     // literal prefix.
 408                     return None;
 409                 }
 410                 (self.tokens.len() - 1, false)
 411             }
 412             Token::RecursiveSuffix => (self.tokens.len() - 1, true),
 413             _ => (self.tokens.len(), false),
 414         };
 415         let mut lit = String::new();
 416         for t in &self.tokens[0..end] {
 417             let Token::Literal(c) = *t else { return None };
 418             lit.push(c);
 419         }
 420         if need_sep {
 421             lit.push('/');
 422         }
 423         if lit.is_empty() {
 424             None
 425         } else {
 426             Some(lit)
 427         }
 428     }
 429
 430     /// Returns a literal suffix of this pattern if the entire pattern matches
 431     /// if the literal suffix matches.
 432     ///
 433     /// If a literal suffix is returned and it must match either the entire
 434     /// file path or be preceded by a `/`, then also return true. This happens
 435     /// with a pattern like `**/foo/bar`. Namely, this pattern matches
 436     /// `foo/bar` and `baz/foo/bar`, but not `foofoo/bar`. In this case, the
 437     /// suffix returned is `/foo/bar` (but should match the entire path
 438     /// `foo/bar`).
 439     ///
 440     /// When this returns true, the suffix literal is guaranteed to start with
 441     /// a `/`.
 442     fn suffix(&self) -> Option<(String, bool)> {
 443         if self.opts.case_insensitive {
 444             return None;
 445         }
 446         let mut lit = String::new();
 447         let (start, entire) = match *self.tokens.get(0)? {
 448             Token::RecursivePrefix => {
 449                 // We only care if this follows a path component if the next
 450                 // token is a literal.
 451                 if let Some(&Token::Literal(_)) = self.tokens.get(1) {
 452                     lit.push('/');
 453                     (1, true)
 454                 } else {
 455                     (1, false)
 456                 }
 457             }
 458             _ => (0, false),
 459         };
 460         let start = match *self.tokens.get(start)? {
 461             Token::ZeroOrMore => {
 462                 // If literal_separator is enabled, then a `*` can't
 463                 // necessarily match everything, so reporting a suffix match
 464                 // as a match of the pattern would be a false positive.
 465                 if self.opts.literal_separator {
 466                     return None;
 467                 }
 468                 start + 1
 469             }
 470             _ => start,
 471         };
 472         for t in &self.tokens[start..] {
 473             let Token::Literal(c) = *t else { return None };
 474             lit.push(c);
 475         }
 476         if lit.is_empty() || lit == "/" {
 477             None
 478         } else {
 479             Some((lit, entire))
 480         }
 481     }
 482
 483     /// If this pattern only needs to inspect the basename of a file path,
 484     /// then the tokens corresponding to only the basename match are returned.
 485     ///
 486     /// For example, given a pattern of `**/*.foo`, only the tokens
 487     /// corresponding to `*.foo` are returned.
 488     ///
 489     /// Note that this will return None if any match of the basename tokens
 490     /// doesn't correspond to a match of the entire pattern. For example, the
 491     /// glob `foo` only matches when a file path has a basename of `foo`, but
 492     /// doesn't *always* match when a file path has a basename of `foo`. e.g.,
 493     /// `foo` doesn't match `abc/foo`.
 494     fn basename_tokens(&self) -> Option<&[Token]> {
 495         if self.opts.case_insensitive {
 496             return None;
 497         }
 498         let start = match *self.tokens.get(0)? {
 499             Token::RecursivePrefix => 1,
 500             _ => {
 501                 // With nothing to gobble up the parent portion of a path,
 502                 // we can't assume that matching on only the basename is
 503                 // correct.
 504                 return None;
 505             }
 506         };
 507         if self.tokens[start..].is_empty() {
 508             return None;
 509         }
 510         for t in self.tokens[start..].iter() {
 511             match *t {
 512                 Token::Literal('/') => return None,
 513                 Token::Literal(_) => {} // OK
 514                 Token::Any | Token::ZeroOrMore => {
 515                     if !self.opts.literal_separator {
 516                         // In this case, `*` and `?` can match a path
 517                         // separator, which means this could reach outside
 518                         // the basename.
 519                         return None;
 520                     }
 521                 }
 522                 Token::RecursivePrefix
 523                 | Token::RecursiveSuffix
 524                 | Token::RecursiveZeroOrMore => {
 525                     return None;
 526                 }
 527                 Token::Class { .. } | Token::Alternates(..) => {
 528                     // We *could* be a little smarter here, but either one
 529                     // of these is going to prevent our literal optimizations
 530                     // anyway, so give up.
 531                     return None;
 532                 }
 533             }
 534         }
 535         Some(&self.tokens[start..])
 536     }
 537
 538     /// Returns the pattern as a literal if and only if the pattern exclusively
 539     /// matches the basename of a file path *and* is a literal.
 540     ///
 541     /// The basic format of these patterns is `**/{literal}`, where `{literal}`
 542     /// does not contain a path separator.
 543     fn basename_literal(&self) -> Option<String> {
 544         let tokens = self.basename_tokens()?;
 545         let mut lit = String::new();
 546         for t in tokens {
 547             let Token::Literal(c) = *t else { return None };
 548             lit.push(c);
 549         }
 550         Some(lit)
 551     }
 552 }
 553
 554 impl<'a> GlobBuilder<'a> {
 555     /// Create a new builder for the pattern given.
 556     ///
 557     /// The pattern is not compiled until `build` is called.
 558     pub fn new(glob: &'a str) -> GlobBuilder<'a> {
 559         GlobBuilder { glob, opts: GlobOptions::default() }
 560     }
 561
 562     /// Parses and builds the pattern.
 563     pub fn build(&self) -> Result<Glob, Error> {
 564         let mut p = Parser {
 565             glob: &self.glob,
 566             stack: vec![Tokens::default()],
 567             chars: self.glob.chars().peekable(),
 568             prev: None,
 569             cur: None,
 570             opts: &self.opts,
 571         };
 572         p.parse()?;
 573         if p.stack.is_empty() {
 574             Err(Error {
 575                 glob: Some(self.glob.to_string()),
 576                 kind: ErrorKind::UnopenedAlternates,
 577             })
 578         } else if p.stack.len() > 1 {
 579             Err(Error {
 580                 glob: Some(self.glob.to_string()),
 581                 kind: ErrorKind::UnclosedAlternates,
 582             })
 583         } else {
 584             let tokens = p.stack.pop().unwrap();
 585             Ok(Glob {
 586                 glob: self.glob.to_string(),
 587                 re: tokens.to_regex_with(&self.opts),
 588                 opts: self.opts,
 589                 tokens,
 590             })
 591         }
 592     }
 593
 594     /// Toggle whether the pattern matches case insensitively or not.
 595     ///
 596     /// This is disabled by default.
 597     pub fn case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
 598         self.opts.case_insensitive = yes;
 599         self
 600     }
 601
 602     /// Toggle whether a literal `/` is required to match a path separator.
 603     ///
 604     /// By default this is false: `*` and `?` will match `/`.
 605     pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
 606         self.opts.literal_separator = yes;
 607         self
 608     }
 609
 610     /// When enabled, a back slash (`\`) may be used to escape
 611     /// special characters in a glob pattern. Additionally, this will
 612     /// prevent `\` from being interpreted as a path separator on all
 613     /// platforms.
 614     ///
 615     /// This is enabled by default on platforms where `\` is not a
 616     /// path separator and disabled by default on platforms where `\`
 617     /// is a path separator.
 618     pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
 619         self.opts.backslash_escape = yes;
 620         self
 621     }
 622
 623     /// Toggle whether an empty pattern in a list of alternates is accepted.
 624     ///
 625     /// For example, if this is set then the glob `foo{,.txt}` will match both
 626     /// `foo` and `foo.txt`.
 627     ///
 628     /// By default this is false.
 629     pub fn empty_alternates(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
 630         self.opts.empty_alternates = yes;
 631         self
 632     }
 633 }
 634
 635 impl Tokens {
 636     /// Convert this pattern to a string that is guaranteed to be a valid
 637     /// regular expression and will represent the matching semantics of this
 638     /// glob pattern and the options given.
 639     fn to_regex_with(&self, options: &GlobOptions) -> String {
 640         let mut re = String::new();
 641         re.push_str("(?-u)");
 642         if options.case_insensitive {
 643             re.push_str("(?i)");
 644         }
 645         re.push('^');
 646         // Special case. If the entire glob is just `**`, then it should match
 647         // everything.
 648         if self.len() == 1 && self[0] == Token::RecursivePrefix {
 649             re.push_str(".*");
 650             re.push('$');
 651             return re;
 652         }
 653         self.tokens_to_regex(options, &self, &mut re);
 654         re.push('$');
 655         re
 656     }
 657
 658     fn tokens_to_regex(
 659         &self,
 660         options: &GlobOptions,
 661         tokens: &[Token],
 662         re: &mut String,
 663     ) {
 664         for tok in tokens.iter() {
 665             match *tok {
 666                 Token::Literal(c) => {
 667                     re.push_str(&char_to_escaped_literal(c));
 668                 }
 669                 Token::Any => {
 670                     if options.literal_separator {
 671                         re.push_str("[^/]");
 672                     } else {
 673                         re.push_str(".");
 674                     }
 675                 }
 676                 Token::ZeroOrMore => {
 677                     if options.literal_separator {
 678                         re.push_str("[^/]*");
 679                     } else {
 680                         re.push_str(".*");
 681                     }
 682                 }
 683                 Token::RecursivePrefix => {
 684                     re.push_str("(?:/?|.*/)");
 685                 }
 686                 Token::RecursiveSuffix => {
 687                     re.push_str("/.*");
 688                 }
 689                 Token::RecursiveZeroOrMore => {
 690                     re.push_str("(?:/|/.*/)");
 691                 }
 692                 Token::Class { negated, ref ranges } => {
 693                     re.push('[');
 694                     if negated {
 695                         re.push('^');
 696                     }
 697                     for r in ranges {
 698                         if r.0 == r.1 {
 699                             // Not strictly necessary, but nicer to look at.
 700                             re.push_str(&char_to_escaped_literal(r.0));
 701                         } else {
 702                             re.push_str(&char_to_escaped_literal(r.0));
 703                             re.push('-');
 704                             re.push_str(&char_to_escaped_literal(r.1));
 705                         }
 706                     }
 707                     re.push(']');
 708                 }
 709                 Token::Alternates(ref patterns) => {
 710                     let mut parts = vec![];
 711                     for pat in patterns {
 712                         let mut altre = String::new();
 713                         self.tokens_to_regex(options, &pat, &mut altre);
 714                         if !altre.is_empty() || options.empty_alternates {
 715                             parts.push(altre);
 716                         }
 717                     }
 718
 719                     // It is possible to have an empty set in which case the
 720                     // resulting alternation '()' would be an error.
 721                     if !parts.is_empty() {
 722                         re.push_str("(?:");
 723                         re.push_str(&parts.join("|"));
 724                         re.push(')');
 725                     }
 726                 }
 727             }
 728         }
 729     }
 730 }
 731
 732 /// Convert a Unicode scalar value to an escaped string suitable for use as
 733 /// a literal in a non-Unicode regex.
 734 fn char_to_escaped_literal(c: char) -> String {
 735     bytes_to_escaped_literal(&c.to_string().into_bytes())
 736 }
 737
 738 /// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
 739 /// code units are converted to their escaped form.
 740 fn bytes_to_escaped_literal(bs: &[u8]) -> String {
 741     let mut s = String::with_capacity(bs.len());
 742     for &b in bs {
 743         if b <= 0x7F {
 744             s.push_str(&regex_syntax::escape(
 745                 char::from(b).encode_utf8(&mut [0; 4]),
 746             ));
 747         } else {
 748             s.push_str(&format!("\\x{:02x}", b));
 749         }
 750     }
 751     s
 752 }
 753
 754 struct Parser<'a> {
 755     glob: &'a str,
 756     stack: Vec<Tokens>,
 757     chars: std::iter::Peekable<std::str::Chars<'a>>,
 758     prev: Option<char>,
 759     cur: Option<char>,
 760     opts: &'a GlobOptions,
 761 }
 762
 763 impl<'a> Parser<'a> {
 764     fn error(&self, kind: ErrorKind) -> Error {
 765         Error { glob: Some(self.glob.to_string()), kind }
 766     }
 767
 768     fn parse(&mut self) -> Result<(), Error> {
 769         while let Some(c) = self.bump() {
 770             match c {
 771                 '?' => self.push_token(Token::Any)?,
 772                 '*' => self.parse_star()?,
 773                 '[' => self.parse_class()?,
 774                 '{' => self.push_alternate()?,
 775                 '}' => self.pop_alternate()?,
 776                 ',' => self.parse_comma()?,
 777                 '\\' => self.parse_backslash()?,
 778                 c => self.push_token(Token::Literal(c))?,
 779             }
 780         }
 781         Ok(())
 782     }
 783
 784     fn push_alternate(&mut self) -> Result<(), Error> {
 785         if self.stack.len() > 1 {
 786             return Err(self.error(ErrorKind::NestedAlternates));
 787         }
 788         Ok(self.stack.push(Tokens::default()))
 789     }
 790
 791     fn pop_alternate(&mut self) -> Result<(), Error> {
 792         let mut alts = vec![];
 793         while self.stack.len() >= 2 {
 794             alts.push(self.stack.pop().unwrap());
 795         }
 796         self.push_token(Token::Alternates(alts))
 797     }
 798
 799     fn push_token(&mut self, tok: Token) -> Result<(), Error> {
 800         if let Some(ref mut pat) = self.stack.last_mut() {
 801             return Ok(pat.push(tok));
 802         }
 803         Err(self.error(ErrorKind::UnopenedAlternates))
 804     }
 805
 806     fn pop_token(&mut self) -> Result<Token, Error> {
 807         if let Some(ref mut pat) = self.stack.last_mut() {
 808             return Ok(pat.pop().unwrap());
 809         }
 810         Err(self.error(ErrorKind::UnopenedAlternates))
 811     }
 812
 813     fn have_tokens(&self) -> Result<bool, Error> {
 814         match self.stack.last() {
 815             None => Err(self.error(ErrorKind::UnopenedAlternates)),
 816             Some(ref pat) => Ok(!pat.is_empty()),
 817         }
 818     }
 819
 820     fn parse_comma(&mut self) -> Result<(), Error> {
 821         // If we aren't inside a group alternation, then don't
 822         // treat commas specially. Otherwise, we need to start
 823         // a new alternate.
 824         if self.stack.len() <= 1 {
 825             self.push_token(Token::Literal(','))
 826         } else {
 827             Ok(self.stack.push(Tokens::default()))
 828         }
 829     }
 830
 831     fn parse_backslash(&mut self) -> Result<(), Error> {
 832         if self.opts.backslash_escape {
 833             match self.bump() {
 834                 None => Err(self.error(ErrorKind::DanglingEscape)),
 835                 Some(c) => self.push_token(Token::Literal(c)),
 836             }
 837         } else if is_separator('\\') {
 838             // Normalize all patterns to use / as a separator.
 839             self.push_token(Token::Literal('/'))
 840         } else {
 841             self.push_token(Token::Literal('\\'))
 842         }
 843     }
 844
 845     fn parse_star(&mut self) -> Result<(), Error> {
 846         let prev = self.prev;
 847         if self.peek() != Some('*') {
 848             self.push_token(Token::ZeroOrMore)?;
 849             return Ok(());
 850         }
 851         assert!(self.bump() == Some('*'));
 852         if !self.have_tokens()? {
 853             if !self.peek().map_or(true, is_separator) {
 854                 self.push_token(Token::ZeroOrMore)?;
 855                 self.push_token(Token::ZeroOrMore)?;
 856             } else {
 857                 self.push_token(Token::RecursivePrefix)?;
 858                 assert!(self.bump().map_or(true, is_separator));
 859             }
 860             return Ok(());
 861         }
 862
 863         if !prev.map(is_separator).unwrap_or(false) {
 864             if self.stack.len() <= 1
 865                 || (prev != Some(',') && prev != Some('{'))
 866             {
 867                 self.push_token(Token::ZeroOrMore)?;
 868                 self.push_token(Token::ZeroOrMore)?;
 869                 return Ok(());
 870             }
 871         }
 872         let is_suffix = match self.peek() {
 873             None => {
 874                 assert!(self.bump().is_none());
 875                 true
 876             }
 877             Some(',') | Some('}') if self.stack.len() >= 2 => true,
 878             Some(c) if is_separator(c) => {
 879                 assert!(self.bump().map(is_separator).unwrap_or(false));
 880                 false
 881             }
 882             _ => {
 883                 self.push_token(Token::ZeroOrMore)?;
 884                 self.push_token(Token::ZeroOrMore)?;
 885                 return Ok(());
 886             }
 887         };
 888         match self.pop_token()? {
 889             Token::RecursivePrefix => {
 890                 self.push_token(Token::RecursivePrefix)?;
 891             }
 892             Token::RecursiveSuffix => {
 893                 self.push_token(Token::RecursiveSuffix)?;
 894             }
 895             _ => {
 896                 if is_suffix {
 897                     self.push_token(Token::RecursiveSuffix)?;
 898                 } else {
 899                     self.push_token(Token::RecursiveZeroOrMore)?;
 900                 }
 901             }
 902         }
 903         Ok(())
 904     }
 905
 906     fn parse_class(&mut self) -> Result<(), Error> {
 907         fn add_to_last_range(
 908             glob: &str,
 909             r: &mut (char, char),
 910             add: char,
 911         ) -> Result<(), Error> {
 912             r.1 = add;
 913             if r.1 < r.0 {
 914                 Err(Error {
 915                     glob: Some(glob.to_string()),
 916                     kind: ErrorKind::InvalidRange(r.0, r.1),
 917                 })
 918             } else {
 919                 Ok(())
 920             }
 921         }
 922         let mut ranges = vec![];
 923         let negated = match self.chars.peek() {
 924             Some(&'!') | Some(&'^') => {
 925                 let bump = self.bump();
 926                 assert!(bump == Some('!') || bump == Some('^'));
 927                 true
 928             }
 929             _ => false,
 930         };
 931         let mut first = true;
 932         let mut in_range = false;
 933         loop {
 934             let c = match self.bump() {
 935                 Some(c) => c,
 936                 // The only way to successfully break this loop is to observe
 937                 // a ']'.
 938                 None => return Err(self.error(ErrorKind::UnclosedClass)),
 939             };
 940             match c {
 941                 ']' => {
 942                     if first {
 943                         ranges.push((']', ']'));
 944                     } else {
 945                         break;
 946                     }
 947                 }
 948                 '-' => {
 949                     if first {
 950                         ranges.push(('-', '-'));
 951                     } else if in_range {
 952                         // invariant: in_range is only set when there is
 953                         // already at least one character seen.
 954                         let r = ranges.last_mut().unwrap();
 955                         add_to_last_range(&self.glob, r, '-')?;
 956                         in_range = false;
 957                     } else {
 958                         assert!(!ranges.is_empty());
 959                         in_range = true;
 960                     }
 961                 }
 962                 c => {
 963                     if in_range {
 964                         // invariant: in_range is only set when there is
 965                         // already at least one character seen.
 966                         add_to_last_range(
 967                             &self.glob,
 968                             ranges.last_mut().unwrap(),
 969                             c,
 970                         )?;
 971                     } else {
 972                         ranges.push((c, c));
 973                     }
 974                     in_range = false;
 975                 }
 976             }
 977             first = false;
 978         }
 979         if in_range {
 980             // Means that the last character in the class was a '-', so add
 981             // it as a literal.
 982             ranges.push(('-', '-'));
 983         }
 984         self.push_token(Token::Class { negated, ranges })
 985     }
 986
 987     fn bump(&mut self) -> Option<char> {
 988         self.prev = self.cur;
 989         self.cur = self.chars.next();
 990         self.cur
 991     }
 992
 993     fn peek(&mut self) -> Option<char> {
 994         self.chars.peek().map(|&ch| ch)
 995     }
 996 }
 997
 998 #[cfg(test)]
 999 fn starts_with(needle: &[u8], haystack: &[u8]) -> bool {
1000     needle.len() <= haystack.len() && needle == &haystack[..needle.len()]
1001 }
1002
1003 #[cfg(test)]
1004 fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
1005     if needle.len() > haystack.len() {
1006         return false;
1007     }
1008     needle == &haystack[haystack.len() - needle.len()..]
1009 }
1010
1011 #[cfg(test)]
1012 mod tests {
1013     use super::Token::*;
1014     use super::{Glob, GlobBuilder, Token};
1015     use crate::{ErrorKind, GlobSetBuilder};
1016
1017     #[derive(Clone, Copy, Debug, Default)]
1018     struct Options {
1019         casei: Option<bool>,
1020         litsep: Option<bool>,
1021         bsesc: Option<bool>,
1022         ealtre: Option<bool>,
1023     }
1024
1025     macro_rules! syntax {
1026         ($name:ident, $pat:expr, $tokens:expr) => {
1027             #[test]
1028             fn $name() {
1029                 let pat = Glob::new($pat).unwrap();
1030                 assert_eq!($tokens, pat.tokens.0);
1031             }
1032         };
1033     }
1034
1035     macro_rules! syntaxerr {
1036         ($name:ident, $pat:expr, $err:expr) => {
1037             #[test]
1038             fn $name() {
1039                 let err = Glob::new($pat).unwrap_err();
1040                 assert_eq!(&$err, err.kind());
1041             }
1042         };
1043     }
1044
1045     macro_rules! toregex {
1046         ($name:ident, $pat:expr, $re:expr) => {
1047             toregex!($name, $pat, $re, Options::default());
1048         };
1049         ($name:ident, $pat:expr, $re:expr, $options:expr) => {
1050             #[test]
1051             fn $name() {
1052                 let mut builder = GlobBuilder::new($pat);
1053                 if let Some(casei) = $options.casei {
1054                     builder.case_insensitive(casei);
1055                 }
1056                 if let Some(litsep) = $options.litsep {
1057                     builder.literal_separator(litsep);
1058                 }
1059                 if let Some(bsesc) = $options.bsesc {
1060                     builder.backslash_escape(bsesc);
1061                 }
1062                 if let Some(ealtre) = $options.ealtre {
1063                     builder.empty_alternates(ealtre);
1064                 }
1065                 let pat = builder.build().unwrap();
1066                 assert_eq!(format!("(?-u){}", $re), pat.regex());
1067             }
1068         };
1069     }
1070
1071     macro_rules! matches {
1072         ($name:ident, $pat:expr, $path:expr) => {
1073             matches!($name, $pat, $path, Options::default());
1074         };
1075         ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1076             #[test]
1077             fn $name() {
1078                 let mut builder = GlobBuilder::new($pat);
1079                 if let Some(casei) = $options.casei {
1080                     builder.case_insensitive(casei);
1081                 }
1082                 if let Some(litsep) = $options.litsep {
1083                     builder.literal_separator(litsep);
1084                 }
1085                 if let Some(bsesc) = $options.bsesc {
1086                     builder.backslash_escape(bsesc);
1087                 }
1088                 if let Some(ealtre) = $options.ealtre {
1089                     builder.empty_alternates(ealtre);
1090                 }
1091                 let pat = builder.build().unwrap();
1092                 let matcher = pat.compile_matcher();
1093                 let strategic = pat.compile_strategic_matcher();
1094                 let set = GlobSetBuilder::new().add(pat).build().unwrap();
1095                 assert!(matcher.is_match($path));
1096                 assert!(strategic.is_match($path));
1097                 assert!(set.is_match($path));
1098             }
1099         };
1100     }
1101
1102     macro_rules! nmatches {
1103         ($name:ident, $pat:expr, $path:expr) => {
1104             nmatches!($name, $pat, $path, Options::default());
1105         };
1106         ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1107             #[test]
1108             fn $name() {
1109                 let mut builder = GlobBuilder::new($pat);
1110                 if let Some(casei) = $options.casei {
1111                     builder.case_insensitive(casei);
1112                 }
1113                 if let Some(litsep) = $options.litsep {
1114                     builder.literal_separator(litsep);
1115                 }
1116                 if let Some(bsesc) = $options.bsesc {
1117                     builder.backslash_escape(bsesc);
1118                 }
1119                 if let Some(ealtre) = $options.ealtre {
1120                     builder.empty_alternates(ealtre);
1121                 }
1122                 let pat = builder.build().unwrap();
1123                 let matcher = pat.compile_matcher();
1124                 let strategic = pat.compile_strategic_matcher();
1125                 let set = GlobSetBuilder::new().add(pat).build().unwrap();
1126                 assert!(!matcher.is_match($path));
1127                 assert!(!strategic.is_match($path));
1128                 assert!(!set.is_match($path));
1129             }
1130         };
1131     }
1132
1133     fn s(string: &str) -> String {
1134         string.to_string()
1135     }
1136
1137     fn class(s: char, e: char) -> Token {
1138         Class { negated: false, ranges: vec![(s, e)] }
1139     }
1140
1141     fn classn(s: char, e: char) -> Token {
1142         Class { negated: true, ranges: vec![(s, e)] }
1143     }
1144
1145     fn rclass(ranges: &[(char, char)]) -> Token {
1146         Class { negated: false, ranges: ranges.to_vec() }
1147     }
1148
1149     fn rclassn(ranges: &[(char, char)]) -> Token {
1150         Class { negated: true, ranges: ranges.to_vec() }
1151     }
1152
1153     syntax!(literal1, "a", vec![Literal('a')]);
1154     syntax!(literal2, "ab", vec![Literal('a'), Literal('b')]);
1155     syntax!(any1, "?", vec![Any]);
1156     syntax!(any2, "a?b", vec![Literal('a'), Any, Literal('b')]);
1157     syntax!(seq1, "*", vec![ZeroOrMore]);
1158     syntax!(seq2, "a*b", vec![Literal('a'), ZeroOrMore, Literal('b')]);
1159     syntax!(
1160         seq3,
1161         "*a*b*",
1162         vec![ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,]
1163     );
1164     syntax!(rseq1, "**", vec![RecursivePrefix]);
1165     syntax!(rseq2, "**/", vec![RecursivePrefix]);
1166     syntax!(rseq3, "/**", vec![RecursiveSuffix]);
1167     syntax!(rseq4, "/**/", vec![RecursiveZeroOrMore]);
1168     syntax!(
1169         rseq5,
1170         "a/**/b",
1171         vec![Literal('a'), RecursiveZeroOrMore, Literal('b'),]
1172     );
1173     syntax!(cls1, "[a]", vec![class('a', 'a')]);
1174     syntax!(cls2, "[!a]", vec![classn('a', 'a')]);
1175     syntax!(cls3, "[a-z]", vec![class('a', 'z')]);
1176     syntax!(cls4, "[!a-z]", vec![classn('a', 'z')]);
1177     syntax!(cls5, "[-]", vec![class('-', '-')]);
1178     syntax!(cls6, "[]]", vec![class(']', ']')]);
1179     syntax!(cls7, "[*]", vec![class('*', '*')]);
1180     syntax!(cls8, "[!!]", vec![classn('!', '!')]);
1181     syntax!(cls9, "[a-]", vec![rclass(&[('a', 'a'), ('-', '-')])]);
1182     syntax!(cls10, "[-a-z]", vec![rclass(&[('-', '-'), ('a', 'z')])]);
1183     syntax!(cls11, "[a-z-]", vec![rclass(&[('a', 'z'), ('-', '-')])]);
1184     syntax!(
1185         cls12,
1186         "[-a-z-]",
1187         vec![rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),]
1188     );
1189     syntax!(cls13, "[]-z]", vec![class(']', 'z')]);
1190     syntax!(cls14, "[--z]", vec![class('-', 'z')]);
1191     syntax!(cls15, "[ --]", vec![class(' ', '-')]);
1192     syntax!(cls16, "[0-9a-z]", vec![rclass(&[('0', '9'), ('a', 'z')])]);
1193     syntax!(cls17, "[a-z0-9]", vec![rclass(&[('a', 'z'), ('0', '9')])]);
1194     syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]);
1195     syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]);
1196     syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
1197     syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
1198
1199     syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
1200     syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
1201     syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
1202     syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
1203     syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
1204     syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
1205
1206     const CASEI: Options =
1207         Options { casei: Some(true), litsep: None, bsesc: None, ealtre: None };
1208     const SLASHLIT: Options =
1209         Options { casei: None, litsep: Some(true), bsesc: None, ealtre: None };
1210     const NOBSESC: Options = Options {
1211         casei: None,
1212         litsep: None,
1213         bsesc: Some(false),
1214         ealtre: None,
1215     };
1216     const BSESC: Options =
1217         Options { casei: None, litsep: None, bsesc: Some(true), ealtre: None };
1218     const EALTRE: Options = Options {
1219         casei: None,
1220         litsep: None,
1221         bsesc: Some(true),
1222         ealtre: Some(true),
1223     };
1224
1225     toregex!(re_casei, "a", "(?i)^a$", &CASEI);
1226
1227     toregex!(re_slash1, "?", r"^[^/]$", SLASHLIT);
1228     toregex!(re_slash2, "*", r"^[^/]*$", SLASHLIT);
1229
1230     toregex!(re1, "a", "^a$");
1231     toregex!(re2, "?", "^.$");
1232     toregex!(re3, "*", "^.*$");
1233     toregex!(re4, "a?", "^a.$");
1234     toregex!(re5, "?a", "^.a$");
1235     toregex!(re6, "a*", "^a.*$");
1236     toregex!(re7, "*a", "^.*a$");
1237     toregex!(re8, "[*]", r"^[\*]$");
1238     toregex!(re9, "[+]", r"^[\+]$");
1239     toregex!(re10, "+", r"^\+$");
1240     toregex!(re11, "☃", r"^\xe2\x98\x83$");
1241     toregex!(re12, "**", r"^.*$");
1242     toregex!(re13, "**/", r"^.*$");
1243     toregex!(re14, "**/*", r"^(?:/?|.*/).*$");
1244     toregex!(re15, "**/**", r"^.*$");
1245     toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$");
1246     toregex!(re17, "**/**/**", r"^.*$");
1247     toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$");
1248     toregex!(re19, "a/**", r"^a/.*$");
1249     toregex!(re20, "a/**/**", r"^a/.*$");
1250     toregex!(re21, "a/**/**/**", r"^a/.*$");
1251     toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$");
1252     toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$");
1253     toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$");
1254     toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
1255     toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
1256     toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
1257     toregex!(re28, "a**", r"^a.*.*$");
1258     toregex!(re29, "**a", r"^.*.*a$");
1259     toregex!(re30, "a**b", r"^a.*.*b$");
1260     toregex!(re31, "***", r"^.*.*.*$");
1261     toregex!(re32, "/a**", r"^/a.*.*$");
1262     toregex!(re33, "/**a", r"^/.*.*a$");
1263     toregex!(re34, "/a**b", r"^/a.*.*b$");
1264     toregex!(re35, "{a,b}", r"^(?:b|a)$");
1265
1266     matches!(match1, "a", "a");
1267     matches!(match2, "a*b", "a_b");
1268     matches!(match3, "a*b*c", "abc");
1269     matches!(match4, "a*b*c", "a_b_c");
1270     matches!(match5, "a*b*c", "a___b___c");
1271     matches!(match6, "abc*abc*abc", "abcabcabcabcabcabcabc");
1272     matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1273     matches!(match8, "a*b[xyz]c*d", "abxcdbxcddd");
1274     matches!(match9, "*.rs", ".rs");
1275     matches!(match10, "☃", "☃");
1276
1277     matches!(matchrec1, "some/**/needle.txt", "some/needle.txt");
1278     matches!(matchrec2, "some/**/needle.txt", "some/one/needle.txt");
1279     matches!(matchrec3, "some/**/needle.txt", "some/one/two/needle.txt");
1280     matches!(matchrec4, "some/**/needle.txt", "some/other/needle.txt");
1281     matches!(matchrec5, "**", "abcde");
1282     matches!(matchrec6, "**", "");
1283     matches!(matchrec7, "**", ".asdf");
1284     matches!(matchrec8, "**", "/x/.asdf");
1285     matches!(matchrec9, "some/**/**/needle.txt", "some/needle.txt");
1286     matches!(matchrec10, "some/**/**/needle.txt", "some/one/needle.txt");
1287     matches!(matchrec11, "some/**/**/needle.txt", "some/one/two/needle.txt");
1288     matches!(matchrec12, "some/**/**/needle.txt", "some/other/needle.txt");
1289     matches!(matchrec13, "**/test", "one/two/test");
1290     matches!(matchrec14, "**/test", "one/test");
1291     matches!(matchrec15, "**/test", "test");
1292     matches!(matchrec16, "/**/test", "/one/two/test");
1293     matches!(matchrec17, "/**/test", "/one/test");
1294     matches!(matchrec18, "/**/test", "/test");
1295     matches!(matchrec19, "**/.*", ".abc");
1296     matches!(matchrec20, "**/.*", "abc/.abc");
1297     matches!(matchrec21, "**/foo/bar", "foo/bar");
1298     matches!(matchrec22, ".*/**", ".abc/abc");
1299     matches!(matchrec23, "test/**", "test/");
1300     matches!(matchrec24, "test/**", "test/one");
1301     matches!(matchrec25, "test/**", "test/one/two");
1302     matches!(matchrec26, "some/*/needle.txt", "some/one/needle.txt");
1303
1304     matches!(matchrange1, "a[0-9]b", "a0b");
1305     matches!(matchrange2, "a[0-9]b", "a9b");
1306     matches!(matchrange3, "a[!0-9]b", "a_b");
1307     matches!(matchrange4, "[a-z123]", "1");
1308     matches!(matchrange5, "[1a-z23]", "1");
1309     matches!(matchrange6, "[123a-z]", "1");
1310     matches!(matchrange7, "[abc-]", "-");
1311     matches!(matchrange8, "[-abc]", "-");
1312     matches!(matchrange9, "[-a-c]", "b");
1313     matches!(matchrange10, "[a-c-]", "b");
1314     matches!(matchrange11, "[-]", "-");
1315     matches!(matchrange12, "a[^0-9]b", "a_b");
1316
1317     matches!(matchpat1, "*hello.txt", "hello.txt");
1318     matches!(matchpat2, "*hello.txt", "gareth_says_hello.txt");
1319     matches!(matchpat3, "*hello.txt", "some/path/to/hello.txt");
1320     matches!(matchpat4, "*hello.txt", "some\\path\\to\\hello.txt");
1321     matches!(matchpat5, "*hello.txt", "/an/absolute/path/to/hello.txt");
1322     matches!(matchpat6, "*some/path/to/hello.txt", "some/path/to/hello.txt");
1323     matches!(
1324         matchpat7,
1325         "*some/path/to/hello.txt",
1326         "a/bigger/some/path/to/hello.txt"
1327     );
1328
1329     matches!(matchescape, "_[[]_[]]_[?]_[*]_!_", "_[_]_?_*_!_");
1330
1331     matches!(matchcasei1, "aBcDeFg", "aBcDeFg", CASEI);
1332     matches!(matchcasei2, "aBcDeFg", "abcdefg", CASEI);
1333     matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI);
1334     matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI);
1335
1336     matches!(matchalt1, "a,b", "a,b");
1337     matches!(matchalt2, ",", ",");
1338     matches!(matchalt3, "{a,b}", "a");
1339     matches!(matchalt4, "{a,b}", "b");
1340     matches!(matchalt5, "{**/src/**,foo}", "abc/src/bar");
1341     matches!(matchalt6, "{**/src/**,foo}", "foo");
1342     matches!(matchalt7, "{[}],foo}", "}");
1343     matches!(matchalt8, "{foo}", "foo");
1344     matches!(matchalt9, "{}", "");
1345     matches!(matchalt10, "{,}", "");
1346     matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo");
1347     matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar");
1348     matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat");
1349     matches!(matchalt14, "foo{,.txt}", "foo.txt");
1350     nmatches!(matchalt15, "foo{,.txt}", "foo");
1351     matches!(matchalt16, "foo{,.txt}", "foo", EALTRE);
1352
1353     matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
1354     #[cfg(unix)]
1355     nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
1356     #[cfg(not(unix))]
1357     nmatches!(matchslash2, "abc?def", "abc\\def", SLASHLIT);
1358     nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT);
1359     matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs
1360     #[cfg(unix)]
1361     nmatches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1362     #[cfg(not(unix))]
1363     matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1364
1365     matches!(matchbackslash1, "\\[", "[", BSESC);
1366     matches!(matchbackslash2, "\\?", "?", BSESC);
1367     matches!(matchbackslash3, "\\*", "*", BSESC);
1368     matches!(matchbackslash4, "\\[a-z]", "\\a", NOBSESC);
1369     matches!(matchbackslash5, "\\?", "\\a", NOBSESC);
1370     matches!(matchbackslash6, "\\*", "\\\\", NOBSESC);
1371     #[cfg(unix)]
1372     matches!(matchbackslash7, "\\a", "a");
1373     #[cfg(not(unix))]
1374     matches!(matchbackslash8, "\\a", "/a");
1375
1376     nmatches!(matchnot1, "a*b*c", "abcd");
1377     nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca");
1378     nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt");
1379     nmatches!(matchnot4, "some/**/**/needle.txt", "some/other/notthis.txt");
1380     nmatches!(matchnot5, "/**/test", "test");
1381     nmatches!(matchnot6, "/**/test", "/one/notthis");
1382     nmatches!(matchnot7, "/**/test", "/notthis");
1383     nmatches!(matchnot8, "**/.*", "ab.c");
1384     nmatches!(matchnot9, "**/.*", "abc/ab.c");
1385     nmatches!(matchnot10, ".*/**", "a.bc");
1386     nmatches!(matchnot11, ".*/**", "abc/a.bc");
1387     nmatches!(matchnot12, "a[0-9]b", "a_b");
1388     nmatches!(matchnot13, "a[!0-9]b", "a0b");
1389     nmatches!(matchnot14, "a[!0-9]b", "a9b");
1390     nmatches!(matchnot15, "[!-]", "-");
1391     nmatches!(matchnot16, "*hello.txt", "hello.txt-and-then-some");
1392     nmatches!(matchnot17, "*hello.txt", "goodbye.txt");
1393     nmatches!(
1394         matchnot18,
1395         "*some/path/to/hello.txt",
1396         "some/path/to/hello.txt-and-then-some"
1397     );
1398     nmatches!(
1399         matchnot19,
1400         "*some/path/to/hello.txt",
1401         "some/other/path/to/hello.txt"
1402     );
1403     nmatches!(matchnot20, "a", "foo/a");
1404     nmatches!(matchnot21, "./foo", "foo");
1405     nmatches!(matchnot22, "**/foo", "foofoo");
1406     nmatches!(matchnot23, "**/foo/bar", "foofoo/bar");
1407     nmatches!(matchnot24, "/*.c", "mozilla-sha1/sha1.c");
1408     nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT);
1409     nmatches!(
1410         matchnot26,
1411         "**/m4/ltoptions.m4",
1412         "csharp/src/packages/repositories.config",
1413         SLASHLIT
1414     );
1415     nmatches!(matchnot27, "a[^0-9]b", "a0b");
1416     nmatches!(matchnot28, "a[^0-9]b", "a9b");
1417     nmatches!(matchnot29, "[^-]", "-");
1418     nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt");
1419     nmatches!(
1420         matchrec31,
1421         "some/*/needle.txt",
1422         "some/one/two/needle.txt",
1423         SLASHLIT
1424     );
1425     nmatches!(
1426         matchrec32,
1427         "some/*/needle.txt",
1428         "some/one/two/three/needle.txt",
1429         SLASHLIT
1430     );
1431     nmatches!(matchrec33, ".*/**", ".abc");
1432     nmatches!(matchrec34, "foo/**", "foo");
1433
1434     macro_rules! extract {
1435         ($which:ident, $name:ident, $pat:expr, $expect:expr) => {
1436             extract!($which, $name, $pat, $expect, Options::default());
1437         };
1438         ($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => {
1439             #[test]
1440             fn $name() {
1441                 let mut builder = GlobBuilder::new($pat);
1442                 if let Some(casei) = $options.casei {
1443                     builder.case_insensitive(casei);
1444                 }
1445                 if let Some(litsep) = $options.litsep {
1446                     builder.literal_separator(litsep);
1447                 }
1448                 if let Some(bsesc) = $options.bsesc {
1449                     builder.backslash_escape(bsesc);
1450                 }
1451                 if let Some(ealtre) = $options.ealtre {
1452                     builder.empty_alternates(ealtre);
1453                 }
1454                 let pat = builder.build().unwrap();
1455                 assert_eq!($expect, pat.$which());
1456             }
1457         };
1458     }
1459
1460     macro_rules! literal {
1461         ($($tt:tt)*) => { extract!(literal, $($tt)*); }
1462     }
1463
1464     macro_rules! basetokens {
1465         ($($tt:tt)*) => { extract!(basename_tokens, $($tt)*); }
1466     }
1467
1468     macro_rules! ext {
1469         ($($tt:tt)*) => { extract!(ext, $($tt)*); }
1470     }
1471
1472     macro_rules! required_ext {
1473         ($($tt:tt)*) => { extract!(required_ext, $($tt)*); }
1474     }
1475
1476     macro_rules! prefix {
1477         ($($tt:tt)*) => { extract!(prefix, $($tt)*); }
1478     }
1479
1480     macro_rules! suffix {
1481         ($($tt:tt)*) => { extract!(suffix, $($tt)*); }
1482     }
1483
1484     macro_rules! baseliteral {
1485         ($($tt:tt)*) => { extract!(basename_literal, $($tt)*); }
1486     }
1487
1488     literal!(extract_lit1, "foo", Some(s("foo")));
1489     literal!(extract_lit2, "foo", None, CASEI);
1490     literal!(extract_lit3, "/foo", Some(s("/foo")));
1491     literal!(extract_lit4, "/foo/", Some(s("/foo/")));
1492     literal!(extract_lit5, "/foo/bar", Some(s("/foo/bar")));
1493     literal!(extract_lit6, "*.foo", None);
1494     literal!(extract_lit7, "foo/bar", Some(s("foo/bar")));
1495     literal!(extract_lit8, "**/foo/bar", None);
1496
1497     basetokens!(
1498         extract_basetoks1,
1499         "**/foo",
1500         Some(&*vec![Literal('f'), Literal('o'), Literal('o'),])
1501     );
1502     basetokens!(extract_basetoks2, "**/foo", None, CASEI);
1503     basetokens!(
1504         extract_basetoks3,
1505         "**/foo",
1506         Some(&*vec![Literal('f'), Literal('o'), Literal('o'),]),
1507         SLASHLIT
1508     );
1509     basetokens!(extract_basetoks4, "*foo", None, SLASHLIT);
1510     basetokens!(extract_basetoks5, "*foo", None);
1511     basetokens!(extract_basetoks6, "**/fo*o", None);
1512     basetokens!(
1513         extract_basetoks7,
1514         "**/fo*o",
1515         Some(&*vec![Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),]),
1516         SLASHLIT
1517     );
1518
1519     ext!(extract_ext1, "**/*.rs", Some(s(".rs")));
1520     ext!(extract_ext2, "**/*.rs.bak", None);
1521     ext!(extract_ext3, "*.rs", Some(s(".rs")));
1522     ext!(extract_ext4, "a*.rs", None);
1523     ext!(extract_ext5, "/*.c", None);
1524     ext!(extract_ext6, "*.c", None, SLASHLIT);
1525     ext!(extract_ext7, "*.c", Some(s(".c")));
1526
1527     required_ext!(extract_req_ext1, "*.rs", Some(s(".rs")));
1528     required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs")));
1529     required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs")));
1530     required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs")));
1531     required_ext!(extract_req_ext5, ".rs", Some(s(".rs")));
1532     required_ext!(extract_req_ext6, "./rs", None);
1533     required_ext!(extract_req_ext7, "foo", None);
1534     required_ext!(extract_req_ext8, ".foo/", None);
1535     required_ext!(extract_req_ext9, "foo/", None);
1536
1537     prefix!(extract_prefix1, "/foo", Some(s("/foo")));
1538     prefix!(extract_prefix2, "/foo/*", Some(s("/foo/")));
1539     prefix!(extract_prefix3, "**/foo", None);
1540     prefix!(extract_prefix4, "foo/**", Some(s("foo/")));
1541
1542     suffix!(extract_suffix1, "**/foo/bar", Some((s("/foo/bar"), true)));
1543     suffix!(extract_suffix2, "*/foo/bar", Some((s("/foo/bar"), false)));
1544     suffix!(extract_suffix3, "*/foo/bar", None, SLASHLIT);
1545     suffix!(extract_suffix4, "foo/bar", Some((s("foo/bar"), false)));
1546     suffix!(extract_suffix5, "*.foo", Some((s(".foo"), false)));
1547     suffix!(extract_suffix6, "*.foo", None, SLASHLIT);
1548     suffix!(extract_suffix7, "**/*_test", Some((s("_test"), false)));
1549
1550     baseliteral!(extract_baselit1, "**/foo", Some(s("foo")));
1551     baseliteral!(extract_baselit2, "foo", None);
1552     baseliteral!(extract_baselit3, "*foo", None);
1553     baseliteral!(extract_baselit4, "*/foo", None);
1554 }