vendor/globset/src/glob.rs

   1 use std::fmt;
   2 use std::hash;
   3 use std::iter;
   4 use std::ops::{Deref, DerefMut};
   5 use std::path::{Path, is_separator};
   6 use std::str;
   7
   8 use regex;
   9 use regex::bytes::Regex;
  10
  11 use {Candidate, Error, ErrorKind, new_regex};
  12
  13 /// Describes a matching strategy for a particular pattern.
  14 ///
  15 /// This provides a way to more quickly determine whether a pattern matches
  16 /// a particular file path in a way that scales with a large number of
  17 /// patterns. For example, if many patterns are of the form `*.ext`, then it's
  18 /// possible to test whether any of those patterns matches by looking up a
  19 /// file path's extension in a hash table.
  20 #[derive(Clone, Debug, Eq, PartialEq)]
  21 pub enum MatchStrategy {
  22     /// A pattern matches if and only if the entire file path matches this
  23     /// literal string.
  24     Literal(String),
  25     /// A pattern matches if and only if the file path's basename matches this
  26     /// literal string.
  27     BasenameLiteral(String),
  28     /// A pattern matches if and only if the file path's extension matches this
  29     /// literal string.
  30     Extension(String),
  31     /// A pattern matches if and only if this prefix literal is a prefix of the
  32     /// candidate file path.
  33     Prefix(String),
  34     /// A pattern matches if and only if this prefix literal is a prefix of the
  35     /// candidate file path.
  36     ///
  37     /// An exception: if `component` is true, then `suffix` must appear at the
  38     /// beginning of a file path or immediately following a `/`.
  39     Suffix {
  40         /// The actual suffix.
  41         suffix: String,
  42         /// Whether this must start at the beginning of a path component.
  43         component: bool,
  44     },
  45     /// A pattern matches only if the given extension matches the file path's
  46     /// extension. Note that this is a necessary but NOT sufficient criterion.
  47     /// Namely, if the extension matches, then a full regex search is still
  48     /// required.
  49     RequiredExtension(String),
  50     /// A regex needs to be used for matching.
  51     Regex,
  52 }
  53
  54 impl MatchStrategy {
  55     /// Returns a matching strategy for the given pattern.
  56     pub fn new(pat: &Glob) -> MatchStrategy {
  57         if let Some(lit) = pat.basename_literal() {
  58             MatchStrategy::BasenameLiteral(lit)
  59         } else if let Some(lit) = pat.literal() {
  60             MatchStrategy::Literal(lit)
  61         } else if let Some(ext) = pat.ext() {
  62             MatchStrategy::Extension(ext)
  63         } else if let Some(prefix) = pat.prefix() {
  64             MatchStrategy::Prefix(prefix)
  65         } else if let Some((suffix, component)) = pat.suffix() {
  66             MatchStrategy::Suffix { suffix: suffix, component: component }
  67         } else if let Some(ext) = pat.required_ext() {
  68             MatchStrategy::RequiredExtension(ext)
  69         } else {
  70             MatchStrategy::Regex
  71         }
  72     }
  73 }
  74
  75 /// Glob represents a successfully parsed shell glob pattern.
  76 ///
  77 /// It cannot be used directly to match file paths, but it can be converted
  78 /// to a regular expression string or a matcher.
  79 #[derive(Clone, Debug, Eq)]
  80 pub struct Glob {
  81     glob: String,
  82     re: String,
  83     opts: GlobOptions,
  84     tokens: Tokens,
  85 }
  86
  87 impl PartialEq for Glob {
  88   fn eq(&self, other: &Glob) -> bool {
  89     self.glob == other.glob && self.opts == other.opts
  90   }
  91 }
  92
  93 impl hash::Hash for Glob {
  94   fn hash<H: hash::Hasher>(&self, state: &mut H) {
  95     self.glob.hash(state);
  96     self.opts.hash(state);
  97   }
  98 }
  99
 100 impl fmt::Display for Glob {
 101     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 102         self.glob.fmt(f)
 103     }
 104 }
 105
 106 /// A matcher for a single pattern.
 107 #[derive(Clone, Debug)]
 108 pub struct GlobMatcher {
 109     /// The underlying pattern.
 110     pat: Glob,
 111     /// The pattern, as a compiled regex.
 112     re: Regex,
 113 }
 114
 115 impl GlobMatcher {
 116     /// Tests whether the given path matches this pattern or not.
 117     pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
 118         self.is_match_candidate(&Candidate::new(path.as_ref()))
 119     }
 120
 121     /// Tests whether the given path matches this pattern or not.
 122     pub fn is_match_candidate(&self, path: &Candidate) -> bool {
 123         self.re.is_match(&path.path)
 124     }
 125 }
 126
 127 /// A strategic matcher for a single pattern.
 128 #[cfg(test)]
 129 #[derive(Clone, Debug)]
 130 struct GlobStrategic {
 131     /// The match strategy to use.
 132     strategy: MatchStrategy,
 133     /// The underlying pattern.
 134     pat: Glob,
 135     /// The pattern, as a compiled regex.
 136     re: Regex,
 137 }
 138
 139 #[cfg(test)]
 140 impl GlobStrategic {
 141     /// Tests whether the given path matches this pattern or not.
 142     fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
 143         self.is_match_candidate(&Candidate::new(path.as_ref()))
 144     }
 145
 146     /// Tests whether the given path matches this pattern or not.
 147     fn is_match_candidate(&self, candidate: &Candidate) -> bool {
 148         let byte_path = &*candidate.path;
 149
 150         match self.strategy {
 151             MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
 152             MatchStrategy::BasenameLiteral(ref lit) => {
 153                 lit.as_bytes() == &*candidate.basename
 154             }
 155             MatchStrategy::Extension(ref ext) => {
 156                 ext.as_bytes() == &*candidate.ext
 157             }
 158             MatchStrategy::Prefix(ref pre) => {
 159                 starts_with(pre.as_bytes(), byte_path)
 160             }
 161             MatchStrategy::Suffix { ref suffix, component } => {
 162                 if component && byte_path == &suffix.as_bytes()[1..] {
 163                     return true;
 164                 }
 165                 ends_with(suffix.as_bytes(), byte_path)
 166             }
 167             MatchStrategy::RequiredExtension(ref ext) => {
 168                 let ext = ext.as_bytes();
 169                 &*candidate.ext == ext && self.re.is_match(byte_path)
 170             }
 171             MatchStrategy::Regex => self.re.is_match(byte_path),
 172         }
 173     }
 174 }
 175
 176 /// A builder for a pattern.
 177 ///
 178 /// This builder enables configuring the match semantics of a pattern. For
 179 /// example, one can make matching case insensitive.
 180 ///
 181 /// The lifetime `'a` refers to the lifetime of the pattern string.
 182 #[derive(Clone, Debug)]
 183 pub struct GlobBuilder<'a> {
 184     /// The glob pattern to compile.
 185     glob: &'a str,
 186     /// Options for the pattern.
 187     opts: GlobOptions,
 188 }
 189
 190 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
 191 struct GlobOptions {
 192     /// Whether to match case insensitively.
 193     case_insensitive: bool,
 194     /// Whether to require a literal separator to match a separator in a file
 195     /// path. e.g., when enabled, `*` won't match `/`.
 196     literal_separator: bool,
 197     /// Whether or not to use `\` to escape special characters.
 198     /// e.g., when enabled, `\*` will match a literal `*`.
 199     backslash_escape: bool,
 200 }
 201
 202 impl GlobOptions {
 203     fn default() -> GlobOptions {
 204         GlobOptions {
 205             case_insensitive: false,
 206             literal_separator: false,
 207             backslash_escape: !is_separator('\\'),
 208         }
 209     }
 210 }
 211
 212 #[derive(Clone, Debug, Default, Eq, PartialEq)]
 213 struct Tokens(Vec<Token>);
 214
 215 impl Deref for Tokens {
 216     type Target = Vec<Token>;
 217     fn deref(&self) -> &Vec<Token> { &self.0 }
 218 }
 219
 220 impl DerefMut for Tokens {
 221     fn deref_mut(&mut self) -> &mut Vec<Token> { &mut self.0 }
 222 }
 223
 224 #[derive(Clone, Debug, Eq, PartialEq)]
 225 enum Token {
 226     Literal(char),
 227     Any,
 228     ZeroOrMore,
 229     RecursivePrefix,
 230     RecursiveSuffix,
 231     RecursiveZeroOrMore,
 232     Class {
 233         negated: bool,
 234         ranges: Vec<(char, char)>,
 235     },
 236     Alternates(Vec<Tokens>),
 237 }
 238
 239 impl Glob {
 240     /// Builds a new pattern with default options.
 241     pub fn new(glob: &str) -> Result<Glob, Error> {
 242         GlobBuilder::new(glob).build()
 243     }
 244
 245     /// Returns a matcher for this pattern.
 246     pub fn compile_matcher(&self) -> GlobMatcher {
 247         let re = new_regex(&self.re)
 248             .expect("regex compilation shouldn't fail");
 249         GlobMatcher {
 250             pat: self.clone(),
 251             re: re,
 252         }
 253     }
 254
 255     /// Returns a strategic matcher.
 256     ///
 257     /// This isn't exposed because it's not clear whether it's actually
 258     /// faster than just running a regex for a *single* pattern. If it
 259     /// is faster, then GlobMatcher should do it automatically.
 260     #[cfg(test)]
 261     fn compile_strategic_matcher(&self) -> GlobStrategic {
 262         let strategy = MatchStrategy::new(self);
 263         let re = new_regex(&self.re)
 264             .expect("regex compilation shouldn't fail");
 265         GlobStrategic {
 266             strategy: strategy,
 267             pat: self.clone(),
 268             re: re,
 269         }
 270     }
 271
 272     /// Returns the original glob pattern used to build this pattern.
 273     pub fn glob(&self) -> &str {
 274         &self.glob
 275     }
 276
 277     /// Returns the regular expression string for this glob.
 278     ///
 279     /// Note that regular expressions for globs are intended to be matched on
 280     /// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In
 281     /// particular, globs are frequently used on file paths, where there is no
 282     /// general guarantee that file paths are themselves valid UTF-8. As a
 283     /// result, callers will need to ensure that they are using a regex API
 284     /// that can match on arbitrary bytes. For example, the
 285     /// [`regex`](https://crates.io/regex)
 286     /// crate's
 287     /// [`Regex`](https://docs.rs/regex/*/regex/struct.Regex.html)
 288     /// API is not suitable for this since it matches on `&str`, but its
 289     /// [`bytes::Regex`](https://docs.rs/regex/*/regex/bytes/struct.Regex.html)
 290     /// API is suitable for this.
 291     pub fn regex(&self) -> &str {
 292         &self.re
 293     }
 294
 295     /// Returns the pattern as a literal if and only if the pattern must match
 296     /// an entire path exactly.
 297     ///
 298     /// The basic format of these patterns is `{literal}`.
 299     fn literal(&self) -> Option<String> {
 300         if self.opts.case_insensitive {
 301             return None;
 302         }
 303         let mut lit = String::new();
 304         for t in &*self.tokens {
 305             match *t {
 306                 Token::Literal(c) => lit.push(c),
 307                 _ => return None,
 308             }
 309         }
 310         if lit.is_empty() {
 311             None
 312         } else {
 313             Some(lit)
 314         }
 315     }
 316
 317     /// Returns an extension if this pattern matches a file path if and only
 318     /// if the file path has the extension returned.
 319     ///
 320     /// Note that this extension returned differs from the extension that
 321     /// std::path::Path::extension returns. Namely, this extension includes
 322     /// the '.'. Also, paths like `.rs` are considered to have an extension
 323     /// of `.rs`.
 324     fn ext(&self) -> Option<String> {
 325         if self.opts.case_insensitive {
 326             return None;
 327         }
 328         let start = match self.tokens.get(0) {
 329             Some(&Token::RecursivePrefix) => 1,
 330             Some(_) => 0,
 331             _ => return None,
 332         };
 333         match self.tokens.get(start) {
 334             Some(&Token::ZeroOrMore) => {
 335                 // If there was no recursive prefix, then we only permit
 336                 // `*` if `*` can match a `/`. For example, if `*` can't
 337                 // match `/`, then `*.c` doesn't match `foo/bar.c`.
 338                 if start == 0 && self.opts.literal_separator {
 339                     return None;
 340                 }
 341             }
 342             _ => return None,
 343         }
 344         match self.tokens.get(start + 1) {
 345             Some(&Token::Literal('.')) => {}
 346             _ => return None,
 347         }
 348         let mut lit = ".".to_string();
 349         for t in self.tokens[start + 2..].iter() {
 350             match *t {
 351                 Token::Literal('.') | Token::Literal('/') => return None,
 352                 Token::Literal(c) => lit.push(c),
 353                 _ => return None,
 354             }
 355         }
 356         if lit.is_empty() {
 357             None
 358         } else {
 359             Some(lit)
 360         }
 361     }
 362
 363     /// This is like `ext`, but returns an extension even if it isn't sufficent
 364     /// to imply a match. Namely, if an extension is returned, then it is
 365     /// necessary but not sufficient for a match.
 366     fn required_ext(&self) -> Option<String> {
 367         if self.opts.case_insensitive {
 368             return None;
 369         }
 370         // We don't care at all about the beginning of this pattern. All we
 371         // need to check for is if it ends with a literal of the form `.ext`.
 372         let mut ext: Vec<char> = vec![]; // built in reverse
 373         for t in self.tokens.iter().rev() {
 374             match *t {
 375                 Token::Literal('/') => return None,
 376                 Token::Literal(c) => {
 377                     ext.push(c);
 378                     if c == '.' {
 379                         break;
 380                     }
 381                 }
 382                 _ => return None,
 383             }
 384         }
 385         if ext.last() != Some(&'.') {
 386             None
 387         } else {
 388             ext.reverse();
 389             Some(ext.into_iter().collect())
 390         }
 391     }
 392
 393     /// Returns a literal prefix of this pattern if the entire pattern matches
 394     /// if the literal prefix matches.
 395     fn prefix(&self) -> Option<String> {
 396         if self.opts.case_insensitive {
 397             return None;
 398         }
 399         let end = match self.tokens.last() {
 400             Some(&Token::ZeroOrMore) => {
 401                 if self.opts.literal_separator {
 402                     // If a trailing `*` can't match a `/`, then we can't
 403                     // assume a match of the prefix corresponds to a match
 404                     // of the overall pattern. e.g., `foo/*` with
 405                     // `literal_separator` enabled matches `foo/bar` but not
 406                     // `foo/bar/baz`, even though `foo/bar/baz` has a `foo/`
 407                     // literal prefix.
 408                     return None;
 409                 }
 410                 self.tokens.len() - 1
 411             }
 412             _ => self.tokens.len(),
 413         };
 414         let mut lit = String::new();
 415         for t in &self.tokens[0..end] {
 416             match *t {
 417                 Token::Literal(c) => lit.push(c),
 418                 _ => return None,
 419             }
 420         }
 421         if lit.is_empty() {
 422             None
 423         } else {
 424             Some(lit)
 425         }
 426     }
 427
 428     /// Returns a literal suffix of this pattern if the entire pattern matches
 429     /// if the literal suffix matches.
 430     ///
 431     /// If a literal suffix is returned and it must match either the entire
 432     /// file path or be preceded by a `/`, then also return true. This happens
 433     /// with a pattern like `**/foo/bar`. Namely, this pattern matches
 434     /// `foo/bar` and `baz/foo/bar`, but not `foofoo/bar`. In this case, the
 435     /// suffix returned is `/foo/bar` (but should match the entire path
 436     /// `foo/bar`).
 437     ///
 438     /// When this returns true, the suffix literal is guaranteed to start with
 439     /// a `/`.
 440     fn suffix(&self) -> Option<(String, bool)> {
 441         if self.opts.case_insensitive {
 442             return None;
 443         }
 444         let mut lit = String::new();
 445         let (start, entire) = match self.tokens.get(0) {
 446             Some(&Token::RecursivePrefix) => {
 447                 // We only care if this follows a path component if the next
 448                 // token is a literal.
 449                 if let Some(&Token::Literal(_)) = self.tokens.get(1) {
 450                     lit.push('/');
 451                     (1, true)
 452                 } else {
 453                     (1, false)
 454                 }
 455             }
 456             _ => (0, false),
 457         };
 458         let start = match self.tokens.get(start) {
 459             Some(&Token::ZeroOrMore) => {
 460                 // If literal_separator is enabled, then a `*` can't
 461                 // necessarily match everything, so reporting a suffix match
 462                 // as a match of the pattern would be a false positive.
 463                 if self.opts.literal_separator {
 464                     return None;
 465                 }
 466                 start + 1
 467             }
 468             _ => start,
 469         };
 470         for t in &self.tokens[start..] {
 471             match *t {
 472                 Token::Literal(c) => lit.push(c),
 473                 _ => return None,
 474             }
 475         }
 476         if lit.is_empty() || lit == "/" {
 477             None
 478         } else {
 479             Some((lit, entire))
 480         }
 481     }
 482
 483     /// If this pattern only needs to inspect the basename of a file path,
 484     /// then the tokens corresponding to only the basename match are returned.
 485     ///
 486     /// For example, given a pattern of `**/*.foo`, only the tokens
 487     /// corresponding to `*.foo` are returned.
 488     ///
 489     /// Note that this will return None if any match of the basename tokens
 490     /// doesn't correspond to a match of the entire pattern. For example, the
 491     /// glob `foo` only matches when a file path has a basename of `foo`, but
 492     /// doesn't *always* match when a file path has a basename of `foo`. e.g.,
 493     /// `foo` doesn't match `abc/foo`.
 494     fn basename_tokens(&self) -> Option<&[Token]> {
 495         if self.opts.case_insensitive {
 496             return None;
 497         }
 498         let start = match self.tokens.get(0) {
 499             Some(&Token::RecursivePrefix) => 1,
 500             _ => {
 501                 // With nothing to gobble up the parent portion of a path,
 502                 // we can't assume that matching on only the basename is
 503                 // correct.
 504                 return None;
 505             }
 506         };
 507         if self.tokens[start..].is_empty() {
 508             return None;
 509         }
 510         for t in &self.tokens[start..] {
 511             match *t {
 512                 Token::Literal('/') => return None,
 513                 Token::Literal(_) => {} // OK
 514                 Token::Any | Token::ZeroOrMore => {
 515                     if !self.opts.literal_separator {
 516                         // In this case, `*` and `?` can match a path
 517                         // separator, which means this could reach outside
 518                         // the basename.
 519                         return None;
 520                     }
 521                 }
 522                 Token::RecursivePrefix
 523                 | Token::RecursiveSuffix
 524                 | Token::RecursiveZeroOrMore => {
 525                     return None;
 526                 }
 527                 Token::Class{..} | Token::Alternates(..) => {
 528                     // We *could* be a little smarter here, but either one
 529                     // of these is going to prevent our literal optimizations
 530                     // anyway, so give up.
 531                     return None;
 532                 }
 533             }
 534         }
 535         Some(&self.tokens[start..])
 536     }
 537
 538     /// Returns the pattern as a literal if and only if the pattern exclusively
 539     /// matches the basename of a file path *and* is a literal.
 540     ///
 541     /// The basic format of these patterns is `**/{literal}`, where `{literal}`
 542     /// does not contain a path separator.
 543     fn basename_literal(&self) -> Option<String> {
 544         let tokens = match self.basename_tokens() {
 545             None => return None,
 546             Some(tokens) => tokens,
 547         };
 548         let mut lit = String::new();
 549         for t in tokens {
 550             match *t {
 551                 Token::Literal(c) => lit.push(c),
 552                 _ => return None,
 553             }
 554         }
 555         Some(lit)
 556     }
 557 }
 558
 559 impl<'a> GlobBuilder<'a> {
 560     /// Create a new builder for the pattern given.
 561     ///
 562     /// The pattern is not compiled until `build` is called.
 563     pub fn new(glob: &'a str) -> GlobBuilder<'a> {
 564         GlobBuilder {
 565             glob: glob,
 566             opts: GlobOptions::default(),
 567         }
 568     }
 569
 570     /// Parses and builds the pattern.
 571     pub fn build(&self) -> Result<Glob, Error> {
 572         let mut p = Parser {
 573             glob: &self.glob,
 574             stack: vec![Tokens::default()],
 575             chars: self.glob.chars().peekable(),
 576             prev: None,
 577             cur: None,
 578             opts: &self.opts,
 579         };
 580         p.parse()?;
 581         if p.stack.is_empty() {
 582             Err(Error {
 583                 glob: Some(self.glob.to_string()),
 584                 kind: ErrorKind::UnopenedAlternates,
 585             })
 586         } else if p.stack.len() > 1 {
 587             Err(Error {
 588                 glob: Some(self.glob.to_string()),
 589                 kind: ErrorKind::UnclosedAlternates,
 590             })
 591         } else {
 592             let tokens = p.stack.pop().unwrap();
 593             Ok(Glob {
 594                 glob: self.glob.to_string(),
 595                 re: tokens.to_regex_with(&self.opts),
 596                 opts: self.opts,
 597                 tokens: tokens,
 598             })
 599         }
 600     }
 601
 602     /// Toggle whether the pattern matches case insensitively or not.
 603     ///
 604     /// This is disabled by default.
 605     pub fn case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
 606         self.opts.case_insensitive = yes;
 607         self
 608     }
 609
 610     /// Toggle whether a literal `/` is required to match a path separator.
 611     pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
 612         self.opts.literal_separator = yes;
 613         self
 614     }
 615
 616     /// When enabled, a back slash (`\`) may be used to escape
 617     /// special characters in a glob pattern. Additionally, this will
 618     /// prevent `\` from being interpreted as a path separator on all
 619     /// platforms.
 620     ///
 621     /// This is enabled by default on platforms where `\` is not a
 622     /// path separator and disabled by default on platforms where `\`
 623     /// is a path separator.
 624     pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
 625         self.opts.backslash_escape = yes;
 626         self
 627     }
 628 }
 629
 630 impl Tokens {
 631     /// Convert this pattern to a string that is guaranteed to be a valid
 632     /// regular expression and will represent the matching semantics of this
 633     /// glob pattern and the options given.
 634     fn to_regex_with(&self, options: &GlobOptions) -> String {
 635         let mut re = String::new();
 636         re.push_str("(?-u)");
 637         if options.case_insensitive {
 638             re.push_str("(?i)");
 639         }
 640         re.push('^');
 641         // Special case. If the entire glob is just `**`, then it should match
 642         // everything.
 643         if self.len() == 1 && self[0] == Token::RecursivePrefix {
 644             re.push_str(".*");
 645             re.push('$');
 646             return re;
 647         }
 648         self.tokens_to_regex(options, &self, &mut re);
 649         re.push('$');
 650         re
 651     }
 652
 653     fn tokens_to_regex(
 654         &self,
 655         options: &GlobOptions,
 656         tokens: &[Token],
 657         re: &mut String,
 658     ) {
 659         for tok in tokens {
 660             match *tok {
 661                 Token::Literal(c) => {
 662                     re.push_str(&char_to_escaped_literal(c));
 663                 }
 664                 Token::Any => {
 665                     if options.literal_separator {
 666                         re.push_str("[^/]");
 667                     } else {
 668                         re.push_str(".");
 669                     }
 670                 }
 671                 Token::ZeroOrMore => {
 672                     if options.literal_separator {
 673                         re.push_str("[^/]*");
 674                     } else {
 675                         re.push_str(".*");
 676                     }
 677                 }
 678                 Token::RecursivePrefix => {
 679                     re.push_str("(?:/?|.*/)");
 680                 }
 681                 Token::RecursiveSuffix => {
 682                     re.push_str("(?:/?|/.*)");
 683                 }
 684                 Token::RecursiveZeroOrMore => {
 685                     re.push_str("(?:/|/.*/)");
 686                 }
 687                 Token::Class { negated, ref ranges } => {
 688                     re.push('[');
 689                     if negated {
 690                         re.push('^');
 691                     }
 692                     for r in ranges {
 693                         if r.0 == r.1 {
 694                             // Not strictly necessary, but nicer to look at.
 695                             re.push_str(&char_to_escaped_literal(r.0));
 696                         } else {
 697                             re.push_str(&char_to_escaped_literal(r.0));
 698                             re.push('-');
 699                             re.push_str(&char_to_escaped_literal(r.1));
 700                         }
 701                     }
 702                     re.push(']');
 703                 }
 704                 Token::Alternates(ref patterns) => {
 705                     let mut parts = vec![];
 706                     for pat in patterns {
 707                         let mut altre = String::new();
 708                         self.tokens_to_regex(options, &pat, &mut altre);
 709                         if !altre.is_empty() {
 710                             parts.push(altre);
 711                         }
 712                     }
 713
 714                     // It is possible to have an empty set in which case the
 715                     // resulting alternation '()' would be an error.
 716                     if !parts.is_empty() {
 717                         re.push('(');
 718                         re.push_str(&parts.join("|"));
 719                         re.push(')');
 720                     }
 721                 }
 722             }
 723         }
 724     }
 725 }
 726
 727 /// Convert a Unicode scalar value to an escaped string suitable for use as
 728 /// a literal in a non-Unicode regex.
 729 fn char_to_escaped_literal(c: char) -> String {
 730     bytes_to_escaped_literal(&c.to_string().into_bytes())
 731 }
 732
 733 /// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
 734 /// code units are converted to their escaped form.
 735 fn bytes_to_escaped_literal(bs: &[u8]) -> String {
 736     let mut s = String::with_capacity(bs.len());
 737     for &b in bs {
 738         if b <= 0x7F {
 739             s.push_str(&regex::escape(&(b as char).to_string()));
 740         } else {
 741             s.push_str(&format!("\\x{:02x}", b));
 742         }
 743     }
 744     s
 745 }
 746
 747 struct Parser<'a> {
 748     glob: &'a str,
 749     stack: Vec<Tokens>,
 750     chars: iter::Peekable<str::Chars<'a>>,
 751     prev: Option<char>,
 752     cur: Option<char>,
 753     opts: &'a GlobOptions,
 754 }
 755
 756 impl<'a> Parser<'a> {
 757     fn error(&self, kind: ErrorKind) -> Error {
 758         Error { glob: Some(self.glob.to_string()), kind: kind }
 759     }
 760
 761     fn parse(&mut self) -> Result<(), Error> {
 762         while let Some(c) = self.bump() {
 763             match c {
 764                 '?' => self.push_token(Token::Any)?,
 765                 '*' => self.parse_star()?,
 766                 '[' => self.parse_class()?,
 767                 '{' => self.push_alternate()?,
 768                 '}' => self.pop_alternate()?,
 769                 ',' => self.parse_comma()?,
 770                 '\\' => self.parse_backslash()?,
 771                 c => self.push_token(Token::Literal(c))?,
 772             }
 773         }
 774         Ok(())
 775     }
 776
 777     fn push_alternate(&mut self) -> Result<(), Error> {
 778         if self.stack.len() > 1 {
 779             return Err(self.error(ErrorKind::NestedAlternates));
 780         }
 781         Ok(self.stack.push(Tokens::default()))
 782     }
 783
 784     fn pop_alternate(&mut self) -> Result<(), Error> {
 785         let mut alts = vec![];
 786         while self.stack.len() >= 2 {
 787             alts.push(self.stack.pop().unwrap());
 788         }
 789         self.push_token(Token::Alternates(alts))
 790     }
 791
 792     fn push_token(&mut self, tok: Token) -> Result<(), Error> {
 793         if let Some(ref mut pat) = self.stack.last_mut() {
 794             return Ok(pat.push(tok));
 795         }
 796         Err(self.error(ErrorKind::UnopenedAlternates))
 797     }
 798
 799     fn pop_token(&mut self) -> Result<Token, Error> {
 800         if let Some(ref mut pat) = self.stack.last_mut() {
 801             return Ok(pat.pop().unwrap());
 802         }
 803         Err(self.error(ErrorKind::UnopenedAlternates))
 804     }
 805
 806     fn have_tokens(&self) -> Result<bool, Error> {
 807         match self.stack.last() {
 808             None => Err(self.error(ErrorKind::UnopenedAlternates)),
 809             Some(ref pat) => Ok(!pat.is_empty()),
 810         }
 811     }
 812
 813     fn parse_comma(&mut self) -> Result<(), Error> {
 814         // If we aren't inside a group alternation, then don't
 815         // treat commas specially. Otherwise, we need to start
 816         // a new alternate.
 817         if self.stack.len() <= 1 {
 818             self.push_token(Token::Literal(','))
 819         } else {
 820             Ok(self.stack.push(Tokens::default()))
 821         }
 822     }
 823
 824     fn parse_backslash(&mut self) -> Result<(), Error> {
 825         if self.opts.backslash_escape {
 826             match self.bump() {
 827                 None => Err(self.error(ErrorKind::DanglingEscape)),
 828                 Some(c) => self.push_token(Token::Literal(c)),
 829             }
 830         } else if is_separator('\\') {
 831             // Normalize all patterns to use / as a separator.
 832             self.push_token(Token::Literal('/'))
 833         } else {
 834             self.push_token(Token::Literal('\\'))
 835         }
 836     }
 837
 838     fn parse_star(&mut self) -> Result<(), Error> {
 839         let prev = self.prev;
 840         if self.peek() != Some('*') {
 841             self.push_token(Token::ZeroOrMore)?;
 842             return Ok(());
 843         }
 844         assert!(self.bump() == Some('*'));
 845         if !self.have_tokens()? {
 846             if !self.peek().map_or(true, is_separator) {
 847                 self.push_token(Token::ZeroOrMore)?;
 848                 self.push_token(Token::ZeroOrMore)?;
 849             } else {
 850                 self.push_token(Token::RecursivePrefix)?;
 851                 assert!(self.bump().map_or(true, is_separator));
 852             }
 853             return Ok(());
 854         }
 855
 856         if !prev.map(is_separator).unwrap_or(false) {
 857             if self.stack.len() <= 1
 858                 || (prev != Some(',') && prev != Some('{'))
 859             {
 860                 self.push_token(Token::ZeroOrMore)?;
 861                 self.push_token(Token::ZeroOrMore)?;
 862                 return Ok(());
 863             }
 864         }
 865         let is_suffix =
 866             match self.peek() {
 867                 None => {
 868                     assert!(self.bump().is_none());
 869                     true
 870                 }
 871                 Some(',') | Some('}') if self.stack.len() >= 2 => {
 872                     true
 873                 }
 874                 Some(c) if is_separator(c) => {
 875                     assert!(self.bump().map(is_separator).unwrap_or(false));
 876                     false
 877                 }
 878                 _ => {
 879                     self.push_token(Token::ZeroOrMore)?;
 880                     self.push_token(Token::ZeroOrMore)?;
 881                     return Ok(());
 882                 }
 883             };
 884         match self.pop_token()? {
 885             Token::RecursivePrefix => {
 886                 self.push_token(Token::RecursivePrefix)?;
 887             }
 888             Token::RecursiveSuffix => {
 889                 self.push_token(Token::RecursiveSuffix)?;
 890             }
 891             _ => {
 892                 if is_suffix {
 893                     self.push_token(Token::RecursiveSuffix)?;
 894                 } else {
 895                     self.push_token(Token::RecursiveZeroOrMore)?;
 896                 }
 897             }
 898         }
 899         Ok(())
 900     }
 901
 902     fn parse_class(&mut self) -> Result<(), Error> {
 903         fn add_to_last_range(
 904             glob: &str,
 905             r: &mut (char, char),
 906             add: char,
 907         ) -> Result<(), Error> {
 908             r.1 = add;
 909             if r.1 < r.0 {
 910                 Err(Error {
 911                     glob: Some(glob.to_string()),
 912                     kind: ErrorKind::InvalidRange(r.0, r.1),
 913                 })
 914             } else {
 915                 Ok(())
 916             }
 917         }
 918         let mut ranges = vec![];
 919         let negated = match self.chars.peek() {
 920             Some(&'!') | Some(&'^') => {
 921                 let bump = self.bump();
 922                 assert!(bump == Some('!') || bump == Some('^'));
 923                 true
 924             }
 925             _ => false,
 926         };
 927         let mut first = true;
 928         let mut in_range = false;
 929         loop {
 930             let c = match self.bump() {
 931                 Some(c) => c,
 932                 // The only way to successfully break this loop is to observe
 933                 // a ']'.
 934                 None => return Err(self.error(ErrorKind::UnclosedClass)),
 935             };
 936             match c {
 937                 ']' => {
 938                     if first {
 939                         ranges.push((']', ']'));
 940                     } else {
 941                         break;
 942                     }
 943                 }
 944                 '-' => {
 945                     if first {
 946                         ranges.push(('-', '-'));
 947                     } else if in_range {
 948                         // invariant: in_range is only set when there is
 949                         // already at least one character seen.
 950                         let r = ranges.last_mut().unwrap();
 951                         add_to_last_range(&self.glob, r, '-')?;
 952                         in_range = false;
 953                     } else {
 954                         assert!(!ranges.is_empty());
 955                         in_range = true;
 956                     }
 957                 }
 958                 c => {
 959                     if in_range {
 960                         // invariant: in_range is only set when there is
 961                         // already at least one character seen.
 962                         add_to_last_range(
 963                             &self.glob, ranges.last_mut().unwrap(), c)?;
 964                     } else {
 965                         ranges.push((c, c));
 966                     }
 967                     in_range = false;
 968                 }
 969             }
 970             first = false;
 971         }
 972         if in_range {
 973             // Means that the last character in the class was a '-', so add
 974             // it as a literal.
 975             ranges.push(('-', '-'));
 976         }
 977         self.push_token(Token::Class {
 978             negated: negated,
 979             ranges: ranges,
 980         })
 981     }
 982
 983     fn bump(&mut self) -> Option<char> {
 984         self.prev = self.cur;
 985         self.cur = self.chars.next();
 986         self.cur
 987     }
 988
 989     fn peek(&mut self) -> Option<char> {
 990         self.chars.peek().map(|&ch| ch)
 991     }
 992 }
 993
 994 #[cfg(test)]
 995 fn starts_with(needle: &[u8], haystack: &[u8]) -> bool {
 996     needle.len() <= haystack.len() && needle == &haystack[..needle.len()]
 997 }
 998
 999 #[cfg(test)]
1000 fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
1001     if needle.len() > haystack.len() {
1002         return false;
1003     }
1004     needle == &haystack[haystack.len() - needle.len()..]
1005 }
1006
1007 #[cfg(test)]
1008 mod tests {
1009     use {GlobSetBuilder, ErrorKind};
1010     use super::{Glob, GlobBuilder, Token};
1011     use super::Token::*;
1012
1013     #[derive(Clone, Copy, Debug, Default)]
1014     struct Options {
1015         casei: Option<bool>,
1016         litsep: Option<bool>,
1017         bsesc: Option<bool>,
1018     }
1019
1020     macro_rules! syntax {
1021         ($name:ident, $pat:expr, $tokens:expr) => {
1022             #[test]
1023             fn $name() {
1024                 let pat = Glob::new($pat).unwrap();
1025                 assert_eq!($tokens, pat.tokens.0);
1026             }
1027         }
1028     }
1029
1030     macro_rules! syntaxerr {
1031         ($name:ident, $pat:expr, $err:expr) => {
1032             #[test]
1033             fn $name() {
1034                 let err = Glob::new($pat).unwrap_err();
1035                 assert_eq!(&$err, err.kind());
1036             }
1037         }
1038     }
1039
1040     macro_rules! toregex {
1041         ($name:ident, $pat:expr, $re:expr) => {
1042             toregex!($name, $pat, $re, Options::default());
1043         };
1044         ($name:ident, $pat:expr, $re:expr, $options:expr) => {
1045             #[test]
1046             fn $name() {
1047                 let mut builder = GlobBuilder::new($pat);
1048                 if let Some(casei) = $options.casei {
1049                     builder.case_insensitive(casei);
1050                 }
1051                 if let Some(litsep) = $options.litsep {
1052                     builder.literal_separator(litsep);
1053                 }
1054                 if let Some(bsesc) = $options.bsesc {
1055                     builder.backslash_escape(bsesc);
1056                 }
1057                 let pat = builder.build().unwrap();
1058                 assert_eq!(format!("(?-u){}", $re), pat.regex());
1059             }
1060         };
1061     }
1062
1063     macro_rules! matches {
1064         ($name:ident, $pat:expr, $path:expr) => {
1065             matches!($name, $pat, $path, Options::default());
1066         };
1067         ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1068             #[test]
1069             fn $name() {
1070                 let mut builder = GlobBuilder::new($pat);
1071                 if let Some(casei) = $options.casei {
1072                     builder.case_insensitive(casei);
1073                 }
1074                 if let Some(litsep) = $options.litsep {
1075                     builder.literal_separator(litsep);
1076                 }
1077                 if let Some(bsesc) = $options.bsesc {
1078                     builder.backslash_escape(bsesc);
1079                 }
1080                 let pat = builder.build().unwrap();
1081                 let matcher = pat.compile_matcher();
1082                 let strategic = pat.compile_strategic_matcher();
1083                 let set = GlobSetBuilder::new().add(pat).build().unwrap();
1084                 assert!(matcher.is_match($path));
1085                 assert!(strategic.is_match($path));
1086                 assert!(set.is_match($path));
1087             }
1088         };
1089     }
1090
1091     macro_rules! nmatches {
1092         ($name:ident, $pat:expr, $path:expr) => {
1093             nmatches!($name, $pat, $path, Options::default());
1094         };
1095         ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1096             #[test]
1097             fn $name() {
1098                 let mut builder = GlobBuilder::new($pat);
1099                 if let Some(casei) = $options.casei {
1100                     builder.case_insensitive(casei);
1101                 }
1102                 if let Some(litsep) = $options.litsep {
1103                     builder.literal_separator(litsep);
1104                 }
1105                 if let Some(bsesc) = $options.bsesc {
1106                     builder.backslash_escape(bsesc);
1107                 }
1108                 let pat = builder.build().unwrap();
1109                 let matcher = pat.compile_matcher();
1110                 let strategic = pat.compile_strategic_matcher();
1111                 let set = GlobSetBuilder::new().add(pat).build().unwrap();
1112                 assert!(!matcher.is_match($path));
1113                 assert!(!strategic.is_match($path));
1114                 assert!(!set.is_match($path));
1115             }
1116         };
1117     }
1118
1119     fn s(string: &str) -> String { string.to_string() }
1120
1121     fn class(s: char, e: char) -> Token {
1122         Class { negated: false, ranges: vec![(s, e)] }
1123     }
1124
1125     fn classn(s: char, e: char) -> Token {
1126         Class { negated: true, ranges: vec![(s, e)] }
1127     }
1128
1129     fn rclass(ranges: &[(char, char)]) -> Token {
1130         Class { negated: false, ranges: ranges.to_vec() }
1131     }
1132
1133     fn rclassn(ranges: &[(char, char)]) -> Token {
1134         Class { negated: true, ranges: ranges.to_vec() }
1135     }
1136
1137     syntax!(literal1, "a", vec![Literal('a')]);
1138     syntax!(literal2, "ab", vec![Literal('a'), Literal('b')]);
1139     syntax!(any1, "?", vec![Any]);
1140     syntax!(any2, "a?b", vec![Literal('a'), Any, Literal('b')]);
1141     syntax!(seq1, "*", vec![ZeroOrMore]);
1142     syntax!(seq2, "a*b", vec![Literal('a'), ZeroOrMore, Literal('b')]);
1143     syntax!(seq3, "*a*b*", vec![
1144         ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,
1145     ]);
1146     syntax!(rseq1, "**", vec![RecursivePrefix]);
1147     syntax!(rseq2, "**/", vec![RecursivePrefix]);
1148     syntax!(rseq3, "/**", vec![RecursiveSuffix]);
1149     syntax!(rseq4, "/**/", vec![RecursiveZeroOrMore]);
1150     syntax!(rseq5, "a/**/b", vec![
1151         Literal('a'), RecursiveZeroOrMore, Literal('b'),
1152     ]);
1153     syntax!(cls1, "[a]", vec![class('a', 'a')]);
1154     syntax!(cls2, "[!a]", vec![classn('a', 'a')]);
1155     syntax!(cls3, "[a-z]", vec![class('a', 'z')]);
1156     syntax!(cls4, "[!a-z]", vec![classn('a', 'z')]);
1157     syntax!(cls5, "[-]", vec![class('-', '-')]);
1158     syntax!(cls6, "[]]", vec![class(']', ']')]);
1159     syntax!(cls7, "[*]", vec![class('*', '*')]);
1160     syntax!(cls8, "[!!]", vec![classn('!', '!')]);
1161     syntax!(cls9, "[a-]", vec![rclass(&[('a', 'a'), ('-', '-')])]);
1162     syntax!(cls10, "[-a-z]", vec![rclass(&[('-', '-'), ('a', 'z')])]);
1163     syntax!(cls11, "[a-z-]", vec![rclass(&[('a', 'z'), ('-', '-')])]);
1164     syntax!(cls12, "[-a-z-]", vec![
1165         rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),
1166     ]);
1167     syntax!(cls13, "[]-z]", vec![class(']', 'z')]);
1168     syntax!(cls14, "[--z]", vec![class('-', 'z')]);
1169     syntax!(cls15, "[ --]", vec![class(' ', '-')]);
1170     syntax!(cls16, "[0-9a-z]", vec![rclass(&[('0', '9'), ('a', 'z')])]);
1171     syntax!(cls17, "[a-z0-9]", vec![rclass(&[('a', 'z'), ('0', '9')])]);
1172     syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]);
1173     syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]);
1174     syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
1175     syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
1176
1177     syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
1178     syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
1179     syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
1180     syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
1181     syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
1182     syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
1183
1184     const CASEI: Options = Options {
1185         casei: Some(true),
1186         litsep: None,
1187         bsesc: None,
1188     };
1189     const SLASHLIT: Options = Options {
1190         casei: None,
1191         litsep: Some(true),
1192         bsesc: None,
1193     };
1194     const NOBSESC: Options = Options {
1195         casei: None,
1196         litsep: None,
1197         bsesc: Some(false),
1198     };
1199     const BSESC: Options = Options {
1200         casei: None,
1201         litsep: None,
1202         bsesc: Some(true),
1203     };
1204
1205     toregex!(re_casei, "a", "(?i)^a$", &CASEI);
1206
1207     toregex!(re_slash1, "?", r"^[^/]$", SLASHLIT);
1208     toregex!(re_slash2, "*", r"^[^/]*$", SLASHLIT);
1209
1210     toregex!(re1, "a", "^a$");
1211     toregex!(re2, "?", "^.$");
1212     toregex!(re3, "*", "^.*$");
1213     toregex!(re4, "a?", "^a.$");
1214     toregex!(re5, "?a", "^.a$");
1215     toregex!(re6, "a*", "^a.*$");
1216     toregex!(re7, "*a", "^.*a$");
1217     toregex!(re8, "[*]", r"^[\*]$");
1218     toregex!(re9, "[+]", r"^[\+]$");
1219     toregex!(re10, "+", r"^\+$");
1220     toregex!(re11, "☃", r"^\xe2\x98\x83$");
1221     toregex!(re12, "**", r"^.*$");
1222     toregex!(re13, "**/", r"^.*$");
1223     toregex!(re14, "**/*", r"^(?:/?|.*/).*$");
1224     toregex!(re15, "**/**", r"^.*$");
1225     toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$");
1226     toregex!(re17, "**/**/**", r"^.*$");
1227     toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$");
1228     toregex!(re19, "a/**", r"^a(?:/?|/.*)$");
1229     toregex!(re20, "a/**/**", r"^a(?:/?|/.*)$");
1230     toregex!(re21, "a/**/**/**", r"^a(?:/?|/.*)$");
1231     toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$");
1232     toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$");
1233     toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$");
1234     toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
1235     toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
1236     toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
1237     toregex!(re28, "a**", r"^a.*.*$");
1238     toregex!(re29, "**a", r"^.*.*a$");
1239     toregex!(re30, "a**b", r"^a.*.*b$");
1240     toregex!(re31, "***", r"^.*.*.*$");
1241     toregex!(re32, "/a**", r"^/a.*.*$");
1242     toregex!(re33, "/**a", r"^/.*.*a$");
1243     toregex!(re34, "/a**b", r"^/a.*.*b$");
1244
1245     matches!(match1, "a", "a");
1246     matches!(match2, "a*b", "a_b");
1247     matches!(match3, "a*b*c", "abc");
1248     matches!(match4, "a*b*c", "a_b_c");
1249     matches!(match5, "a*b*c", "a___b___c");
1250     matches!(match6, "abc*abc*abc", "abcabcabcabcabcabcabc");
1251     matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1252     matches!(match8, "a*b[xyz]c*d", "abxcdbxcddd");
1253     matches!(match9, "*.rs", ".rs");
1254     matches!(match10, "☃", "☃");
1255
1256     matches!(matchrec1, "some/**/needle.txt", "some/needle.txt");
1257     matches!(matchrec2, "some/**/needle.txt", "some/one/needle.txt");
1258     matches!(matchrec3, "some/**/needle.txt", "some/one/two/needle.txt");
1259     matches!(matchrec4, "some/**/needle.txt", "some/other/needle.txt");
1260     matches!(matchrec5, "**", "abcde");
1261     matches!(matchrec6, "**", "");
1262     matches!(matchrec7, "**", ".asdf");
1263     matches!(matchrec8, "**", "/x/.asdf");
1264     matches!(matchrec9, "some/**/**/needle.txt", "some/needle.txt");
1265     matches!(matchrec10, "some/**/**/needle.txt", "some/one/needle.txt");
1266     matches!(matchrec11, "some/**/**/needle.txt", "some/one/two/needle.txt");
1267     matches!(matchrec12, "some/**/**/needle.txt", "some/other/needle.txt");
1268     matches!(matchrec13, "**/test", "one/two/test");
1269     matches!(matchrec14, "**/test", "one/test");
1270     matches!(matchrec15, "**/test", "test");
1271     matches!(matchrec16, "/**/test", "/one/two/test");
1272     matches!(matchrec17, "/**/test", "/one/test");
1273     matches!(matchrec18, "/**/test", "/test");
1274     matches!(matchrec19, "**/.*", ".abc");
1275     matches!(matchrec20, "**/.*", "abc/.abc");
1276     matches!(matchrec21, ".*/**", ".abc");
1277     matches!(matchrec22, ".*/**", ".abc/abc");
1278     matches!(matchrec23, "foo/**", "foo");
1279     matches!(matchrec24, "**/foo/bar", "foo/bar");
1280     matches!(matchrec25, "some/*/needle.txt", "some/one/needle.txt");
1281
1282     matches!(matchrange1, "a[0-9]b", "a0b");
1283     matches!(matchrange2, "a[0-9]b", "a9b");
1284     matches!(matchrange3, "a[!0-9]b", "a_b");
1285     matches!(matchrange4, "[a-z123]", "1");
1286     matches!(matchrange5, "[1a-z23]", "1");
1287     matches!(matchrange6, "[123a-z]", "1");
1288     matches!(matchrange7, "[abc-]", "-");
1289     matches!(matchrange8, "[-abc]", "-");
1290     matches!(matchrange9, "[-a-c]", "b");
1291     matches!(matchrange10, "[a-c-]", "b");
1292     matches!(matchrange11, "[-]", "-");
1293     matches!(matchrange12, "a[^0-9]b", "a_b");
1294
1295     matches!(matchpat1, "*hello.txt", "hello.txt");
1296     matches!(matchpat2, "*hello.txt", "gareth_says_hello.txt");
1297     matches!(matchpat3, "*hello.txt", "some/path/to/hello.txt");
1298     matches!(matchpat4, "*hello.txt", "some\\path\\to\\hello.txt");
1299     matches!(matchpat5, "*hello.txt", "/an/absolute/path/to/hello.txt");
1300     matches!(matchpat6, "*some/path/to/hello.txt", "some/path/to/hello.txt");
1301     matches!(matchpat7, "*some/path/to/hello.txt",
1302              "a/bigger/some/path/to/hello.txt");
1303
1304     matches!(matchescape, "_[[]_[]]_[?]_[*]_!_", "_[_]_?_*_!_");
1305
1306     matches!(matchcasei1, "aBcDeFg", "aBcDeFg", CASEI);
1307     matches!(matchcasei2, "aBcDeFg", "abcdefg", CASEI);
1308     matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI);
1309     matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI);
1310
1311     matches!(matchalt1, "a,b", "a,b");
1312     matches!(matchalt2, ",", ",");
1313     matches!(matchalt3, "{a,b}", "a");
1314     matches!(matchalt4, "{a,b}", "b");
1315     matches!(matchalt5, "{**/src/**,foo}", "abc/src/bar");
1316     matches!(matchalt6, "{**/src/**,foo}", "foo");
1317     matches!(matchalt7, "{[}],foo}", "}");
1318     matches!(matchalt8, "{foo}", "foo");
1319     matches!(matchalt9, "{}", "");
1320     matches!(matchalt10, "{,}", "");
1321     matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo");
1322     matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar");
1323     matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat");
1324
1325     matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
1326     #[cfg(unix)]
1327     nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
1328     #[cfg(not(unix))]
1329     nmatches!(matchslash2, "abc?def", "abc\\def", SLASHLIT);
1330     nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT);
1331     matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs
1332     #[cfg(unix)]
1333     nmatches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1334     #[cfg(not(unix))]
1335     matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1336
1337     matches!(matchbackslash1, "\\[", "[", BSESC);
1338     matches!(matchbackslash2, "\\?", "?", BSESC);
1339     matches!(matchbackslash3, "\\*", "*", BSESC);
1340     matches!(matchbackslash4, "\\[a-z]", "\\a", NOBSESC);
1341     matches!(matchbackslash5, "\\?", "\\a", NOBSESC);
1342     matches!(matchbackslash6, "\\*", "\\\\", NOBSESC);
1343     #[cfg(unix)]
1344     matches!(matchbackslash7, "\\a", "a");
1345     #[cfg(not(unix))]
1346     matches!(matchbackslash8, "\\a", "/a");
1347
1348     nmatches!(matchnot1, "a*b*c", "abcd");
1349     nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca");
1350     nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt");
1351     nmatches!(matchnot4, "some/**/**/needle.txt", "some/other/notthis.txt");
1352     nmatches!(matchnot5, "/**/test", "test");
1353     nmatches!(matchnot6, "/**/test", "/one/notthis");
1354     nmatches!(matchnot7, "/**/test", "/notthis");
1355     nmatches!(matchnot8, "**/.*", "ab.c");
1356     nmatches!(matchnot9, "**/.*", "abc/ab.c");
1357     nmatches!(matchnot10, ".*/**", "a.bc");
1358     nmatches!(matchnot11, ".*/**", "abc/a.bc");
1359     nmatches!(matchnot12, "a[0-9]b", "a_b");
1360     nmatches!(matchnot13, "a[!0-9]b", "a0b");
1361     nmatches!(matchnot14, "a[!0-9]b", "a9b");
1362     nmatches!(matchnot15, "[!-]", "-");
1363     nmatches!(matchnot16, "*hello.txt", "hello.txt-and-then-some");
1364     nmatches!(matchnot17, "*hello.txt", "goodbye.txt");
1365     nmatches!(matchnot18, "*some/path/to/hello.txt",
1366               "some/path/to/hello.txt-and-then-some");
1367     nmatches!(matchnot19, "*some/path/to/hello.txt",
1368               "some/other/path/to/hello.txt");
1369     nmatches!(matchnot20, "a", "foo/a");
1370     nmatches!(matchnot21, "./foo", "foo");
1371     nmatches!(matchnot22, "**/foo", "foofoo");
1372     nmatches!(matchnot23, "**/foo/bar", "foofoo/bar");
1373     nmatches!(matchnot24, "/*.c", "mozilla-sha1/sha1.c");
1374     nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT);
1375     nmatches!(matchnot26, "**/m4/ltoptions.m4",
1376               "csharp/src/packages/repositories.config", SLASHLIT);
1377     nmatches!(matchnot27, "a[^0-9]b", "a0b");
1378     nmatches!(matchnot28, "a[^0-9]b", "a9b");
1379     nmatches!(matchnot29, "[^-]", "-");
1380     nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt");
1381     nmatches!(
1382         matchrec31,
1383         "some/*/needle.txt", "some/one/two/needle.txt", SLASHLIT);
1384     nmatches!(
1385         matchrec32,
1386         "some/*/needle.txt", "some/one/two/three/needle.txt", SLASHLIT);
1387
1388     macro_rules! extract {
1389         ($which:ident, $name:ident, $pat:expr, $expect:expr) => {
1390             extract!($which, $name, $pat, $expect, Options::default());
1391         };
1392         ($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => {
1393             #[test]
1394             fn $name() {
1395                 let mut builder = GlobBuilder::new($pat);
1396                 if let Some(casei) = $options.casei {
1397                     builder.case_insensitive(casei);
1398                 }
1399                 if let Some(litsep) = $options.litsep {
1400                     builder.literal_separator(litsep);
1401                 }
1402                 if let Some(bsesc) = $options.bsesc {
1403                     builder.backslash_escape(bsesc);
1404                 }
1405                 let pat = builder.build().unwrap();
1406                 assert_eq!($expect, pat.$which());
1407             }
1408         };
1409     }
1410
1411     macro_rules! literal {
1412         ($($tt:tt)*) => { extract!(literal, $($tt)*); }
1413     }
1414
1415     macro_rules! basetokens {
1416         ($($tt:tt)*) => { extract!(basename_tokens, $($tt)*); }
1417     }
1418
1419     macro_rules! ext {
1420         ($($tt:tt)*) => { extract!(ext, $($tt)*); }
1421     }
1422
1423     macro_rules! required_ext {
1424         ($($tt:tt)*) => { extract!(required_ext, $($tt)*); }
1425     }
1426
1427     macro_rules! prefix {
1428         ($($tt:tt)*) => { extract!(prefix, $($tt)*); }
1429     }
1430
1431     macro_rules! suffix {
1432         ($($tt:tt)*) => { extract!(suffix, $($tt)*); }
1433     }
1434
1435     macro_rules! baseliteral {
1436         ($($tt:tt)*) => { extract!(basename_literal, $($tt)*); }
1437     }
1438
1439     literal!(extract_lit1, "foo", Some(s("foo")));
1440     literal!(extract_lit2, "foo", None, CASEI);
1441     literal!(extract_lit3, "/foo", Some(s("/foo")));
1442     literal!(extract_lit4, "/foo/", Some(s("/foo/")));
1443     literal!(extract_lit5, "/foo/bar", Some(s("/foo/bar")));
1444     literal!(extract_lit6, "*.foo", None);
1445     literal!(extract_lit7, "foo/bar", Some(s("foo/bar")));
1446     literal!(extract_lit8, "**/foo/bar", None);
1447
1448     basetokens!(extract_basetoks1, "**/foo", Some(&*vec![
1449         Literal('f'), Literal('o'), Literal('o'),
1450     ]));
1451     basetokens!(extract_basetoks2, "**/foo", None, CASEI);
1452     basetokens!(extract_basetoks3, "**/foo", Some(&*vec![
1453         Literal('f'), Literal('o'), Literal('o'),
1454     ]), SLASHLIT);
1455     basetokens!(extract_basetoks4, "*foo", None, SLASHLIT);
1456     basetokens!(extract_basetoks5, "*foo", None);
1457     basetokens!(extract_basetoks6, "**/fo*o", None);
1458     basetokens!(extract_basetoks7, "**/fo*o", Some(&*vec![
1459         Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),
1460     ]), SLASHLIT);
1461
1462     ext!(extract_ext1, "**/*.rs", Some(s(".rs")));
1463     ext!(extract_ext2, "**/*.rs.bak", None);
1464     ext!(extract_ext3, "*.rs", Some(s(".rs")));
1465     ext!(extract_ext4, "a*.rs", None);
1466     ext!(extract_ext5, "/*.c", None);
1467     ext!(extract_ext6, "*.c", None, SLASHLIT);
1468     ext!(extract_ext7, "*.c", Some(s(".c")));
1469
1470     required_ext!(extract_req_ext1, "*.rs", Some(s(".rs")));
1471     required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs")));
1472     required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs")));
1473     required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs")));
1474     required_ext!(extract_req_ext5, ".rs", Some(s(".rs")));
1475     required_ext!(extract_req_ext6, "./rs", None);
1476     required_ext!(extract_req_ext7, "foo", None);
1477     required_ext!(extract_req_ext8, ".foo/", None);
1478     required_ext!(extract_req_ext9, "foo/", None);
1479
1480     prefix!(extract_prefix1, "/foo", Some(s("/foo")));
1481     prefix!(extract_prefix2, "/foo/*", Some(s("/foo/")));
1482     prefix!(extract_prefix3, "**/foo", None);
1483     prefix!(extract_prefix4, "foo/**", None);
1484
1485     suffix!(extract_suffix1, "**/foo/bar", Some((s("/foo/bar"), true)));
1486     suffix!(extract_suffix2, "*/foo/bar", Some((s("/foo/bar"), false)));
1487     suffix!(extract_suffix3, "*/foo/bar", None, SLASHLIT);
1488     suffix!(extract_suffix4, "foo/bar", Some((s("foo/bar"), false)));
1489     suffix!(extract_suffix5, "*.foo", Some((s(".foo"), false)));
1490     suffix!(extract_suffix6, "*.foo", None, SLASHLIT);
1491     suffix!(extract_suffix7, "**/*_test", Some((s("_test"), false)));
1492
1493     baseliteral!(extract_baselit1, "**/foo", Some(s("foo")));
1494     baseliteral!(extract_baselit2, "foo", None);
1495     baseliteral!(extract_baselit3, "*foo", None);
1496     baseliteral!(extract_baselit4, "*/foo", None);
1497 }