]> git.proxmox.com Git - cargo.git/blob - vendor/globset/src/glob.rs
New upstream version 0.37.0
[cargo.git] / vendor / globset / src / glob.rs
1 use std::fmt;
2 use std::hash;
3 use std::iter;
4 use std::ops::{Deref, DerefMut};
5 use std::path::{Path, is_separator};
6 use std::str;
7
8 use regex;
9 use regex::bytes::Regex;
10
11 use {Candidate, Error, ErrorKind, new_regex};
12
13 /// Describes a matching strategy for a particular pattern.
14 ///
15 /// This provides a way to more quickly determine whether a pattern matches
16 /// a particular file path in a way that scales with a large number of
17 /// patterns. For example, if many patterns are of the form `*.ext`, then it's
18 /// possible to test whether any of those patterns matches by looking up a
19 /// file path's extension in a hash table.
20 #[derive(Clone, Debug, Eq, PartialEq)]
21 pub enum MatchStrategy {
22 /// A pattern matches if and only if the entire file path matches this
23 /// literal string.
24 Literal(String),
25 /// A pattern matches if and only if the file path's basename matches this
26 /// literal string.
27 BasenameLiteral(String),
28 /// A pattern matches if and only if the file path's extension matches this
29 /// literal string.
30 Extension(String),
31 /// A pattern matches if and only if this prefix literal is a prefix of the
32 /// candidate file path.
33 Prefix(String),
34 /// A pattern matches if and only if this prefix literal is a prefix of the
35 /// candidate file path.
36 ///
37 /// An exception: if `component` is true, then `suffix` must appear at the
38 /// beginning of a file path or immediately following a `/`.
39 Suffix {
40 /// The actual suffix.
41 suffix: String,
42 /// Whether this must start at the beginning of a path component.
43 component: bool,
44 },
45 /// A pattern matches only if the given extension matches the file path's
46 /// extension. Note that this is a necessary but NOT sufficient criterion.
47 /// Namely, if the extension matches, then a full regex search is still
48 /// required.
49 RequiredExtension(String),
50 /// A regex needs to be used for matching.
51 Regex,
52 }
53
54 impl MatchStrategy {
55 /// Returns a matching strategy for the given pattern.
56 pub fn new(pat: &Glob) -> MatchStrategy {
57 if let Some(lit) = pat.basename_literal() {
58 MatchStrategy::BasenameLiteral(lit)
59 } else if let Some(lit) = pat.literal() {
60 MatchStrategy::Literal(lit)
61 } else if let Some(ext) = pat.ext() {
62 MatchStrategy::Extension(ext)
63 } else if let Some(prefix) = pat.prefix() {
64 MatchStrategy::Prefix(prefix)
65 } else if let Some((suffix, component)) = pat.suffix() {
66 MatchStrategy::Suffix { suffix: suffix, component: component }
67 } else if let Some(ext) = pat.required_ext() {
68 MatchStrategy::RequiredExtension(ext)
69 } else {
70 MatchStrategy::Regex
71 }
72 }
73 }
74
75 /// Glob represents a successfully parsed shell glob pattern.
76 ///
77 /// It cannot be used directly to match file paths, but it can be converted
78 /// to a regular expression string or a matcher.
79 #[derive(Clone, Debug, Eq)]
80 pub struct Glob {
81 glob: String,
82 re: String,
83 opts: GlobOptions,
84 tokens: Tokens,
85 }
86
87 impl PartialEq for Glob {
88 fn eq(&self, other: &Glob) -> bool {
89 self.glob == other.glob && self.opts == other.opts
90 }
91 }
92
93 impl hash::Hash for Glob {
94 fn hash<H: hash::Hasher>(&self, state: &mut H) {
95 self.glob.hash(state);
96 self.opts.hash(state);
97 }
98 }
99
100 impl fmt::Display for Glob {
101 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
102 self.glob.fmt(f)
103 }
104 }
105
106 /// A matcher for a single pattern.
107 #[derive(Clone, Debug)]
108 pub struct GlobMatcher {
109 /// The underlying pattern.
110 pat: Glob,
111 /// The pattern, as a compiled regex.
112 re: Regex,
113 }
114
115 impl GlobMatcher {
116 /// Tests whether the given path matches this pattern or not.
117 pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
118 self.is_match_candidate(&Candidate::new(path.as_ref()))
119 }
120
121 /// Tests whether the given path matches this pattern or not.
122 pub fn is_match_candidate(&self, path: &Candidate) -> bool {
123 self.re.is_match(&path.path)
124 }
125 }
126
127 /// A strategic matcher for a single pattern.
128 #[cfg(test)]
129 #[derive(Clone, Debug)]
130 struct GlobStrategic {
131 /// The match strategy to use.
132 strategy: MatchStrategy,
133 /// The underlying pattern.
134 pat: Glob,
135 /// The pattern, as a compiled regex.
136 re: Regex,
137 }
138
139 #[cfg(test)]
140 impl GlobStrategic {
141 /// Tests whether the given path matches this pattern or not.
142 fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
143 self.is_match_candidate(&Candidate::new(path.as_ref()))
144 }
145
146 /// Tests whether the given path matches this pattern or not.
147 fn is_match_candidate(&self, candidate: &Candidate) -> bool {
148 let byte_path = &*candidate.path;
149
150 match self.strategy {
151 MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
152 MatchStrategy::BasenameLiteral(ref lit) => {
153 lit.as_bytes() == &*candidate.basename
154 }
155 MatchStrategy::Extension(ref ext) => {
156 ext.as_bytes() == &*candidate.ext
157 }
158 MatchStrategy::Prefix(ref pre) => {
159 starts_with(pre.as_bytes(), byte_path)
160 }
161 MatchStrategy::Suffix { ref suffix, component } => {
162 if component && byte_path == &suffix.as_bytes()[1..] {
163 return true;
164 }
165 ends_with(suffix.as_bytes(), byte_path)
166 }
167 MatchStrategy::RequiredExtension(ref ext) => {
168 let ext = ext.as_bytes();
169 &*candidate.ext == ext && self.re.is_match(byte_path)
170 }
171 MatchStrategy::Regex => self.re.is_match(byte_path),
172 }
173 }
174 }
175
176 /// A builder for a pattern.
177 ///
178 /// This builder enables configuring the match semantics of a pattern. For
179 /// example, one can make matching case insensitive.
180 ///
181 /// The lifetime `'a` refers to the lifetime of the pattern string.
182 #[derive(Clone, Debug)]
183 pub struct GlobBuilder<'a> {
184 /// The glob pattern to compile.
185 glob: &'a str,
186 /// Options for the pattern.
187 opts: GlobOptions,
188 }
189
190 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
191 struct GlobOptions {
192 /// Whether to match case insensitively.
193 case_insensitive: bool,
194 /// Whether to require a literal separator to match a separator in a file
195 /// path. e.g., when enabled, `*` won't match `/`.
196 literal_separator: bool,
197 /// Whether or not to use `\` to escape special characters.
198 /// e.g., when enabled, `\*` will match a literal `*`.
199 backslash_escape: bool,
200 }
201
202 impl GlobOptions {
203 fn default() -> GlobOptions {
204 GlobOptions {
205 case_insensitive: false,
206 literal_separator: false,
207 backslash_escape: !is_separator('\\'),
208 }
209 }
210 }
211
212 #[derive(Clone, Debug, Default, Eq, PartialEq)]
213 struct Tokens(Vec<Token>);
214
215 impl Deref for Tokens {
216 type Target = Vec<Token>;
217 fn deref(&self) -> &Vec<Token> { &self.0 }
218 }
219
220 impl DerefMut for Tokens {
221 fn deref_mut(&mut self) -> &mut Vec<Token> { &mut self.0 }
222 }
223
224 #[derive(Clone, Debug, Eq, PartialEq)]
225 enum Token {
226 Literal(char),
227 Any,
228 ZeroOrMore,
229 RecursivePrefix,
230 RecursiveSuffix,
231 RecursiveZeroOrMore,
232 Class {
233 negated: bool,
234 ranges: Vec<(char, char)>,
235 },
236 Alternates(Vec<Tokens>),
237 }
238
239 impl Glob {
240 /// Builds a new pattern with default options.
241 pub fn new(glob: &str) -> Result<Glob, Error> {
242 GlobBuilder::new(glob).build()
243 }
244
245 /// Returns a matcher for this pattern.
246 pub fn compile_matcher(&self) -> GlobMatcher {
247 let re = new_regex(&self.re)
248 .expect("regex compilation shouldn't fail");
249 GlobMatcher {
250 pat: self.clone(),
251 re: re,
252 }
253 }
254
255 /// Returns a strategic matcher.
256 ///
257 /// This isn't exposed because it's not clear whether it's actually
258 /// faster than just running a regex for a *single* pattern. If it
259 /// is faster, then GlobMatcher should do it automatically.
260 #[cfg(test)]
261 fn compile_strategic_matcher(&self) -> GlobStrategic {
262 let strategy = MatchStrategy::new(self);
263 let re = new_regex(&self.re)
264 .expect("regex compilation shouldn't fail");
265 GlobStrategic {
266 strategy: strategy,
267 pat: self.clone(),
268 re: re,
269 }
270 }
271
272 /// Returns the original glob pattern used to build this pattern.
273 pub fn glob(&self) -> &str {
274 &self.glob
275 }
276
277 /// Returns the regular expression string for this glob.
278 ///
279 /// Note that regular expressions for globs are intended to be matched on
280 /// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In
281 /// particular, globs are frequently used on file paths, where there is no
282 /// general guarantee that file paths are themselves valid UTF-8. As a
283 /// result, callers will need to ensure that they are using a regex API
284 /// that can match on arbitrary bytes. For example, the
285 /// [`regex`](https://crates.io/regex)
286 /// crate's
287 /// [`Regex`](https://docs.rs/regex/*/regex/struct.Regex.html)
288 /// API is not suitable for this since it matches on `&str`, but its
289 /// [`bytes::Regex`](https://docs.rs/regex/*/regex/bytes/struct.Regex.html)
290 /// API is suitable for this.
291 pub fn regex(&self) -> &str {
292 &self.re
293 }
294
295 /// Returns the pattern as a literal if and only if the pattern must match
296 /// an entire path exactly.
297 ///
298 /// The basic format of these patterns is `{literal}`.
299 fn literal(&self) -> Option<String> {
300 if self.opts.case_insensitive {
301 return None;
302 }
303 let mut lit = String::new();
304 for t in &*self.tokens {
305 match *t {
306 Token::Literal(c) => lit.push(c),
307 _ => return None,
308 }
309 }
310 if lit.is_empty() {
311 None
312 } else {
313 Some(lit)
314 }
315 }
316
317 /// Returns an extension if this pattern matches a file path if and only
318 /// if the file path has the extension returned.
319 ///
320 /// Note that this extension returned differs from the extension that
321 /// std::path::Path::extension returns. Namely, this extension includes
322 /// the '.'. Also, paths like `.rs` are considered to have an extension
323 /// of `.rs`.
324 fn ext(&self) -> Option<String> {
325 if self.opts.case_insensitive {
326 return None;
327 }
328 let start = match self.tokens.get(0) {
329 Some(&Token::RecursivePrefix) => 1,
330 Some(_) => 0,
331 _ => return None,
332 };
333 match self.tokens.get(start) {
334 Some(&Token::ZeroOrMore) => {
335 // If there was no recursive prefix, then we only permit
336 // `*` if `*` can match a `/`. For example, if `*` can't
337 // match `/`, then `*.c` doesn't match `foo/bar.c`.
338 if start == 0 && self.opts.literal_separator {
339 return None;
340 }
341 }
342 _ => return None,
343 }
344 match self.tokens.get(start + 1) {
345 Some(&Token::Literal('.')) => {}
346 _ => return None,
347 }
348 let mut lit = ".".to_string();
349 for t in self.tokens[start + 2..].iter() {
350 match *t {
351 Token::Literal('.') | Token::Literal('/') => return None,
352 Token::Literal(c) => lit.push(c),
353 _ => return None,
354 }
355 }
356 if lit.is_empty() {
357 None
358 } else {
359 Some(lit)
360 }
361 }
362
363 /// This is like `ext`, but returns an extension even if it isn't sufficent
364 /// to imply a match. Namely, if an extension is returned, then it is
365 /// necessary but not sufficient for a match.
366 fn required_ext(&self) -> Option<String> {
367 if self.opts.case_insensitive {
368 return None;
369 }
370 // We don't care at all about the beginning of this pattern. All we
371 // need to check for is if it ends with a literal of the form `.ext`.
372 let mut ext: Vec<char> = vec![]; // built in reverse
373 for t in self.tokens.iter().rev() {
374 match *t {
375 Token::Literal('/') => return None,
376 Token::Literal(c) => {
377 ext.push(c);
378 if c == '.' {
379 break;
380 }
381 }
382 _ => return None,
383 }
384 }
385 if ext.last() != Some(&'.') {
386 None
387 } else {
388 ext.reverse();
389 Some(ext.into_iter().collect())
390 }
391 }
392
393 /// Returns a literal prefix of this pattern if the entire pattern matches
394 /// if the literal prefix matches.
395 fn prefix(&self) -> Option<String> {
396 if self.opts.case_insensitive {
397 return None;
398 }
399 let end = match self.tokens.last() {
400 Some(&Token::ZeroOrMore) => {
401 if self.opts.literal_separator {
402 // If a trailing `*` can't match a `/`, then we can't
403 // assume a match of the prefix corresponds to a match
404 // of the overall pattern. e.g., `foo/*` with
405 // `literal_separator` enabled matches `foo/bar` but not
406 // `foo/bar/baz`, even though `foo/bar/baz` has a `foo/`
407 // literal prefix.
408 return None;
409 }
410 self.tokens.len() - 1
411 }
412 _ => self.tokens.len(),
413 };
414 let mut lit = String::new();
415 for t in &self.tokens[0..end] {
416 match *t {
417 Token::Literal(c) => lit.push(c),
418 _ => return None,
419 }
420 }
421 if lit.is_empty() {
422 None
423 } else {
424 Some(lit)
425 }
426 }
427
428 /// Returns a literal suffix of this pattern if the entire pattern matches
429 /// if the literal suffix matches.
430 ///
431 /// If a literal suffix is returned and it must match either the entire
432 /// file path or be preceded by a `/`, then also return true. This happens
433 /// with a pattern like `**/foo/bar`. Namely, this pattern matches
434 /// `foo/bar` and `baz/foo/bar`, but not `foofoo/bar`. In this case, the
435 /// suffix returned is `/foo/bar` (but should match the entire path
436 /// `foo/bar`).
437 ///
438 /// When this returns true, the suffix literal is guaranteed to start with
439 /// a `/`.
440 fn suffix(&self) -> Option<(String, bool)> {
441 if self.opts.case_insensitive {
442 return None;
443 }
444 let mut lit = String::new();
445 let (start, entire) = match self.tokens.get(0) {
446 Some(&Token::RecursivePrefix) => {
447 // We only care if this follows a path component if the next
448 // token is a literal.
449 if let Some(&Token::Literal(_)) = self.tokens.get(1) {
450 lit.push('/');
451 (1, true)
452 } else {
453 (1, false)
454 }
455 }
456 _ => (0, false),
457 };
458 let start = match self.tokens.get(start) {
459 Some(&Token::ZeroOrMore) => {
460 // If literal_separator is enabled, then a `*` can't
461 // necessarily match everything, so reporting a suffix match
462 // as a match of the pattern would be a false positive.
463 if self.opts.literal_separator {
464 return None;
465 }
466 start + 1
467 }
468 _ => start,
469 };
470 for t in &self.tokens[start..] {
471 match *t {
472 Token::Literal(c) => lit.push(c),
473 _ => return None,
474 }
475 }
476 if lit.is_empty() || lit == "/" {
477 None
478 } else {
479 Some((lit, entire))
480 }
481 }
482
483 /// If this pattern only needs to inspect the basename of a file path,
484 /// then the tokens corresponding to only the basename match are returned.
485 ///
486 /// For example, given a pattern of `**/*.foo`, only the tokens
487 /// corresponding to `*.foo` are returned.
488 ///
489 /// Note that this will return None if any match of the basename tokens
490 /// doesn't correspond to a match of the entire pattern. For example, the
491 /// glob `foo` only matches when a file path has a basename of `foo`, but
492 /// doesn't *always* match when a file path has a basename of `foo`. e.g.,
493 /// `foo` doesn't match `abc/foo`.
494 fn basename_tokens(&self) -> Option<&[Token]> {
495 if self.opts.case_insensitive {
496 return None;
497 }
498 let start = match self.tokens.get(0) {
499 Some(&Token::RecursivePrefix) => 1,
500 _ => {
501 // With nothing to gobble up the parent portion of a path,
502 // we can't assume that matching on only the basename is
503 // correct.
504 return None;
505 }
506 };
507 if self.tokens[start..].is_empty() {
508 return None;
509 }
510 for t in &self.tokens[start..] {
511 match *t {
512 Token::Literal('/') => return None,
513 Token::Literal(_) => {} // OK
514 Token::Any | Token::ZeroOrMore => {
515 if !self.opts.literal_separator {
516 // In this case, `*` and `?` can match a path
517 // separator, which means this could reach outside
518 // the basename.
519 return None;
520 }
521 }
522 Token::RecursivePrefix
523 | Token::RecursiveSuffix
524 | Token::RecursiveZeroOrMore => {
525 return None;
526 }
527 Token::Class{..} | Token::Alternates(..) => {
528 // We *could* be a little smarter here, but either one
529 // of these is going to prevent our literal optimizations
530 // anyway, so give up.
531 return None;
532 }
533 }
534 }
535 Some(&self.tokens[start..])
536 }
537
538 /// Returns the pattern as a literal if and only if the pattern exclusively
539 /// matches the basename of a file path *and* is a literal.
540 ///
541 /// The basic format of these patterns is `**/{literal}`, where `{literal}`
542 /// does not contain a path separator.
543 fn basename_literal(&self) -> Option<String> {
544 let tokens = match self.basename_tokens() {
545 None => return None,
546 Some(tokens) => tokens,
547 };
548 let mut lit = String::new();
549 for t in tokens {
550 match *t {
551 Token::Literal(c) => lit.push(c),
552 _ => return None,
553 }
554 }
555 Some(lit)
556 }
557 }
558
559 impl<'a> GlobBuilder<'a> {
560 /// Create a new builder for the pattern given.
561 ///
562 /// The pattern is not compiled until `build` is called.
563 pub fn new(glob: &'a str) -> GlobBuilder<'a> {
564 GlobBuilder {
565 glob: glob,
566 opts: GlobOptions::default(),
567 }
568 }
569
570 /// Parses and builds the pattern.
571 pub fn build(&self) -> Result<Glob, Error> {
572 let mut p = Parser {
573 glob: &self.glob,
574 stack: vec![Tokens::default()],
575 chars: self.glob.chars().peekable(),
576 prev: None,
577 cur: None,
578 opts: &self.opts,
579 };
580 p.parse()?;
581 if p.stack.is_empty() {
582 Err(Error {
583 glob: Some(self.glob.to_string()),
584 kind: ErrorKind::UnopenedAlternates,
585 })
586 } else if p.stack.len() > 1 {
587 Err(Error {
588 glob: Some(self.glob.to_string()),
589 kind: ErrorKind::UnclosedAlternates,
590 })
591 } else {
592 let tokens = p.stack.pop().unwrap();
593 Ok(Glob {
594 glob: self.glob.to_string(),
595 re: tokens.to_regex_with(&self.opts),
596 opts: self.opts,
597 tokens: tokens,
598 })
599 }
600 }
601
602 /// Toggle whether the pattern matches case insensitively or not.
603 ///
604 /// This is disabled by default.
605 pub fn case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
606 self.opts.case_insensitive = yes;
607 self
608 }
609
610 /// Toggle whether a literal `/` is required to match a path separator.
611 pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
612 self.opts.literal_separator = yes;
613 self
614 }
615
616 /// When enabled, a back slash (`\`) may be used to escape
617 /// special characters in a glob pattern. Additionally, this will
618 /// prevent `\` from being interpreted as a path separator on all
619 /// platforms.
620 ///
621 /// This is enabled by default on platforms where `\` is not a
622 /// path separator and disabled by default on platforms where `\`
623 /// is a path separator.
624 pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
625 self.opts.backslash_escape = yes;
626 self
627 }
628 }
629
630 impl Tokens {
631 /// Convert this pattern to a string that is guaranteed to be a valid
632 /// regular expression and will represent the matching semantics of this
633 /// glob pattern and the options given.
634 fn to_regex_with(&self, options: &GlobOptions) -> String {
635 let mut re = String::new();
636 re.push_str("(?-u)");
637 if options.case_insensitive {
638 re.push_str("(?i)");
639 }
640 re.push('^');
641 // Special case. If the entire glob is just `**`, then it should match
642 // everything.
643 if self.len() == 1 && self[0] == Token::RecursivePrefix {
644 re.push_str(".*");
645 re.push('$');
646 return re;
647 }
648 self.tokens_to_regex(options, &self, &mut re);
649 re.push('$');
650 re
651 }
652
653 fn tokens_to_regex(
654 &self,
655 options: &GlobOptions,
656 tokens: &[Token],
657 re: &mut String,
658 ) {
659 for tok in tokens {
660 match *tok {
661 Token::Literal(c) => {
662 re.push_str(&char_to_escaped_literal(c));
663 }
664 Token::Any => {
665 if options.literal_separator {
666 re.push_str("[^/]");
667 } else {
668 re.push_str(".");
669 }
670 }
671 Token::ZeroOrMore => {
672 if options.literal_separator {
673 re.push_str("[^/]*");
674 } else {
675 re.push_str(".*");
676 }
677 }
678 Token::RecursivePrefix => {
679 re.push_str("(?:/?|.*/)");
680 }
681 Token::RecursiveSuffix => {
682 re.push_str("(?:/?|/.*)");
683 }
684 Token::RecursiveZeroOrMore => {
685 re.push_str("(?:/|/.*/)");
686 }
687 Token::Class { negated, ref ranges } => {
688 re.push('[');
689 if negated {
690 re.push('^');
691 }
692 for r in ranges {
693 if r.0 == r.1 {
694 // Not strictly necessary, but nicer to look at.
695 re.push_str(&char_to_escaped_literal(r.0));
696 } else {
697 re.push_str(&char_to_escaped_literal(r.0));
698 re.push('-');
699 re.push_str(&char_to_escaped_literal(r.1));
700 }
701 }
702 re.push(']');
703 }
704 Token::Alternates(ref patterns) => {
705 let mut parts = vec![];
706 for pat in patterns {
707 let mut altre = String::new();
708 self.tokens_to_regex(options, &pat, &mut altre);
709 if !altre.is_empty() {
710 parts.push(altre);
711 }
712 }
713
714 // It is possible to have an empty set in which case the
715 // resulting alternation '()' would be an error.
716 if !parts.is_empty() {
717 re.push('(');
718 re.push_str(&parts.join("|"));
719 re.push(')');
720 }
721 }
722 }
723 }
724 }
725 }
726
727 /// Convert a Unicode scalar value to an escaped string suitable for use as
728 /// a literal in a non-Unicode regex.
729 fn char_to_escaped_literal(c: char) -> String {
730 bytes_to_escaped_literal(&c.to_string().into_bytes())
731 }
732
733 /// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
734 /// code units are converted to their escaped form.
735 fn bytes_to_escaped_literal(bs: &[u8]) -> String {
736 let mut s = String::with_capacity(bs.len());
737 for &b in bs {
738 if b <= 0x7F {
739 s.push_str(&regex::escape(&(b as char).to_string()));
740 } else {
741 s.push_str(&format!("\\x{:02x}", b));
742 }
743 }
744 s
745 }
746
747 struct Parser<'a> {
748 glob: &'a str,
749 stack: Vec<Tokens>,
750 chars: iter::Peekable<str::Chars<'a>>,
751 prev: Option<char>,
752 cur: Option<char>,
753 opts: &'a GlobOptions,
754 }
755
756 impl<'a> Parser<'a> {
757 fn error(&self, kind: ErrorKind) -> Error {
758 Error { glob: Some(self.glob.to_string()), kind: kind }
759 }
760
761 fn parse(&mut self) -> Result<(), Error> {
762 while let Some(c) = self.bump() {
763 match c {
764 '?' => self.push_token(Token::Any)?,
765 '*' => self.parse_star()?,
766 '[' => self.parse_class()?,
767 '{' => self.push_alternate()?,
768 '}' => self.pop_alternate()?,
769 ',' => self.parse_comma()?,
770 '\\' => self.parse_backslash()?,
771 c => self.push_token(Token::Literal(c))?,
772 }
773 }
774 Ok(())
775 }
776
777 fn push_alternate(&mut self) -> Result<(), Error> {
778 if self.stack.len() > 1 {
779 return Err(self.error(ErrorKind::NestedAlternates));
780 }
781 Ok(self.stack.push(Tokens::default()))
782 }
783
784 fn pop_alternate(&mut self) -> Result<(), Error> {
785 let mut alts = vec![];
786 while self.stack.len() >= 2 {
787 alts.push(self.stack.pop().unwrap());
788 }
789 self.push_token(Token::Alternates(alts))
790 }
791
792 fn push_token(&mut self, tok: Token) -> Result<(), Error> {
793 if let Some(ref mut pat) = self.stack.last_mut() {
794 return Ok(pat.push(tok));
795 }
796 Err(self.error(ErrorKind::UnopenedAlternates))
797 }
798
799 fn pop_token(&mut self) -> Result<Token, Error> {
800 if let Some(ref mut pat) = self.stack.last_mut() {
801 return Ok(pat.pop().unwrap());
802 }
803 Err(self.error(ErrorKind::UnopenedAlternates))
804 }
805
806 fn have_tokens(&self) -> Result<bool, Error> {
807 match self.stack.last() {
808 None => Err(self.error(ErrorKind::UnopenedAlternates)),
809 Some(ref pat) => Ok(!pat.is_empty()),
810 }
811 }
812
813 fn parse_comma(&mut self) -> Result<(), Error> {
814 // If we aren't inside a group alternation, then don't
815 // treat commas specially. Otherwise, we need to start
816 // a new alternate.
817 if self.stack.len() <= 1 {
818 self.push_token(Token::Literal(','))
819 } else {
820 Ok(self.stack.push(Tokens::default()))
821 }
822 }
823
824 fn parse_backslash(&mut self) -> Result<(), Error> {
825 if self.opts.backslash_escape {
826 match self.bump() {
827 None => Err(self.error(ErrorKind::DanglingEscape)),
828 Some(c) => self.push_token(Token::Literal(c)),
829 }
830 } else if is_separator('\\') {
831 // Normalize all patterns to use / as a separator.
832 self.push_token(Token::Literal('/'))
833 } else {
834 self.push_token(Token::Literal('\\'))
835 }
836 }
837
838 fn parse_star(&mut self) -> Result<(), Error> {
839 let prev = self.prev;
840 if self.peek() != Some('*') {
841 self.push_token(Token::ZeroOrMore)?;
842 return Ok(());
843 }
844 assert!(self.bump() == Some('*'));
845 if !self.have_tokens()? {
846 if !self.peek().map_or(true, is_separator) {
847 self.push_token(Token::ZeroOrMore)?;
848 self.push_token(Token::ZeroOrMore)?;
849 } else {
850 self.push_token(Token::RecursivePrefix)?;
851 assert!(self.bump().map_or(true, is_separator));
852 }
853 return Ok(());
854 }
855
856 if !prev.map(is_separator).unwrap_or(false) {
857 if self.stack.len() <= 1
858 || (prev != Some(',') && prev != Some('{'))
859 {
860 self.push_token(Token::ZeroOrMore)?;
861 self.push_token(Token::ZeroOrMore)?;
862 return Ok(());
863 }
864 }
865 let is_suffix =
866 match self.peek() {
867 None => {
868 assert!(self.bump().is_none());
869 true
870 }
871 Some(',') | Some('}') if self.stack.len() >= 2 => {
872 true
873 }
874 Some(c) if is_separator(c) => {
875 assert!(self.bump().map(is_separator).unwrap_or(false));
876 false
877 }
878 _ => {
879 self.push_token(Token::ZeroOrMore)?;
880 self.push_token(Token::ZeroOrMore)?;
881 return Ok(());
882 }
883 };
884 match self.pop_token()? {
885 Token::RecursivePrefix => {
886 self.push_token(Token::RecursivePrefix)?;
887 }
888 Token::RecursiveSuffix => {
889 self.push_token(Token::RecursiveSuffix)?;
890 }
891 _ => {
892 if is_suffix {
893 self.push_token(Token::RecursiveSuffix)?;
894 } else {
895 self.push_token(Token::RecursiveZeroOrMore)?;
896 }
897 }
898 }
899 Ok(())
900 }
901
902 fn parse_class(&mut self) -> Result<(), Error> {
903 fn add_to_last_range(
904 glob: &str,
905 r: &mut (char, char),
906 add: char,
907 ) -> Result<(), Error> {
908 r.1 = add;
909 if r.1 < r.0 {
910 Err(Error {
911 glob: Some(glob.to_string()),
912 kind: ErrorKind::InvalidRange(r.0, r.1),
913 })
914 } else {
915 Ok(())
916 }
917 }
918 let mut ranges = vec![];
919 let negated = match self.chars.peek() {
920 Some(&'!') | Some(&'^') => {
921 let bump = self.bump();
922 assert!(bump == Some('!') || bump == Some('^'));
923 true
924 }
925 _ => false,
926 };
927 let mut first = true;
928 let mut in_range = false;
929 loop {
930 let c = match self.bump() {
931 Some(c) => c,
932 // The only way to successfully break this loop is to observe
933 // a ']'.
934 None => return Err(self.error(ErrorKind::UnclosedClass)),
935 };
936 match c {
937 ']' => {
938 if first {
939 ranges.push((']', ']'));
940 } else {
941 break;
942 }
943 }
944 '-' => {
945 if first {
946 ranges.push(('-', '-'));
947 } else if in_range {
948 // invariant: in_range is only set when there is
949 // already at least one character seen.
950 let r = ranges.last_mut().unwrap();
951 add_to_last_range(&self.glob, r, '-')?;
952 in_range = false;
953 } else {
954 assert!(!ranges.is_empty());
955 in_range = true;
956 }
957 }
958 c => {
959 if in_range {
960 // invariant: in_range is only set when there is
961 // already at least one character seen.
962 add_to_last_range(
963 &self.glob, ranges.last_mut().unwrap(), c)?;
964 } else {
965 ranges.push((c, c));
966 }
967 in_range = false;
968 }
969 }
970 first = false;
971 }
972 if in_range {
973 // Means that the last character in the class was a '-', so add
974 // it as a literal.
975 ranges.push(('-', '-'));
976 }
977 self.push_token(Token::Class {
978 negated: negated,
979 ranges: ranges,
980 })
981 }
982
983 fn bump(&mut self) -> Option<char> {
984 self.prev = self.cur;
985 self.cur = self.chars.next();
986 self.cur
987 }
988
989 fn peek(&mut self) -> Option<char> {
990 self.chars.peek().map(|&ch| ch)
991 }
992 }
993
994 #[cfg(test)]
995 fn starts_with(needle: &[u8], haystack: &[u8]) -> bool {
996 needle.len() <= haystack.len() && needle == &haystack[..needle.len()]
997 }
998
999 #[cfg(test)]
1000 fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
1001 if needle.len() > haystack.len() {
1002 return false;
1003 }
1004 needle == &haystack[haystack.len() - needle.len()..]
1005 }
1006
1007 #[cfg(test)]
1008 mod tests {
1009 use {GlobSetBuilder, ErrorKind};
1010 use super::{Glob, GlobBuilder, Token};
1011 use super::Token::*;
1012
1013 #[derive(Clone, Copy, Debug, Default)]
1014 struct Options {
1015 casei: Option<bool>,
1016 litsep: Option<bool>,
1017 bsesc: Option<bool>,
1018 }
1019
1020 macro_rules! syntax {
1021 ($name:ident, $pat:expr, $tokens:expr) => {
1022 #[test]
1023 fn $name() {
1024 let pat = Glob::new($pat).unwrap();
1025 assert_eq!($tokens, pat.tokens.0);
1026 }
1027 }
1028 }
1029
1030 macro_rules! syntaxerr {
1031 ($name:ident, $pat:expr, $err:expr) => {
1032 #[test]
1033 fn $name() {
1034 let err = Glob::new($pat).unwrap_err();
1035 assert_eq!(&$err, err.kind());
1036 }
1037 }
1038 }
1039
1040 macro_rules! toregex {
1041 ($name:ident, $pat:expr, $re:expr) => {
1042 toregex!($name, $pat, $re, Options::default());
1043 };
1044 ($name:ident, $pat:expr, $re:expr, $options:expr) => {
1045 #[test]
1046 fn $name() {
1047 let mut builder = GlobBuilder::new($pat);
1048 if let Some(casei) = $options.casei {
1049 builder.case_insensitive(casei);
1050 }
1051 if let Some(litsep) = $options.litsep {
1052 builder.literal_separator(litsep);
1053 }
1054 if let Some(bsesc) = $options.bsesc {
1055 builder.backslash_escape(bsesc);
1056 }
1057 let pat = builder.build().unwrap();
1058 assert_eq!(format!("(?-u){}", $re), pat.regex());
1059 }
1060 };
1061 }
1062
1063 macro_rules! matches {
1064 ($name:ident, $pat:expr, $path:expr) => {
1065 matches!($name, $pat, $path, Options::default());
1066 };
1067 ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1068 #[test]
1069 fn $name() {
1070 let mut builder = GlobBuilder::new($pat);
1071 if let Some(casei) = $options.casei {
1072 builder.case_insensitive(casei);
1073 }
1074 if let Some(litsep) = $options.litsep {
1075 builder.literal_separator(litsep);
1076 }
1077 if let Some(bsesc) = $options.bsesc {
1078 builder.backslash_escape(bsesc);
1079 }
1080 let pat = builder.build().unwrap();
1081 let matcher = pat.compile_matcher();
1082 let strategic = pat.compile_strategic_matcher();
1083 let set = GlobSetBuilder::new().add(pat).build().unwrap();
1084 assert!(matcher.is_match($path));
1085 assert!(strategic.is_match($path));
1086 assert!(set.is_match($path));
1087 }
1088 };
1089 }
1090
1091 macro_rules! nmatches {
1092 ($name:ident, $pat:expr, $path:expr) => {
1093 nmatches!($name, $pat, $path, Options::default());
1094 };
1095 ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1096 #[test]
1097 fn $name() {
1098 let mut builder = GlobBuilder::new($pat);
1099 if let Some(casei) = $options.casei {
1100 builder.case_insensitive(casei);
1101 }
1102 if let Some(litsep) = $options.litsep {
1103 builder.literal_separator(litsep);
1104 }
1105 if let Some(bsesc) = $options.bsesc {
1106 builder.backslash_escape(bsesc);
1107 }
1108 let pat = builder.build().unwrap();
1109 let matcher = pat.compile_matcher();
1110 let strategic = pat.compile_strategic_matcher();
1111 let set = GlobSetBuilder::new().add(pat).build().unwrap();
1112 assert!(!matcher.is_match($path));
1113 assert!(!strategic.is_match($path));
1114 assert!(!set.is_match($path));
1115 }
1116 };
1117 }
1118
1119 fn s(string: &str) -> String { string.to_string() }
1120
1121 fn class(s: char, e: char) -> Token {
1122 Class { negated: false, ranges: vec![(s, e)] }
1123 }
1124
1125 fn classn(s: char, e: char) -> Token {
1126 Class { negated: true, ranges: vec![(s, e)] }
1127 }
1128
1129 fn rclass(ranges: &[(char, char)]) -> Token {
1130 Class { negated: false, ranges: ranges.to_vec() }
1131 }
1132
1133 fn rclassn(ranges: &[(char, char)]) -> Token {
1134 Class { negated: true, ranges: ranges.to_vec() }
1135 }
1136
1137 syntax!(literal1, "a", vec![Literal('a')]);
1138 syntax!(literal2, "ab", vec![Literal('a'), Literal('b')]);
1139 syntax!(any1, "?", vec![Any]);
1140 syntax!(any2, "a?b", vec![Literal('a'), Any, Literal('b')]);
1141 syntax!(seq1, "*", vec![ZeroOrMore]);
1142 syntax!(seq2, "a*b", vec![Literal('a'), ZeroOrMore, Literal('b')]);
1143 syntax!(seq3, "*a*b*", vec![
1144 ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,
1145 ]);
1146 syntax!(rseq1, "**", vec![RecursivePrefix]);
1147 syntax!(rseq2, "**/", vec![RecursivePrefix]);
1148 syntax!(rseq3, "/**", vec![RecursiveSuffix]);
1149 syntax!(rseq4, "/**/", vec![RecursiveZeroOrMore]);
1150 syntax!(rseq5, "a/**/b", vec![
1151 Literal('a'), RecursiveZeroOrMore, Literal('b'),
1152 ]);
1153 syntax!(cls1, "[a]", vec![class('a', 'a')]);
1154 syntax!(cls2, "[!a]", vec![classn('a', 'a')]);
1155 syntax!(cls3, "[a-z]", vec![class('a', 'z')]);
1156 syntax!(cls4, "[!a-z]", vec![classn('a', 'z')]);
1157 syntax!(cls5, "[-]", vec![class('-', '-')]);
1158 syntax!(cls6, "[]]", vec![class(']', ']')]);
1159 syntax!(cls7, "[*]", vec![class('*', '*')]);
1160 syntax!(cls8, "[!!]", vec![classn('!', '!')]);
1161 syntax!(cls9, "[a-]", vec![rclass(&[('a', 'a'), ('-', '-')])]);
1162 syntax!(cls10, "[-a-z]", vec![rclass(&[('-', '-'), ('a', 'z')])]);
1163 syntax!(cls11, "[a-z-]", vec![rclass(&[('a', 'z'), ('-', '-')])]);
1164 syntax!(cls12, "[-a-z-]", vec![
1165 rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),
1166 ]);
1167 syntax!(cls13, "[]-z]", vec![class(']', 'z')]);
1168 syntax!(cls14, "[--z]", vec![class('-', 'z')]);
1169 syntax!(cls15, "[ --]", vec![class(' ', '-')]);
1170 syntax!(cls16, "[0-9a-z]", vec![rclass(&[('0', '9'), ('a', 'z')])]);
1171 syntax!(cls17, "[a-z0-9]", vec![rclass(&[('a', 'z'), ('0', '9')])]);
1172 syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]);
1173 syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]);
1174 syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
1175 syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
1176
1177 syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
1178 syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
1179 syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
1180 syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
1181 syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
1182 syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
1183
1184 const CASEI: Options = Options {
1185 casei: Some(true),
1186 litsep: None,
1187 bsesc: None,
1188 };
1189 const SLASHLIT: Options = Options {
1190 casei: None,
1191 litsep: Some(true),
1192 bsesc: None,
1193 };
1194 const NOBSESC: Options = Options {
1195 casei: None,
1196 litsep: None,
1197 bsesc: Some(false),
1198 };
1199 const BSESC: Options = Options {
1200 casei: None,
1201 litsep: None,
1202 bsesc: Some(true),
1203 };
1204
1205 toregex!(re_casei, "a", "(?i)^a$", &CASEI);
1206
1207 toregex!(re_slash1, "?", r"^[^/]$", SLASHLIT);
1208 toregex!(re_slash2, "*", r"^[^/]*$", SLASHLIT);
1209
1210 toregex!(re1, "a", "^a$");
1211 toregex!(re2, "?", "^.$");
1212 toregex!(re3, "*", "^.*$");
1213 toregex!(re4, "a?", "^a.$");
1214 toregex!(re5, "?a", "^.a$");
1215 toregex!(re6, "a*", "^a.*$");
1216 toregex!(re7, "*a", "^.*a$");
1217 toregex!(re8, "[*]", r"^[\*]$");
1218 toregex!(re9, "[+]", r"^[\+]$");
1219 toregex!(re10, "+", r"^\+$");
1220 toregex!(re11, "☃", r"^\xe2\x98\x83$");
1221 toregex!(re12, "**", r"^.*$");
1222 toregex!(re13, "**/", r"^.*$");
1223 toregex!(re14, "**/*", r"^(?:/?|.*/).*$");
1224 toregex!(re15, "**/**", r"^.*$");
1225 toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$");
1226 toregex!(re17, "**/**/**", r"^.*$");
1227 toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$");
1228 toregex!(re19, "a/**", r"^a(?:/?|/.*)$");
1229 toregex!(re20, "a/**/**", r"^a(?:/?|/.*)$");
1230 toregex!(re21, "a/**/**/**", r"^a(?:/?|/.*)$");
1231 toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$");
1232 toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$");
1233 toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$");
1234 toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
1235 toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
1236 toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
1237 toregex!(re28, "a**", r"^a.*.*$");
1238 toregex!(re29, "**a", r"^.*.*a$");
1239 toregex!(re30, "a**b", r"^a.*.*b$");
1240 toregex!(re31, "***", r"^.*.*.*$");
1241 toregex!(re32, "/a**", r"^/a.*.*$");
1242 toregex!(re33, "/**a", r"^/.*.*a$");
1243 toregex!(re34, "/a**b", r"^/a.*.*b$");
1244
1245 matches!(match1, "a", "a");
1246 matches!(match2, "a*b", "a_b");
1247 matches!(match3, "a*b*c", "abc");
1248 matches!(match4, "a*b*c", "a_b_c");
1249 matches!(match5, "a*b*c", "a___b___c");
1250 matches!(match6, "abc*abc*abc", "abcabcabcabcabcabcabc");
1251 matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1252 matches!(match8, "a*b[xyz]c*d", "abxcdbxcddd");
1253 matches!(match9, "*.rs", ".rs");
1254 matches!(match10, "☃", "☃");
1255
1256 matches!(matchrec1, "some/**/needle.txt", "some/needle.txt");
1257 matches!(matchrec2, "some/**/needle.txt", "some/one/needle.txt");
1258 matches!(matchrec3, "some/**/needle.txt", "some/one/two/needle.txt");
1259 matches!(matchrec4, "some/**/needle.txt", "some/other/needle.txt");
1260 matches!(matchrec5, "**", "abcde");
1261 matches!(matchrec6, "**", "");
1262 matches!(matchrec7, "**", ".asdf");
1263 matches!(matchrec8, "**", "/x/.asdf");
1264 matches!(matchrec9, "some/**/**/needle.txt", "some/needle.txt");
1265 matches!(matchrec10, "some/**/**/needle.txt", "some/one/needle.txt");
1266 matches!(matchrec11, "some/**/**/needle.txt", "some/one/two/needle.txt");
1267 matches!(matchrec12, "some/**/**/needle.txt", "some/other/needle.txt");
1268 matches!(matchrec13, "**/test", "one/two/test");
1269 matches!(matchrec14, "**/test", "one/test");
1270 matches!(matchrec15, "**/test", "test");
1271 matches!(matchrec16, "/**/test", "/one/two/test");
1272 matches!(matchrec17, "/**/test", "/one/test");
1273 matches!(matchrec18, "/**/test", "/test");
1274 matches!(matchrec19, "**/.*", ".abc");
1275 matches!(matchrec20, "**/.*", "abc/.abc");
1276 matches!(matchrec21, ".*/**", ".abc");
1277 matches!(matchrec22, ".*/**", ".abc/abc");
1278 matches!(matchrec23, "foo/**", "foo");
1279 matches!(matchrec24, "**/foo/bar", "foo/bar");
1280 matches!(matchrec25, "some/*/needle.txt", "some/one/needle.txt");
1281
1282 matches!(matchrange1, "a[0-9]b", "a0b");
1283 matches!(matchrange2, "a[0-9]b", "a9b");
1284 matches!(matchrange3, "a[!0-9]b", "a_b");
1285 matches!(matchrange4, "[a-z123]", "1");
1286 matches!(matchrange5, "[1a-z23]", "1");
1287 matches!(matchrange6, "[123a-z]", "1");
1288 matches!(matchrange7, "[abc-]", "-");
1289 matches!(matchrange8, "[-abc]", "-");
1290 matches!(matchrange9, "[-a-c]", "b");
1291 matches!(matchrange10, "[a-c-]", "b");
1292 matches!(matchrange11, "[-]", "-");
1293 matches!(matchrange12, "a[^0-9]b", "a_b");
1294
1295 matches!(matchpat1, "*hello.txt", "hello.txt");
1296 matches!(matchpat2, "*hello.txt", "gareth_says_hello.txt");
1297 matches!(matchpat3, "*hello.txt", "some/path/to/hello.txt");
1298 matches!(matchpat4, "*hello.txt", "some\\path\\to\\hello.txt");
1299 matches!(matchpat5, "*hello.txt", "/an/absolute/path/to/hello.txt");
1300 matches!(matchpat6, "*some/path/to/hello.txt", "some/path/to/hello.txt");
1301 matches!(matchpat7, "*some/path/to/hello.txt",
1302 "a/bigger/some/path/to/hello.txt");
1303
1304 matches!(matchescape, "_[[]_[]]_[?]_[*]_!_", "_[_]_?_*_!_");
1305
1306 matches!(matchcasei1, "aBcDeFg", "aBcDeFg", CASEI);
1307 matches!(matchcasei2, "aBcDeFg", "abcdefg", CASEI);
1308 matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI);
1309 matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI);
1310
1311 matches!(matchalt1, "a,b", "a,b");
1312 matches!(matchalt2, ",", ",");
1313 matches!(matchalt3, "{a,b}", "a");
1314 matches!(matchalt4, "{a,b}", "b");
1315 matches!(matchalt5, "{**/src/**,foo}", "abc/src/bar");
1316 matches!(matchalt6, "{**/src/**,foo}", "foo");
1317 matches!(matchalt7, "{[}],foo}", "}");
1318 matches!(matchalt8, "{foo}", "foo");
1319 matches!(matchalt9, "{}", "");
1320 matches!(matchalt10, "{,}", "");
1321 matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo");
1322 matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar");
1323 matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat");
1324
1325 matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
1326 #[cfg(unix)]
1327 nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
1328 #[cfg(not(unix))]
1329 nmatches!(matchslash2, "abc?def", "abc\\def", SLASHLIT);
1330 nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT);
1331 matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs
1332 #[cfg(unix)]
1333 nmatches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1334 #[cfg(not(unix))]
1335 matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1336
1337 matches!(matchbackslash1, "\\[", "[", BSESC);
1338 matches!(matchbackslash2, "\\?", "?", BSESC);
1339 matches!(matchbackslash3, "\\*", "*", BSESC);
1340 matches!(matchbackslash4, "\\[a-z]", "\\a", NOBSESC);
1341 matches!(matchbackslash5, "\\?", "\\a", NOBSESC);
1342 matches!(matchbackslash6, "\\*", "\\\\", NOBSESC);
1343 #[cfg(unix)]
1344 matches!(matchbackslash7, "\\a", "a");
1345 #[cfg(not(unix))]
1346 matches!(matchbackslash8, "\\a", "/a");
1347
1348 nmatches!(matchnot1, "a*b*c", "abcd");
1349 nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca");
1350 nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt");
1351 nmatches!(matchnot4, "some/**/**/needle.txt", "some/other/notthis.txt");
1352 nmatches!(matchnot5, "/**/test", "test");
1353 nmatches!(matchnot6, "/**/test", "/one/notthis");
1354 nmatches!(matchnot7, "/**/test", "/notthis");
1355 nmatches!(matchnot8, "**/.*", "ab.c");
1356 nmatches!(matchnot9, "**/.*", "abc/ab.c");
1357 nmatches!(matchnot10, ".*/**", "a.bc");
1358 nmatches!(matchnot11, ".*/**", "abc/a.bc");
1359 nmatches!(matchnot12, "a[0-9]b", "a_b");
1360 nmatches!(matchnot13, "a[!0-9]b", "a0b");
1361 nmatches!(matchnot14, "a[!0-9]b", "a9b");
1362 nmatches!(matchnot15, "[!-]", "-");
1363 nmatches!(matchnot16, "*hello.txt", "hello.txt-and-then-some");
1364 nmatches!(matchnot17, "*hello.txt", "goodbye.txt");
1365 nmatches!(matchnot18, "*some/path/to/hello.txt",
1366 "some/path/to/hello.txt-and-then-some");
1367 nmatches!(matchnot19, "*some/path/to/hello.txt",
1368 "some/other/path/to/hello.txt");
1369 nmatches!(matchnot20, "a", "foo/a");
1370 nmatches!(matchnot21, "./foo", "foo");
1371 nmatches!(matchnot22, "**/foo", "foofoo");
1372 nmatches!(matchnot23, "**/foo/bar", "foofoo/bar");
1373 nmatches!(matchnot24, "/*.c", "mozilla-sha1/sha1.c");
1374 nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT);
1375 nmatches!(matchnot26, "**/m4/ltoptions.m4",
1376 "csharp/src/packages/repositories.config", SLASHLIT);
1377 nmatches!(matchnot27, "a[^0-9]b", "a0b");
1378 nmatches!(matchnot28, "a[^0-9]b", "a9b");
1379 nmatches!(matchnot29, "[^-]", "-");
1380 nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt");
1381 nmatches!(
1382 matchrec31,
1383 "some/*/needle.txt", "some/one/two/needle.txt", SLASHLIT);
1384 nmatches!(
1385 matchrec32,
1386 "some/*/needle.txt", "some/one/two/three/needle.txt", SLASHLIT);
1387
1388 macro_rules! extract {
1389 ($which:ident, $name:ident, $pat:expr, $expect:expr) => {
1390 extract!($which, $name, $pat, $expect, Options::default());
1391 };
1392 ($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => {
1393 #[test]
1394 fn $name() {
1395 let mut builder = GlobBuilder::new($pat);
1396 if let Some(casei) = $options.casei {
1397 builder.case_insensitive(casei);
1398 }
1399 if let Some(litsep) = $options.litsep {
1400 builder.literal_separator(litsep);
1401 }
1402 if let Some(bsesc) = $options.bsesc {
1403 builder.backslash_escape(bsesc);
1404 }
1405 let pat = builder.build().unwrap();
1406 assert_eq!($expect, pat.$which());
1407 }
1408 };
1409 }
1410
1411 macro_rules! literal {
1412 ($($tt:tt)*) => { extract!(literal, $($tt)*); }
1413 }
1414
1415 macro_rules! basetokens {
1416 ($($tt:tt)*) => { extract!(basename_tokens, $($tt)*); }
1417 }
1418
1419 macro_rules! ext {
1420 ($($tt:tt)*) => { extract!(ext, $($tt)*); }
1421 }
1422
1423 macro_rules! required_ext {
1424 ($($tt:tt)*) => { extract!(required_ext, $($tt)*); }
1425 }
1426
1427 macro_rules! prefix {
1428 ($($tt:tt)*) => { extract!(prefix, $($tt)*); }
1429 }
1430
1431 macro_rules! suffix {
1432 ($($tt:tt)*) => { extract!(suffix, $($tt)*); }
1433 }
1434
1435 macro_rules! baseliteral {
1436 ($($tt:tt)*) => { extract!(basename_literal, $($tt)*); }
1437 }
1438
1439 literal!(extract_lit1, "foo", Some(s("foo")));
1440 literal!(extract_lit2, "foo", None, CASEI);
1441 literal!(extract_lit3, "/foo", Some(s("/foo")));
1442 literal!(extract_lit4, "/foo/", Some(s("/foo/")));
1443 literal!(extract_lit5, "/foo/bar", Some(s("/foo/bar")));
1444 literal!(extract_lit6, "*.foo", None);
1445 literal!(extract_lit7, "foo/bar", Some(s("foo/bar")));
1446 literal!(extract_lit8, "**/foo/bar", None);
1447
1448 basetokens!(extract_basetoks1, "**/foo", Some(&*vec![
1449 Literal('f'), Literal('o'), Literal('o'),
1450 ]));
1451 basetokens!(extract_basetoks2, "**/foo", None, CASEI);
1452 basetokens!(extract_basetoks3, "**/foo", Some(&*vec![
1453 Literal('f'), Literal('o'), Literal('o'),
1454 ]), SLASHLIT);
1455 basetokens!(extract_basetoks4, "*foo", None, SLASHLIT);
1456 basetokens!(extract_basetoks5, "*foo", None);
1457 basetokens!(extract_basetoks6, "**/fo*o", None);
1458 basetokens!(extract_basetoks7, "**/fo*o", Some(&*vec![
1459 Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),
1460 ]), SLASHLIT);
1461
1462 ext!(extract_ext1, "**/*.rs", Some(s(".rs")));
1463 ext!(extract_ext2, "**/*.rs.bak", None);
1464 ext!(extract_ext3, "*.rs", Some(s(".rs")));
1465 ext!(extract_ext4, "a*.rs", None);
1466 ext!(extract_ext5, "/*.c", None);
1467 ext!(extract_ext6, "*.c", None, SLASHLIT);
1468 ext!(extract_ext7, "*.c", Some(s(".c")));
1469
1470 required_ext!(extract_req_ext1, "*.rs", Some(s(".rs")));
1471 required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs")));
1472 required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs")));
1473 required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs")));
1474 required_ext!(extract_req_ext5, ".rs", Some(s(".rs")));
1475 required_ext!(extract_req_ext6, "./rs", None);
1476 required_ext!(extract_req_ext7, "foo", None);
1477 required_ext!(extract_req_ext8, ".foo/", None);
1478 required_ext!(extract_req_ext9, "foo/", None);
1479
1480 prefix!(extract_prefix1, "/foo", Some(s("/foo")));
1481 prefix!(extract_prefix2, "/foo/*", Some(s("/foo/")));
1482 prefix!(extract_prefix3, "**/foo", None);
1483 prefix!(extract_prefix4, "foo/**", None);
1484
1485 suffix!(extract_suffix1, "**/foo/bar", Some((s("/foo/bar"), true)));
1486 suffix!(extract_suffix2, "*/foo/bar", Some((s("/foo/bar"), false)));
1487 suffix!(extract_suffix3, "*/foo/bar", None, SLASHLIT);
1488 suffix!(extract_suffix4, "foo/bar", Some((s("foo/bar"), false)));
1489 suffix!(extract_suffix5, "*.foo", Some((s(".foo"), false)));
1490 suffix!(extract_suffix6, "*.foo", None, SLASHLIT);
1491 suffix!(extract_suffix7, "**/*_test", Some((s("_test"), false)));
1492
1493 baseliteral!(extract_baselit1, "**/foo", Some(s("foo")));
1494 baseliteral!(extract_baselit2, "foo", None);
1495 baseliteral!(extract_baselit3, "*foo", None);
1496 baseliteral!(extract_baselit4, "*/foo", None);
1497 }