]> git.proxmox.com Git - rustc.git/blob - vendor/globset/src/glob.rs
New upstream version 1.76.0+dfsg1
[rustc.git] / vendor / globset / src / glob.rs
1 use std::path::{is_separator, Path};
2
3 use regex_automata::meta::Regex;
4
5 use crate::{new_regex, Candidate, Error, ErrorKind};
6
7 /// Describes a matching strategy for a particular pattern.
8 ///
9 /// This provides a way to more quickly determine whether a pattern matches
10 /// a particular file path in a way that scales with a large number of
11 /// patterns. For example, if many patterns are of the form `*.ext`, then it's
12 /// possible to test whether any of those patterns matches by looking up a
13 /// file path's extension in a hash table.
14 #[derive(Clone, Debug, Eq, PartialEq)]
15 pub(crate) enum MatchStrategy {
16 /// A pattern matches if and only if the entire file path matches this
17 /// literal string.
18 Literal(String),
19 /// A pattern matches if and only if the file path's basename matches this
20 /// literal string.
21 BasenameLiteral(String),
22 /// A pattern matches if and only if the file path's extension matches this
23 /// literal string.
24 Extension(String),
25 /// A pattern matches if and only if this prefix literal is a prefix of the
26 /// candidate file path.
27 Prefix(String),
28 /// A pattern matches if and only if this prefix literal is a prefix of the
29 /// candidate file path.
30 ///
31 /// An exception: if `component` is true, then `suffix` must appear at the
32 /// beginning of a file path or immediately following a `/`.
33 Suffix {
34 /// The actual suffix.
35 suffix: String,
36 /// Whether this must start at the beginning of a path component.
37 component: bool,
38 },
39 /// A pattern matches only if the given extension matches the file path's
40 /// extension. Note that this is a necessary but NOT sufficient criterion.
41 /// Namely, if the extension matches, then a full regex search is still
42 /// required.
43 RequiredExtension(String),
44 /// A regex needs to be used for matching.
45 Regex,
46 }
47
48 impl MatchStrategy {
49 /// Returns a matching strategy for the given pattern.
50 pub(crate) fn new(pat: &Glob) -> MatchStrategy {
51 if let Some(lit) = pat.basename_literal() {
52 MatchStrategy::BasenameLiteral(lit)
53 } else if let Some(lit) = pat.literal() {
54 MatchStrategy::Literal(lit)
55 } else if let Some(ext) = pat.ext() {
56 MatchStrategy::Extension(ext)
57 } else if let Some(prefix) = pat.prefix() {
58 MatchStrategy::Prefix(prefix)
59 } else if let Some((suffix, component)) = pat.suffix() {
60 MatchStrategy::Suffix { suffix, component }
61 } else if let Some(ext) = pat.required_ext() {
62 MatchStrategy::RequiredExtension(ext)
63 } else {
64 MatchStrategy::Regex
65 }
66 }
67 }
68
69 /// Glob represents a successfully parsed shell glob pattern.
70 ///
71 /// It cannot be used directly to match file paths, but it can be converted
72 /// to a regular expression string or a matcher.
73 #[derive(Clone, Debug, Eq)]
74 pub struct Glob {
75 glob: String,
76 re: String,
77 opts: GlobOptions,
78 tokens: Tokens,
79 }
80
81 impl PartialEq for Glob {
82 fn eq(&self, other: &Glob) -> bool {
83 self.glob == other.glob && self.opts == other.opts
84 }
85 }
86
87 impl std::hash::Hash for Glob {
88 fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
89 self.glob.hash(state);
90 self.opts.hash(state);
91 }
92 }
93
94 impl std::fmt::Display for Glob {
95 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
96 self.glob.fmt(f)
97 }
98 }
99
100 impl std::str::FromStr for Glob {
101 type Err = Error;
102
103 fn from_str(glob: &str) -> Result<Self, Self::Err> {
104 Self::new(glob)
105 }
106 }
107
108 /// A matcher for a single pattern.
109 #[derive(Clone, Debug)]
110 pub struct GlobMatcher {
111 /// The underlying pattern.
112 pat: Glob,
113 /// The pattern, as a compiled regex.
114 re: Regex,
115 }
116
117 impl GlobMatcher {
118 /// Tests whether the given path matches this pattern or not.
119 pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
120 self.is_match_candidate(&Candidate::new(path.as_ref()))
121 }
122
123 /// Tests whether the given path matches this pattern or not.
124 pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
125 self.re.is_match(&path.path)
126 }
127
128 /// Returns the `Glob` used to compile this matcher.
129 pub fn glob(&self) -> &Glob {
130 &self.pat
131 }
132 }
133
134 /// A strategic matcher for a single pattern.
135 #[cfg(test)]
136 #[derive(Clone, Debug)]
137 struct GlobStrategic {
138 /// The match strategy to use.
139 strategy: MatchStrategy,
140 /// The pattern, as a compiled regex.
141 re: Regex,
142 }
143
144 #[cfg(test)]
145 impl GlobStrategic {
146 /// Tests whether the given path matches this pattern or not.
147 fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
148 self.is_match_candidate(&Candidate::new(path.as_ref()))
149 }
150
151 /// Tests whether the given path matches this pattern or not.
152 fn is_match_candidate(&self, candidate: &Candidate<'_>) -> bool {
153 let byte_path = &*candidate.path;
154
155 match self.strategy {
156 MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
157 MatchStrategy::BasenameLiteral(ref lit) => {
158 lit.as_bytes() == &*candidate.basename
159 }
160 MatchStrategy::Extension(ref ext) => {
161 ext.as_bytes() == &*candidate.ext
162 }
163 MatchStrategy::Prefix(ref pre) => {
164 starts_with(pre.as_bytes(), byte_path)
165 }
166 MatchStrategy::Suffix { ref suffix, component } => {
167 if component && byte_path == &suffix.as_bytes()[1..] {
168 return true;
169 }
170 ends_with(suffix.as_bytes(), byte_path)
171 }
172 MatchStrategy::RequiredExtension(ref ext) => {
173 let ext = ext.as_bytes();
174 &*candidate.ext == ext && self.re.is_match(byte_path)
175 }
176 MatchStrategy::Regex => self.re.is_match(byte_path),
177 }
178 }
179 }
180
181 /// A builder for a pattern.
182 ///
183 /// This builder enables configuring the match semantics of a pattern. For
184 /// example, one can make matching case insensitive.
185 ///
186 /// The lifetime `'a` refers to the lifetime of the pattern string.
187 #[derive(Clone, Debug)]
188 pub struct GlobBuilder<'a> {
189 /// The glob pattern to compile.
190 glob: &'a str,
191 /// Options for the pattern.
192 opts: GlobOptions,
193 }
194
195 #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
196 struct GlobOptions {
197 /// Whether to match case insensitively.
198 case_insensitive: bool,
199 /// Whether to require a literal separator to match a separator in a file
200 /// path. e.g., when enabled, `*` won't match `/`.
201 literal_separator: bool,
202 /// Whether or not to use `\` to escape special characters.
203 /// e.g., when enabled, `\*` will match a literal `*`.
204 backslash_escape: bool,
205 /// Whether or not an empty case in an alternate will be removed.
206 /// e.g., when enabled, `{,a}` will match "" and "a".
207 empty_alternates: bool,
208 }
209
210 impl GlobOptions {
211 fn default() -> GlobOptions {
212 GlobOptions {
213 case_insensitive: false,
214 literal_separator: false,
215 backslash_escape: !is_separator('\\'),
216 empty_alternates: false,
217 }
218 }
219 }
220
221 #[derive(Clone, Debug, Default, Eq, PartialEq)]
222 struct Tokens(Vec<Token>);
223
224 impl std::ops::Deref for Tokens {
225 type Target = Vec<Token>;
226 fn deref(&self) -> &Vec<Token> {
227 &self.0
228 }
229 }
230
231 impl std::ops::DerefMut for Tokens {
232 fn deref_mut(&mut self) -> &mut Vec<Token> {
233 &mut self.0
234 }
235 }
236
237 #[derive(Clone, Debug, Eq, PartialEq)]
238 enum Token {
239 Literal(char),
240 Any,
241 ZeroOrMore,
242 RecursivePrefix,
243 RecursiveSuffix,
244 RecursiveZeroOrMore,
245 Class { negated: bool, ranges: Vec<(char, char)> },
246 Alternates(Vec<Tokens>),
247 }
248
249 impl Glob {
250 /// Builds a new pattern with default options.
251 pub fn new(glob: &str) -> Result<Glob, Error> {
252 GlobBuilder::new(glob).build()
253 }
254
255 /// Returns a matcher for this pattern.
256 pub fn compile_matcher(&self) -> GlobMatcher {
257 let re =
258 new_regex(&self.re).expect("regex compilation shouldn't fail");
259 GlobMatcher { pat: self.clone(), re }
260 }
261
262 /// Returns a strategic matcher.
263 ///
264 /// This isn't exposed because it's not clear whether it's actually
265 /// faster than just running a regex for a *single* pattern. If it
266 /// is faster, then GlobMatcher should do it automatically.
267 #[cfg(test)]
268 fn compile_strategic_matcher(&self) -> GlobStrategic {
269 let strategy = MatchStrategy::new(self);
270 let re =
271 new_regex(&self.re).expect("regex compilation shouldn't fail");
272 GlobStrategic { strategy, re }
273 }
274
275 /// Returns the original glob pattern used to build this pattern.
276 pub fn glob(&self) -> &str {
277 &self.glob
278 }
279
280 /// Returns the regular expression string for this glob.
281 ///
282 /// Note that regular expressions for globs are intended to be matched on
283 /// arbitrary bytes (`&[u8]`) instead of Unicode strings (`&str`). In
284 /// particular, globs are frequently used on file paths, where there is no
285 /// general guarantee that file paths are themselves valid UTF-8. As a
286 /// result, callers will need to ensure that they are using a regex API
287 /// that can match on arbitrary bytes. For example, the
288 /// [`regex`](https://crates.io/regex)
289 /// crate's
290 /// [`Regex`](https://docs.rs/regex/*/regex/struct.Regex.html)
291 /// API is not suitable for this since it matches on `&str`, but its
292 /// [`bytes::Regex`](https://docs.rs/regex/*/regex/bytes/struct.Regex.html)
293 /// API is suitable for this.
294 pub fn regex(&self) -> &str {
295 &self.re
296 }
297
298 /// Returns the pattern as a literal if and only if the pattern must match
299 /// an entire path exactly.
300 ///
301 /// The basic format of these patterns is `{literal}`.
302 fn literal(&self) -> Option<String> {
303 if self.opts.case_insensitive {
304 return None;
305 }
306 let mut lit = String::new();
307 for t in &*self.tokens {
308 let Token::Literal(c) = *t else { return None };
309 lit.push(c);
310 }
311 if lit.is_empty() {
312 None
313 } else {
314 Some(lit)
315 }
316 }
317
318 /// Returns an extension if this pattern matches a file path if and only
319 /// if the file path has the extension returned.
320 ///
321 /// Note that this extension returned differs from the extension that
322 /// std::path::Path::extension returns. Namely, this extension includes
323 /// the '.'. Also, paths like `.rs` are considered to have an extension
324 /// of `.rs`.
325 fn ext(&self) -> Option<String> {
326 if self.opts.case_insensitive {
327 return None;
328 }
329 let start = match *self.tokens.get(0)? {
330 Token::RecursivePrefix => 1,
331 _ => 0,
332 };
333 match *self.tokens.get(start)? {
334 Token::ZeroOrMore => {
335 // If there was no recursive prefix, then we only permit
336 // `*` if `*` can match a `/`. For example, if `*` can't
337 // match `/`, then `*.c` doesn't match `foo/bar.c`.
338 if start == 0 && self.opts.literal_separator {
339 return None;
340 }
341 }
342 _ => return None,
343 }
344 match *self.tokens.get(start + 1)? {
345 Token::Literal('.') => {}
346 _ => return None,
347 }
348 let mut lit = ".".to_string();
349 for t in self.tokens[start + 2..].iter() {
350 match *t {
351 Token::Literal('.') | Token::Literal('/') => return None,
352 Token::Literal(c) => lit.push(c),
353 _ => return None,
354 }
355 }
356 if lit.is_empty() {
357 None
358 } else {
359 Some(lit)
360 }
361 }
362
363 /// This is like `ext`, but returns an extension even if it isn't sufficient
364 /// to imply a match. Namely, if an extension is returned, then it is
365 /// necessary but not sufficient for a match.
366 fn required_ext(&self) -> Option<String> {
367 if self.opts.case_insensitive {
368 return None;
369 }
370 // We don't care at all about the beginning of this pattern. All we
371 // need to check for is if it ends with a literal of the form `.ext`.
372 let mut ext: Vec<char> = vec![]; // built in reverse
373 for t in self.tokens.iter().rev() {
374 match *t {
375 Token::Literal('/') => return None,
376 Token::Literal(c) => {
377 ext.push(c);
378 if c == '.' {
379 break;
380 }
381 }
382 _ => return None,
383 }
384 }
385 if ext.last() != Some(&'.') {
386 None
387 } else {
388 ext.reverse();
389 Some(ext.into_iter().collect())
390 }
391 }
392
393 /// Returns a literal prefix of this pattern if the entire pattern matches
394 /// if the literal prefix matches.
395 fn prefix(&self) -> Option<String> {
396 if self.opts.case_insensitive {
397 return None;
398 }
399 let (end, need_sep) = match *self.tokens.last()? {
400 Token::ZeroOrMore => {
401 if self.opts.literal_separator {
402 // If a trailing `*` can't match a `/`, then we can't
403 // assume a match of the prefix corresponds to a match
404 // of the overall pattern. e.g., `foo/*` with
405 // `literal_separator` enabled matches `foo/bar` but not
406 // `foo/bar/baz`, even though `foo/bar/baz` has a `foo/`
407 // literal prefix.
408 return None;
409 }
410 (self.tokens.len() - 1, false)
411 }
412 Token::RecursiveSuffix => (self.tokens.len() - 1, true),
413 _ => (self.tokens.len(), false),
414 };
415 let mut lit = String::new();
416 for t in &self.tokens[0..end] {
417 let Token::Literal(c) = *t else { return None };
418 lit.push(c);
419 }
420 if need_sep {
421 lit.push('/');
422 }
423 if lit.is_empty() {
424 None
425 } else {
426 Some(lit)
427 }
428 }
429
430 /// Returns a literal suffix of this pattern if the entire pattern matches
431 /// if the literal suffix matches.
432 ///
433 /// If a literal suffix is returned and it must match either the entire
434 /// file path or be preceded by a `/`, then also return true. This happens
435 /// with a pattern like `**/foo/bar`. Namely, this pattern matches
436 /// `foo/bar` and `baz/foo/bar`, but not `foofoo/bar`. In this case, the
437 /// suffix returned is `/foo/bar` (but should match the entire path
438 /// `foo/bar`).
439 ///
440 /// When this returns true, the suffix literal is guaranteed to start with
441 /// a `/`.
442 fn suffix(&self) -> Option<(String, bool)> {
443 if self.opts.case_insensitive {
444 return None;
445 }
446 let mut lit = String::new();
447 let (start, entire) = match *self.tokens.get(0)? {
448 Token::RecursivePrefix => {
449 // We only care if this follows a path component if the next
450 // token is a literal.
451 if let Some(&Token::Literal(_)) = self.tokens.get(1) {
452 lit.push('/');
453 (1, true)
454 } else {
455 (1, false)
456 }
457 }
458 _ => (0, false),
459 };
460 let start = match *self.tokens.get(start)? {
461 Token::ZeroOrMore => {
462 // If literal_separator is enabled, then a `*` can't
463 // necessarily match everything, so reporting a suffix match
464 // as a match of the pattern would be a false positive.
465 if self.opts.literal_separator {
466 return None;
467 }
468 start + 1
469 }
470 _ => start,
471 };
472 for t in &self.tokens[start..] {
473 let Token::Literal(c) = *t else { return None };
474 lit.push(c);
475 }
476 if lit.is_empty() || lit == "/" {
477 None
478 } else {
479 Some((lit, entire))
480 }
481 }
482
483 /// If this pattern only needs to inspect the basename of a file path,
484 /// then the tokens corresponding to only the basename match are returned.
485 ///
486 /// For example, given a pattern of `**/*.foo`, only the tokens
487 /// corresponding to `*.foo` are returned.
488 ///
489 /// Note that this will return None if any match of the basename tokens
490 /// doesn't correspond to a match of the entire pattern. For example, the
491 /// glob `foo` only matches when a file path has a basename of `foo`, but
492 /// doesn't *always* match when a file path has a basename of `foo`. e.g.,
493 /// `foo` doesn't match `abc/foo`.
494 fn basename_tokens(&self) -> Option<&[Token]> {
495 if self.opts.case_insensitive {
496 return None;
497 }
498 let start = match *self.tokens.get(0)? {
499 Token::RecursivePrefix => 1,
500 _ => {
501 // With nothing to gobble up the parent portion of a path,
502 // we can't assume that matching on only the basename is
503 // correct.
504 return None;
505 }
506 };
507 if self.tokens[start..].is_empty() {
508 return None;
509 }
510 for t in self.tokens[start..].iter() {
511 match *t {
512 Token::Literal('/') => return None,
513 Token::Literal(_) => {} // OK
514 Token::Any | Token::ZeroOrMore => {
515 if !self.opts.literal_separator {
516 // In this case, `*` and `?` can match a path
517 // separator, which means this could reach outside
518 // the basename.
519 return None;
520 }
521 }
522 Token::RecursivePrefix
523 | Token::RecursiveSuffix
524 | Token::RecursiveZeroOrMore => {
525 return None;
526 }
527 Token::Class { .. } | Token::Alternates(..) => {
528 // We *could* be a little smarter here, but either one
529 // of these is going to prevent our literal optimizations
530 // anyway, so give up.
531 return None;
532 }
533 }
534 }
535 Some(&self.tokens[start..])
536 }
537
538 /// Returns the pattern as a literal if and only if the pattern exclusively
539 /// matches the basename of a file path *and* is a literal.
540 ///
541 /// The basic format of these patterns is `**/{literal}`, where `{literal}`
542 /// does not contain a path separator.
543 fn basename_literal(&self) -> Option<String> {
544 let tokens = self.basename_tokens()?;
545 let mut lit = String::new();
546 for t in tokens {
547 let Token::Literal(c) = *t else { return None };
548 lit.push(c);
549 }
550 Some(lit)
551 }
552 }
553
554 impl<'a> GlobBuilder<'a> {
555 /// Create a new builder for the pattern given.
556 ///
557 /// The pattern is not compiled until `build` is called.
558 pub fn new(glob: &'a str) -> GlobBuilder<'a> {
559 GlobBuilder { glob, opts: GlobOptions::default() }
560 }
561
562 /// Parses and builds the pattern.
563 pub fn build(&self) -> Result<Glob, Error> {
564 let mut p = Parser {
565 glob: &self.glob,
566 stack: vec![Tokens::default()],
567 chars: self.glob.chars().peekable(),
568 prev: None,
569 cur: None,
570 opts: &self.opts,
571 };
572 p.parse()?;
573 if p.stack.is_empty() {
574 Err(Error {
575 glob: Some(self.glob.to_string()),
576 kind: ErrorKind::UnopenedAlternates,
577 })
578 } else if p.stack.len() > 1 {
579 Err(Error {
580 glob: Some(self.glob.to_string()),
581 kind: ErrorKind::UnclosedAlternates,
582 })
583 } else {
584 let tokens = p.stack.pop().unwrap();
585 Ok(Glob {
586 glob: self.glob.to_string(),
587 re: tokens.to_regex_with(&self.opts),
588 opts: self.opts,
589 tokens,
590 })
591 }
592 }
593
594 /// Toggle whether the pattern matches case insensitively or not.
595 ///
596 /// This is disabled by default.
597 pub fn case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
598 self.opts.case_insensitive = yes;
599 self
600 }
601
602 /// Toggle whether a literal `/` is required to match a path separator.
603 ///
604 /// By default this is false: `*` and `?` will match `/`.
605 pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
606 self.opts.literal_separator = yes;
607 self
608 }
609
610 /// When enabled, a back slash (`\`) may be used to escape
611 /// special characters in a glob pattern. Additionally, this will
612 /// prevent `\` from being interpreted as a path separator on all
613 /// platforms.
614 ///
615 /// This is enabled by default on platforms where `\` is not a
616 /// path separator and disabled by default on platforms where `\`
617 /// is a path separator.
618 pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
619 self.opts.backslash_escape = yes;
620 self
621 }
622
623 /// Toggle whether an empty pattern in a list of alternates is accepted.
624 ///
625 /// For example, if this is set then the glob `foo{,.txt}` will match both
626 /// `foo` and `foo.txt`.
627 ///
628 /// By default this is false.
629 pub fn empty_alternates(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
630 self.opts.empty_alternates = yes;
631 self
632 }
633 }
634
635 impl Tokens {
636 /// Convert this pattern to a string that is guaranteed to be a valid
637 /// regular expression and will represent the matching semantics of this
638 /// glob pattern and the options given.
639 fn to_regex_with(&self, options: &GlobOptions) -> String {
640 let mut re = String::new();
641 re.push_str("(?-u)");
642 if options.case_insensitive {
643 re.push_str("(?i)");
644 }
645 re.push('^');
646 // Special case. If the entire glob is just `**`, then it should match
647 // everything.
648 if self.len() == 1 && self[0] == Token::RecursivePrefix {
649 re.push_str(".*");
650 re.push('$');
651 return re;
652 }
653 self.tokens_to_regex(options, &self, &mut re);
654 re.push('$');
655 re
656 }
657
658 fn tokens_to_regex(
659 &self,
660 options: &GlobOptions,
661 tokens: &[Token],
662 re: &mut String,
663 ) {
664 for tok in tokens.iter() {
665 match *tok {
666 Token::Literal(c) => {
667 re.push_str(&char_to_escaped_literal(c));
668 }
669 Token::Any => {
670 if options.literal_separator {
671 re.push_str("[^/]");
672 } else {
673 re.push_str(".");
674 }
675 }
676 Token::ZeroOrMore => {
677 if options.literal_separator {
678 re.push_str("[^/]*");
679 } else {
680 re.push_str(".*");
681 }
682 }
683 Token::RecursivePrefix => {
684 re.push_str("(?:/?|.*/)");
685 }
686 Token::RecursiveSuffix => {
687 re.push_str("/.*");
688 }
689 Token::RecursiveZeroOrMore => {
690 re.push_str("(?:/|/.*/)");
691 }
692 Token::Class { negated, ref ranges } => {
693 re.push('[');
694 if negated {
695 re.push('^');
696 }
697 for r in ranges {
698 if r.0 == r.1 {
699 // Not strictly necessary, but nicer to look at.
700 re.push_str(&char_to_escaped_literal(r.0));
701 } else {
702 re.push_str(&char_to_escaped_literal(r.0));
703 re.push('-');
704 re.push_str(&char_to_escaped_literal(r.1));
705 }
706 }
707 re.push(']');
708 }
709 Token::Alternates(ref patterns) => {
710 let mut parts = vec![];
711 for pat in patterns {
712 let mut altre = String::new();
713 self.tokens_to_regex(options, &pat, &mut altre);
714 if !altre.is_empty() || options.empty_alternates {
715 parts.push(altre);
716 }
717 }
718
719 // It is possible to have an empty set in which case the
720 // resulting alternation '()' would be an error.
721 if !parts.is_empty() {
722 re.push_str("(?:");
723 re.push_str(&parts.join("|"));
724 re.push(')');
725 }
726 }
727 }
728 }
729 }
730 }
731
732 /// Convert a Unicode scalar value to an escaped string suitable for use as
733 /// a literal in a non-Unicode regex.
734 fn char_to_escaped_literal(c: char) -> String {
735 bytes_to_escaped_literal(&c.to_string().into_bytes())
736 }
737
738 /// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
739 /// code units are converted to their escaped form.
740 fn bytes_to_escaped_literal(bs: &[u8]) -> String {
741 let mut s = String::with_capacity(bs.len());
742 for &b in bs {
743 if b <= 0x7F {
744 s.push_str(&regex_syntax::escape(
745 char::from(b).encode_utf8(&mut [0; 4]),
746 ));
747 } else {
748 s.push_str(&format!("\\x{:02x}", b));
749 }
750 }
751 s
752 }
753
754 struct Parser<'a> {
755 glob: &'a str,
756 stack: Vec<Tokens>,
757 chars: std::iter::Peekable<std::str::Chars<'a>>,
758 prev: Option<char>,
759 cur: Option<char>,
760 opts: &'a GlobOptions,
761 }
762
763 impl<'a> Parser<'a> {
764 fn error(&self, kind: ErrorKind) -> Error {
765 Error { glob: Some(self.glob.to_string()), kind }
766 }
767
768 fn parse(&mut self) -> Result<(), Error> {
769 while let Some(c) = self.bump() {
770 match c {
771 '?' => self.push_token(Token::Any)?,
772 '*' => self.parse_star()?,
773 '[' => self.parse_class()?,
774 '{' => self.push_alternate()?,
775 '}' => self.pop_alternate()?,
776 ',' => self.parse_comma()?,
777 '\\' => self.parse_backslash()?,
778 c => self.push_token(Token::Literal(c))?,
779 }
780 }
781 Ok(())
782 }
783
784 fn push_alternate(&mut self) -> Result<(), Error> {
785 if self.stack.len() > 1 {
786 return Err(self.error(ErrorKind::NestedAlternates));
787 }
788 Ok(self.stack.push(Tokens::default()))
789 }
790
791 fn pop_alternate(&mut self) -> Result<(), Error> {
792 let mut alts = vec![];
793 while self.stack.len() >= 2 {
794 alts.push(self.stack.pop().unwrap());
795 }
796 self.push_token(Token::Alternates(alts))
797 }
798
799 fn push_token(&mut self, tok: Token) -> Result<(), Error> {
800 if let Some(ref mut pat) = self.stack.last_mut() {
801 return Ok(pat.push(tok));
802 }
803 Err(self.error(ErrorKind::UnopenedAlternates))
804 }
805
806 fn pop_token(&mut self) -> Result<Token, Error> {
807 if let Some(ref mut pat) = self.stack.last_mut() {
808 return Ok(pat.pop().unwrap());
809 }
810 Err(self.error(ErrorKind::UnopenedAlternates))
811 }
812
813 fn have_tokens(&self) -> Result<bool, Error> {
814 match self.stack.last() {
815 None => Err(self.error(ErrorKind::UnopenedAlternates)),
816 Some(ref pat) => Ok(!pat.is_empty()),
817 }
818 }
819
820 fn parse_comma(&mut self) -> Result<(), Error> {
821 // If we aren't inside a group alternation, then don't
822 // treat commas specially. Otherwise, we need to start
823 // a new alternate.
824 if self.stack.len() <= 1 {
825 self.push_token(Token::Literal(','))
826 } else {
827 Ok(self.stack.push(Tokens::default()))
828 }
829 }
830
831 fn parse_backslash(&mut self) -> Result<(), Error> {
832 if self.opts.backslash_escape {
833 match self.bump() {
834 None => Err(self.error(ErrorKind::DanglingEscape)),
835 Some(c) => self.push_token(Token::Literal(c)),
836 }
837 } else if is_separator('\\') {
838 // Normalize all patterns to use / as a separator.
839 self.push_token(Token::Literal('/'))
840 } else {
841 self.push_token(Token::Literal('\\'))
842 }
843 }
844
845 fn parse_star(&mut self) -> Result<(), Error> {
846 let prev = self.prev;
847 if self.peek() != Some('*') {
848 self.push_token(Token::ZeroOrMore)?;
849 return Ok(());
850 }
851 assert!(self.bump() == Some('*'));
852 if !self.have_tokens()? {
853 if !self.peek().map_or(true, is_separator) {
854 self.push_token(Token::ZeroOrMore)?;
855 self.push_token(Token::ZeroOrMore)?;
856 } else {
857 self.push_token(Token::RecursivePrefix)?;
858 assert!(self.bump().map_or(true, is_separator));
859 }
860 return Ok(());
861 }
862
863 if !prev.map(is_separator).unwrap_or(false) {
864 if self.stack.len() <= 1
865 || (prev != Some(',') && prev != Some('{'))
866 {
867 self.push_token(Token::ZeroOrMore)?;
868 self.push_token(Token::ZeroOrMore)?;
869 return Ok(());
870 }
871 }
872 let is_suffix = match self.peek() {
873 None => {
874 assert!(self.bump().is_none());
875 true
876 }
877 Some(',') | Some('}') if self.stack.len() >= 2 => true,
878 Some(c) if is_separator(c) => {
879 assert!(self.bump().map(is_separator).unwrap_or(false));
880 false
881 }
882 _ => {
883 self.push_token(Token::ZeroOrMore)?;
884 self.push_token(Token::ZeroOrMore)?;
885 return Ok(());
886 }
887 };
888 match self.pop_token()? {
889 Token::RecursivePrefix => {
890 self.push_token(Token::RecursivePrefix)?;
891 }
892 Token::RecursiveSuffix => {
893 self.push_token(Token::RecursiveSuffix)?;
894 }
895 _ => {
896 if is_suffix {
897 self.push_token(Token::RecursiveSuffix)?;
898 } else {
899 self.push_token(Token::RecursiveZeroOrMore)?;
900 }
901 }
902 }
903 Ok(())
904 }
905
906 fn parse_class(&mut self) -> Result<(), Error> {
907 fn add_to_last_range(
908 glob: &str,
909 r: &mut (char, char),
910 add: char,
911 ) -> Result<(), Error> {
912 r.1 = add;
913 if r.1 < r.0 {
914 Err(Error {
915 glob: Some(glob.to_string()),
916 kind: ErrorKind::InvalidRange(r.0, r.1),
917 })
918 } else {
919 Ok(())
920 }
921 }
922 let mut ranges = vec![];
923 let negated = match self.chars.peek() {
924 Some(&'!') | Some(&'^') => {
925 let bump = self.bump();
926 assert!(bump == Some('!') || bump == Some('^'));
927 true
928 }
929 _ => false,
930 };
931 let mut first = true;
932 let mut in_range = false;
933 loop {
934 let c = match self.bump() {
935 Some(c) => c,
936 // The only way to successfully break this loop is to observe
937 // a ']'.
938 None => return Err(self.error(ErrorKind::UnclosedClass)),
939 };
940 match c {
941 ']' => {
942 if first {
943 ranges.push((']', ']'));
944 } else {
945 break;
946 }
947 }
948 '-' => {
949 if first {
950 ranges.push(('-', '-'));
951 } else if in_range {
952 // invariant: in_range is only set when there is
953 // already at least one character seen.
954 let r = ranges.last_mut().unwrap();
955 add_to_last_range(&self.glob, r, '-')?;
956 in_range = false;
957 } else {
958 assert!(!ranges.is_empty());
959 in_range = true;
960 }
961 }
962 c => {
963 if in_range {
964 // invariant: in_range is only set when there is
965 // already at least one character seen.
966 add_to_last_range(
967 &self.glob,
968 ranges.last_mut().unwrap(),
969 c,
970 )?;
971 } else {
972 ranges.push((c, c));
973 }
974 in_range = false;
975 }
976 }
977 first = false;
978 }
979 if in_range {
980 // Means that the last character in the class was a '-', so add
981 // it as a literal.
982 ranges.push(('-', '-'));
983 }
984 self.push_token(Token::Class { negated, ranges })
985 }
986
987 fn bump(&mut self) -> Option<char> {
988 self.prev = self.cur;
989 self.cur = self.chars.next();
990 self.cur
991 }
992
993 fn peek(&mut self) -> Option<char> {
994 self.chars.peek().map(|&ch| ch)
995 }
996 }
997
998 #[cfg(test)]
999 fn starts_with(needle: &[u8], haystack: &[u8]) -> bool {
1000 needle.len() <= haystack.len() && needle == &haystack[..needle.len()]
1001 }
1002
1003 #[cfg(test)]
1004 fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
1005 if needle.len() > haystack.len() {
1006 return false;
1007 }
1008 needle == &haystack[haystack.len() - needle.len()..]
1009 }
1010
1011 #[cfg(test)]
1012 mod tests {
1013 use super::Token::*;
1014 use super::{Glob, GlobBuilder, Token};
1015 use crate::{ErrorKind, GlobSetBuilder};
1016
1017 #[derive(Clone, Copy, Debug, Default)]
1018 struct Options {
1019 casei: Option<bool>,
1020 litsep: Option<bool>,
1021 bsesc: Option<bool>,
1022 ealtre: Option<bool>,
1023 }
1024
1025 macro_rules! syntax {
1026 ($name:ident, $pat:expr, $tokens:expr) => {
1027 #[test]
1028 fn $name() {
1029 let pat = Glob::new($pat).unwrap();
1030 assert_eq!($tokens, pat.tokens.0);
1031 }
1032 };
1033 }
1034
1035 macro_rules! syntaxerr {
1036 ($name:ident, $pat:expr, $err:expr) => {
1037 #[test]
1038 fn $name() {
1039 let err = Glob::new($pat).unwrap_err();
1040 assert_eq!(&$err, err.kind());
1041 }
1042 };
1043 }
1044
1045 macro_rules! toregex {
1046 ($name:ident, $pat:expr, $re:expr) => {
1047 toregex!($name, $pat, $re, Options::default());
1048 };
1049 ($name:ident, $pat:expr, $re:expr, $options:expr) => {
1050 #[test]
1051 fn $name() {
1052 let mut builder = GlobBuilder::new($pat);
1053 if let Some(casei) = $options.casei {
1054 builder.case_insensitive(casei);
1055 }
1056 if let Some(litsep) = $options.litsep {
1057 builder.literal_separator(litsep);
1058 }
1059 if let Some(bsesc) = $options.bsesc {
1060 builder.backslash_escape(bsesc);
1061 }
1062 if let Some(ealtre) = $options.ealtre {
1063 builder.empty_alternates(ealtre);
1064 }
1065 let pat = builder.build().unwrap();
1066 assert_eq!(format!("(?-u){}", $re), pat.regex());
1067 }
1068 };
1069 }
1070
1071 macro_rules! matches {
1072 ($name:ident, $pat:expr, $path:expr) => {
1073 matches!($name, $pat, $path, Options::default());
1074 };
1075 ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1076 #[test]
1077 fn $name() {
1078 let mut builder = GlobBuilder::new($pat);
1079 if let Some(casei) = $options.casei {
1080 builder.case_insensitive(casei);
1081 }
1082 if let Some(litsep) = $options.litsep {
1083 builder.literal_separator(litsep);
1084 }
1085 if let Some(bsesc) = $options.bsesc {
1086 builder.backslash_escape(bsesc);
1087 }
1088 if let Some(ealtre) = $options.ealtre {
1089 builder.empty_alternates(ealtre);
1090 }
1091 let pat = builder.build().unwrap();
1092 let matcher = pat.compile_matcher();
1093 let strategic = pat.compile_strategic_matcher();
1094 let set = GlobSetBuilder::new().add(pat).build().unwrap();
1095 assert!(matcher.is_match($path));
1096 assert!(strategic.is_match($path));
1097 assert!(set.is_match($path));
1098 }
1099 };
1100 }
1101
1102 macro_rules! nmatches {
1103 ($name:ident, $pat:expr, $path:expr) => {
1104 nmatches!($name, $pat, $path, Options::default());
1105 };
1106 ($name:ident, $pat:expr, $path:expr, $options:expr) => {
1107 #[test]
1108 fn $name() {
1109 let mut builder = GlobBuilder::new($pat);
1110 if let Some(casei) = $options.casei {
1111 builder.case_insensitive(casei);
1112 }
1113 if let Some(litsep) = $options.litsep {
1114 builder.literal_separator(litsep);
1115 }
1116 if let Some(bsesc) = $options.bsesc {
1117 builder.backslash_escape(bsesc);
1118 }
1119 if let Some(ealtre) = $options.ealtre {
1120 builder.empty_alternates(ealtre);
1121 }
1122 let pat = builder.build().unwrap();
1123 let matcher = pat.compile_matcher();
1124 let strategic = pat.compile_strategic_matcher();
1125 let set = GlobSetBuilder::new().add(pat).build().unwrap();
1126 assert!(!matcher.is_match($path));
1127 assert!(!strategic.is_match($path));
1128 assert!(!set.is_match($path));
1129 }
1130 };
1131 }
1132
1133 fn s(string: &str) -> String {
1134 string.to_string()
1135 }
1136
1137 fn class(s: char, e: char) -> Token {
1138 Class { negated: false, ranges: vec![(s, e)] }
1139 }
1140
1141 fn classn(s: char, e: char) -> Token {
1142 Class { negated: true, ranges: vec![(s, e)] }
1143 }
1144
1145 fn rclass(ranges: &[(char, char)]) -> Token {
1146 Class { negated: false, ranges: ranges.to_vec() }
1147 }
1148
1149 fn rclassn(ranges: &[(char, char)]) -> Token {
1150 Class { negated: true, ranges: ranges.to_vec() }
1151 }
1152
1153 syntax!(literal1, "a", vec![Literal('a')]);
1154 syntax!(literal2, "ab", vec![Literal('a'), Literal('b')]);
1155 syntax!(any1, "?", vec![Any]);
1156 syntax!(any2, "a?b", vec![Literal('a'), Any, Literal('b')]);
1157 syntax!(seq1, "*", vec![ZeroOrMore]);
1158 syntax!(seq2, "a*b", vec![Literal('a'), ZeroOrMore, Literal('b')]);
1159 syntax!(
1160 seq3,
1161 "*a*b*",
1162 vec![ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,]
1163 );
1164 syntax!(rseq1, "**", vec![RecursivePrefix]);
1165 syntax!(rseq2, "**/", vec![RecursivePrefix]);
1166 syntax!(rseq3, "/**", vec![RecursiveSuffix]);
1167 syntax!(rseq4, "/**/", vec![RecursiveZeroOrMore]);
1168 syntax!(
1169 rseq5,
1170 "a/**/b",
1171 vec![Literal('a'), RecursiveZeroOrMore, Literal('b'),]
1172 );
1173 syntax!(cls1, "[a]", vec![class('a', 'a')]);
1174 syntax!(cls2, "[!a]", vec![classn('a', 'a')]);
1175 syntax!(cls3, "[a-z]", vec![class('a', 'z')]);
1176 syntax!(cls4, "[!a-z]", vec![classn('a', 'z')]);
1177 syntax!(cls5, "[-]", vec![class('-', '-')]);
1178 syntax!(cls6, "[]]", vec![class(']', ']')]);
1179 syntax!(cls7, "[*]", vec![class('*', '*')]);
1180 syntax!(cls8, "[!!]", vec![classn('!', '!')]);
1181 syntax!(cls9, "[a-]", vec![rclass(&[('a', 'a'), ('-', '-')])]);
1182 syntax!(cls10, "[-a-z]", vec![rclass(&[('-', '-'), ('a', 'z')])]);
1183 syntax!(cls11, "[a-z-]", vec![rclass(&[('a', 'z'), ('-', '-')])]);
1184 syntax!(
1185 cls12,
1186 "[-a-z-]",
1187 vec![rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),]
1188 );
1189 syntax!(cls13, "[]-z]", vec![class(']', 'z')]);
1190 syntax!(cls14, "[--z]", vec![class('-', 'z')]);
1191 syntax!(cls15, "[ --]", vec![class(' ', '-')]);
1192 syntax!(cls16, "[0-9a-z]", vec![rclass(&[('0', '9'), ('a', 'z')])]);
1193 syntax!(cls17, "[a-z0-9]", vec![rclass(&[('a', 'z'), ('0', '9')])]);
1194 syntax!(cls18, "[!0-9a-z]", vec![rclassn(&[('0', '9'), ('a', 'z')])]);
1195 syntax!(cls19, "[!a-z0-9]", vec![rclassn(&[('a', 'z'), ('0', '9')])]);
1196 syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
1197 syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
1198
1199 syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
1200 syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
1201 syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
1202 syntaxerr!(err_unclosed4, "[!]", ErrorKind::UnclosedClass);
1203 syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
1204 syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
1205
1206 const CASEI: Options =
1207 Options { casei: Some(true), litsep: None, bsesc: None, ealtre: None };
1208 const SLASHLIT: Options =
1209 Options { casei: None, litsep: Some(true), bsesc: None, ealtre: None };
1210 const NOBSESC: Options = Options {
1211 casei: None,
1212 litsep: None,
1213 bsesc: Some(false),
1214 ealtre: None,
1215 };
1216 const BSESC: Options =
1217 Options { casei: None, litsep: None, bsesc: Some(true), ealtre: None };
1218 const EALTRE: Options = Options {
1219 casei: None,
1220 litsep: None,
1221 bsesc: Some(true),
1222 ealtre: Some(true),
1223 };
1224
1225 toregex!(re_casei, "a", "(?i)^a$", &CASEI);
1226
1227 toregex!(re_slash1, "?", r"^[^/]$", SLASHLIT);
1228 toregex!(re_slash2, "*", r"^[^/]*$", SLASHLIT);
1229
1230 toregex!(re1, "a", "^a$");
1231 toregex!(re2, "?", "^.$");
1232 toregex!(re3, "*", "^.*$");
1233 toregex!(re4, "a?", "^a.$");
1234 toregex!(re5, "?a", "^.a$");
1235 toregex!(re6, "a*", "^a.*$");
1236 toregex!(re7, "*a", "^.*a$");
1237 toregex!(re8, "[*]", r"^[\*]$");
1238 toregex!(re9, "[+]", r"^[\+]$");
1239 toregex!(re10, "+", r"^\+$");
1240 toregex!(re11, "☃", r"^\xe2\x98\x83$");
1241 toregex!(re12, "**", r"^.*$");
1242 toregex!(re13, "**/", r"^.*$");
1243 toregex!(re14, "**/*", r"^(?:/?|.*/).*$");
1244 toregex!(re15, "**/**", r"^.*$");
1245 toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$");
1246 toregex!(re17, "**/**/**", r"^.*$");
1247 toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$");
1248 toregex!(re19, "a/**", r"^a/.*$");
1249 toregex!(re20, "a/**/**", r"^a/.*$");
1250 toregex!(re21, "a/**/**/**", r"^a/.*$");
1251 toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$");
1252 toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$");
1253 toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$");
1254 toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
1255 toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
1256 toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
1257 toregex!(re28, "a**", r"^a.*.*$");
1258 toregex!(re29, "**a", r"^.*.*a$");
1259 toregex!(re30, "a**b", r"^a.*.*b$");
1260 toregex!(re31, "***", r"^.*.*.*$");
1261 toregex!(re32, "/a**", r"^/a.*.*$");
1262 toregex!(re33, "/**a", r"^/.*.*a$");
1263 toregex!(re34, "/a**b", r"^/a.*.*b$");
1264 toregex!(re35, "{a,b}", r"^(?:b|a)$");
1265
1266 matches!(match1, "a", "a");
1267 matches!(match2, "a*b", "a_b");
1268 matches!(match3, "a*b*c", "abc");
1269 matches!(match4, "a*b*c", "a_b_c");
1270 matches!(match5, "a*b*c", "a___b___c");
1271 matches!(match6, "abc*abc*abc", "abcabcabcabcabcabcabc");
1272 matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
1273 matches!(match8, "a*b[xyz]c*d", "abxcdbxcddd");
1274 matches!(match9, "*.rs", ".rs");
1275 matches!(match10, "☃", "☃");
1276
1277 matches!(matchrec1, "some/**/needle.txt", "some/needle.txt");
1278 matches!(matchrec2, "some/**/needle.txt", "some/one/needle.txt");
1279 matches!(matchrec3, "some/**/needle.txt", "some/one/two/needle.txt");
1280 matches!(matchrec4, "some/**/needle.txt", "some/other/needle.txt");
1281 matches!(matchrec5, "**", "abcde");
1282 matches!(matchrec6, "**", "");
1283 matches!(matchrec7, "**", ".asdf");
1284 matches!(matchrec8, "**", "/x/.asdf");
1285 matches!(matchrec9, "some/**/**/needle.txt", "some/needle.txt");
1286 matches!(matchrec10, "some/**/**/needle.txt", "some/one/needle.txt");
1287 matches!(matchrec11, "some/**/**/needle.txt", "some/one/two/needle.txt");
1288 matches!(matchrec12, "some/**/**/needle.txt", "some/other/needle.txt");
1289 matches!(matchrec13, "**/test", "one/two/test");
1290 matches!(matchrec14, "**/test", "one/test");
1291 matches!(matchrec15, "**/test", "test");
1292 matches!(matchrec16, "/**/test", "/one/two/test");
1293 matches!(matchrec17, "/**/test", "/one/test");
1294 matches!(matchrec18, "/**/test", "/test");
1295 matches!(matchrec19, "**/.*", ".abc");
1296 matches!(matchrec20, "**/.*", "abc/.abc");
1297 matches!(matchrec21, "**/foo/bar", "foo/bar");
1298 matches!(matchrec22, ".*/**", ".abc/abc");
1299 matches!(matchrec23, "test/**", "test/");
1300 matches!(matchrec24, "test/**", "test/one");
1301 matches!(matchrec25, "test/**", "test/one/two");
1302 matches!(matchrec26, "some/*/needle.txt", "some/one/needle.txt");
1303
1304 matches!(matchrange1, "a[0-9]b", "a0b");
1305 matches!(matchrange2, "a[0-9]b", "a9b");
1306 matches!(matchrange3, "a[!0-9]b", "a_b");
1307 matches!(matchrange4, "[a-z123]", "1");
1308 matches!(matchrange5, "[1a-z23]", "1");
1309 matches!(matchrange6, "[123a-z]", "1");
1310 matches!(matchrange7, "[abc-]", "-");
1311 matches!(matchrange8, "[-abc]", "-");
1312 matches!(matchrange9, "[-a-c]", "b");
1313 matches!(matchrange10, "[a-c-]", "b");
1314 matches!(matchrange11, "[-]", "-");
1315 matches!(matchrange12, "a[^0-9]b", "a_b");
1316
1317 matches!(matchpat1, "*hello.txt", "hello.txt");
1318 matches!(matchpat2, "*hello.txt", "gareth_says_hello.txt");
1319 matches!(matchpat3, "*hello.txt", "some/path/to/hello.txt");
1320 matches!(matchpat4, "*hello.txt", "some\\path\\to\\hello.txt");
1321 matches!(matchpat5, "*hello.txt", "/an/absolute/path/to/hello.txt");
1322 matches!(matchpat6, "*some/path/to/hello.txt", "some/path/to/hello.txt");
1323 matches!(
1324 matchpat7,
1325 "*some/path/to/hello.txt",
1326 "a/bigger/some/path/to/hello.txt"
1327 );
1328
1329 matches!(matchescape, "_[[]_[]]_[?]_[*]_!_", "_[_]_?_*_!_");
1330
1331 matches!(matchcasei1, "aBcDeFg", "aBcDeFg", CASEI);
1332 matches!(matchcasei2, "aBcDeFg", "abcdefg", CASEI);
1333 matches!(matchcasei3, "aBcDeFg", "ABCDEFG", CASEI);
1334 matches!(matchcasei4, "aBcDeFg", "AbCdEfG", CASEI);
1335
1336 matches!(matchalt1, "a,b", "a,b");
1337 matches!(matchalt2, ",", ",");
1338 matches!(matchalt3, "{a,b}", "a");
1339 matches!(matchalt4, "{a,b}", "b");
1340 matches!(matchalt5, "{**/src/**,foo}", "abc/src/bar");
1341 matches!(matchalt6, "{**/src/**,foo}", "foo");
1342 matches!(matchalt7, "{[}],foo}", "}");
1343 matches!(matchalt8, "{foo}", "foo");
1344 matches!(matchalt9, "{}", "");
1345 matches!(matchalt10, "{,}", "");
1346 matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo");
1347 matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar");
1348 matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat");
1349 matches!(matchalt14, "foo{,.txt}", "foo.txt");
1350 nmatches!(matchalt15, "foo{,.txt}", "foo");
1351 matches!(matchalt16, "foo{,.txt}", "foo", EALTRE);
1352
1353 matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
1354 #[cfg(unix)]
1355 nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
1356 #[cfg(not(unix))]
1357 nmatches!(matchslash2, "abc?def", "abc\\def", SLASHLIT);
1358 nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT);
1359 matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs
1360 #[cfg(unix)]
1361 nmatches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1362 #[cfg(not(unix))]
1363 matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
1364
1365 matches!(matchbackslash1, "\\[", "[", BSESC);
1366 matches!(matchbackslash2, "\\?", "?", BSESC);
1367 matches!(matchbackslash3, "\\*", "*", BSESC);
1368 matches!(matchbackslash4, "\\[a-z]", "\\a", NOBSESC);
1369 matches!(matchbackslash5, "\\?", "\\a", NOBSESC);
1370 matches!(matchbackslash6, "\\*", "\\\\", NOBSESC);
1371 #[cfg(unix)]
1372 matches!(matchbackslash7, "\\a", "a");
1373 #[cfg(not(unix))]
1374 matches!(matchbackslash8, "\\a", "/a");
1375
1376 nmatches!(matchnot1, "a*b*c", "abcd");
1377 nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca");
1378 nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt");
1379 nmatches!(matchnot4, "some/**/**/needle.txt", "some/other/notthis.txt");
1380 nmatches!(matchnot5, "/**/test", "test");
1381 nmatches!(matchnot6, "/**/test", "/one/notthis");
1382 nmatches!(matchnot7, "/**/test", "/notthis");
1383 nmatches!(matchnot8, "**/.*", "ab.c");
1384 nmatches!(matchnot9, "**/.*", "abc/ab.c");
1385 nmatches!(matchnot10, ".*/**", "a.bc");
1386 nmatches!(matchnot11, ".*/**", "abc/a.bc");
1387 nmatches!(matchnot12, "a[0-9]b", "a_b");
1388 nmatches!(matchnot13, "a[!0-9]b", "a0b");
1389 nmatches!(matchnot14, "a[!0-9]b", "a9b");
1390 nmatches!(matchnot15, "[!-]", "-");
1391 nmatches!(matchnot16, "*hello.txt", "hello.txt-and-then-some");
1392 nmatches!(matchnot17, "*hello.txt", "goodbye.txt");
1393 nmatches!(
1394 matchnot18,
1395 "*some/path/to/hello.txt",
1396 "some/path/to/hello.txt-and-then-some"
1397 );
1398 nmatches!(
1399 matchnot19,
1400 "*some/path/to/hello.txt",
1401 "some/other/path/to/hello.txt"
1402 );
1403 nmatches!(matchnot20, "a", "foo/a");
1404 nmatches!(matchnot21, "./foo", "foo");
1405 nmatches!(matchnot22, "**/foo", "foofoo");
1406 nmatches!(matchnot23, "**/foo/bar", "foofoo/bar");
1407 nmatches!(matchnot24, "/*.c", "mozilla-sha1/sha1.c");
1408 nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT);
1409 nmatches!(
1410 matchnot26,
1411 "**/m4/ltoptions.m4",
1412 "csharp/src/packages/repositories.config",
1413 SLASHLIT
1414 );
1415 nmatches!(matchnot27, "a[^0-9]b", "a0b");
1416 nmatches!(matchnot28, "a[^0-9]b", "a9b");
1417 nmatches!(matchnot29, "[^-]", "-");
1418 nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt");
1419 nmatches!(
1420 matchrec31,
1421 "some/*/needle.txt",
1422 "some/one/two/needle.txt",
1423 SLASHLIT
1424 );
1425 nmatches!(
1426 matchrec32,
1427 "some/*/needle.txt",
1428 "some/one/two/three/needle.txt",
1429 SLASHLIT
1430 );
1431 nmatches!(matchrec33, ".*/**", ".abc");
1432 nmatches!(matchrec34, "foo/**", "foo");
1433
1434 macro_rules! extract {
1435 ($which:ident, $name:ident, $pat:expr, $expect:expr) => {
1436 extract!($which, $name, $pat, $expect, Options::default());
1437 };
1438 ($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => {
1439 #[test]
1440 fn $name() {
1441 let mut builder = GlobBuilder::new($pat);
1442 if let Some(casei) = $options.casei {
1443 builder.case_insensitive(casei);
1444 }
1445 if let Some(litsep) = $options.litsep {
1446 builder.literal_separator(litsep);
1447 }
1448 if let Some(bsesc) = $options.bsesc {
1449 builder.backslash_escape(bsesc);
1450 }
1451 if let Some(ealtre) = $options.ealtre {
1452 builder.empty_alternates(ealtre);
1453 }
1454 let pat = builder.build().unwrap();
1455 assert_eq!($expect, pat.$which());
1456 }
1457 };
1458 }
1459
1460 macro_rules! literal {
1461 ($($tt:tt)*) => { extract!(literal, $($tt)*); }
1462 }
1463
1464 macro_rules! basetokens {
1465 ($($tt:tt)*) => { extract!(basename_tokens, $($tt)*); }
1466 }
1467
1468 macro_rules! ext {
1469 ($($tt:tt)*) => { extract!(ext, $($tt)*); }
1470 }
1471
1472 macro_rules! required_ext {
1473 ($($tt:tt)*) => { extract!(required_ext, $($tt)*); }
1474 }
1475
1476 macro_rules! prefix {
1477 ($($tt:tt)*) => { extract!(prefix, $($tt)*); }
1478 }
1479
1480 macro_rules! suffix {
1481 ($($tt:tt)*) => { extract!(suffix, $($tt)*); }
1482 }
1483
1484 macro_rules! baseliteral {
1485 ($($tt:tt)*) => { extract!(basename_literal, $($tt)*); }
1486 }
1487
1488 literal!(extract_lit1, "foo", Some(s("foo")));
1489 literal!(extract_lit2, "foo", None, CASEI);
1490 literal!(extract_lit3, "/foo", Some(s("/foo")));
1491 literal!(extract_lit4, "/foo/", Some(s("/foo/")));
1492 literal!(extract_lit5, "/foo/bar", Some(s("/foo/bar")));
1493 literal!(extract_lit6, "*.foo", None);
1494 literal!(extract_lit7, "foo/bar", Some(s("foo/bar")));
1495 literal!(extract_lit8, "**/foo/bar", None);
1496
1497 basetokens!(
1498 extract_basetoks1,
1499 "**/foo",
1500 Some(&*vec![Literal('f'), Literal('o'), Literal('o'),])
1501 );
1502 basetokens!(extract_basetoks2, "**/foo", None, CASEI);
1503 basetokens!(
1504 extract_basetoks3,
1505 "**/foo",
1506 Some(&*vec![Literal('f'), Literal('o'), Literal('o'),]),
1507 SLASHLIT
1508 );
1509 basetokens!(extract_basetoks4, "*foo", None, SLASHLIT);
1510 basetokens!(extract_basetoks5, "*foo", None);
1511 basetokens!(extract_basetoks6, "**/fo*o", None);
1512 basetokens!(
1513 extract_basetoks7,
1514 "**/fo*o",
1515 Some(&*vec![Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),]),
1516 SLASHLIT
1517 );
1518
1519 ext!(extract_ext1, "**/*.rs", Some(s(".rs")));
1520 ext!(extract_ext2, "**/*.rs.bak", None);
1521 ext!(extract_ext3, "*.rs", Some(s(".rs")));
1522 ext!(extract_ext4, "a*.rs", None);
1523 ext!(extract_ext5, "/*.c", None);
1524 ext!(extract_ext6, "*.c", None, SLASHLIT);
1525 ext!(extract_ext7, "*.c", Some(s(".c")));
1526
1527 required_ext!(extract_req_ext1, "*.rs", Some(s(".rs")));
1528 required_ext!(extract_req_ext2, "/foo/bar/*.rs", Some(s(".rs")));
1529 required_ext!(extract_req_ext3, "/foo/bar/*.rs", Some(s(".rs")));
1530 required_ext!(extract_req_ext4, "/foo/bar/.rs", Some(s(".rs")));
1531 required_ext!(extract_req_ext5, ".rs", Some(s(".rs")));
1532 required_ext!(extract_req_ext6, "./rs", None);
1533 required_ext!(extract_req_ext7, "foo", None);
1534 required_ext!(extract_req_ext8, ".foo/", None);
1535 required_ext!(extract_req_ext9, "foo/", None);
1536
1537 prefix!(extract_prefix1, "/foo", Some(s("/foo")));
1538 prefix!(extract_prefix2, "/foo/*", Some(s("/foo/")));
1539 prefix!(extract_prefix3, "**/foo", None);
1540 prefix!(extract_prefix4, "foo/**", Some(s("foo/")));
1541
1542 suffix!(extract_suffix1, "**/foo/bar", Some((s("/foo/bar"), true)));
1543 suffix!(extract_suffix2, "*/foo/bar", Some((s("/foo/bar"), false)));
1544 suffix!(extract_suffix3, "*/foo/bar", None, SLASHLIT);
1545 suffix!(extract_suffix4, "foo/bar", Some((s("foo/bar"), false)));
1546 suffix!(extract_suffix5, "*.foo", Some((s(".foo"), false)));
1547 suffix!(extract_suffix6, "*.foo", None, SLASHLIT);
1548 suffix!(extract_suffix7, "**/*_test", Some((s("_test"), false)));
1549
1550 baseliteral!(extract_baselit1, "**/foo", Some(s("foo")));
1551 baseliteral!(extract_baselit2, "foo", None);
1552 baseliteral!(extract_baselit3, "*foo", None);
1553 baseliteral!(extract_baselit4, "*/foo", None);
1554 }