2 The types module provides a way of associating globs on file names to file
5 This can be used to match specific types of files. For example, among
6 the default file types provided, the Rust file type is defined to be `*.rs`
7 with name `rust`. Similarly, the C file type is defined to be `*.{c,h}` with
10 Note that the set of default types may change over time.
14 This shows how to create and use a simple file type matcher using the default
15 file types defined in this crate.
18 use ignore::types::TypesBuilder;
20 let mut builder = TypesBuilder::new();
21 builder.add_defaults();
22 builder.select("rust");
23 let matcher = builder.build().unwrap();
25 assert!(matcher.matched("foo.rs", false).is_whitelist());
26 assert!(matcher.matched("foo.c", false).is_ignore());
31 This is like the previous example, but shows how negating a file type works.
32 That is, this will let us match file paths that *don't* correspond to a
36 use ignore::types::TypesBuilder;
38 let mut builder = TypesBuilder::new();
39 builder.add_defaults();
41 let matcher = builder.build().unwrap();
43 assert!(matcher.matched("foo.rs", false).is_none());
44 assert!(matcher.matched("foo.c", false).is_ignore());
47 # Example: custom file type definitions
49 This shows how to extend this library default file type definitions with
53 use ignore::types::TypesBuilder;
55 let mut builder = TypesBuilder::new();
56 builder.add_defaults();
57 builder.add("foo", "*.foo");
58 // Another way of adding a file type definition.
59 // This is useful when accepting input from an end user.
60 builder.add_def("bar:*.bar");
61 // Note: we only select `foo`, not `bar`.
62 builder.select("foo");
63 let matcher = builder.build().unwrap();
65 assert!(matcher.matched("x.foo", false).is_whitelist());
66 // This is ignored because we only selected the `foo` file type.
67 assert!(matcher.matched("x.bar", false).is_ignore());
70 We can also add file type definitions based on other definitions.
73 use ignore::types::TypesBuilder;
75 let mut builder = TypesBuilder::new();
76 builder.add_defaults();
77 builder.add("foo", "*.foo");
78 builder.add_def("bar:include:foo,cpp");
79 builder.select("bar");
80 let matcher = builder.build().unwrap();
82 assert!(matcher.matched("x.foo", false).is_whitelist());
83 assert!(matcher.matched("y.cpp", false).is_whitelist());
87 use std
::cell
::RefCell
;
88 use std
::collections
::HashMap
;
92 use globset
::{GlobBuilder, GlobSet, GlobSetBuilder}
;
94 use thread_local
::ThreadLocal
;
96 use pathutil
::file_name
;
99 const DEFAULT_TYPES
: &'
static [(&'
static str, &'
static [&'
static str])] = &[
100 ("agda", &["*.agda", "*.lagda"]),
101 ("ats", &["*.ats", "*.dats", "*.sats", "*.hats"]),
102 ("aidl", &["*.aidl"]),
103 ("amake", &["*.mk", "*.bp"]),
104 ("asciidoc", &["*.adoc", "*.asc", "*.asciidoc"]),
105 ("asm", &["*.asm", "*.s", "*.S"]),
106 ("asp", &["*.aspx", "*.aspx.cs", "*.aspx.cs", "*.ascx", "*.ascx.cs", "*.ascx.vb"]),
107 ("avro", &["*.avdl", "*.avpr", "*.avsc"]),
109 ("bazel", &["*.bzl", "WORKSPACE", "BUILD", "BUILD.bazel"]),
110 ("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
111 ("buildstream", &["*.bst"]),
112 ("bzip2", &["*.bz2"]),
113 ("c", &["*.c", "*.h", "*.H", "*.cats"]),
114 ("cabal", &["*.cabal"]),
115 ("cbor", &["*.cbor"]),
116 ("ceylon", &["*.ceylon"]),
117 ("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
118 ("cmake", &["*.cmake", "CMakeLists.txt"]),
119 ("coffeescript", &["*.coffee"]),
120 ("creole", &["*.creole"]),
121 ("config", &["*.cfg", "*.conf", "*.config", "*.ini"]),
123 "*.C", "*.cc", "*.cpp", "*.cxx",
124 "*.h", "*.H", "*.hh", "*.hpp", "*.hxx", "*.inl",
126 ("crystal", &["Projectfile", "*.cr"]),
128 ("csharp", &["*.cs"]),
129 ("cshtml", &["*.cshtml"]),
130 ("css", &["*.css", "*.scss"]),
132 ("cython", &["*.pyx", "*.pxi", "*.pxd"]),
133 ("dart", &["*.dart"]),
135 ("dhall", &["*.dhall"]),
136 ("docker", &["*Dockerfile*"]),
137 ("elisp", &["*.el"]),
138 ("elixir", &["*.ex", "*.eex", "*.exs"]),
140 ("erlang", &["*.erl", "*.hrl"]),
141 ("fidl", &["*.fidl"]),
142 ("fish", &["*.fish"]),
144 "*.f", "*.F", "*.f77", "*.F77", "*.pfo",
145 "*.f90", "*.F90", "*.f95", "*.F95",
147 ("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
148 ("gn", &["*.gn", "*.gni"]),
151 ("groovy", &["*.groovy", "*.gradle"]),
152 ("h", &["*.h", "*.hpp"]),
154 ("haskell", &["*.hs", "*.lhs", "*.cpphs", "*.c2hs", "*.hsc"]),
155 ("hs", &["*.hs", "*.lhs"]),
156 ("html", &["*.htm", "*.html", "*.ejs"]),
157 ("idris", &["*.idr", "*.lidr"]),
158 ("java", &["*.java", "*.jsp"]),
159 ("jinja", &["*.j2", "*.jinja", "*.jinja2"]),
161 "*.js", "*.jsx", "*.vue",
163 ("json", &["*.json", "composer.lock"]),
164 ("jsonl", &["*.jsonl"]),
165 ("julia", &["*.jl"]),
166 ("jupyter", &["*.ipynb", "*.jpynb"]),
168 ("kotlin", &["*.kt", "*.kts"]),
169 ("less", &["*.less"]),
172 "COPYING", "COPYING[.-]*",
173 "COPYRIGHT", "COPYRIGHT[.-]*",
175 "licen[cs]e", "licen[cs]e.*",
176 "LICEN[CS]E", "LICEN[CS]E[.-]*", "*[.-]LICEN[CS]E*",
177 "NOTICE", "NOTICE[.-]*",
178 "PATENTS", "PATENTS[.-]*",
179 "UNLICEN[CS]E", "UNLICEN[CS]E[.-]*",
180 // GPL (gpl.txt, etc.)
184 // Other license-specific (APACHE-2.0.txt, etc.)
197 ("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
200 ("lzma", &["*.lzma"]),
202 ("m4", &["*.ac", "*.m4"]),
204 "gnumakefile", "Gnumakefile", "GNUmakefile",
205 "makefile", "Makefile",
208 ("mako", &["*.mako", "*.mao"]),
209 ("markdown", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]),
210 ("md", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]),
211 ("man", &["*.[0-9lnpx]", "*.[0-9][cEFMmpSx]"]),
212 ("matlab", &["*.m"]),
216 "*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets"
220 ("objc", &["*.h", "*.m"]),
221 ("objcpp", &["*.h", "*.mm"]),
222 ("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
224 ("pascal", &["*.pas", "*.dpr", "*.lpr", "*.pp", "*.inc"]),
225 ("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]),
227 ("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
229 ("postscript", &[".eps", ".ps"]),
230 ("protobuf", &["*.proto"]),
231 ("ps", &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]),
232 ("puppet", &["*.erb", "*.pp", "*.rb"]),
233 ("purs", &["*.purs"]),
235 ("qmake", &["*.pro", "*.pri", "*.prf"]),
236 ("readme", &["README*", "*README"]),
237 ("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
238 ("rdoc", &["*.rdoc"]),
240 ("ruby", &["Gemfile", "*.gemspec", ".irbrc", "Rakefile", "*.rb"]),
242 ("sass", &["*.sass", "*.scss"]),
243 ("scala", &["*.scala", "*.sbt"]),
245 // Portable/misc. init files
246 ".login", ".logout", ".profile", "profile",
247 // bash-specific init files
248 ".bash_login", "bash_login",
249 ".bash_logout", "bash_logout",
250 ".bash_profile", "bash_profile",
251 ".bashrc", "bashrc", "*.bashrc",
252 // csh-specific init files
254 // ksh-specific init files
256 // tcsh-specific init files
258 // zsh-specific init files
261 ".zlogout", "zlogout",
262 ".zprofile", "zprofile",
265 "*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh", "*.zsh",
267 ("smarty", &["*.tpl"]),
268 ("sml", &["*.sml", "*.sig"]),
270 ("spark", &["*.spark"]),
271 ("sql", &["*.sql", "*.psql"]),
272 ("stylus", &["*.styl"]),
273 ("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
275 ("swift", &["*.swift"]),
276 ("swig", &["*.def", "*.i"]),
278 "*.automount", "*.conf", "*.device", "*.link", "*.mount", "*.path",
279 "*.scope", "*.service", "*.slice", "*.socket", "*.swap", "*.target",
282 ("taskpaper", &["*.taskpaper"]),
284 ("tex", &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib"]),
285 ("textile", &["*.textile"]),
286 ("thrift", &["*.thrift"]),
288 ("ts", &["*.ts", "*.tsx"]),
290 ("toml", &["*.toml", "Cargo.lock"]),
291 ("twig", &["*.twig"]),
292 ("vala", &["*.vala"]),
294 ("verilog", &["*.v", "*.vh", "*.sv", "*.svh"]),
295 ("vhdl", &["*.vhd", "*.vhdl"]),
297 ("vimscript", &["*.vim"]),
298 ("wiki", &["*.mediawiki", "*.wiki"]),
299 ("webidl", &["*.idl", "*.webidl", "*.widl"]),
300 ("xml", &["*.xml", "*.xml.dist"]),
303 ("yaml", &["*.yaml", "*.yml"]),
307 ".zlogout", "zlogout",
308 ".zprofile", "zprofile",
314 /// Glob represents a single glob in a set of file type definitions.
316 /// There may be more than one glob for a particular file type.
318 /// This is used to report information about the highest precedent glob
321 /// Note that not all matches necessarily correspond to a specific glob.
322 /// For example, if there are one or more selections and a file path doesn't
323 /// match any of those selections, then the file path is considered to be
326 /// The lifetime `'a` refers to the lifetime of the underlying file type
327 /// definition, which corresponds to the lifetime of the file type matcher.
328 #[derive(Clone, Debug)]
329 pub struct Glob
<'a
>(GlobInner
<'a
>);
331 #[derive(Clone, Debug)]
333 /// No glob matched, but the file path should still be ignored.
337 /// The file type definition which provided the glob.
338 def
: &'a FileTypeDef
,
339 /// The index of the glob that matched inside the file type definition.
341 /// Whether the selection was negated or not.
347 fn unmatched() -> Glob
<'a
> {
348 Glob(GlobInner
::UnmatchedIgnore
)
352 /// A single file type definition.
354 /// File type definitions can be retrieved in aggregate from a file type
355 /// matcher. File type definitions are also reported when its responsible
357 #[derive(Clone, Debug, Eq, PartialEq)]
358 pub struct FileTypeDef
{
364 /// Return the name of this file type.
365 pub fn name(&self) -> &str {
369 /// Return the globs used to recognize this file type.
370 pub fn globs(&self) -> &[String
] {
375 /// Types is a file type matcher.
376 #[derive(Clone, Debug)]
378 /// All of the file type definitions, sorted lexicographically by name.
379 defs
: Vec
<FileTypeDef
>,
380 /// All of the selections made by the user.
381 selections
: Vec
<Selection
<FileTypeDef
>>,
382 /// Whether there is at least one Selection::Select in our selections.
383 /// When this is true, a Match::None is converted to Match::Ignore.
385 /// A mapping from glob index in the set to two indices. The first is an
386 /// index into `selections` and the second is an index into the
387 /// corresponding file type definition's list of globs.
388 glob_to_selection
: Vec
<(usize, usize)>,
389 /// The set of all glob selections, used for actual matching.
391 /// Temporary storage for globs that match.
392 matches
: Arc
<ThreadLocal
<RefCell
<Vec
<usize>>>>,
395 /// Indicates the type of a selection for a particular file type.
396 #[derive(Clone, Debug)]
402 impl<T
> Selection
<T
> {
403 fn is_negated(&self) -> bool
{
405 Selection
::Select(..) => false,
406 Selection
::Negate(..) => true,
410 fn name(&self) -> &str {
412 Selection
::Select(ref name
, _
) => name
,
413 Selection
::Negate(ref name
, _
) => name
,
417 fn map
<U
, F
: FnOnce(T
) -> U
>(self, f
: F
) -> Selection
<U
> {
419 Selection
::Select(name
, inner
) => {
420 Selection
::Select(name
, f(inner
))
422 Selection
::Negate(name
, inner
) => {
423 Selection
::Negate(name
, f(inner
))
428 fn inner(&self) -> &T
{
430 Selection
::Select(_
, ref inner
) => inner
,
431 Selection
::Negate(_
, ref inner
) => inner
,
437 /// Creates a new file type matcher that never matches any path and
438 /// contains no file type definitions.
439 pub fn empty() -> Types
{
444 glob_to_selection
: vec
![],
445 set
: GlobSetBuilder
::new().build().unwrap(),
446 matches
: Arc
::new(ThreadLocal
::default()),
450 /// Returns true if and only if this matcher has zero selections.
451 pub fn is_empty(&self) -> bool
{
452 self.selections
.is_empty()
455 /// Returns the number of selections used in this matcher.
456 pub fn len(&self) -> usize {
457 self.selections
.len()
460 /// Return the set of current file type definitions.
462 /// Definitions and globs are sorted.
463 pub fn definitions(&self) -> &[FileTypeDef
] {
467 /// Returns a match for the given path against this file type matcher.
469 /// The path is considered whitelisted if it matches a selected file type.
470 /// The path is considered ignored if it matches a negated file type.
471 /// If at least one file type is selected and `path` doesn't match, then
472 /// the path is also considered ignored.
473 pub fn matched
<'a
, P
: AsRef
<Path
>>(
477 ) -> Match
<Glob
<'a
>> {
478 // File types don't apply to directories, and we can't do anything
479 // if our glob set is empty.
480 if is_dir
|| self.set
.is_empty() {
483 // We only want to match against the file name, so extract it.
484 // If one doesn't exist, then we can't match it.
485 let name
= match file_name(path
.as_ref()) {
487 None
if self.has_selected
=> {
488 return Match
::Ignore(Glob
::unmatched());
494 let mut matches
= self.matches
.get_default().borrow_mut();
495 self.set
.matches_into(name
, &mut *matches
);
496 // The highest precedent match is the last one.
497 if let Some(&i
) = matches
.last() {
498 let (isel
, iglob
) = self.glob_to_selection
[i
];
499 let sel
= &self.selections
[isel
];
500 let glob
= Glob(GlobInner
::Matched
{
503 negated
: sel
.is_negated(),
505 return if sel
.is_negated() {
508 Match
::Whitelist(glob
)
511 if self.has_selected
{
512 Match
::Ignore(Glob
::unmatched())
519 /// TypesBuilder builds a type matcher from a set of file type definitions and
520 /// a set of file type selections.
521 pub struct TypesBuilder
{
522 types
: HashMap
<String
, FileTypeDef
>,
523 selections
: Vec
<Selection
<()>>,
527 /// Create a new builder for a file type matcher.
529 /// The builder contains *no* type definitions to start with. A set
530 /// of default type definitions can be added with `add_defaults`, and
531 /// additional type definitions can be added with `select` and `negate`.
532 pub fn new() -> TypesBuilder
{
534 types
: HashMap
::new(),
539 /// Build the current set of file type definitions *and* selections into
540 /// a file type matcher.
541 pub fn build(&self) -> Result
<Types
, Error
> {
542 let defs
= self.definitions();
543 let has_selected
= self.selections
.iter().any(|s
| !s
.is_negated());
545 let mut selections
= vec
![];
546 let mut glob_to_selection
= vec
![];
547 let mut build_set
= GlobSetBuilder
::new();
548 for (isel
, selection
) in self.selections
.iter().enumerate() {
549 let def
= match self.types
.get(selection
.name()) {
550 Some(def
) => def
.clone(),
552 let name
= selection
.name().to_string();
553 return Err(Error
::UnrecognizedFileType(name
));
556 for (iglob
, glob
) in def
.globs
.iter().enumerate() {
558 GlobBuilder
::new(glob
)
559 .literal_separator(true)
563 glob
: Some(glob
.to_string()),
564 err
: err
.kind().to_string(),
567 glob_to_selection
.push((isel
, iglob
));
569 selections
.push(selection
.clone().map(move |_
| def
));
571 let set
= build_set
.build().map_err(|err
| {
572 Error
::Glob { glob: None, err: err.to_string() }
576 selections
: selections
,
577 has_selected
: has_selected
,
578 glob_to_selection
: glob_to_selection
,
580 matches
: Arc
::new(ThreadLocal
::default()),
584 /// Return the set of current file type definitions.
586 /// Definitions and globs are sorted.
587 pub fn definitions(&self) -> Vec
<FileTypeDef
> {
588 let mut defs
= vec
![];
589 for def
in self.types
.values() {
590 let mut def
= def
.clone();
594 defs
.sort_by(|def1
, def2
| def1
.name().cmp(def2
.name()));
598 /// Select the file type given by `name`.
600 /// If `name` is `all`, then all file types currently defined are selected.
601 pub fn select(&mut self, name
: &str) -> &mut TypesBuilder
{
603 for name
in self.types
.keys() {
604 self.selections
.push(Selection
::Select(name
.to_string(), ()));
607 self.selections
.push(Selection
::Select(name
.to_string(), ()));
612 /// Ignore the file type given by `name`.
614 /// If `name` is `all`, then all file types currently defined are negated.
615 pub fn negate(&mut self, name
: &str) -> &mut TypesBuilder
{
617 for name
in self.types
.keys() {
618 self.selections
.push(Selection
::Negate(name
.to_string(), ()));
621 self.selections
.push(Selection
::Negate(name
.to_string(), ()));
626 /// Clear any file type definitions for the type name given.
627 pub fn clear(&mut self, name
: &str) -> &mut TypesBuilder
{
628 self.types
.remove(name
);
632 /// Add a new file type definition. `name` can be arbitrary and `pat`
633 /// should be a glob recognizing file paths belonging to the `name` type.
635 /// If `name` is `all` or otherwise contains any character that is not a
636 /// Unicode letter or number, then an error is returned.
637 pub fn add(&mut self, name
: &str, glob
: &str) -> Result
<(), Error
> {
639 static ref RE
: Regex
= Regex
::new(r
"^[\pL\pN]+$").unwrap();
641 if name
== "all" || !RE
.is_match(name
) {
642 return Err(Error
::InvalidDefinition
);
644 let (key
, glob
) = (name
.to_string(), glob
.to_string());
645 self.types
.entry(key
).or_insert_with(|| {
646 FileTypeDef { name: name.to_string(), globs: vec![] }
651 /// Add a new file type definition specified in string form. There are two
653 /// 1. `{name}:{glob}`. This defines a 'root' definition that associates the
654 /// given name with the given glob.
655 /// 2. `{name}:include:{comma-separated list of already defined names}.
656 /// This defines an 'include' definition that associates the given name
657 /// with the definitions of the given existing types.
658 /// Names may not include any characters that are not
659 /// Unicode letters or numbers.
660 pub fn add_def(&mut self, def
: &str) -> Result
<(), Error
> {
661 let parts
: Vec
<&str> = def
.split('
:'
).collect();
666 if name
.is_empty() || glob
.is_empty() {
667 return Err(Error
::InvalidDefinition
);
673 let types_string
= parts
[2];
674 if name
.is_empty() || parts
[1] != "include" || types_string
.is_empty() {
675 return Err(Error
::InvalidDefinition
);
677 let types
= types_string
.split('
,'
);
678 // Check ahead of time to ensure that all types specified are
679 // present and fail fast if not.
680 if types
.clone().any(|t
| !self.types
.contains_key(t
)) {
681 return Err(Error
::InvalidDefinition
);
683 for type_name
in types
{
684 let globs
= self.types
.get(type_name
).unwrap().globs
.clone();
686 self.add(name
, &glob
)?
;
691 _
=> Err(Error
::InvalidDefinition
)
695 /// Add a set of default file type definitions.
696 pub fn add_defaults(&mut self) -> &mut TypesBuilder
{
697 static MSG
: &'
static str = "adding a default type should never fail";
698 for &(name
, exts
) in DEFAULT_TYPES
{
700 self.add(name
, ext
).expect(MSG
);
709 use super::TypesBuilder
;
711 macro_rules
! matched
{
712 ($name
:ident
, $types
:expr
, $sel
:expr
, $selnot
:expr
,
714 matched
!($name
, $types
, $sel
, $selnot
, $path
, true);
716 (not
, $name
:ident
, $types
:expr
, $sel
:expr
, $selnot
:expr
,
718 matched
!($name
, $types
, $sel
, $selnot
, $path
, false);
720 ($name
:ident
, $types
:expr
, $sel
:expr
, $selnot
:expr
,
721 $path
:expr
, $matched
:expr
) => {
724 let mut btypes
= TypesBuilder
::new();
725 for tydef
in $types
{
726 btypes
.add_def(tydef
).unwrap();
731 for selnot
in $selnot
{
732 btypes
.negate(selnot
);
734 let types
= btypes
.build().unwrap();
735 let mat
= types
.matched($path
, false);
736 assert_eq
!($matched
, !mat
.is_ignore());
741 fn types() -> Vec
<&'
static str> {
748 "combo:include:html,rust"
752 matched
!(match1
, types(), vec
!["rust"], vec
![], "lib.rs");
753 matched
!(match2
, types(), vec
!["html"], vec
![], "index.html");
754 matched
!(match3
, types(), vec
!["html"], vec
![], "index.htm");
755 matched
!(match4
, types(), vec
!["html", "rust"], vec
![], "main.rs");
756 matched
!(match5
, types(), vec
![], vec
![], "index.html");
757 matched
!(match6
, types(), vec
![], vec
!["rust"], "index.html");
758 matched
!(match7
, types(), vec
!["foo"], vec
!["rust"], "main.foo");
759 matched
!(match8
, types(), vec
!["combo"], vec
![], "index.html");
760 matched
!(match9
, types(), vec
!["combo"], vec
![], "lib.rs");
762 matched
!(not
, matchnot1
, types(), vec
!["rust"], vec
![], "index.html");
763 matched
!(not
, matchnot2
, types(), vec
![], vec
!["rust"], "main.rs");
764 matched
!(not
, matchnot3
, types(), vec
!["foo"], vec
!["rust"], "main.rs");
765 matched
!(not
, matchnot4
, types(), vec
!["rust"], vec
!["foo"], "main.rs");
766 matched
!(not
, matchnot5
, types(), vec
!["rust"], vec
!["foo"], "main.foo");
767 matched
!(not
, matchnot6
, types(), vec
!["combo"], vec
![], "leftpad.js");
770 fn test_invalid_defs() {
771 let mut btypes
= TypesBuilder
::new();
772 for tydef
in types() {
773 btypes
.add_def(tydef
).unwrap();
775 // Preserve the original definitions for later comparison.
776 let original_defs
= btypes
.definitions();
778 // Reference to type that does not exist
779 "combo:include:html,python",
781 "combo:foobar:html,rust",
784 for def
in bad_defs
{
785 assert
!(btypes
.add_def(def
).is_err());
786 // Ensure that nothing changed, even if some of the includes were valid.
787 assert_eq
!(btypes
.definitions(), original_defs
);