]>
Commit | Line | Data |
---|---|---|
dfeec247 XL |
1 | /*! |
2 | The ignore crate provides a fast recursive directory iterator that respects | |
3 | various filters such as globs, file types and `.gitignore` files. The precise | |
4 | matching rules and precedence is explained in the documentation for | |
5 | `WalkBuilder`. | |
6 | ||
7 | Secondarily, this crate exposes gitignore and file type matchers for use cases | |
8 | that demand more fine-grained control. | |
9 | ||
10 | # Example | |
11 | ||
12 | This example shows the most basic usage of this crate. This code will | |
13 | recursively traverse the current directory while automatically filtering out | |
14 | files and directories according to ignore globs found in files like | |
15 | `.ignore` and `.gitignore`: | |
16 | ||
17 | ||
18 | ```rust,no_run | |
19 | use ignore::Walk; | |
20 | ||
21 | for result in Walk::new("./") { | |
22 | // Each item yielded by the iterator is either a directory entry or an | |
23 | // error, so either print the path or the error. | |
24 | match result { | |
25 | Ok(entry) => println!("{}", entry.path().display()), | |
26 | Err(err) => println!("ERROR: {}", err), | |
27 | } | |
28 | } | |
29 | ``` | |
30 | ||
31 | # Example: advanced | |
32 | ||
33 | By default, the recursive directory iterator will ignore hidden files and | |
34 | directories. This can be disabled by building the iterator with `WalkBuilder`: | |
35 | ||
36 | ```rust,no_run | |
37 | use ignore::WalkBuilder; | |
38 | ||
39 | for result in WalkBuilder::new("./").hidden(false).build() { | |
40 | println!("{:?}", result); | |
41 | } | |
42 | ``` | |
43 | ||
44 | See the documentation for `WalkBuilder` for many other options. | |
45 | */ | |
46 | ||
47 | #![deny(missing_docs)] | |
48 | ||
dfeec247 XL |
49 | extern crate globset; |
50 | #[macro_use] | |
51 | extern crate lazy_static; | |
52 | #[macro_use] | |
53 | extern crate log; | |
54 | extern crate memchr; | |
55 | extern crate regex; | |
56 | extern crate same_file; | |
57 | extern crate thread_local; | |
58 | extern crate walkdir; | |
59 | #[cfg(windows)] | |
60 | extern crate winapi_util; | |
61 | ||
62 | use std::error; | |
63 | use std::fmt; | |
64 | use std::io; | |
65 | use std::path::{Path, PathBuf}; | |
66 | ||
3dfed10e XL |
67 | pub use walk::{ |
68 | DirEntry, ParallelVisitor, ParallelVisitorBuilder, Walk, WalkBuilder, | |
69 | WalkParallel, WalkState, | |
70 | }; | |
dfeec247 | 71 | |
3dfed10e | 72 | mod default_types; |
dfeec247 XL |
73 | mod dir; |
74 | pub mod gitignore; | |
dfeec247 | 75 | pub mod overrides; |
3dfed10e | 76 | mod pathutil; |
dfeec247 XL |
77 | pub mod types; |
78 | mod walk; | |
79 | ||
80 | /// Represents an error that can occur when parsing a gitignore file. | |
81 | #[derive(Debug)] | |
82 | pub enum Error { | |
83 | /// A collection of "soft" errors. These occur when adding an ignore | |
84 | /// file partially succeeded. | |
85 | Partial(Vec<Error>), | |
86 | /// An error associated with a specific line number. | |
87 | WithLineNumber { | |
88 | /// The line number. | |
89 | line: u64, | |
90 | /// The underlying error. | |
91 | err: Box<Error>, | |
92 | }, | |
93 | /// An error associated with a particular file path. | |
94 | WithPath { | |
95 | /// The file path. | |
96 | path: PathBuf, | |
97 | /// The underlying error. | |
98 | err: Box<Error>, | |
99 | }, | |
100 | /// An error associated with a particular directory depth when recursively | |
101 | /// walking a directory. | |
102 | WithDepth { | |
103 | /// The directory depth. | |
104 | depth: usize, | |
105 | /// The underlying error. | |
106 | err: Box<Error>, | |
107 | }, | |
108 | /// An error that occurs when a file loop is detected when traversing | |
109 | /// symbolic links. | |
110 | Loop { | |
111 | /// The ancestor file path in the loop. | |
112 | ancestor: PathBuf, | |
113 | /// The child file path in the loop. | |
114 | child: PathBuf, | |
115 | }, | |
116 | /// An error that occurs when doing I/O, such as reading an ignore file. | |
117 | Io(io::Error), | |
118 | /// An error that occurs when trying to parse a glob. | |
119 | Glob { | |
120 | /// The original glob that caused this error. This glob, when | |
121 | /// available, always corresponds to the glob provided by an end user. | |
122 | /// e.g., It is the glob as written in a `.gitignore` file. | |
123 | /// | |
124 | /// (This glob may be distinct from the glob that is actually | |
125 | /// compiled, after accounting for `gitignore` semantics.) | |
126 | glob: Option<String>, | |
127 | /// The underlying glob error as a string. | |
128 | err: String, | |
129 | }, | |
130 | /// A type selection for a file type that is not defined. | |
131 | UnrecognizedFileType(String), | |
132 | /// A user specified file type definition could not be parsed. | |
133 | InvalidDefinition, | |
134 | } | |
135 | ||
136 | impl Clone for Error { | |
137 | fn clone(&self) -> Error { | |
138 | match *self { | |
139 | Error::Partial(ref errs) => Error::Partial(errs.clone()), | |
140 | Error::WithLineNumber { line, ref err } => { | |
141 | Error::WithLineNumber { line: line, err: err.clone() } | |
142 | } | |
143 | Error::WithPath { ref path, ref err } => { | |
144 | Error::WithPath { path: path.clone(), err: err.clone() } | |
145 | } | |
146 | Error::WithDepth { depth, ref err } => { | |
147 | Error::WithDepth { depth: depth, err: err.clone() } | |
148 | } | |
3dfed10e XL |
149 | Error::Loop { ref ancestor, ref child } => Error::Loop { |
150 | ancestor: ancestor.clone(), | |
151 | child: child.clone(), | |
152 | }, | |
153 | Error::Io(ref err) => match err.raw_os_error() { | |
154 | Some(e) => Error::Io(io::Error::from_raw_os_error(e)), | |
155 | None => Error::Io(io::Error::new(err.kind(), err.to_string())), | |
156 | }, | |
dfeec247 XL |
157 | Error::Glob { ref glob, ref err } => { |
158 | Error::Glob { glob: glob.clone(), err: err.clone() } | |
159 | } | |
160 | Error::UnrecognizedFileType(ref err) => { | |
161 | Error::UnrecognizedFileType(err.clone()) | |
162 | } | |
163 | Error::InvalidDefinition => Error::InvalidDefinition, | |
164 | } | |
165 | } | |
166 | } | |
167 | ||
168 | impl Error { | |
169 | /// Returns true if this is a partial error. | |
170 | /// | |
171 | /// A partial error occurs when only some operations failed while others | |
172 | /// may have succeeded. For example, an ignore file may contain an invalid | |
173 | /// glob among otherwise valid globs. | |
174 | pub fn is_partial(&self) -> bool { | |
175 | match *self { | |
176 | Error::Partial(_) => true, | |
177 | Error::WithLineNumber { ref err, .. } => err.is_partial(), | |
178 | Error::WithPath { ref err, .. } => err.is_partial(), | |
179 | Error::WithDepth { ref err, .. } => err.is_partial(), | |
180 | _ => false, | |
181 | } | |
182 | } | |
183 | ||
184 | /// Returns true if this error is exclusively an I/O error. | |
185 | pub fn is_io(&self) -> bool { | |
186 | match *self { | |
187 | Error::Partial(ref errs) => errs.len() == 1 && errs[0].is_io(), | |
188 | Error::WithLineNumber { ref err, .. } => err.is_io(), | |
189 | Error::WithPath { ref err, .. } => err.is_io(), | |
190 | Error::WithDepth { ref err, .. } => err.is_io(), | |
191 | Error::Loop { .. } => false, | |
192 | Error::Io(_) => true, | |
193 | Error::Glob { .. } => false, | |
194 | Error::UnrecognizedFileType(_) => false, | |
195 | Error::InvalidDefinition => false, | |
196 | } | |
197 | } | |
198 | ||
cdc7bbd5 XL |
199 | /// Inspect the original [`io::Error`] if there is one. |
200 | /// | |
201 | /// [`None`] is returned if the [`Error`] doesn't correspond to an | |
202 | /// [`io::Error`]. This might happen, for example, when the error was | |
203 | /// produced because a cycle was found in the directory tree while | |
204 | /// following symbolic links. | |
205 | /// | |
206 | /// This method returns a borrowed value that is bound to the lifetime of the [`Error`]. To | |
207 | /// obtain an owned value, the [`into_io_error`] can be used instead. | |
208 | /// | |
209 | /// > This is the original [`io::Error`] and is _not_ the same as | |
210 | /// > [`impl From<Error> for std::io::Error`][impl] which contains additional context about the | |
211 | /// error. | |
212 | /// | |
213 | /// [`None`]: https://doc.rust-lang.org/stable/std/option/enum.Option.html#variant.None | |
214 | /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html | |
215 | /// [`From`]: https://doc.rust-lang.org/stable/std/convert/trait.From.html | |
216 | /// [`Error`]: struct.Error.html | |
217 | /// [`into_io_error`]: struct.Error.html#method.into_io_error | |
218 | /// [impl]: struct.Error.html#impl-From%3CError%3E | |
219 | pub fn io_error(&self) -> Option<&std::io::Error> { | |
220 | match *self { | |
221 | Error::Partial(ref errs) => { | |
222 | if errs.len() == 1 { | |
223 | errs[0].io_error() | |
224 | } else { | |
225 | None | |
226 | } | |
227 | } | |
228 | Error::WithLineNumber { ref err, .. } => err.io_error(), | |
229 | Error::WithPath { ref err, .. } => err.io_error(), | |
230 | Error::WithDepth { ref err, .. } => err.io_error(), | |
231 | Error::Loop { .. } => None, | |
232 | Error::Io(ref err) => Some(err), | |
233 | Error::Glob { .. } => None, | |
234 | Error::UnrecognizedFileType(_) => None, | |
235 | Error::InvalidDefinition => None, | |
236 | } | |
237 | } | |
238 | ||
239 | /// Similar to [`io_error`] except consumes self to convert to the original | |
240 | /// [`io::Error`] if one exists. | |
241 | /// | |
242 | /// [`io_error`]: struct.Error.html#method.io_error | |
243 | /// [`io::Error`]: https://doc.rust-lang.org/stable/std/io/struct.Error.html | |
244 | pub fn into_io_error(self) -> Option<std::io::Error> { | |
245 | match self { | |
246 | Error::Partial(mut errs) => { | |
247 | if errs.len() == 1 { | |
248 | errs.remove(0).into_io_error() | |
249 | } else { | |
250 | None | |
251 | } | |
252 | } | |
253 | Error::WithLineNumber { err, .. } => err.into_io_error(), | |
254 | Error::WithPath { err, .. } => err.into_io_error(), | |
255 | Error::WithDepth { err, .. } => err.into_io_error(), | |
256 | Error::Loop { .. } => None, | |
257 | Error::Io(err) => Some(err), | |
258 | Error::Glob { .. } => None, | |
259 | Error::UnrecognizedFileType(_) => None, | |
260 | Error::InvalidDefinition => None, | |
261 | } | |
262 | } | |
263 | ||
dfeec247 XL |
264 | /// Returns a depth associated with recursively walking a directory (if |
265 | /// this error was generated from a recursive directory iterator). | |
266 | pub fn depth(&self) -> Option<usize> { | |
267 | match *self { | |
268 | Error::WithPath { ref err, .. } => err.depth(), | |
269 | Error::WithDepth { depth, .. } => Some(depth), | |
270 | _ => None, | |
271 | } | |
272 | } | |
273 | ||
274 | /// Turn an error into a tagged error with the given file path. | |
275 | fn with_path<P: AsRef<Path>>(self, path: P) -> Error { | |
276 | Error::WithPath { | |
277 | path: path.as_ref().to_path_buf(), | |
278 | err: Box::new(self), | |
279 | } | |
280 | } | |
281 | ||
282 | /// Turn an error into a tagged error with the given depth. | |
283 | fn with_depth(self, depth: usize) -> Error { | |
3dfed10e | 284 | Error::WithDepth { depth: depth, err: Box::new(self) } |
dfeec247 XL |
285 | } |
286 | ||
287 | /// Turn an error into a tagged error with the given file path and line | |
288 | /// number. If path is empty, then it is omitted from the error. | |
289 | fn tagged<P: AsRef<Path>>(self, path: P, lineno: u64) -> Error { | |
3dfed10e XL |
290 | let errline = |
291 | Error::WithLineNumber { line: lineno, err: Box::new(self) }; | |
dfeec247 XL |
292 | if path.as_ref().as_os_str().is_empty() { |
293 | return errline; | |
294 | } | |
295 | errline.with_path(path) | |
296 | } | |
297 | ||
298 | /// Build an error from a walkdir error. | |
299 | fn from_walkdir(err: walkdir::Error) -> Error { | |
300 | let depth = err.depth(); | |
301 | if let (Some(anc), Some(child)) = (err.loop_ancestor(), err.path()) { | |
302 | return Error::WithDepth { | |
303 | depth: depth, | |
304 | err: Box::new(Error::Loop { | |
305 | ancestor: anc.to_path_buf(), | |
306 | child: child.to_path_buf(), | |
307 | }), | |
308 | }; | |
309 | } | |
310 | let path = err.path().map(|p| p.to_path_buf()); | |
311 | let mut ig_err = Error::Io(io::Error::from(err)); | |
312 | if let Some(path) = path { | |
3dfed10e | 313 | ig_err = Error::WithPath { path: path, err: Box::new(ig_err) }; |
dfeec247 XL |
314 | } |
315 | ig_err | |
316 | } | |
317 | } | |
318 | ||
319 | impl error::Error for Error { | |
3dfed10e | 320 | #[allow(deprecated)] |
dfeec247 XL |
321 | fn description(&self) -> &str { |
322 | match *self { | |
323 | Error::Partial(_) => "partial error", | |
324 | Error::WithLineNumber { ref err, .. } => err.description(), | |
325 | Error::WithPath { ref err, .. } => err.description(), | |
326 | Error::WithDepth { ref err, .. } => err.description(), | |
327 | Error::Loop { .. } => "file system loop found", | |
328 | Error::Io(ref err) => err.description(), | |
329 | Error::Glob { ref err, .. } => err, | |
330 | Error::UnrecognizedFileType(_) => "unrecognized file type", | |
331 | Error::InvalidDefinition => "invalid definition", | |
332 | } | |
333 | } | |
334 | } | |
335 | ||
336 | impl fmt::Display for Error { | |
337 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | |
338 | match *self { | |
339 | Error::Partial(ref errs) => { | |
340 | let msgs: Vec<String> = | |
341 | errs.iter().map(|err| err.to_string()).collect(); | |
342 | write!(f, "{}", msgs.join("\n")) | |
343 | } | |
344 | Error::WithLineNumber { line, ref err } => { | |
345 | write!(f, "line {}: {}", line, err) | |
346 | } | |
347 | Error::WithPath { ref path, ref err } => { | |
348 | write!(f, "{}: {}", path.display(), err) | |
349 | } | |
350 | Error::WithDepth { ref err, .. } => err.fmt(f), | |
3dfed10e XL |
351 | Error::Loop { ref ancestor, ref child } => write!( |
352 | f, | |
353 | "File system loop found: \ | |
dfeec247 | 354 | {} points to an ancestor {}", |
3dfed10e XL |
355 | child.display(), |
356 | ancestor.display() | |
357 | ), | |
dfeec247 XL |
358 | Error::Io(ref err) => err.fmt(f), |
359 | Error::Glob { glob: None, ref err } => write!(f, "{}", err), | |
360 | Error::Glob { glob: Some(ref glob), ref err } => { | |
361 | write!(f, "error parsing glob '{}': {}", glob, err) | |
362 | } | |
363 | Error::UnrecognizedFileType(ref ty) => { | |
364 | write!(f, "unrecognized file type: {}", ty) | |
365 | } | |
3dfed10e XL |
366 | Error::InvalidDefinition => write!( |
367 | f, | |
368 | "invalid definition (format is type:glob, e.g., \ | |
369 | html:*.html)" | |
370 | ), | |
dfeec247 XL |
371 | } |
372 | } | |
373 | } | |
374 | ||
375 | impl From<io::Error> for Error { | |
376 | fn from(err: io::Error) -> Error { | |
377 | Error::Io(err) | |
378 | } | |
379 | } | |
380 | ||
381 | #[derive(Debug, Default)] | |
382 | struct PartialErrorBuilder(Vec<Error>); | |
383 | ||
384 | impl PartialErrorBuilder { | |
385 | fn push(&mut self, err: Error) { | |
386 | self.0.push(err); | |
387 | } | |
388 | ||
389 | fn push_ignore_io(&mut self, err: Error) { | |
390 | if !err.is_io() { | |
391 | self.push(err); | |
392 | } | |
393 | } | |
394 | ||
395 | fn maybe_push(&mut self, err: Option<Error>) { | |
396 | if let Some(err) = err { | |
397 | self.push(err); | |
398 | } | |
399 | } | |
400 | ||
401 | fn maybe_push_ignore_io(&mut self, err: Option<Error>) { | |
402 | if let Some(err) = err { | |
403 | self.push_ignore_io(err); | |
404 | } | |
405 | } | |
406 | ||
407 | fn into_error_option(mut self) -> Option<Error> { | |
408 | if self.0.is_empty() { | |
409 | None | |
410 | } else if self.0.len() == 1 { | |
411 | Some(self.0.pop().unwrap()) | |
412 | } else { | |
413 | Some(Error::Partial(self.0)) | |
414 | } | |
415 | } | |
416 | } | |
417 | ||
418 | /// The result of a glob match. | |
419 | /// | |
420 | /// The type parameter `T` typically refers to a type that provides more | |
421 | /// information about a particular match. For example, it might identify | |
422 | /// the specific gitignore file and the specific glob pattern that caused | |
423 | /// the match. | |
424 | #[derive(Clone, Debug)] | |
425 | pub enum Match<T> { | |
426 | /// The path didn't match any glob. | |
427 | None, | |
428 | /// The highest precedent glob matched indicates the path should be | |
429 | /// ignored. | |
430 | Ignore(T), | |
431 | /// The highest precedent glob matched indicates the path should be | |
432 | /// whitelisted. | |
433 | Whitelist(T), | |
434 | } | |
435 | ||
436 | impl<T> Match<T> { | |
437 | /// Returns true if the match result didn't match any globs. | |
438 | pub fn is_none(&self) -> bool { | |
439 | match *self { | |
440 | Match::None => true, | |
441 | Match::Ignore(_) | Match::Whitelist(_) => false, | |
442 | } | |
443 | } | |
444 | ||
445 | /// Returns true if the match result implies the path should be ignored. | |
446 | pub fn is_ignore(&self) -> bool { | |
447 | match *self { | |
448 | Match::Ignore(_) => true, | |
449 | Match::None | Match::Whitelist(_) => false, | |
450 | } | |
451 | } | |
452 | ||
453 | /// Returns true if the match result implies the path should be | |
454 | /// whitelisted. | |
455 | pub fn is_whitelist(&self) -> bool { | |
456 | match *self { | |
457 | Match::Whitelist(_) => true, | |
458 | Match::None | Match::Ignore(_) => false, | |
459 | } | |
460 | } | |
461 | ||
462 | /// Inverts the match so that `Ignore` becomes `Whitelist` and | |
463 | /// `Whitelist` becomes `Ignore`. A non-match remains the same. | |
464 | pub fn invert(self) -> Match<T> { | |
465 | match self { | |
466 | Match::None => Match::None, | |
467 | Match::Ignore(t) => Match::Whitelist(t), | |
468 | Match::Whitelist(t) => Match::Ignore(t), | |
469 | } | |
470 | } | |
471 | ||
472 | /// Return the value inside this match if it exists. | |
473 | pub fn inner(&self) -> Option<&T> { | |
474 | match *self { | |
475 | Match::None => None, | |
476 | Match::Ignore(ref t) => Some(t), | |
477 | Match::Whitelist(ref t) => Some(t), | |
478 | } | |
479 | } | |
480 | ||
481 | /// Apply the given function to the value inside this match. | |
482 | /// | |
483 | /// If the match has no value, then return the match unchanged. | |
484 | pub fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Match<U> { | |
485 | match self { | |
486 | Match::None => Match::None, | |
487 | Match::Ignore(t) => Match::Ignore(f(t)), | |
488 | Match::Whitelist(t) => Match::Whitelist(f(t)), | |
489 | } | |
490 | } | |
491 | ||
492 | /// Return the match if it is not none. Otherwise, return other. | |
493 | pub fn or(self, other: Self) -> Self { | |
494 | if self.is_none() { | |
495 | other | |
496 | } else { | |
497 | self | |
498 | } | |
499 | } | |
500 | } | |
501 | ||
502 | #[cfg(test)] | |
503 | mod tests { | |
504 | use std::env; | |
505 | use std::error; | |
506 | use std::fs; | |
507 | use std::path::{Path, PathBuf}; | |
508 | use std::result; | |
509 | ||
510 | /// A convenient result type alias. | |
511 | pub type Result<T> = | |
512 | result::Result<T, Box<dyn error::Error + Send + Sync>>; | |
513 | ||
514 | macro_rules! err { | |
515 | ($($tt:tt)*) => { | |
516 | Box::<dyn error::Error + Send + Sync>::from(format!($($tt)*)) | |
517 | } | |
518 | } | |
519 | ||
520 | /// A simple wrapper for creating a temporary directory that is | |
521 | /// automatically deleted when it's dropped. | |
522 | /// | |
523 | /// We use this in lieu of tempfile because tempfile brings in too many | |
524 | /// dependencies. | |
525 | #[derive(Debug)] | |
526 | pub struct TempDir(PathBuf); | |
527 | ||
528 | impl Drop for TempDir { | |
529 | fn drop(&mut self) { | |
530 | fs::remove_dir_all(&self.0).unwrap(); | |
531 | } | |
532 | } | |
533 | ||
534 | impl TempDir { | |
535 | /// Create a new empty temporary directory under the system's configured | |
536 | /// temporary directory. | |
537 | pub fn new() -> Result<TempDir> { | |
538 | use std::sync::atomic::{AtomicUsize, Ordering}; | |
539 | ||
540 | static TRIES: usize = 100; | |
541 | static COUNTER: AtomicUsize = AtomicUsize::new(0); | |
542 | ||
543 | let tmpdir = env::temp_dir(); | |
544 | for _ in 0..TRIES { | |
545 | let count = COUNTER.fetch_add(1, Ordering::SeqCst); | |
546 | let path = tmpdir.join("rust-ignore").join(count.to_string()); | |
547 | if path.is_dir() { | |
548 | continue; | |
549 | } | |
550 | fs::create_dir_all(&path).map_err(|e| { | |
551 | err!("failed to create {}: {}", path.display(), e) | |
552 | })?; | |
553 | return Ok(TempDir(path)); | |
554 | } | |
555 | Err(err!("failed to create temp dir after {} tries", TRIES)) | |
556 | } | |
557 | ||
558 | /// Return the underlying path to this temporary directory. | |
559 | pub fn path(&self) -> &Path { | |
560 | &self.0 | |
561 | } | |
562 | } | |
563 | } |