]> git.proxmox.com Git - cargo.git/blob - src/cargo/sources/path.rs
d1d461f7d730c8f979baf16e150fe280fc057180
[cargo.git] / src / cargo / sources / path.rs
1 use std::collections::HashSet;
2 use std::fmt::{self, Debug, Formatter};
3 use std::path::{Path, PathBuf};
4 use std::task::Poll;
5
6 use crate::core::source::MaybePackage;
7 use crate::core::{Dependency, Package, PackageId, Source, SourceId, Summary};
8 use crate::ops;
9 use crate::util::{internal, CargoResult, Config};
10 use anyhow::Context as _;
11 use cargo_util::paths;
12 use filetime::FileTime;
13 use ignore::gitignore::GitignoreBuilder;
14 use log::{trace, warn};
15 use walkdir::WalkDir;
16
17 pub struct PathSource<'cfg> {
18 source_id: SourceId,
19 path: PathBuf,
20 updated: bool,
21 packages: Vec<Package>,
22 config: &'cfg Config,
23 recursive: bool,
24 }
25
26 impl<'cfg> PathSource<'cfg> {
27 /// Invoked with an absolute path to a directory that contains a `Cargo.toml`.
28 ///
29 /// This source will only return the package at precisely the `path`
30 /// specified, and it will be an error if there's not a package at `path`.
31 pub fn new(path: &Path, source_id: SourceId, config: &'cfg Config) -> PathSource<'cfg> {
32 PathSource {
33 source_id,
34 path: path.to_path_buf(),
35 updated: false,
36 packages: Vec::new(),
37 config,
38 recursive: false,
39 }
40 }
41
42 /// Creates a new source which is walked recursively to discover packages.
43 ///
44 /// This is similar to the `new` method except that instead of requiring a
45 /// valid package to be present at `root` the folder is walked entirely to
46 /// crawl for packages.
47 ///
48 /// Note that this should be used with care and likely shouldn't be chosen
49 /// by default!
50 pub fn new_recursive(root: &Path, id: SourceId, config: &'cfg Config) -> PathSource<'cfg> {
51 PathSource {
52 recursive: true,
53 ..PathSource::new(root, id, config)
54 }
55 }
56
57 pub fn preload_with(&mut self, pkg: Package) {
58 assert!(!self.updated);
59 assert!(!self.recursive);
60 assert!(self.packages.is_empty());
61 self.updated = true;
62 self.packages.push(pkg);
63 }
64
65 pub fn root_package(&mut self) -> CargoResult<Package> {
66 trace!("root_package; source={:?}", self);
67
68 self.update()?;
69
70 match self.packages.iter().find(|p| p.root() == &*self.path) {
71 Some(pkg) => Ok(pkg.clone()),
72 None => Err(internal(format!(
73 "no package found in source {:?}",
74 self.path
75 ))),
76 }
77 }
78
79 pub fn read_packages(&self) -> CargoResult<Vec<Package>> {
80 if self.updated {
81 Ok(self.packages.clone())
82 } else if self.recursive {
83 ops::read_packages(&self.path, self.source_id, self.config)
84 } else {
85 let path = self.path.join("Cargo.toml");
86 let (pkg, _) = ops::read_package(&path, self.source_id, self.config)?;
87 Ok(vec![pkg])
88 }
89 }
90
91 /// List all files relevant to building this package inside this source.
92 ///
93 /// This function will use the appropriate methods to determine the
94 /// set of files underneath this source's directory which are relevant for
95 /// building `pkg`.
96 ///
97 /// The basic assumption of this method is that all files in the directory
98 /// are relevant for building this package, but it also contains logic to
99 /// use other methods like .gitignore to filter the list of files.
100 pub fn list_files(&self, pkg: &Package) -> CargoResult<Vec<PathBuf>> {
101 self._list_files(pkg).with_context(|| {
102 format!(
103 "failed to determine list of files in {}",
104 pkg.root().display()
105 )
106 })
107 }
108
109 fn _list_files(&self, pkg: &Package) -> CargoResult<Vec<PathBuf>> {
110 let root = pkg.root();
111 let no_include_option = pkg.manifest().include().is_empty();
112 let git_repo = if no_include_option {
113 self.discover_git_repo(root)?
114 } else {
115 None
116 };
117
118 let mut exclude_builder = GitignoreBuilder::new(root);
119 if no_include_option && git_repo.is_none() {
120 // no include option and not git repo discovered (see rust-lang/cargo#7183).
121 exclude_builder.add_line(None, ".*")?;
122 }
123 for rule in pkg.manifest().exclude() {
124 exclude_builder.add_line(None, rule)?;
125 }
126 let ignore_exclude = exclude_builder.build()?;
127
128 let mut include_builder = GitignoreBuilder::new(root);
129 for rule in pkg.manifest().include() {
130 include_builder.add_line(None, rule)?;
131 }
132 let ignore_include = include_builder.build()?;
133
134 let ignore_should_package = |relative_path: &Path, is_dir: bool| {
135 // "Include" and "exclude" options are mutually exclusive.
136 if no_include_option {
137 !ignore_exclude
138 .matched_path_or_any_parents(relative_path, is_dir)
139 .is_ignore()
140 } else {
141 if is_dir {
142 // Generally, include directives don't list every
143 // directory (nor should they!). Just skip all directory
144 // checks, and only check files.
145 return true;
146 }
147 ignore_include
148 .matched_path_or_any_parents(relative_path, /* is_dir */ false)
149 .is_ignore()
150 }
151 };
152
153 let mut filter = |path: &Path, is_dir: bool| {
154 let relative_path = match path.strip_prefix(root) {
155 Ok(p) => p,
156 Err(_) => return false,
157 };
158
159 let rel = relative_path.as_os_str();
160 if rel == "Cargo.lock" {
161 return pkg.include_lockfile();
162 } else if rel == "Cargo.toml" {
163 return true;
164 }
165
166 ignore_should_package(relative_path, is_dir)
167 };
168
169 // Attempt Git-prepopulate only if no `include` (see rust-lang/cargo#4135).
170 if no_include_option {
171 if let Some(repo) = git_repo {
172 return self.list_files_git(pkg, &repo, &mut filter);
173 }
174 }
175 self.list_files_walk(pkg, &mut filter)
176 }
177
178 /// Returns `Some(git2::Repository)` if found sibling `Cargo.toml` and `.git`
179 /// directory; otherwise, caller should fall back on full file list.
180 fn discover_git_repo(&self, root: &Path) -> CargoResult<Option<git2::Repository>> {
181 let repo = match git2::Repository::discover(root) {
182 Ok(repo) => repo,
183 Err(e) => {
184 log::debug!(
185 "could not discover git repo at or above {}: {}",
186 root.display(),
187 e
188 );
189 return Ok(None);
190 }
191 };
192 let index = repo
193 .index()
194 .with_context(|| format!("failed to open git index at {}", repo.path().display()))?;
195 let repo_root = repo.workdir().ok_or_else(|| {
196 anyhow::format_err!(
197 "did not expect repo at {} to be bare",
198 repo.path().display()
199 )
200 })?;
201 let repo_relative_path = match paths::strip_prefix_canonical(root, repo_root) {
202 Ok(p) => p,
203 Err(e) => {
204 log::warn!(
205 "cannot determine if path `{:?}` is in git repo `{:?}`: {:?}",
206 root,
207 repo_root,
208 e
209 );
210 return Ok(None);
211 }
212 };
213 let manifest_path = repo_relative_path.join("Cargo.toml");
214 if index.get_path(&manifest_path, 0).is_some() {
215 return Ok(Some(repo));
216 }
217 // Package Cargo.toml is not in git, don't use git to guide our selection.
218 Ok(None)
219 }
220
221 fn list_files_git(
222 &self,
223 pkg: &Package,
224 repo: &git2::Repository,
225 filter: &mut dyn FnMut(&Path, bool) -> bool,
226 ) -> CargoResult<Vec<PathBuf>> {
227 warn!("list_files_git {}", pkg.package_id());
228 let index = repo.index()?;
229 let root = repo
230 .workdir()
231 .ok_or_else(|| anyhow::format_err!("can't list files on a bare repository"))?;
232 let pkg_path = pkg.root();
233
234 let mut ret = Vec::<PathBuf>::new();
235
236 // We use information from the Git repository to guide us in traversing
237 // its tree. The primary purpose of this is to take advantage of the
238 // `.gitignore` and auto-ignore files that don't matter.
239 //
240 // Here we're also careful to look at both tracked and untracked files as
241 // the untracked files are often part of a build and may become relevant
242 // as part of a future commit.
243 let index_files = index.iter().map(|entry| {
244 use libgit2_sys::{GIT_FILEMODE_COMMIT, GIT_FILEMODE_LINK};
245 // ``is_dir`` is an optimization to avoid calling
246 // ``fs::metadata`` on every file.
247 let is_dir = if entry.mode == GIT_FILEMODE_LINK as u32 {
248 // Let the code below figure out if this symbolic link points
249 // to a directory or not.
250 None
251 } else {
252 Some(entry.mode == GIT_FILEMODE_COMMIT as u32)
253 };
254 (join(root, &entry.path), is_dir)
255 });
256 let mut opts = git2::StatusOptions::new();
257 opts.include_untracked(true);
258 if let Ok(suffix) = pkg_path.strip_prefix(root) {
259 opts.pathspec(suffix);
260 }
261 let statuses = repo.statuses(Some(&mut opts))?;
262 let mut skip_paths = HashSet::new();
263 let untracked: Vec<_> = statuses
264 .iter()
265 .filter_map(|entry| {
266 match entry.status() {
267 // Don't include Cargo.lock if it is untracked. Packaging will
268 // generate a new one as needed.
269 git2::Status::WT_NEW if entry.path() != Some("Cargo.lock") => {
270 Some(Ok((join(root, entry.path_bytes()), None)))
271 }
272 git2::Status::WT_DELETED => {
273 let path = match join(root, entry.path_bytes()) {
274 Ok(p) => p,
275 Err(e) => return Some(Err(e)),
276 };
277 skip_paths.insert(path);
278 None
279 }
280 _ => None,
281 }
282 })
283 .collect::<CargoResult<_>>()?;
284
285 let mut subpackages_found = Vec::new();
286
287 for (file_path, is_dir) in index_files.chain(untracked) {
288 let file_path = file_path?;
289 if skip_paths.contains(&file_path) {
290 continue;
291 }
292
293 // Filter out files blatantly outside this package. This is helped a
294 // bit above via the `pathspec` function call, but we need to filter
295 // the entries in the index as well.
296 if !file_path.starts_with(pkg_path) {
297 continue;
298 }
299
300 match file_path.file_name().and_then(|s| s.to_str()) {
301 // The `target` directory is never included.
302 Some("target") => continue,
303
304 // Keep track of all sub-packages found and also strip out all
305 // matches we've found so far. Note, though, that if we find
306 // our own `Cargo.toml`, we keep going.
307 Some("Cargo.toml") => {
308 let path = file_path.parent().unwrap();
309 if path != pkg_path {
310 warn!("subpackage found: {}", path.display());
311 ret.retain(|p| !p.starts_with(path));
312 subpackages_found.push(path.to_path_buf());
313 continue;
314 }
315 }
316
317 _ => {}
318 }
319
320 // If this file is part of any other sub-package we've found so far,
321 // skip it.
322 if subpackages_found.iter().any(|p| file_path.starts_with(p)) {
323 continue;
324 }
325
326 // `is_dir` is None for symlinks. The `unwrap` checks if the
327 // symlink points to a directory.
328 let is_dir = is_dir.unwrap_or_else(|| file_path.is_dir());
329 if is_dir {
330 warn!(" found submodule {}", file_path.display());
331 let rel = file_path.strip_prefix(root)?;
332 let rel = rel.to_str().ok_or_else(|| {
333 anyhow::format_err!("invalid utf-8 filename: {}", rel.display())
334 })?;
335 // Git submodules are currently only named through `/` path
336 // separators, explicitly not `\` which windows uses. Who knew?
337 let rel = rel.replace(r"\", "/");
338 match repo.find_submodule(&rel).and_then(|s| s.open()) {
339 Ok(repo) => {
340 let files = self.list_files_git(pkg, &repo, filter)?;
341 ret.extend(files.into_iter());
342 }
343 Err(..) => {
344 self.walk(&file_path, &mut ret, false, filter)?;
345 }
346 }
347 } else if filter(&file_path, is_dir) {
348 assert!(!is_dir);
349 // We found a file!
350 warn!(" found {}", file_path.display());
351 ret.push(file_path);
352 }
353 }
354 return Ok(ret);
355
356 #[cfg(unix)]
357 fn join(path: &Path, data: &[u8]) -> CargoResult<PathBuf> {
358 use std::ffi::OsStr;
359 use std::os::unix::prelude::*;
360 Ok(path.join(<OsStr as OsStrExt>::from_bytes(data)))
361 }
362 #[cfg(windows)]
363 fn join(path: &Path, data: &[u8]) -> CargoResult<PathBuf> {
364 use std::str;
365 match str::from_utf8(data) {
366 Ok(s) => Ok(path.join(s)),
367 Err(e) => Err(anyhow::format_err!(
368 "cannot process path in git with a non utf8 filename: {}\n{:?}",
369 e,
370 data
371 )),
372 }
373 }
374 }
375
376 fn list_files_walk(
377 &self,
378 pkg: &Package,
379 filter: &mut dyn FnMut(&Path, bool) -> bool,
380 ) -> CargoResult<Vec<PathBuf>> {
381 let mut ret = Vec::new();
382 self.walk(pkg.root(), &mut ret, true, filter)?;
383 Ok(ret)
384 }
385
386 fn walk(
387 &self,
388 path: &Path,
389 ret: &mut Vec<PathBuf>,
390 is_root: bool,
391 filter: &mut dyn FnMut(&Path, bool) -> bool,
392 ) -> CargoResult<()> {
393 let walkdir = WalkDir::new(path)
394 .follow_links(true)
395 .into_iter()
396 .filter_entry(|entry| {
397 let path = entry.path();
398 let at_root = is_root && entry.depth() == 0;
399 let is_dir = entry.file_type().is_dir();
400
401 if !at_root && !filter(path, is_dir) {
402 return false;
403 }
404
405 if !is_dir {
406 return true;
407 }
408
409 // Don't recurse into any sub-packages that we have.
410 if !at_root && path.join("Cargo.toml").exists() {
411 return false;
412 }
413
414 // Skip root Cargo artifacts.
415 if is_root
416 && entry.depth() == 1
417 && path.file_name().and_then(|s| s.to_str()) == Some("target")
418 {
419 return false;
420 }
421
422 true
423 });
424 for entry in walkdir {
425 match entry {
426 Ok(entry) => {
427 if !entry.file_type().is_dir() {
428 ret.push(entry.into_path());
429 }
430 }
431 Err(err) if err.loop_ancestor().is_some() => {
432 self.config.shell().warn(err)?;
433 }
434 Err(err) => match err.path() {
435 // If the error occurs with a path, simply recover from it.
436 // Don't worry about error skipping here, the callers would
437 // still hit the IO error if they do access it thereafter.
438 Some(path) => ret.push(path.to_path_buf()),
439 None => return Err(err.into()),
440 },
441 }
442 }
443
444 Ok(())
445 }
446
447 pub fn last_modified_file(&self, pkg: &Package) -> CargoResult<(FileTime, PathBuf)> {
448 if !self.updated {
449 return Err(internal(format!(
450 "BUG: source `{:?}` was not updated",
451 self.path
452 )));
453 }
454
455 let mut max = FileTime::zero();
456 let mut max_path = PathBuf::new();
457 for file in self.list_files(pkg).with_context(|| {
458 format!(
459 "failed to determine the most recently modified file in {}",
460 pkg.root().display()
461 )
462 })? {
463 // An `fs::stat` error here is either because path is a
464 // broken symlink, a permissions error, or a race
465 // condition where this path was `rm`-ed -- either way,
466 // we can ignore the error and treat the path's `mtime`
467 // as `0`.
468 let mtime = paths::mtime(&file).unwrap_or_else(|_| FileTime::zero());
469 if mtime > max {
470 max = mtime;
471 max_path = file;
472 }
473 }
474 trace!("last modified file {}: {}", self.path.display(), max);
475 Ok((max, max_path))
476 }
477
478 pub fn path(&self) -> &Path {
479 &self.path
480 }
481 }
482
483 impl<'cfg> Debug for PathSource<'cfg> {
484 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
485 write!(f, "the paths source")
486 }
487 }
488
489 impl<'cfg> Source for PathSource<'cfg> {
490 fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> Poll<CargoResult<()>> {
491 for s in self.packages.iter().map(|p| p.summary()) {
492 if dep.matches(s) {
493 f(s.clone())
494 }
495 }
496 Poll::Ready(Ok(()))
497 }
498
499 fn fuzzy_query(
500 &mut self,
501 _dep: &Dependency,
502 f: &mut dyn FnMut(Summary),
503 ) -> Poll<CargoResult<()>> {
504 for s in self.packages.iter().map(|p| p.summary()) {
505 f(s.clone())
506 }
507 Poll::Ready(Ok(()))
508 }
509
510 fn supports_checksums(&self) -> bool {
511 false
512 }
513
514 fn requires_precise(&self) -> bool {
515 false
516 }
517
518 fn source_id(&self) -> SourceId {
519 self.source_id
520 }
521
522 fn update(&mut self) -> CargoResult<()> {
523 if !self.updated {
524 let packages = self.read_packages()?;
525 self.packages.extend(packages.into_iter());
526 self.updated = true;
527 }
528
529 Ok(())
530 }
531
532 fn download(&mut self, id: PackageId) -> CargoResult<MaybePackage> {
533 trace!("getting packages; id={}", id);
534
535 let pkg = self.packages.iter().find(|pkg| pkg.package_id() == id);
536 pkg.cloned()
537 .map(MaybePackage::Ready)
538 .ok_or_else(|| internal(format!("failed to find {} in path source", id)))
539 }
540
541 fn finish_download(&mut self, _id: PackageId, _data: Vec<u8>) -> CargoResult<Package> {
542 panic!("no download should have started")
543 }
544
545 fn fingerprint(&self, pkg: &Package) -> CargoResult<String> {
546 let (max, max_path) = self.last_modified_file(pkg)?;
547 // Note that we try to strip the prefix of this package to get a
548 // relative path to ensure that the fingerprint remains consistent
549 // across entire project directory renames.
550 let max_path = max_path.strip_prefix(&self.path).unwrap_or(&max_path);
551 Ok(format!("{} ({})", max, max_path.display()))
552 }
553
554 fn describe(&self) -> String {
555 match self.source_id.url().to_file_path() {
556 Ok(path) => path.display().to_string(),
557 Err(_) => self.source_id.to_string(),
558 }
559 }
560
561 fn add_to_yanked_whitelist(&mut self, _pkgs: &[PackageId]) {}
562
563 fn is_yanked(&mut self, _pkg: PackageId) -> CargoResult<bool> {
564 Ok(false)
565 }
566
567 fn block_until_ready(&mut self) -> CargoResult<()> {
568 Ok(())
569 }
570 }