]> git.proxmox.com Git - cargo.git/blob - src/cargo/sources/registry/mod.rs
913da02ba88587f9c59137d1d41aac207752f198
[cargo.git] / src / cargo / sources / registry / mod.rs
1 //! A `Source` for registry-based packages.
2 //!
3 //! # What's a Registry?
4 //!
5 //! Registries are central locations where packages can be uploaded to,
6 //! discovered, and searched for. The purpose of a registry is to have a
7 //! location that serves as permanent storage for versions of a crate over time.
8 //!
9 //! Compared to git sources, a registry provides many packages as well as many
10 //! versions simultaneously. Git sources can also have commits deleted through
11 //! rebasings where registries cannot have their versions deleted.
12 //!
13 //! # The Index of a Registry
14 //!
15 //! One of the major difficulties with a registry is that hosting so many
16 //! packages may quickly run into performance problems when dealing with
17 //! dependency graphs. It's infeasible for cargo to download the entire contents
18 //! of the registry just to resolve one package's dependencies, for example. As
19 //! a result, cargo needs some efficient method of querying what packages are
20 //! available on a registry, what versions are available, and what the
21 //! dependencies for each version is.
22 //!
23 //! One method of doing so would be having the registry expose an HTTP endpoint
24 //! which can be queried with a list of packages and a response of their
25 //! dependencies and versions is returned. This is somewhat inefficient however
26 //! as we may have to hit the endpoint many times and we may have already
27 //! queried for much of the data locally already (for other packages, for
28 //! example). This also involves inventing a transport format between the
29 //! registry and Cargo itself, so this route was not taken.
30 //!
31 //! Instead, Cargo communicates with registries through a git repository
32 //! referred to as the Index. The Index of a registry is essentially an easily
33 //! query-able version of the registry's database for a list of versions of a
34 //! package as well as a list of dependencies for each version.
35 //!
36 //! Using git to host this index provides a number of benefits:
37 //!
38 //! * The entire index can be stored efficiently locally on disk. This means
39 //! that all queries of a registry can happen locally and don't need to touch
40 //! the network.
41 //!
42 //! * Updates of the index are quite efficient. Using git buys incremental
43 //! updates, compressed transmission, etc for free. The index must be updated
44 //! each time we need fresh information from a registry, but this is one
45 //! update of a git repository that probably hasn't changed a whole lot so
46 //! it shouldn't be too expensive.
47 //!
48 //! Additionally, each modification to the index is just appending a line at
49 //! the end of a file (the exact format is described later). This means that
50 //! the commits for an index are quite small and easily applied/compressible.
51 //!
52 //! ## The format of the Index
53 //!
54 //! The index is a store for the list of versions for all packages known, so its
55 //! format on disk is optimized slightly to ensure that `ls registry` doesn't
56 //! produce a list of all packages ever known. The index also wants to ensure
57 //! that there's not a million files which may actually end up hitting
58 //! filesystem limits at some point. To this end, a few decisions were made
59 //! about the format of the registry:
60 //!
61 //! 1. Each crate will have one file corresponding to it. Each version for a
62 //! crate will just be a line in this file.
63 //! 2. There will be two tiers of directories for crate names, under which
64 //! crates corresponding to those tiers will be located.
65 //!
66 //! As an example, this is an example hierarchy of an index:
67 //!
68 //! ```notrust
69 //! .
70 //! ├── 3
71 //! │   └── u
72 //! │   └── url
73 //! ├── bz
74 //! │   └── ip
75 //! │   └── bzip2
76 //! ├── config.json
77 //! ├── en
78 //! │   └── co
79 //! │   └── encoding
80 //! └── li
81 //!    ├── bg
82 //!    │   └── libgit2
83 //!    └── nk
84 //!    └── link-config
85 //! ```
86 //!
87 //! The root of the index contains a `config.json` file with a few entries
88 //! corresponding to the registry (see [`RegistryConfig`] below).
89 //!
90 //! Otherwise, there are three numbered directories (1, 2, 3) for crates with
91 //! names 1, 2, and 3 characters in length. The 1/2 directories simply have the
92 //! crate files underneath them, while the 3 directory is sharded by the first
93 //! letter of the crate name.
94 //!
95 //! Otherwise the top-level directory contains many two-letter directory names,
96 //! each of which has many sub-folders with two letters. At the end of all these
97 //! are the actual crate files themselves.
98 //!
99 //! The purpose of this layout is to hopefully cut down on `ls` sizes as well as
100 //! efficient lookup based on the crate name itself.
101 //!
102 //! ## Crate files
103 //!
104 //! Each file in the index is the history of one crate over time. Each line in
105 //! the file corresponds to one version of a crate, stored in JSON format (see
106 //! the `RegistryPackage` structure below).
107 //!
108 //! As new versions are published, new lines are appended to this file. The only
109 //! modifications to this file that should happen over time are yanks of a
110 //! particular version.
111 //!
112 //! # Downloading Packages
113 //!
114 //! The purpose of the Index was to provide an efficient method to resolve the
115 //! dependency graph for a package. So far we only required one network
116 //! interaction to update the registry's repository (yay!). After resolution has
117 //! been performed, however we need to download the contents of packages so we
118 //! can read the full manifest and build the source code.
119 //!
120 //! To accomplish this, this source's `download` method will make an HTTP
121 //! request per-package requested to download tarballs into a local cache. These
122 //! tarballs will then be unpacked into a destination folder.
123 //!
124 //! Note that because versions uploaded to the registry are frozen forever that
125 //! the HTTP download and unpacking can all be skipped if the version has
126 //! already been downloaded and unpacked. This caching allows us to only
127 //! download a package when absolutely necessary.
128 //!
129 //! # Filesystem Hierarchy
130 //!
131 //! Overall, the `$HOME/.cargo` looks like this when talking about the registry:
132 //!
133 //! ```notrust
134 //! # A folder under which all registry metadata is hosted (similar to
135 //! # $HOME/.cargo/git)
136 //! $HOME/.cargo/registry/
137 //!
138 //! # For each registry that cargo knows about (keyed by hostname + hash)
139 //! # there is a folder which is the checked out version of the index for
140 //! # the registry in this location. Note that this is done so cargo can
141 //! # support multiple registries simultaneously
142 //! index/
143 //! registry1-<hash>/
144 //! registry2-<hash>/
145 //! ...
146 //!
147 //! # This folder is a cache for all downloaded tarballs from a registry.
148 //! # Once downloaded and verified, a tarball never changes.
149 //! cache/
150 //! registry1-<hash>/<pkg>-<version>.crate
151 //! ...
152 //!
153 //! # Location in which all tarballs are unpacked. Each tarball is known to
154 //! # be frozen after downloading, so transitively this folder is also
155 //! # frozen once its unpacked (it's never unpacked again)
156 //! src/
157 //! registry1-<hash>/<pkg>-<version>/...
158 //! ...
159 //! ```
160
161 use std::borrow::Cow;
162 use std::collections::BTreeMap;
163 use std::collections::HashSet;
164 use std::fs::{File, OpenOptions};
165 use std::io::Write;
166 use std::path::{Path, PathBuf};
167 use std::task::Poll;
168
169 use anyhow::Context as _;
170 use flate2::read::GzDecoder;
171 use log::debug;
172 use semver::Version;
173 use serde::Deserialize;
174 use tar::Archive;
175
176 use crate::core::dependency::{DepKind, Dependency};
177 use crate::core::source::MaybePackage;
178 use crate::core::{Package, PackageId, Source, SourceId, Summary};
179 use crate::sources::PathSource;
180 use crate::util::hex;
181 use crate::util::interning::InternedString;
182 use crate::util::into_url::IntoUrl;
183 use crate::util::network::PollExt;
184 use crate::util::{restricted_names, CargoResult, Config, Filesystem, OptVersionReq};
185
186 const PACKAGE_SOURCE_LOCK: &str = ".cargo-ok";
187 pub const CRATES_IO_INDEX: &str = "https://github.com/rust-lang/crates.io-index";
188 pub const CRATES_IO_REGISTRY: &str = "crates-io";
189 pub const CRATES_IO_DOMAIN: &str = "crates.io";
190 const CRATE_TEMPLATE: &str = "{crate}";
191 const VERSION_TEMPLATE: &str = "{version}";
192 const PREFIX_TEMPLATE: &str = "{prefix}";
193 const LOWER_PREFIX_TEMPLATE: &str = "{lowerprefix}";
194 const CHECKSUM_TEMPLATE: &str = "{sha256-checksum}";
195
196 /// A "source" for a local (see `local::LocalRegistry`) or remote (see
197 /// `remote::RemoteRegistry`) registry.
198 ///
199 /// This contains common functionality that is shared between the two registry
200 /// kinds, with the registry-specific logic implemented as part of the
201 /// [`RegistryData`] trait referenced via the `ops` field.
202 pub struct RegistrySource<'cfg> {
203 source_id: SourceId,
204 /// The path where crate files are extracted (`$CARGO_HOME/registry/src/$REG-HASH`).
205 src_path: Filesystem,
206 /// Local reference to [`Config`] for convenience.
207 config: &'cfg Config,
208 /// Whether or not the index has been updated.
209 ///
210 /// This is used as an optimization to avoid updating if not needed, such
211 /// as `Cargo.lock` already exists and the index already contains the
212 /// locked entries. Or, to avoid updating multiple times.
213 ///
214 /// Only remote registries really need to update. Local registries only
215 /// check that the index exists.
216 updated: bool,
217 /// Abstraction for interfacing to the different registry kinds.
218 ops: Box<dyn RegistryData + 'cfg>,
219 /// Interface for managing the on-disk index.
220 index: index::RegistryIndex<'cfg>,
221 /// A set of packages that should be allowed to be used, even if they are
222 /// yanked.
223 ///
224 /// This is populated from the entries in `Cargo.lock` to ensure that
225 /// `cargo update -p somepkg` won't unlock yanked entries in `Cargo.lock`.
226 /// Otherwise, the resolver would think that those entries no longer
227 /// exist, and it would trigger updates to unrelated packages.
228 yanked_whitelist: HashSet<PackageId>,
229 }
230
231 /// The `config.json` file stored in the index.
232 #[derive(Deserialize)]
233 pub struct RegistryConfig {
234 /// Download endpoint for all crates.
235 ///
236 /// The string is a template which will generate the download URL for the
237 /// tarball of a specific version of a crate. The substrings `{crate}` and
238 /// `{version}` will be replaced with the crate's name and version
239 /// respectively. The substring `{prefix}` will be replaced with the
240 /// crate's prefix directory name, and the substring `{lowerprefix}` will
241 /// be replaced with the crate's prefix directory name converted to
242 /// lowercase. The substring `{sha256-checksum}` will be replaced with the
243 /// crate's sha256 checksum.
244 ///
245 /// For backwards compatibility, if the string does not contain any
246 /// markers (`{crate}`, `{version}`, `{prefix}`, or ``{lowerprefix}`), it
247 /// will be extended with `/{crate}/{version}/download` to
248 /// support registries like crates.io which were created before the
249 /// templating setup was created.
250 pub dl: String,
251
252 /// API endpoint for the registry. This is what's actually hit to perform
253 /// operations like yanks, owner modifications, publish new crates, etc.
254 /// If this is None, the registry does not support API commands.
255 pub api: Option<String>,
256 }
257
258 /// The maximum version of the `v` field in the index this version of cargo
259 /// understands.
260 pub(crate) const INDEX_V_MAX: u32 = 2;
261
262 /// A single line in the index representing a single version of a package.
263 #[derive(Deserialize)]
264 pub struct RegistryPackage<'a> {
265 name: InternedString,
266 vers: Version,
267 #[serde(borrow)]
268 deps: Vec<RegistryDependency<'a>>,
269 features: BTreeMap<InternedString, Vec<InternedString>>,
270 /// This field contains features with new, extended syntax. Specifically,
271 /// namespaced features (`dep:`) and weak dependencies (`pkg?/feat`).
272 ///
273 /// This is separated from `features` because versions older than 1.19
274 /// will fail to load due to not being able to parse the new syntax, even
275 /// with a `Cargo.lock` file.
276 features2: Option<BTreeMap<InternedString, Vec<InternedString>>>,
277 cksum: String,
278 /// If `true`, Cargo will skip this version when resolving.
279 ///
280 /// This was added in 2014. Everything in the crates.io index has this set
281 /// now, so this probably doesn't need to be an option anymore.
282 yanked: Option<bool>,
283 /// Native library name this package links to.
284 ///
285 /// Added early 2018 (see <https://github.com/rust-lang/cargo/pull/4978>),
286 /// can be `None` if published before then.
287 links: Option<InternedString>,
288 /// The schema version for this entry.
289 ///
290 /// If this is None, it defaults to version 1. Entries with unknown
291 /// versions are ignored.
292 ///
293 /// Version `2` format adds the `features2` field.
294 ///
295 /// This provides a method to safely introduce changes to index entries
296 /// and allow older versions of cargo to ignore newer entries it doesn't
297 /// understand. This is honored as of 1.51, so unfortunately older
298 /// versions will ignore it, and potentially misinterpret version 2 and
299 /// newer entries.
300 ///
301 /// The intent is that versions older than 1.51 will work with a
302 /// pre-existing `Cargo.lock`, but they may not correctly process `cargo
303 /// update` or build a lock from scratch. In that case, cargo may
304 /// incorrectly select a new package that uses a new index format. A
305 /// workaround is to downgrade any packages that are incompatible with the
306 /// `--precise` flag of `cargo update`.
307 v: Option<u32>,
308 }
309
310 #[test]
311 fn escaped_char_in_json() {
312 let _: RegistryPackage<'_> = serde_json::from_str(
313 r#"{"name":"a","vers":"0.0.1","deps":[],"cksum":"bae3","features":{}}"#,
314 )
315 .unwrap();
316 let _: RegistryPackage<'_> = serde_json::from_str(
317 r#"{"name":"a","vers":"0.0.1","deps":[],"cksum":"bae3","features":{"test":["k","q"]},"links":"a-sys"}"#
318 ).unwrap();
319
320 // Now we add escaped cher all the places they can go
321 // these are not valid, but it should error later than json parsing
322 let _: RegistryPackage<'_> = serde_json::from_str(
323 r#"{
324 "name":"This name has a escaped cher in it \n\t\" ",
325 "vers":"0.0.1",
326 "deps":[{
327 "name": " \n\t\" ",
328 "req": " \n\t\" ",
329 "features": [" \n\t\" "],
330 "optional": true,
331 "default_features": true,
332 "target": " \n\t\" ",
333 "kind": " \n\t\" ",
334 "registry": " \n\t\" "
335 }],
336 "cksum":"bae3",
337 "features":{"test \n\t\" ":["k \n\t\" ","q \n\t\" "]},
338 "links":" \n\t\" "}"#,
339 )
340 .unwrap();
341 }
342
343 /// A dependency as encoded in the index JSON.
344 #[derive(Deserialize)]
345 struct RegistryDependency<'a> {
346 name: InternedString,
347 #[serde(borrow)]
348 req: Cow<'a, str>,
349 features: Vec<InternedString>,
350 optional: bool,
351 default_features: bool,
352 target: Option<Cow<'a, str>>,
353 kind: Option<Cow<'a, str>>,
354 registry: Option<Cow<'a, str>>,
355 package: Option<InternedString>,
356 public: Option<bool>,
357 }
358
359 impl<'a> RegistryDependency<'a> {
360 /// Converts an encoded dependency in the registry to a cargo dependency
361 pub fn into_dep(self, default: SourceId) -> CargoResult<Dependency> {
362 let RegistryDependency {
363 name,
364 req,
365 mut features,
366 optional,
367 default_features,
368 target,
369 kind,
370 registry,
371 package,
372 public,
373 } = self;
374
375 let id = if let Some(registry) = &registry {
376 SourceId::for_registry(&registry.into_url()?)?
377 } else {
378 default
379 };
380
381 let mut dep = Dependency::parse(package.unwrap_or(name), Some(&req), id)?;
382 if package.is_some() {
383 dep.set_explicit_name_in_toml(name);
384 }
385 let kind = match kind.as_deref().unwrap_or("") {
386 "dev" => DepKind::Development,
387 "build" => DepKind::Build,
388 _ => DepKind::Normal,
389 };
390
391 let platform = match target {
392 Some(target) => Some(target.parse()?),
393 None => None,
394 };
395
396 // All dependencies are private by default
397 let public = public.unwrap_or(false);
398
399 // Unfortunately older versions of cargo and/or the registry ended up
400 // publishing lots of entries where the features array contained the
401 // empty feature, "", inside. This confuses the resolution process much
402 // later on and these features aren't actually valid, so filter them all
403 // out here.
404 features.retain(|s| !s.is_empty());
405
406 // In index, "registry" is null if it is from the same index.
407 // In Cargo.toml, "registry" is None if it is from the default
408 if !id.is_default_registry() {
409 dep.set_registry_id(id);
410 }
411
412 dep.set_optional(optional)
413 .set_default_features(default_features)
414 .set_features(features)
415 .set_platform(platform)
416 .set_kind(kind)
417 .set_public(public);
418
419 Ok(dep)
420 }
421 }
422
423 /// An abstract interface to handle both a local (see `local::LocalRegistry`)
424 /// and remote (see `remote::RemoteRegistry`) registry.
425 ///
426 /// This allows [`RegistrySource`] to abstractly handle both registry kinds.
427 pub trait RegistryData {
428 /// Performs initialization for the registry.
429 ///
430 /// This should be safe to call multiple times, the implementation is
431 /// expected to not do any work if it is already prepared.
432 fn prepare(&self) -> CargoResult<()>;
433
434 /// Returns the path to the index.
435 ///
436 /// Note that different registries store the index in different formats
437 /// (remote=git, local=files).
438 fn index_path(&self) -> &Filesystem;
439
440 /// Loads the JSON for a specific named package from the index.
441 ///
442 /// * `root` is the root path to the index.
443 /// * `path` is the relative path to the package to load (like `ca/rg/cargo`).
444 /// * `data` is a callback that will receive the raw bytes of the index JSON file.
445 ///
446 /// If `load` returns a `Poll::Pending` then it must not have called data.
447 fn load(
448 &self,
449 root: &Path,
450 path: &Path,
451 data: &mut dyn FnMut(&[u8]) -> CargoResult<()>,
452 ) -> Poll<CargoResult<()>>;
453
454 /// Loads the `config.json` file and returns it.
455 ///
456 /// Local registries don't have a config, and return `None`.
457 fn config(&mut self) -> CargoResult<Option<RegistryConfig>>;
458
459 /// Updates the index.
460 ///
461 /// For a remote registry, this updates the index over the network. Local
462 /// registries only check that the index exists.
463 fn update_index(&mut self) -> CargoResult<()>;
464
465 /// Prepare to start downloading a `.crate` file.
466 ///
467 /// Despite the name, this doesn't actually download anything. If the
468 /// `.crate` is already downloaded, then it returns [`MaybeLock::Ready`].
469 /// If it hasn't been downloaded, then it returns [`MaybeLock::Download`]
470 /// which contains the URL to download. The [`crate::core::package::Downloads`]
471 /// system handles the actual download process. After downloading, it
472 /// calls [`Self::finish_download`] to save the downloaded file.
473 ///
474 /// `checksum` is currently only used by local registries to verify the
475 /// file contents (because local registries never actually download
476 /// anything). Remote registries will validate the checksum in
477 /// `finish_download`. For already downloaded `.crate` files, it does not
478 /// validate the checksum, assuming the filesystem does not suffer from
479 /// corruption or manipulation.
480 fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult<MaybeLock>;
481
482 /// Finish a download by saving a `.crate` file to disk.
483 ///
484 /// After [`crate::core::package::Downloads`] has finished a download,
485 /// it will call this to save the `.crate` file. This is only relevant
486 /// for remote registries. This should validate the checksum and save
487 /// the given data to the on-disk cache.
488 ///
489 /// Returns a [`File`] handle to the `.crate` file, positioned at the start.
490 fn finish_download(&mut self, pkg: PackageId, checksum: &str, data: &[u8])
491 -> CargoResult<File>;
492
493 /// Returns whether or not the `.crate` file is already downloaded.
494 fn is_crate_downloaded(&self, _pkg: PackageId) -> bool {
495 true
496 }
497
498 /// Validates that the global package cache lock is held.
499 ///
500 /// Given the [`Filesystem`], this will make sure that the package cache
501 /// lock is held. If not, it will panic. See
502 /// [`Config::acquire_package_cache_lock`] for acquiring the global lock.
503 ///
504 /// Returns the [`Path`] to the [`Filesystem`].
505 fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path;
506
507 /// Returns the current "version" of the index.
508 ///
509 /// For local registries, this returns `None` because there is no
510 /// versioning. For remote registries, this returns the SHA hash of the
511 /// git index on disk (or None if the index hasn't been downloaded yet).
512 ///
513 /// This is used by index caching to check if the cache is out of date.
514 fn current_version(&self) -> Option<InternedString>;
515
516 /// Block until all outstanding Poll::Pending requests are Poll::Ready.
517 fn block_until_ready(&mut self) -> CargoResult<()>;
518 }
519
520 /// The status of [`RegistryData::download`] which indicates if a `.crate`
521 /// file has already been downloaded, or if not then the URL to download.
522 pub enum MaybeLock {
523 /// The `.crate` file is already downloaded. [`File`] is a handle to the
524 /// opened `.crate` file on the filesystem.
525 Ready(File),
526 /// The `.crate` file is not downloaded, here's the URL to download it from.
527 ///
528 /// `descriptor` is just a text string to display to the user of what is
529 /// being downloaded.
530 Download { url: String, descriptor: String },
531 }
532
533 mod index;
534 mod local;
535 mod remote;
536
537 fn short_name(id: SourceId) -> String {
538 let hash = hex::short_hash(&id);
539 let ident = id.url().host_str().unwrap_or("").to_string();
540 format!("{}-{}", ident, hash)
541 }
542
543 impl<'cfg> RegistrySource<'cfg> {
544 pub fn remote(
545 source_id: SourceId,
546 yanked_whitelist: &HashSet<PackageId>,
547 config: &'cfg Config,
548 ) -> RegistrySource<'cfg> {
549 let name = short_name(source_id);
550 let ops = remote::RemoteRegistry::new(source_id, config, &name);
551 RegistrySource::new(source_id, config, &name, Box::new(ops), yanked_whitelist)
552 }
553
554 pub fn local(
555 source_id: SourceId,
556 path: &Path,
557 yanked_whitelist: &HashSet<PackageId>,
558 config: &'cfg Config,
559 ) -> RegistrySource<'cfg> {
560 let name = short_name(source_id);
561 let ops = local::LocalRegistry::new(path, config, &name);
562 RegistrySource::new(source_id, config, &name, Box::new(ops), yanked_whitelist)
563 }
564
565 fn new(
566 source_id: SourceId,
567 config: &'cfg Config,
568 name: &str,
569 ops: Box<dyn RegistryData + 'cfg>,
570 yanked_whitelist: &HashSet<PackageId>,
571 ) -> RegistrySource<'cfg> {
572 RegistrySource {
573 src_path: config.registry_source_path().join(name),
574 config,
575 source_id,
576 updated: false,
577 index: index::RegistryIndex::new(source_id, ops.index_path(), config),
578 yanked_whitelist: yanked_whitelist.clone(),
579 ops,
580 }
581 }
582
583 /// Decode the configuration stored within the registry.
584 ///
585 /// This requires that the index has been at least checked out.
586 pub fn config(&mut self) -> CargoResult<Option<RegistryConfig>> {
587 self.ops.config()
588 }
589
590 /// Unpacks a downloaded package into a location where it's ready to be
591 /// compiled.
592 ///
593 /// No action is taken if the source looks like it's already unpacked.
594 fn unpack_package(&self, pkg: PackageId, tarball: &File) -> CargoResult<PathBuf> {
595 // The `.cargo-ok` file is used to track if the source is already
596 // unpacked.
597 let package_dir = format!("{}-{}", pkg.name(), pkg.version());
598 let dst = self.src_path.join(&package_dir);
599 dst.create_dir()?;
600 let path = dst.join(PACKAGE_SOURCE_LOCK);
601 let path = self.config.assert_package_cache_locked(&path);
602 let unpack_dir = path.parent().unwrap();
603 if let Ok(meta) = path.metadata() {
604 if meta.len() > 0 {
605 return Ok(unpack_dir.to_path_buf());
606 }
607 }
608 let gz = GzDecoder::new(tarball);
609 let mut tar = Archive::new(gz);
610 let prefix = unpack_dir.file_name().unwrap();
611 let parent = unpack_dir.parent().unwrap();
612 for entry in tar.entries()? {
613 let mut entry = entry.with_context(|| "failed to iterate over archive")?;
614 let entry_path = entry
615 .path()
616 .with_context(|| "failed to read entry path")?
617 .into_owned();
618
619 // We're going to unpack this tarball into the global source
620 // directory, but we want to make sure that it doesn't accidentally
621 // (or maliciously) overwrite source code from other crates. Cargo
622 // itself should never generate a tarball that hits this error, and
623 // crates.io should also block uploads with these sorts of tarballs,
624 // but be extra sure by adding a check here as well.
625 if !entry_path.starts_with(prefix) {
626 anyhow::bail!(
627 "invalid tarball downloaded, contains \
628 a file at {:?} which isn't under {:?}",
629 entry_path,
630 prefix
631 )
632 }
633 // Unpacking failed
634 let mut result = entry.unpack_in(parent).map_err(anyhow::Error::from);
635 if cfg!(windows) && restricted_names::is_windows_reserved_path(&entry_path) {
636 result = result.with_context(|| {
637 format!(
638 "`{}` appears to contain a reserved Windows path, \
639 it cannot be extracted on Windows",
640 entry_path.display()
641 )
642 });
643 }
644 result
645 .with_context(|| format!("failed to unpack entry at `{}`", entry_path.display()))?;
646 }
647
648 // The lock file is created after unpacking so we overwrite a lock file
649 // which may have been extracted from the package.
650 let mut ok = OpenOptions::new()
651 .create(true)
652 .read(true)
653 .write(true)
654 .open(&path)
655 .with_context(|| format!("failed to open `{}`", path.display()))?;
656
657 // Write to the lock file to indicate that unpacking was successful.
658 write!(ok, "ok")?;
659
660 Ok(unpack_dir.to_path_buf())
661 }
662
663 fn do_update(&mut self) -> CargoResult<()> {
664 self.ops.update_index()?;
665 let path = self.ops.index_path();
666 self.index = index::RegistryIndex::new(self.source_id, path, self.config);
667 self.updated = true;
668 Ok(())
669 }
670
671 fn get_pkg(&mut self, package: PackageId, path: &File) -> CargoResult<Package> {
672 let path = self
673 .unpack_package(package, path)
674 .with_context(|| format!("failed to unpack package `{}`", package))?;
675 let mut src = PathSource::new(&path, self.source_id, self.config);
676 src.update()?;
677 let mut pkg = match src.download(package)? {
678 MaybePackage::Ready(pkg) => pkg,
679 MaybePackage::Download { .. } => unreachable!(),
680 };
681
682 // After we've loaded the package configure its summary's `checksum`
683 // field with the checksum we know for this `PackageId`.
684 let req = OptVersionReq::exact(package.version());
685 let summary_with_cksum = self
686 .index
687 .summaries(package.name(), &req, &mut *self.ops)?
688 .expect("a downloaded dep now pending!?")
689 .map(|s| s.summary.clone())
690 .next()
691 .expect("summary not found");
692 if let Some(cksum) = summary_with_cksum.checksum() {
693 pkg.manifest_mut()
694 .summary_mut()
695 .set_checksum(cksum.to_string());
696 }
697
698 Ok(pkg)
699 }
700 }
701
702 impl<'cfg> Source for RegistrySource<'cfg> {
703 fn query(&mut self, dep: &Dependency, f: &mut dyn FnMut(Summary)) -> Poll<CargoResult<()>> {
704 // If this is a precise dependency, then it came from a lock file and in
705 // theory the registry is known to contain this version. If, however, we
706 // come back with no summaries, then our registry may need to be
707 // updated, so we fall back to performing a lazy update.
708 if dep.source_id().precise().is_some() && !self.updated {
709 debug!("attempting query without update");
710 let mut called = false;
711 let pend =
712 self.index
713 .query_inner(dep, &mut *self.ops, &self.yanked_whitelist, &mut |s| {
714 if dep.matches(&s) {
715 called = true;
716 f(s);
717 }
718 })?;
719 if pend.is_pending() {
720 return Poll::Pending;
721 }
722 if called {
723 return Poll::Ready(Ok(()));
724 } else {
725 debug!("falling back to an update");
726 self.do_update()?;
727 }
728 }
729
730 self.index
731 .query_inner(dep, &mut *self.ops, &self.yanked_whitelist, &mut |s| {
732 if dep.matches(&s) {
733 f(s);
734 }
735 })
736 }
737
738 fn fuzzy_query(
739 &mut self,
740 dep: &Dependency,
741 f: &mut dyn FnMut(Summary),
742 ) -> Poll<CargoResult<()>> {
743 self.index
744 .query_inner(dep, &mut *self.ops, &self.yanked_whitelist, f)
745 }
746
747 fn supports_checksums(&self) -> bool {
748 true
749 }
750
751 fn requires_precise(&self) -> bool {
752 false
753 }
754
755 fn source_id(&self) -> SourceId {
756 self.source_id
757 }
758
759 fn update(&mut self) -> CargoResult<()> {
760 // If we have an imprecise version then we don't know what we're going
761 // to look for, so we always attempt to perform an update here.
762 //
763 // If we have a precise version, then we'll update lazily during the
764 // querying phase. Note that precise in this case is only
765 // `Some("locked")` as other `Some` values indicate a `cargo update
766 // --precise` request
767 if self.source_id.precise() != Some("locked") {
768 self.do_update()?;
769 } else {
770 debug!("skipping update due to locked registry");
771 }
772 Ok(())
773 }
774
775 fn download(&mut self, package: PackageId) -> CargoResult<MaybePackage> {
776 let hash = self
777 .index
778 .hash(package, &mut *self.ops)?
779 .expect("we got to downloading a dep while pending!?");
780 match self.ops.download(package, hash)? {
781 MaybeLock::Ready(file) => self.get_pkg(package, &file).map(MaybePackage::Ready),
782 MaybeLock::Download { url, descriptor } => {
783 Ok(MaybePackage::Download { url, descriptor })
784 }
785 }
786 }
787
788 fn finish_download(&mut self, package: PackageId, data: Vec<u8>) -> CargoResult<Package> {
789 let hash = self
790 .index
791 .hash(package, &mut *self.ops)?
792 .expect("we got to downloading a dep while pending!?");
793 let file = self.ops.finish_download(package, hash, &data)?;
794 self.get_pkg(package, &file)
795 }
796
797 fn fingerprint(&self, pkg: &Package) -> CargoResult<String> {
798 Ok(pkg.package_id().version().to_string())
799 }
800
801 fn describe(&self) -> String {
802 self.source_id.display_index()
803 }
804
805 fn add_to_yanked_whitelist(&mut self, pkgs: &[PackageId]) {
806 self.yanked_whitelist.extend(pkgs);
807 }
808
809 fn is_yanked(&mut self, pkg: PackageId) -> CargoResult<bool> {
810 if !self.updated {
811 self.do_update()?;
812 }
813 loop {
814 match self.index.is_yanked(pkg, &mut *self.ops)? {
815 Poll::Ready(yanked) => {
816 return Ok(yanked);
817 }
818 Poll::Pending => {
819 self.block_until_ready()?;
820 }
821 }
822 }
823 }
824
825 fn block_until_ready(&mut self) -> CargoResult<()> {
826 self.ops.block_until_ready()
827 }
828 }