]> git.proxmox.com Git - cargo.git/blob - src/cargo/sources/registry/remote.rs
Registry functions return task::Poll to enable parallel fetching of index data.
[cargo.git] / src / cargo / sources / registry / remote.rs
1 use crate::core::{GitReference, PackageId, SourceId};
2 use crate::sources::git;
3 use crate::sources::registry::MaybeLock;
4 use crate::sources::registry::{
5 RegistryConfig, RegistryData, CHECKSUM_TEMPLATE, CRATE_TEMPLATE, LOWER_PREFIX_TEMPLATE,
6 PREFIX_TEMPLATE, VERSION_TEMPLATE,
7 };
8 use crate::util::errors::CargoResult;
9 use crate::util::interning::InternedString;
10 use crate::util::network::PollExt;
11 use crate::util::{Config, Filesystem};
12 use anyhow::Context as _;
13 use cargo_util::{paths, registry::make_dep_path, Sha256};
14 use lazycell::LazyCell;
15 use log::{debug, trace};
16 use std::cell::{Cell, Ref, RefCell};
17 use std::fmt::Write as FmtWrite;
18 use std::fs::{self, File, OpenOptions};
19 use std::io::prelude::*;
20 use std::io::SeekFrom;
21 use std::mem;
22 use std::path::Path;
23 use std::str;
24 use std::task::Poll;
25
26 /// A remote registry is a registry that lives at a remote URL (such as
27 /// crates.io). The git index is cloned locally, and `.crate` files are
28 /// downloaded as needed and cached locally.
29 pub struct RemoteRegistry<'cfg> {
30 index_path: Filesystem,
31 /// Path to the cache of `.crate` files (`$CARGO_HOME/registry/path/$REG-HASH`).
32 cache_path: Filesystem,
33 source_id: SourceId,
34 index_git_ref: GitReference,
35 config: &'cfg Config,
36 tree: RefCell<Option<git2::Tree<'static>>>,
37 repo: LazyCell<git2::Repository>,
38 head: Cell<Option<git2::Oid>>,
39 current_sha: Cell<Option<InternedString>>,
40 }
41
42 impl<'cfg> RemoteRegistry<'cfg> {
43 pub fn new(source_id: SourceId, config: &'cfg Config, name: &str) -> RemoteRegistry<'cfg> {
44 RemoteRegistry {
45 index_path: config.registry_index_path().join(name),
46 cache_path: config.registry_cache_path().join(name),
47 source_id,
48 config,
49 // TODO: we should probably make this configurable
50 index_git_ref: GitReference::DefaultBranch,
51 tree: RefCell::new(None),
52 repo: LazyCell::new(),
53 head: Cell::new(None),
54 current_sha: Cell::new(None),
55 }
56 }
57
58 fn repo(&self) -> CargoResult<&git2::Repository> {
59 self.repo.try_borrow_with(|| {
60 let path = self.config.assert_package_cache_locked(&self.index_path);
61
62 // Fast path without a lock
63 if let Ok(repo) = git2::Repository::open(&path) {
64 trace!("opened a repo without a lock");
65 return Ok(repo);
66 }
67
68 // Ok, now we need to lock and try the whole thing over again.
69 trace!("acquiring registry index lock");
70 match git2::Repository::open(&path) {
71 Ok(repo) => Ok(repo),
72 Err(_) => {
73 drop(paths::remove_dir_all(&path));
74 paths::create_dir_all(&path)?;
75
76 // Note that we'd actually prefer to use a bare repository
77 // here as we're not actually going to check anything out.
78 // All versions of Cargo, though, share the same CARGO_HOME,
79 // so for compatibility with older Cargo which *does* do
80 // checkouts we make sure to initialize a new full
81 // repository (not a bare one).
82 //
83 // We should change this to `init_bare` whenever we feel
84 // like enough time has passed or if we change the directory
85 // that the folder is located in, such as by changing the
86 // hash at the end of the directory.
87 //
88 // Note that in the meantime we also skip `init.templatedir`
89 // as it can be misconfigured sometimes or otherwise add
90 // things that we don't want.
91 let mut opts = git2::RepositoryInitOptions::new();
92 opts.external_template(false);
93 Ok(git2::Repository::init_opts(&path, &opts).with_context(|| {
94 format!("failed to initialize index git repository (in {:?})", path)
95 })?)
96 }
97 }
98 })
99 }
100
101 fn head(&self) -> CargoResult<git2::Oid> {
102 if self.head.get().is_none() {
103 let repo = self.repo()?;
104 let oid = self.index_git_ref.resolve(repo)?;
105 self.head.set(Some(oid));
106 }
107 Ok(self.head.get().unwrap())
108 }
109
110 fn tree(&self) -> CargoResult<Ref<'_, git2::Tree<'_>>> {
111 {
112 let tree = self.tree.borrow();
113 if tree.is_some() {
114 return Ok(Ref::map(tree, |s| s.as_ref().unwrap()));
115 }
116 }
117 let repo = self.repo()?;
118 let commit = repo.find_commit(self.head()?)?;
119 let tree = commit.tree()?;
120
121 // Unfortunately in libgit2 the tree objects look like they've got a
122 // reference to the repository object which means that a tree cannot
123 // outlive the repository that it came from. Here we want to cache this
124 // tree, though, so to accomplish this we transmute it to a static
125 // lifetime.
126 //
127 // Note that we don't actually hand out the static lifetime, instead we
128 // only return a scoped one from this function. Additionally the repo
129 // we loaded from (above) lives as long as this object
130 // (`RemoteRegistry`) so we then just need to ensure that the tree is
131 // destroyed first in the destructor, hence the destructor on
132 // `RemoteRegistry` below.
133 let tree = unsafe { mem::transmute::<git2::Tree<'_>, git2::Tree<'static>>(tree) };
134 *self.tree.borrow_mut() = Some(tree);
135 Ok(Ref::map(self.tree.borrow(), |s| s.as_ref().unwrap()))
136 }
137
138 fn filename(&self, pkg: PackageId) -> String {
139 format!("{}-{}.crate", pkg.name(), pkg.version())
140 }
141 }
142
143 const LAST_UPDATED_FILE: &str = ".last-updated";
144
145 impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
146 fn prepare(&self) -> CargoResult<()> {
147 self.repo()?; // create intermediate dirs and initialize the repo
148 Ok(())
149 }
150
151 fn index_path(&self) -> &Filesystem {
152 &self.index_path
153 }
154
155 fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path {
156 self.config.assert_package_cache_locked(path)
157 }
158
159 fn current_version(&self) -> Option<InternedString> {
160 if let Some(sha) = self.current_sha.get() {
161 return Some(sha);
162 }
163 let sha = InternedString::new(&self.head().ok()?.to_string());
164 self.current_sha.set(Some(sha));
165 Some(sha)
166 }
167
168 fn load(
169 &self,
170 _root: &Path,
171 path: &Path,
172 data: &mut dyn FnMut(&[u8]) -> CargoResult<()>,
173 ) -> Poll<CargoResult<()>> {
174 // Note that the index calls this method and the filesystem is locked
175 // in the index, so we don't need to worry about an `update_index`
176 // happening in a different process.
177 let repo = self.repo()?;
178 let tree = self.tree()?;
179 let entry = tree.get_path(path)?;
180 let object = entry.to_object(repo)?;
181 let blob = match object.as_blob() {
182 Some(blob) => blob,
183 None => {
184 return Err(anyhow::anyhow!(
185 "path `{}` is not a blob in the git repo",
186 path.display()
187 ))
188 .into()
189 }
190 };
191 Poll::Ready(Ok(data(blob.content())?))
192 }
193
194 fn config(&mut self) -> CargoResult<Option<RegistryConfig>> {
195 debug!("loading config");
196 self.prepare()?;
197 self.config.assert_package_cache_locked(&self.index_path);
198 let mut config = None;
199 self.load(Path::new(""), Path::new("config.json"), &mut |json| {
200 config = Some(serde_json::from_slice(json)?);
201 Ok(())
202 })
203 .expect("git registries never return pending")?;
204 trace!("config loaded");
205 Ok(config)
206 }
207
208 fn update_index(&mut self) -> CargoResult<()> {
209 if self.config.offline() {
210 return Ok(());
211 }
212 if self.config.cli_unstable().no_index_update {
213 return Ok(());
214 }
215 // Make sure the index is only updated once per session since it is an
216 // expensive operation. This generally only happens when the resolver
217 // is run multiple times, such as during `cargo publish`.
218 if self.config.updated_sources().contains(&self.source_id) {
219 return Ok(());
220 }
221
222 debug!("updating the index");
223
224 // Ensure that we'll actually be able to acquire an HTTP handle later on
225 // once we start trying to download crates. This will weed out any
226 // problems with `.cargo/config` configuration related to HTTP.
227 //
228 // This way if there's a problem the error gets printed before we even
229 // hit the index, which may not actually read this configuration.
230 self.config.http()?;
231
232 self.prepare()?;
233 self.head.set(None);
234 *self.tree.borrow_mut() = None;
235 self.current_sha.set(None);
236 let path = self.config.assert_package_cache_locked(&self.index_path);
237 self.config
238 .shell()
239 .status("Updating", self.source_id.display_index())?;
240
241 // Fetch the latest version of our `index_git_ref` into the index
242 // checkout.
243 let url = self.source_id.url();
244 let repo = self.repo.borrow_mut().unwrap();
245 git::fetch(repo, url.as_str(), &self.index_git_ref, self.config)
246 .with_context(|| format!("failed to fetch `{}`", url))?;
247 self.config.updated_sources().insert(self.source_id);
248
249 // Create a dummy file to record the mtime for when we updated the
250 // index.
251 paths::create(&path.join(LAST_UPDATED_FILE))?;
252
253 Ok(())
254 }
255
256 fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult<MaybeLock> {
257 let filename = self.filename(pkg);
258
259 // Attempt to open an read-only copy first to avoid an exclusive write
260 // lock and also work with read-only filesystems. Note that we check the
261 // length of the file like below to handle interrupted downloads.
262 //
263 // If this fails then we fall through to the exclusive path where we may
264 // have to redownload the file.
265 let path = self.cache_path.join(&filename);
266 let path = self.config.assert_package_cache_locked(&path);
267 if let Ok(dst) = File::open(&path) {
268 let meta = dst.metadata()?;
269 if meta.len() > 0 {
270 return Ok(MaybeLock::Ready(dst));
271 }
272 }
273
274 let config = self.config()?.unwrap();
275 let mut url = config.dl;
276 if !url.contains(CRATE_TEMPLATE)
277 && !url.contains(VERSION_TEMPLATE)
278 && !url.contains(PREFIX_TEMPLATE)
279 && !url.contains(LOWER_PREFIX_TEMPLATE)
280 && !url.contains(CHECKSUM_TEMPLATE)
281 {
282 write!(url, "/{}/{}/download", CRATE_TEMPLATE, VERSION_TEMPLATE).unwrap();
283 }
284 let prefix = make_dep_path(&*pkg.name(), true);
285 let url = url
286 .replace(CRATE_TEMPLATE, &*pkg.name())
287 .replace(VERSION_TEMPLATE, &pkg.version().to_string())
288 .replace(PREFIX_TEMPLATE, &prefix)
289 .replace(LOWER_PREFIX_TEMPLATE, &prefix.to_lowercase())
290 .replace(CHECKSUM_TEMPLATE, checksum);
291
292 Ok(MaybeLock::Download {
293 url,
294 descriptor: pkg.to_string(),
295 })
296 }
297
298 fn finish_download(
299 &mut self,
300 pkg: PackageId,
301 checksum: &str,
302 data: &[u8],
303 ) -> CargoResult<File> {
304 // Verify what we just downloaded
305 let actual = Sha256::new().update(data).finish_hex();
306 if actual != checksum {
307 anyhow::bail!("failed to verify the checksum of `{}`", pkg)
308 }
309
310 let filename = self.filename(pkg);
311 self.cache_path.create_dir()?;
312 let path = self.cache_path.join(&filename);
313 let path = self.config.assert_package_cache_locked(&path);
314 let mut dst = OpenOptions::new()
315 .create(true)
316 .read(true)
317 .write(true)
318 .open(&path)
319 .with_context(|| format!("failed to open `{}`", path.display()))?;
320 let meta = dst.metadata()?;
321 if meta.len() > 0 {
322 return Ok(dst);
323 }
324
325 dst.write_all(data)?;
326 dst.seek(SeekFrom::Start(0))?;
327 Ok(dst)
328 }
329
330 fn is_crate_downloaded(&self, pkg: PackageId) -> bool {
331 let filename = format!("{}-{}.crate", pkg.name(), pkg.version());
332 let path = Path::new(&filename);
333
334 let path = self.cache_path.join(path);
335 let path = self.config.assert_package_cache_locked(&path);
336 if let Ok(meta) = fs::metadata(path) {
337 return meta.len() > 0;
338 }
339 false
340 }
341
342 fn block_until_ready(&mut self) -> CargoResult<()> {
343 Ok(())
344 }
345 }
346
347 impl<'cfg> Drop for RemoteRegistry<'cfg> {
348 fn drop(&mut self) {
349 // Just be sure to drop this before our other fields
350 self.tree.borrow_mut().take();
351 }
352 }