]> git.proxmox.com Git - rustc.git/blob - src/tools/cargo/src/cargo/sources/registry/remote.rs
New upstream version 1.70.0+dfsg2
[rustc.git] / src / tools / cargo / src / cargo / sources / registry / remote.rs
1 use crate::core::{GitReference, PackageId, SourceId};
2 use crate::sources::git;
3 use crate::sources::registry::download;
4 use crate::sources::registry::MaybeLock;
5 use crate::sources::registry::{LoadResponse, RegistryConfig, RegistryData};
6 use crate::util::errors::CargoResult;
7 use crate::util::interning::InternedString;
8 use crate::util::{Config, Filesystem};
9 use anyhow::Context as _;
10 use cargo_util::paths;
11 use lazycell::LazyCell;
12 use log::{debug, trace};
13 use std::cell::{Cell, Ref, RefCell};
14 use std::fs::File;
15 use std::mem;
16 use std::path::Path;
17 use std::str;
18 use std::task::{ready, Poll};
19
20 /// A remote registry is a registry that lives at a remote URL (such as
21 /// crates.io). The git index is cloned locally, and `.crate` files are
22 /// downloaded as needed and cached locally.
23 pub struct RemoteRegistry<'cfg> {
24 index_path: Filesystem,
25 /// Path to the cache of `.crate` files (`$CARGO_HOME/registry/path/$REG-HASH`).
26 cache_path: Filesystem,
27 source_id: SourceId,
28 index_git_ref: GitReference,
29 config: &'cfg Config,
30 tree: RefCell<Option<git2::Tree<'static>>>,
31 repo: LazyCell<git2::Repository>,
32 head: Cell<Option<git2::Oid>>,
33 current_sha: Cell<Option<InternedString>>,
34 needs_update: bool, // Does this registry need to be updated?
35 quiet: bool,
36 }
37
38 impl<'cfg> RemoteRegistry<'cfg> {
39 pub fn new(source_id: SourceId, config: &'cfg Config, name: &str) -> RemoteRegistry<'cfg> {
40 RemoteRegistry {
41 index_path: config.registry_index_path().join(name),
42 cache_path: config.registry_cache_path().join(name),
43 source_id,
44 config,
45 // TODO: we should probably make this configurable
46 index_git_ref: GitReference::DefaultBranch,
47 tree: RefCell::new(None),
48 repo: LazyCell::new(),
49 head: Cell::new(None),
50 current_sha: Cell::new(None),
51 needs_update: false,
52 quiet: false,
53 }
54 }
55
56 fn repo(&self) -> CargoResult<&git2::Repository> {
57 self.repo.try_borrow_with(|| {
58 let path = self.config.assert_package_cache_locked(&self.index_path);
59
60 // Fast path without a lock
61 if let Ok(repo) = git2::Repository::open(&path) {
62 trace!("opened a repo without a lock");
63 return Ok(repo);
64 }
65
66 // Ok, now we need to lock and try the whole thing over again.
67 trace!("acquiring registry index lock");
68 match git2::Repository::open(&path) {
69 Ok(repo) => Ok(repo),
70 Err(_) => {
71 drop(paths::remove_dir_all(&path));
72 paths::create_dir_all(&path)?;
73
74 // Note that we'd actually prefer to use a bare repository
75 // here as we're not actually going to check anything out.
76 // All versions of Cargo, though, share the same CARGO_HOME,
77 // so for compatibility with older Cargo which *does* do
78 // checkouts we make sure to initialize a new full
79 // repository (not a bare one).
80 //
81 // We should change this to `init_bare` whenever we feel
82 // like enough time has passed or if we change the directory
83 // that the folder is located in, such as by changing the
84 // hash at the end of the directory.
85 //
86 // Note that in the meantime we also skip `init.templatedir`
87 // as it can be misconfigured sometimes or otherwise add
88 // things that we don't want.
89 let mut opts = git2::RepositoryInitOptions::new();
90 opts.external_template(false);
91 Ok(git2::Repository::init_opts(&path, &opts).with_context(|| {
92 format!("failed to initialize index git repository (in {:?})", path)
93 })?)
94 }
95 }
96 })
97 }
98
99 fn head(&self) -> CargoResult<git2::Oid> {
100 if self.head.get().is_none() {
101 let repo = self.repo()?;
102 let oid = self.index_git_ref.resolve(repo)?;
103 self.head.set(Some(oid));
104 }
105 Ok(self.head.get().unwrap())
106 }
107
108 fn tree(&self) -> CargoResult<Ref<'_, git2::Tree<'_>>> {
109 {
110 let tree = self.tree.borrow();
111 if tree.is_some() {
112 return Ok(Ref::map(tree, |s| s.as_ref().unwrap()));
113 }
114 }
115 let repo = self.repo()?;
116 let commit = repo.find_commit(self.head()?)?;
117 let tree = commit.tree()?;
118
119 // Unfortunately in libgit2 the tree objects look like they've got a
120 // reference to the repository object which means that a tree cannot
121 // outlive the repository that it came from. Here we want to cache this
122 // tree, though, so to accomplish this we transmute it to a static
123 // lifetime.
124 //
125 // Note that we don't actually hand out the static lifetime, instead we
126 // only return a scoped one from this function. Additionally the repo
127 // we loaded from (above) lives as long as this object
128 // (`RemoteRegistry`) so we then just need to ensure that the tree is
129 // destroyed first in the destructor, hence the destructor on
130 // `RemoteRegistry` below.
131 let tree = unsafe { mem::transmute::<git2::Tree<'_>, git2::Tree<'static>>(tree) };
132 *self.tree.borrow_mut() = Some(tree);
133 Ok(Ref::map(self.tree.borrow(), |s| s.as_ref().unwrap()))
134 }
135
136 fn current_version(&self) -> Option<InternedString> {
137 if let Some(sha) = self.current_sha.get() {
138 return Some(sha);
139 }
140 let sha = InternedString::new(&self.head().ok()?.to_string());
141 self.current_sha.set(Some(sha));
142 Some(sha)
143 }
144
145 fn is_updated(&self) -> bool {
146 self.config.updated_sources().contains(&self.source_id)
147 }
148
149 fn mark_updated(&self) {
150 self.config.updated_sources().insert(self.source_id);
151 }
152 }
153
154 const LAST_UPDATED_FILE: &str = ".last-updated";
155
156 impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
157 fn prepare(&self) -> CargoResult<()> {
158 self.repo()?; // create intermediate dirs and initialize the repo
159 Ok(())
160 }
161
162 fn index_path(&self) -> &Filesystem {
163 &self.index_path
164 }
165
166 fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path {
167 self.config.assert_package_cache_locked(path)
168 }
169
170 // `index_version` Is a string representing the version of the file used to construct the cached copy.
171 // Older versions of Cargo used the single value of the hash of the HEAD commit as a `index_version`.
172 // This is technically correct but a little too conservative. If a new commit is fetched all cached
173 // files need to be regenerated even if a particular file was not changed.
174 // However if an old cargo has written such a file we still know how to read it, as long as we check for that hash value.
175 //
176 // Cargo now uses a hash of the file's contents as provided by git.
177 fn load(
178 &mut self,
179 _root: &Path,
180 path: &Path,
181 index_version: Option<&str>,
182 ) -> Poll<CargoResult<LoadResponse>> {
183 if self.needs_update {
184 return Poll::Pending;
185 }
186 // Check if the cache is valid.
187 let git_commit_hash = self.current_version();
188 if index_version.is_some() && index_version == git_commit_hash.as_deref() {
189 // This file was written by an old version of cargo, but it is still up-to-date.
190 return Poll::Ready(Ok(LoadResponse::CacheValid));
191 }
192 // Note that the index calls this method and the filesystem is locked
193 // in the index, so we don't need to worry about an `update_index`
194 // happening in a different process.
195 fn load_helper(
196 registry: &RemoteRegistry<'_>,
197 path: &Path,
198 index_version: Option<&str>,
199 ) -> CargoResult<LoadResponse> {
200 let repo = registry.repo()?;
201 let tree = registry.tree()?;
202 let entry = tree.get_path(path);
203 let entry = entry?;
204 let git_file_hash = Some(entry.id().to_string());
205
206 // Check if the cache is valid.
207 if index_version.is_some() && index_version == git_file_hash.as_deref() {
208 return Ok(LoadResponse::CacheValid);
209 }
210
211 let object = entry.to_object(repo)?;
212 let blob = match object.as_blob() {
213 Some(blob) => blob,
214 None => anyhow::bail!("path `{}` is not a blob in the git repo", path.display()),
215 };
216
217 Ok(LoadResponse::Data {
218 raw_data: blob.content().to_vec(),
219 index_version: git_file_hash,
220 })
221 }
222
223 match load_helper(&self, path, index_version) {
224 Ok(result) => Poll::Ready(Ok(result)),
225 Err(_) if !self.is_updated() => {
226 // If git returns an error and we haven't updated the repo, return
227 // pending to allow an update to try again.
228 self.needs_update = true;
229 Poll::Pending
230 }
231 Err(e)
232 if e.downcast_ref::<git2::Error>()
233 .map(|e| e.code() == git2::ErrorCode::NotFound)
234 .unwrap_or_default() =>
235 {
236 // The repo has been updated and the file does not exist.
237 Poll::Ready(Ok(LoadResponse::NotFound))
238 }
239 Err(e) => Poll::Ready(Err(e)),
240 }
241 }
242
243 fn config(&mut self) -> Poll<CargoResult<Option<RegistryConfig>>> {
244 debug!("loading config");
245 self.prepare()?;
246 self.config.assert_package_cache_locked(&self.index_path);
247 match ready!(self.load(Path::new(""), Path::new("config.json"), None)?) {
248 LoadResponse::Data { raw_data, .. } => {
249 trace!("config loaded");
250 let mut cfg: RegistryConfig = serde_json::from_slice(&raw_data)?;
251 if !self.config.cli_unstable().registry_auth {
252 cfg.auth_required = false;
253 }
254 Poll::Ready(Ok(Some(cfg)))
255 }
256 _ => Poll::Ready(Ok(None)),
257 }
258 }
259
260 fn block_until_ready(&mut self) -> CargoResult<()> {
261 if !self.needs_update {
262 return Ok(());
263 }
264
265 self.needs_update = false;
266
267 // Make sure the index is only updated once per session since it is an
268 // expensive operation. This generally only happens when the resolver
269 // is run multiple times, such as during `cargo publish`.
270 if self.is_updated() {
271 return Ok(());
272 }
273 self.mark_updated();
274
275 if self.config.offline() {
276 return Ok(());
277 }
278 if self.config.cli_unstable().no_index_update {
279 return Ok(());
280 }
281
282 debug!("updating the index");
283
284 // Ensure that we'll actually be able to acquire an HTTP handle later on
285 // once we start trying to download crates. This will weed out any
286 // problems with `.cargo/config` configuration related to HTTP.
287 //
288 // This way if there's a problem the error gets printed before we even
289 // hit the index, which may not actually read this configuration.
290 self.config.http()?;
291
292 self.prepare()?;
293 self.head.set(None);
294 *self.tree.borrow_mut() = None;
295 self.current_sha.set(None);
296 let path = self.config.assert_package_cache_locked(&self.index_path);
297 if !self.quiet {
298 self.config
299 .shell()
300 .status("Updating", self.source_id.display_index())?;
301 }
302
303 // Fetch the latest version of our `index_git_ref` into the index
304 // checkout.
305 let url = self.source_id.url();
306 let repo = self.repo.borrow_mut().unwrap();
307 git::fetch(repo, url.as_str(), &self.index_git_ref, self.config)
308 .with_context(|| format!("failed to fetch `{}`", url))?;
309
310 // Create a dummy file to record the mtime for when we updated the
311 // index.
312 paths::create(&path.join(LAST_UPDATED_FILE))?;
313
314 Ok(())
315 }
316
317 fn invalidate_cache(&mut self) {
318 // To fully invalidate, undo `mark_updated`s work
319 self.needs_update = true;
320 }
321
322 fn set_quiet(&mut self, quiet: bool) {
323 self.quiet = quiet;
324 }
325
326 fn is_updated(&self) -> bool {
327 self.is_updated()
328 }
329
330 fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult<MaybeLock> {
331 let registry_config = loop {
332 match self.config()? {
333 Poll::Pending => self.block_until_ready()?,
334 Poll::Ready(cfg) => break cfg.unwrap(),
335 }
336 };
337
338 download::download(
339 &self.cache_path,
340 &self.config,
341 pkg,
342 checksum,
343 registry_config,
344 )
345 }
346
347 fn finish_download(
348 &mut self,
349 pkg: PackageId,
350 checksum: &str,
351 data: &[u8],
352 ) -> CargoResult<File> {
353 download::finish_download(&self.cache_path, &self.config, pkg, checksum, data)
354 }
355
356 fn is_crate_downloaded(&self, pkg: PackageId) -> bool {
357 download::is_crate_downloaded(&self.cache_path, &self.config, pkg)
358 }
359 }
360
361 impl<'cfg> Drop for RemoteRegistry<'cfg> {
362 fn drop(&mut self) {
363 // Just be sure to drop this before our other fields
364 self.tree.borrow_mut().take();
365 }
366 }