]> git.proxmox.com Git - rustc.git/blame - src/tools/cargo/src/cargo/sources/registry/remote.rs
New upstream version 1.75.0+dfsg1
[rustc.git] / src / tools / cargo / src / cargo / sources / registry / remote.rs
CommitLineData
fe692bf9
FG
1//! Access to a Git index based registry. See [`RemoteRegistry`] for details.
2
0a29b90c
FG
3use crate::core::{GitReference, PackageId, SourceId};
4use crate::sources::git;
49aad941 5use crate::sources::git::fetch::RemoteKind;
0a29b90c
FG
6use crate::sources::registry::download;
7use crate::sources::registry::MaybeLock;
8use crate::sources::registry::{LoadResponse, RegistryConfig, RegistryData};
ed00b5ec 9use crate::util::cache_lock::CacheLockMode;
0a29b90c
FG
10use crate::util::errors::CargoResult;
11use crate::util::interning::InternedString;
12use crate::util::{Config, Filesystem};
13use anyhow::Context as _;
14use cargo_util::paths;
15use lazycell::LazyCell;
0a29b90c
FG
16use std::cell::{Cell, Ref, RefCell};
17use std::fs::File;
18use std::mem;
19use std::path::Path;
20use std::str;
21use std::task::{ready, Poll};
add651ee 22use tracing::{debug, trace};
0a29b90c
FG
23
24/// A remote registry is a registry that lives at a remote URL (such as
25/// crates.io). The git index is cloned locally, and `.crate` files are
26/// downloaded as needed and cached locally.
fe692bf9
FG
27///
28/// This type is primarily accessed through the [`RegistryData`] trait.
29///
30/// See the [module-level documentation](super) for the index format and layout.
31///
32/// ## History of Git-based index registry
33///
34/// Using Git to host this index used to be quite efficient. The full index can
35/// be stored efficiently locally on disk, and once it is downloaded, all
36/// queries of a registry can happen locally and needn't touch the network.
37/// Git-based index was a reasonable design choice at the time when HTTP/2
38/// was just introduced.
39///
40/// However, the full index keeps growing as crates.io grows. It becomes
41/// relatively big and slows down the first use of Cargo. Git (specifically
42/// libgit2) is not efficient at handling huge amounts of small files either.
43/// On the other hand, newer protocols like HTTP/2 are prevalent and capable to
44/// serve a bunch of tiny files. Today, it is encouraged to use [`HttpRegistry`],
45/// which is the default from 1.70.0. That being said, Cargo will continue
46/// supporting Git-based index for a pretty long while.
47///
48/// [`HttpRegistry`]: super::http_remote::HttpRegistry
0a29b90c 49pub struct RemoteRegistry<'cfg> {
fe692bf9 50 /// Path to the registry index (`$CARGO_HOME/registry/index/$REG-HASH`).
0a29b90c 51 index_path: Filesystem,
fe692bf9 52 /// Path to the cache of `.crate` files (`$CARGO_HOME/registry/cache/$REG-HASH`).
0a29b90c 53 cache_path: Filesystem,
fe692bf9 54 /// The unique identifier of this registry source.
0a29b90c 55 source_id: SourceId,
fe692bf9
FG
56 /// This reference is stored so that when a registry needs update, it knows
57 /// where to fetch from.
0a29b90c
FG
58 index_git_ref: GitReference,
59 config: &'cfg Config,
fe692bf9
FG
60 /// A Git [tree object] to help this registry find crate metadata from the
61 /// underlying Git repository.
62 ///
781aab86 63 /// This is stored here to prevent Git from repeatedly creating a tree object
fe692bf9
FG
64 /// during each call into `load()`.
65 ///
66 /// [tree object]: https://git-scm.com/book/en/v2/Git-Internals-Git-Objects#_tree_objects
0a29b90c 67 tree: RefCell<Option<git2::Tree<'static>>>,
fe692bf9 68 /// A Git repository that contains the actual index we want.
0a29b90c 69 repo: LazyCell<git2::Repository>,
fe692bf9 70 /// The current HEAD commit of the underlying Git repository.
0a29b90c 71 head: Cell<Option<git2::Oid>>,
fe692bf9 72 /// This stores sha value of the current HEAD commit for convenience.
0a29b90c 73 current_sha: Cell<Option<InternedString>>,
781aab86 74 /// Whether this registry needs to update package information.
fe692bf9
FG
75 ///
76 /// See [`RemoteRegistry::mark_updated`] on how to make sure a registry
77 /// index is updated only once per session.
78 needs_update: bool,
79 /// Disables status messages.
0a29b90c
FG
80 quiet: bool,
81}
82
83impl<'cfg> RemoteRegistry<'cfg> {
fe692bf9
FG
84 /// Creates a Git-rebased remote registry for `source_id`.
85 ///
86 /// * `name` --- Name of a path segment where `.crate` tarballs and the
87 /// registry index are stored. Expect to be unique.
0a29b90c
FG
88 pub fn new(source_id: SourceId, config: &'cfg Config, name: &str) -> RemoteRegistry<'cfg> {
89 RemoteRegistry {
90 index_path: config.registry_index_path().join(name),
91 cache_path: config.registry_cache_path().join(name),
92 source_id,
93 config,
0a29b90c
FG
94 index_git_ref: GitReference::DefaultBranch,
95 tree: RefCell::new(None),
96 repo: LazyCell::new(),
97 head: Cell::new(None),
98 current_sha: Cell::new(None),
99 needs_update: false,
100 quiet: false,
101 }
102 }
103
fe692bf9 104 /// Creates intermediate dirs and initialize the repository.
0a29b90c
FG
105 fn repo(&self) -> CargoResult<&git2::Repository> {
106 self.repo.try_borrow_with(|| {
fe692bf9 107 trace!("acquiring registry index lock");
ed00b5ec
FG
108 let path = self
109 .config
110 .assert_package_cache_locked(CacheLockMode::DownloadExclusive, &self.index_path);
0a29b90c 111
0a29b90c
FG
112 match git2::Repository::open(&path) {
113 Ok(repo) => Ok(repo),
114 Err(_) => {
115 drop(paths::remove_dir_all(&path));
116 paths::create_dir_all(&path)?;
117
118 // Note that we'd actually prefer to use a bare repository
119 // here as we're not actually going to check anything out.
120 // All versions of Cargo, though, share the same CARGO_HOME,
121 // so for compatibility with older Cargo which *does* do
122 // checkouts we make sure to initialize a new full
123 // repository (not a bare one).
124 //
125 // We should change this to `init_bare` whenever we feel
126 // like enough time has passed or if we change the directory
127 // that the folder is located in, such as by changing the
128 // hash at the end of the directory.
129 //
130 // Note that in the meantime we also skip `init.templatedir`
131 // as it can be misconfigured sometimes or otherwise add
132 // things that we don't want.
133 let mut opts = git2::RepositoryInitOptions::new();
134 opts.external_template(false);
135 Ok(git2::Repository::init_opts(&path, &opts).with_context(|| {
136 format!("failed to initialize index git repository (in {:?})", path)
137 })?)
138 }
139 }
140 })
141 }
142
fe692bf9 143 /// Get the object ID of the HEAD commit from the underlying Git repository.
0a29b90c
FG
144 fn head(&self) -> CargoResult<git2::Oid> {
145 if self.head.get().is_none() {
146 let repo = self.repo()?;
147 let oid = self.index_git_ref.resolve(repo)?;
148 self.head.set(Some(oid));
149 }
150 Ok(self.head.get().unwrap())
151 }
152
fe692bf9
FG
153 /// Returns a [`git2::Tree`] object of the current HEAD commit of the
154 /// underlying Git repository.
0a29b90c
FG
155 fn tree(&self) -> CargoResult<Ref<'_, git2::Tree<'_>>> {
156 {
157 let tree = self.tree.borrow();
158 if tree.is_some() {
159 return Ok(Ref::map(tree, |s| s.as_ref().unwrap()));
160 }
161 }
162 let repo = self.repo()?;
163 let commit = repo.find_commit(self.head()?)?;
164 let tree = commit.tree()?;
165
fe692bf9 166 // SAFETY:
0a29b90c
FG
167 // Unfortunately in libgit2 the tree objects look like they've got a
168 // reference to the repository object which means that a tree cannot
169 // outlive the repository that it came from. Here we want to cache this
170 // tree, though, so to accomplish this we transmute it to a static
171 // lifetime.
172 //
173 // Note that we don't actually hand out the static lifetime, instead we
174 // only return a scoped one from this function. Additionally the repo
175 // we loaded from (above) lives as long as this object
176 // (`RemoteRegistry`) so we then just need to ensure that the tree is
177 // destroyed first in the destructor, hence the destructor on
178 // `RemoteRegistry` below.
179 let tree = unsafe { mem::transmute::<git2::Tree<'_>, git2::Tree<'static>>(tree) };
180 *self.tree.borrow_mut() = Some(tree);
181 Ok(Ref::map(self.tree.borrow(), |s| s.as_ref().unwrap()))
182 }
183
fe692bf9
FG
184 /// Gets the current version of the registry index.
185 ///
186 /// It is usually sha of the HEAD commit from the underlying Git repository.
0a29b90c
FG
187 fn current_version(&self) -> Option<InternedString> {
188 if let Some(sha) = self.current_sha.get() {
189 return Some(sha);
190 }
191 let sha = InternedString::new(&self.head().ok()?.to_string());
192 self.current_sha.set(Some(sha));
193 Some(sha)
194 }
195
fe692bf9 196 /// Whether the registry is up-to-date. See [`Self::mark_updated`] for more.
0a29b90c
FG
197 fn is_updated(&self) -> bool {
198 self.config.updated_sources().contains(&self.source_id)
199 }
200
fe692bf9
FG
201 /// Marks this registry as up-to-date.
202 ///
203 /// This makes sure the index is only updated once per session since it is
204 /// an expensive operation. This generally only happens when the resolver
205 /// is run multiple times, such as during `cargo publish`.
0a29b90c
FG
206 fn mark_updated(&self) {
207 self.config.updated_sources().insert(self.source_id);
208 }
209}
210
0a29b90c
FG
211impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
212 fn prepare(&self) -> CargoResult<()> {
fe692bf9 213 self.repo()?;
0a29b90c
FG
214 Ok(())
215 }
216
217 fn index_path(&self) -> &Filesystem {
218 &self.index_path
219 }
220
221 fn assert_index_locked<'a>(&self, path: &'a Filesystem) -> &'a Path {
ed00b5ec
FG
222 self.config
223 .assert_package_cache_locked(CacheLockMode::DownloadExclusive, path)
0a29b90c
FG
224 }
225
fe692bf9
FG
226 /// Read the general concept for `load()` on [`RegistryData::load`].
227 ///
228 /// `index_version` is a string representing the version of the file used
229 /// to construct the cached copy.
230 ///
231 /// Older versions of Cargo used the single value of the hash of the HEAD
232 /// commit as a `index_version`. This is technically correct but a little
233 /// too conservative. If a new commit is fetched all cached files need to
234 /// be regenerated even if a particular file was not changed.
235 ///
236 /// However if an old cargo has written such a file we still know how to
237 /// read it, as long as we check for that hash value.
238 ///
239 /// Cargo now uses a hash of the file's contents as provided by git.
0a29b90c
FG
240 fn load(
241 &mut self,
242 _root: &Path,
243 path: &Path,
244 index_version: Option<&str>,
245 ) -> Poll<CargoResult<LoadResponse>> {
246 if self.needs_update {
247 return Poll::Pending;
248 }
249 // Check if the cache is valid.
250 let git_commit_hash = self.current_version();
251 if index_version.is_some() && index_version == git_commit_hash.as_deref() {
fe692bf9
FG
252 // This file was written by an old version of cargo, but it is
253 // still up-to-date.
0a29b90c
FG
254 return Poll::Ready(Ok(LoadResponse::CacheValid));
255 }
256 // Note that the index calls this method and the filesystem is locked
257 // in the index, so we don't need to worry about an `update_index`
258 // happening in a different process.
259 fn load_helper(
260 registry: &RemoteRegistry<'_>,
261 path: &Path,
262 index_version: Option<&str>,
263 ) -> CargoResult<LoadResponse> {
264 let repo = registry.repo()?;
265 let tree = registry.tree()?;
266 let entry = tree.get_path(path);
267 let entry = entry?;
268 let git_file_hash = Some(entry.id().to_string());
269
270 // Check if the cache is valid.
271 if index_version.is_some() && index_version == git_file_hash.as_deref() {
272 return Ok(LoadResponse::CacheValid);
273 }
274
275 let object = entry.to_object(repo)?;
781aab86
FG
276 let Some(blob) = object.as_blob() else {
277 anyhow::bail!("path `{}` is not a blob in the git repo", path.display())
0a29b90c
FG
278 };
279
280 Ok(LoadResponse::Data {
281 raw_data: blob.content().to_vec(),
282 index_version: git_file_hash,
283 })
284 }
285
286 match load_helper(&self, path, index_version) {
287 Ok(result) => Poll::Ready(Ok(result)),
288 Err(_) if !self.is_updated() => {
fe692bf9
FG
289 // If git returns an error and we haven't updated the repo,
290 // return pending to allow an update to try again.
0a29b90c
FG
291 self.needs_update = true;
292 Poll::Pending
293 }
294 Err(e)
295 if e.downcast_ref::<git2::Error>()
296 .map(|e| e.code() == git2::ErrorCode::NotFound)
297 .unwrap_or_default() =>
298 {
299 // The repo has been updated and the file does not exist.
300 Poll::Ready(Ok(LoadResponse::NotFound))
301 }
302 Err(e) => Poll::Ready(Err(e)),
303 }
304 }
305
306 fn config(&mut self) -> Poll<CargoResult<Option<RegistryConfig>>> {
307 debug!("loading config");
308 self.prepare()?;
ed00b5ec
FG
309 self.config
310 .assert_package_cache_locked(CacheLockMode::DownloadExclusive, &self.index_path);
fe692bf9 311 match ready!(self.load(Path::new(""), Path::new(RegistryConfig::NAME), None)?) {
0a29b90c
FG
312 LoadResponse::Data { raw_data, .. } => {
313 trace!("config loaded");
781aab86 314 let cfg: RegistryConfig = serde_json::from_slice(&raw_data)?;
0a29b90c
FG
315 Poll::Ready(Ok(Some(cfg)))
316 }
317 _ => Poll::Ready(Ok(None)),
318 }
319 }
320
321 fn block_until_ready(&mut self) -> CargoResult<()> {
322 if !self.needs_update {
323 return Ok(());
324 }
325
326 self.needs_update = false;
327
0a29b90c
FG
328 if self.is_updated() {
329 return Ok(());
330 }
331 self.mark_updated();
332
333 if self.config.offline() {
334 return Ok(());
335 }
336 if self.config.cli_unstable().no_index_update {
337 return Ok(());
338 }
339
340 debug!("updating the index");
341
342 // Ensure that we'll actually be able to acquire an HTTP handle later on
343 // once we start trying to download crates. This will weed out any
344 // problems with `.cargo/config` configuration related to HTTP.
345 //
346 // This way if there's a problem the error gets printed before we even
347 // hit the index, which may not actually read this configuration.
348 self.config.http()?;
349
350 self.prepare()?;
351 self.head.set(None);
352 *self.tree.borrow_mut() = None;
353 self.current_sha.set(None);
ed00b5ec
FG
354 let _path = self
355 .config
356 .assert_package_cache_locked(CacheLockMode::DownloadExclusive, &self.index_path);
0a29b90c
FG
357 if !self.quiet {
358 self.config
359 .shell()
360 .status("Updating", self.source_id.display_index())?;
361 }
362
363 // Fetch the latest version of our `index_git_ref` into the index
364 // checkout.
365 let url = self.source_id.url();
366 let repo = self.repo.borrow_mut().unwrap();
49aad941
FG
367 git::fetch(
368 repo,
369 url.as_str(),
370 &self.index_git_ref,
371 self.config,
372 RemoteKind::Registry,
373 )
374 .with_context(|| format!("failed to fetch `{}`", url))?;
0a29b90c 375
0a29b90c
FG
376 Ok(())
377 }
378
fe692bf9
FG
379 /// Read the general concept for `invalidate_cache()` on
380 /// [`RegistryData::invalidate_cache`].
381 ///
382 /// To fully invalidate, undo [`RemoteRegistry::mark_updated`]'s work.
0a29b90c 383 fn invalidate_cache(&mut self) {
0a29b90c
FG
384 self.needs_update = true;
385 }
386
387 fn set_quiet(&mut self, quiet: bool) {
388 self.quiet = quiet;
389 }
390
391 fn is_updated(&self) -> bool {
392 self.is_updated()
393 }
394
395 fn download(&mut self, pkg: PackageId, checksum: &str) -> CargoResult<MaybeLock> {
396 let registry_config = loop {
397 match self.config()? {
398 Poll::Pending => self.block_until_ready()?,
399 Poll::Ready(cfg) => break cfg.unwrap(),
400 }
401 };
402
403 download::download(
404 &self.cache_path,
405 &self.config,
406 pkg,
407 checksum,
408 registry_config,
409 )
410 }
411
412 fn finish_download(
413 &mut self,
414 pkg: PackageId,
415 checksum: &str,
416 data: &[u8],
417 ) -> CargoResult<File> {
418 download::finish_download(&self.cache_path, &self.config, pkg, checksum, data)
419 }
420
421 fn is_crate_downloaded(&self, pkg: PackageId) -> bool {
422 download::is_crate_downloaded(&self.cache_path, &self.config, pkg)
423 }
424}
425
fe692bf9
FG
426/// Implemented to just be sure to drop `tree` field before our other fields.
427/// See SAFETY inside [`RemoteRegistry::tree()`] for more.
0a29b90c
FG
428impl<'cfg> Drop for RemoteRegistry<'cfg> {
429 fn drop(&mut self) {
0a29b90c
FG
430 self.tree.borrow_mut().take();
431 }
432}