]> git.proxmox.com Git - cargo.git/blame - src/cargo/sources/git/utils.rs
Downgrade some log messages.
[cargo.git] / src / cargo / sources / git / utils.rs
CommitLineData
32f52fd2
AC
1//! Utilities for handling git repositories, mainly around
2//! authentication/cloning.
32f52fd2 3
5102de2b 4use crate::core::GitReference;
ebca5190 5use crate::util::errors::CargoResult;
de84f352 6use crate::util::{network, Config, IntoUrl, MetricsCounter, Progress};
ebca5190 7use anyhow::{anyhow, Context as _};
1dae5acb 8use cargo_util::{paths, ProcessBuilder};
869642bb 9use curl::easy::List;
4fd48350 10use git2::{self, ErrorClass, ObjectType};
9ed82b57 11use log::{debug, info};
8a4be789 12use serde::ser;
9ed82b57 13use serde::Serialize;
5102de2b
AC
14use std::env;
15use std::fmt;
5102de2b
AC
16use std::path::{Path, PathBuf};
17use std::process::Command;
d58aa8ab 18use std::time::{Duration, Instant};
a5a298f1 19use url::Url;
32e262d2 20
a5a298f1 21fn serialize_str<T, S>(t: &T, s: S) -> Result<S::Ok, S::Error>
1e682848
AC
22where
23 T: fmt::Display,
24 S: ser::Serializer,
a5a298f1 25{
8a4be789 26 s.collect_str(t)
a5a298f1
AC
27}
28
ed6a6224
SS
29pub struct GitShortID(git2::Buf);
30
31impl GitShortID {
32 pub fn as_str(&self) -> &str {
33 self.0.as_str().unwrap()
34 }
35}
36
23591fe5
LL
37/// `GitRemote` represents a remote repository. It gets cloned into a local
38/// `GitDatabase`.
a5a298f1 39#[derive(PartialEq, Clone, Debug, Serialize)]
ba5639f3 40pub struct GitRemote {
5c7979cd
DO
41 #[serde(serialize_with = "serialize_str")]
42 url: Url,
45265425
YK
43}
44
23591fe5
LL
45/// `GitDatabase` is a local clone of a remote repository's database. Multiple
46/// `GitCheckouts` can be cloned from this `GitDatabase`.
a5a298f1 47#[derive(Serialize)]
ba5639f3
YK
48pub struct GitDatabase {
49 remote: GitRemote,
a6dad622 50 path: PathBuf,
5c7979cd
DO
51 #[serde(skip_serializing)]
52 repo: git2::Repository,
45265425
YK
53}
54
23591fe5 55/// `GitCheckout` is a local checkout of a particular revision. Calling
64ff29ff 56/// `clone_into` with a reference will resolve the reference into a revision,
5c9d614f 57/// and return an `anyhow::Error` if no revision for that reference was found.
a5a298f1 58#[derive(Serialize)]
c64fd71e
AC
59pub struct GitCheckout<'a> {
60 database: &'a GitDatabase,
a6dad622 61 location: PathBuf,
ddc27999
AC
62 #[serde(serialize_with = "serialize_str")]
63 revision: git2::Oid,
5c7979cd
DO
64 #[serde(skip_serializing)]
65 repo: git2::Repository,
be9d1cde
YK
66}
67
64ff29ff 68// Implementations
45265425 69
ba5639f3 70impl GitRemote {
e2191677
AC
71 pub fn new(url: &Url) -> GitRemote {
72 GitRemote { url: url.clone() }
6afe6332 73 }
45265425 74
7a2facba 75 pub fn url(&self) -> &Url {
e2191677 76 &self.url
cda93d80
YK
77 }
78
ddc27999 79 pub fn rev_for(&self, path: &Path, reference: &GitReference) -> CargoResult<git2::Oid> {
7dd9872c 80 reference.resolve(&self.db_at(path)?.repo)
5919fa0d
YK
81 }
82
1e682848
AC
83 pub fn checkout(
84 &self,
85 into: &Path,
ddc27999 86 db: Option<GitDatabase>,
1e682848 87 reference: &GitReference,
ddc27999 88 locked_rev: Option<git2::Oid>,
1e682848 89 cargo_config: &Config,
ddc27999 90 ) -> CargoResult<(GitDatabase, git2::Oid)> {
ddc27999
AC
91 // If we have a previous instance of `GitDatabase` then fetch into that
92 // if we can. If that can successfully load our revision then we've
93 // populated the database with the latest version of `reference`, so
94 // return that database and the rev we resolve to.
95 if let Some(mut db) = db {
96 fetch(&mut db.repo, self.url.as_str(), reference, cargo_config)
6514c289 97 .context(format!("failed to fetch into: {}", into.display()))?;
ddc27999
AC
98 match locked_rev {
99 Some(rev) => {
100 if db.contains(rev) {
101 return Ok((db, rev));
102 }
103 }
104 None => {
7dd9872c 105 if let Ok(rev) = reference.resolve(&db.repo) {
ddc27999
AC
106 return Ok((db, rev));
107 }
108 }
51a28fa2 109 }
c933673e 110 }
ddc27999
AC
111
112 // Otherwise start from scratch to handle corrupt git repositories.
113 // After our fetch (which is interpreted as a clone now) we do the same
114 // resolution to figure out what we cloned.
115 if into.exists() {
116 paths::remove_dir_all(into)?;
117 }
118 paths::create_dir_all(into)?;
119 let mut repo = init(into, true)?;
120 fetch(&mut repo, self.url.as_str(), reference, cargo_config)
6514c289 121 .context(format!("failed to clone into: {}", into.display()))?;
ddc27999
AC
122 let rev = match locked_rev {
123 Some(rev) => rev,
7dd9872c 124 None => reference.resolve(&repo)?,
c64fd71e 125 };
6afe6332 126
1e682848
AC
127 Ok((
128 GitDatabase {
129 remote: self.clone(),
130 path: into.to_path_buf(),
131 repo,
132 },
133 rev,
134 ))
6afe6332
YK
135 }
136
c64fd71e 137 pub fn db_at(&self, db_path: &Path) -> CargoResult<GitDatabase> {
82655b46 138 let repo = git2::Repository::open(db_path)?;
c64fd71e
AC
139 Ok(GitDatabase {
140 remote: self.clone(),
a6dad622 141 path: db_path.to_path_buf(),
0247dc42 142 repo,
c64fd71e 143 })
e05b4dc8 144 }
6afe6332
YK
145}
146
ba5639f3 147impl GitDatabase {
1e682848
AC
148 pub fn copy_to(
149 &self,
ddc27999 150 rev: git2::Oid,
1e682848
AC
151 dest: &Path,
152 cargo_config: &Config,
b8b7faee 153 ) -> CargoResult<GitCheckout<'_>> {
c933673e
AC
154 let mut checkout = None;
155 if let Ok(repo) = git2::Repository::open(dest) {
833ad21d 156 let mut co = GitCheckout::new(dest, self, rev, repo);
c933673e 157 if !co.is_fresh() {
869642bb
AC
158 // After a successful fetch operation the subsequent reset can
159 // fail sometimes for corrupt repositories where the fetch
160 // operation succeeds but the object isn't actually there in one
161 // way or another. In these situations just skip the error and
162 // try blowing away the whole repository and trying with a
163 // clone.
c933673e 164 co.fetch(cargo_config)?;
869642bb
AC
165 match co.reset(cargo_config) {
166 Ok(()) => {
167 assert!(co.is_fresh());
168 checkout = Some(co);
169 }
170 Err(e) => debug!("failed reset after fetch {:?}", e),
2c12269e 171 }
c933673e
AC
172 } else {
173 checkout = Some(co);
2c12269e 174 }
c933673e
AC
175 };
176 let checkout = match checkout {
177 Some(c) => c,
178 None => GitCheckout::clone_into(dest, self, rev, cargo_config)?,
78acc739 179 };
d31f24bd 180 checkout.update_submodules(cargo_config)?;
78acc739 181 Ok(checkout)
be9d1cde 182 }
ba5639f3 183
ddc27999
AC
184 pub fn to_short_id(&self, revision: git2::Oid) -> CargoResult<GitShortID> {
185 let obj = self.repo.find_object(revision, None)?;
c933673e
AC
186 Ok(GitShortID(obj.short_id()?))
187 }
ddc27999
AC
188
189 pub fn contains(&self, oid: git2::Oid) -> bool {
190 self.repo.revparse_single(&oid.to_string()).is_ok()
191 }
192
7dd9872c
AC
193 pub fn resolve(&self, r: &GitReference) -> CargoResult<git2::Oid> {
194 r.resolve(&self.repo)
ddc27999 195 }
c933673e
AC
196}
197
198impl GitReference {
7dd9872c 199 pub fn resolve(&self, repo: &git2::Repository) -> CargoResult<git2::Oid> {
869642bb
AC
200 let id = match self {
201 // Note that we resolve the named tag here in sync with where it's
202 // fetched into via `fetch` below.
203 GitReference::Tag(s) => (|| -> CargoResult<git2::Oid> {
204 let refname = format!("refs/remotes/origin/tags/{}", s);
1e682848
AC
205 let id = repo.refname_to_id(&refname)?;
206 let obj = repo.find_object(id, None)?;
207 let obj = obj.peel(ObjectType::Commit)?;
208 Ok(obj.id())
209 })()
ebca5190 210 .with_context(|| format!("failed to find tag `{}`", s))?,
869642bb
AC
211
212 // Resolve the remote name since that's all we're configuring in
213 // `fetch` below.
214 GitReference::Branch(s) => {
215 let name = format!("origin/{}", s);
fecb7246 216 let b = repo
869642bb 217 .find_branch(&name, git2::BranchType::Remote)
ebca5190 218 .with_context(|| format!("failed to find branch `{}`", s))?;
947e8902
KP
219 b.get()
220 .target()
3a18c89a 221 .ok_or_else(|| anyhow::format_err!("branch `{}` did not have a target", s))?
aa256ffd 222 }
538fb1b4 223
7dd9872c 224 // We'll be using the HEAD commit
4c02977c 225 GitReference::DefaultBranch => {
7dd9872c
AC
226 let head_id = repo.refname_to_id("refs/remotes/origin/HEAD")?;
227 let head = repo.find_object(head_id, None)?;
228 head.peel(ObjectType::Commit)?.id()
4c02977c
AC
229 }
230
869642bb 231 GitReference::Rev(s) => {
c933673e 232 let obj = repo.revparse_single(s)?;
c0cc8ffe
AC
233 match obj.as_tag() {
234 Some(tag) => tag.target_id(),
235 None => obj.id(),
236 }
aa256ffd
AC
237 }
238 };
ddc27999 239 Ok(id)
ba5639f3 240 }
6afe6332
YK
241}
242
c64fd71e 243impl<'a> GitCheckout<'a> {
1e682848
AC
244 fn new(
245 path: &Path,
246 database: &'a GitDatabase,
ddc27999 247 revision: git2::Oid,
1e682848
AC
248 repo: git2::Repository,
249 ) -> GitCheckout<'a> {
2c12269e 250 GitCheckout {
a6dad622 251 location: path.to_path_buf(),
0247dc42
E
252 database,
253 revision,
254 repo,
c9e08631 255 }
2c12269e
TCS
256 }
257
1e682848
AC
258 fn clone_into(
259 into: &Path,
260 database: &'a GitDatabase,
ddc27999 261 revision: git2::Oid,
1e682848
AC
262 config: &Config,
263 ) -> CargoResult<GitCheckout<'a>> {
a6dad622 264 let dirname = into.parent().unwrap();
5102de2b 265 paths::create_dir_all(&dirname)?;
5cca4e8c 266 if into.exists() {
c933673e 267 paths::remove_dir_all(into)?;
be9d1cde 268 }
5cca4e8c
AC
269
270 // we're doing a local filesystem-to-filesystem clone so there should
271 // be no need to respect global configuration options, so pass in
272 // an empty instance of `git2::Config` below.
273 let git_config = git2::Config::new()?;
274
275 // Clone the repository, but make sure we use the "local" option in
276 // libgit2 which will attempt to use hardlinks to set up the database.
277 // This should speed up the clone operation quite a bit if it works.
278 //
279 // Note that we still use the same fetch options because while we don't
280 // need authentication information we may want progress bars and such.
930134c7 281 let url = database.path.into_url()?;
5cca4e8c 282 let mut repo = None;
04008795 283 with_fetch_options(&git_config, url.as_str(), config, &mut |fopts| {
5cca4e8c
AC
284 let mut checkout = git2::build::CheckoutBuilder::new();
285 checkout.dry_run(); // we'll do this below during a `reset`
286
287 let r = git2::build::RepoBuilder::new()
288 // use hard links and/or copy the database, we're doing a
289 // filesystem clone so this'll speed things up quite a bit.
290 .clone_local(git2::build::CloneLocal::Local)
291 .with_checkout(checkout)
292 .fetch_options(fopts)
5cca4e8c
AC
293 .clone(url.as_str(), into)?;
294 repo = Some(r);
295 Ok(())
296 })?;
297 let repo = repo.unwrap();
298
299 let checkout = GitCheckout::new(into, database, revision, repo);
143b0600
AC
300 checkout.reset(config)?;
301 Ok(checkout)
6afe6332
YK
302 }
303
78acc739
AC
304 fn is_fresh(&self) -> bool {
305 match self.repo.revparse_single("HEAD") {
ddc27999 306 Ok(ref head) if head.id() == self.revision => {
3a87d006 307 // See comments in reset() for why we check this
5cca4e8c 308 self.location.join(".cargo-ok").exists()
3a87d006 309 }
78acc739
AC
310 _ => false,
311 }
312 }
313
fe5a5c78 314 fn fetch(&mut self, cargo_config: &Config) -> CargoResult<()> {
78acc739 315 info!("fetch {}", self.repo.path().display());
930134c7 316 let url = self.database.path.into_url()?;
869642bb
AC
317 let reference = GitReference::Rev(self.revision.to_string());
318 fetch(&mut self.repo, url.as_str(), &reference, cargo_config)?;
78acc739
AC
319 Ok(())
320 }
321
143b0600 322 fn reset(&self, config: &Config) -> CargoResult<()> {
f7c91ba6 323 // If we're interrupted while performing this reset (e.g., we die because
3a87d006
AC
324 // of a signal) Cargo needs to be sure to try to check out this repo
325 // again on the next go-round.
326 //
327 // To enable this we have a dummy file in our checkout, .cargo-ok, which
328 // if present means that the repo has been successfully reset and is
329 // ready to go. Hence if we start to do a reset, we make sure this file
330 // *doesn't* exist, and then once we're done we create the file.
331 let ok_file = self.location.join(".cargo-ok");
c933673e 332 let _ = paths::remove_file(&ok_file);
aa256ffd 333 info!("reset {} to {}", self.repo.path().display(), self.revision);
4a1e7107
AB
334
335 // Ensure libgit2 won't mess with newlines when we vendor.
336 if let Ok(mut git_config) = self.repo.config() {
337 git_config.set_bool("core.autocrlf", false)?;
338 }
339
ddc27999 340 let object = self.repo.find_object(self.revision, None)?;
143b0600 341 reset(&self.repo, &object, config)?;
ce86e866 342 paths::create(ok_file)?;
c64fd71e 343 Ok(())
6afe6332
YK
344 }
345
7b03532b 346 fn update_submodules(&self, cargo_config: &Config) -> CargoResult<()> {
c5611a32 347 return update_submodules(&self.repo, cargo_config);
c64fd71e 348
7b03532b 349 fn update_submodules(repo: &git2::Repository, cargo_config: &Config) -> CargoResult<()> {
f0aae40f 350 debug!("update submodules for: {:?}", repo.workdir().unwrap());
c64fd71e 351
23591fe5 352 for mut child in repo.submodules()? {
ebca5190 353 update_submodule(repo, &mut child, cargo_config).with_context(|| {
1e682848
AC
354 format!(
355 "failed to update submodule `{}`",
356 child.name().unwrap_or("")
357 )
358 })?;
072d89ec
JK
359 }
360 Ok(())
361 }
c64fd71e 362
1e682848
AC
363 fn update_submodule(
364 parent: &git2::Repository,
b8b7faee 365 child: &mut git2::Submodule<'_>,
1e682848
AC
366 cargo_config: &Config,
367 ) -> CargoResult<()> {
072d89ec 368 child.init(false)?;
0d44a826
EH
369 let url = child.url().ok_or_else(|| {
370 anyhow::format_err!("non-utf8 url for submodule {:?}?", child.path())
371 })?;
072d89ec
JK
372
373 // A submodule which is listed in .gitmodules but not actually
374 // checked out will not have a head id, so we should ignore it.
375 let head = match child.head_id() {
376 Some(head) => head,
377 None => return Ok(()),
378 };
379
380 // If the submodule hasn't been checked out yet, we need to
381 // clone it. If it has been checked out and the head is the same
ddc74a5d
AC
382 // as the submodule's head, then we can skip an update and keep
383 // recursing.
072d89ec
JK
384 let head_and_repo = child.open().and_then(|repo| {
385 let target = repo.head()?.target();
386 Ok((target, repo))
387 });
fe5a5c78 388 let mut repo = match head_and_repo {
072d89ec
JK
389 Ok((head, repo)) => {
390 if child.head_id() == head {
1e682848 391 return update_submodules(&repo, cargo_config);
c64fd71e 392 }
072d89ec
JK
393 repo
394 }
395 Err(..) => {
396 let path = parent.workdir().unwrap().join(child.path());
c933673e 397 let _ = paths::remove_dir_all(&path);
2a4cdc67 398 init(&path, false)?
072d89ec
JK
399 }
400 };
072d89ec 401 // Fetch data from origin and reset to the head commit
869642bb 402 let reference = GitReference::Rev(head.to_string());
2fa95e3e
EH
403 cargo_config
404 .shell()
405 .status("Updating", format!("git submodule `{}`", url))?;
ebca5190 406 fetch(&mut repo, url, &reference, cargo_config).with_context(|| {
0d44a826 407 format!(
1e682848
AC
408 "failed to fetch submodule `{}` from {}",
409 child.name().unwrap_or(""),
410 url
0d44a826 411 )
072d89ec 412 })?;
c64fd71e 413
143b0600
AC
414 let obj = repo.find_object(head, None)?;
415 reset(&repo, &obj, cargo_config)?;
072d89ec 416 update_submodules(&repo, cargo_config)
c64fd71e 417 }
f7e4d017
YK
418 }
419}
219f9902 420
f66d7163
AC
421/// Prepare the authentication callbacks for cloning a git repository.
422///
423/// The main purpose of this function is to construct the "authentication
424/// callback" which is used to clone a repository. This callback will attempt to
425/// find the right authentication on the system (without user input) and will
426/// guide libgit2 in doing so.
427///
428/// The callback is provided `allowed` types of credentials, and we try to do as
429/// much as possible based on that:
430///
431/// * Prioritize SSH keys from the local ssh agent as they're likely the most
432/// reliable. The username here is prioritized from the credential
433/// callback, then from whatever is configured in git itself, and finally
434/// we fall back to the generic user of `git`.
435///
436/// * If a username/password is allowed, then we fallback to git2-rs's
437/// implementation of the credential helper. This is what is configured
f7c91ba6 438/// with `credential.helper` in git, and is the interface for the macOS
f66d7163
AC
439/// keychain, for example.
440///
441/// * After the above two have failed, we just kinda grapple attempting to
442/// return *something*.
443///
444/// If any form of authentication fails, libgit2 will repeatedly ask us for
445/// credentials until we give it a reason to not do so. To ensure we don't
446/// just sit here looping forever we keep track of authentications we've
447/// attempted and we don't try the same ones again.
1e682848
AC
448fn with_authentication<T, F>(url: &str, cfg: &git2::Config, mut f: F) -> CargoResult<T>
449where
b8b7faee 450 F: FnMut(&mut git2::Credentials<'_>) -> CargoResult<T>,
55321111 451{
219f9902
AC
452 let mut cred_helper = git2::CredentialHelper::new(url);
453 cred_helper.config(cfg);
f66d7163 454
537f7185
AC
455 let mut ssh_username_requested = false;
456 let mut cred_helper_bad = None;
457 let mut ssh_agent_attempts = Vec::new();
458 let mut any_attempts = false;
b379a5b0 459 let mut tried_sshkey = false;
6514c289 460 let mut url_attempt = None;
f66d7163 461
6514c289 462 let orig_url = url;
537f7185
AC
463 let mut res = f(&mut |url, username, allowed| {
464 any_attempts = true;
6514c289
AC
465 if url != orig_url {
466 url_attempt = Some(url.to_string());
467 }
f66d7163
AC
468 // libgit2's "USERNAME" authentication actually means that it's just
469 // asking us for a username to keep going. This is currently only really
470 // used for SSH authentication and isn't really an authentication type.
471 // The logic currently looks like:
472 //
473 // let user = ...;
474 // if (user.is_null())
475 // user = callback(USERNAME, null, ...);
476 //
477 // callback(SSH_KEY, user, ...)
478 //
537f7185
AC
479 // So if we're being called here then we know that (a) we're using ssh
480 // authentication and (b) no username was specified in the URL that
481 // we're trying to clone. We need to guess an appropriate username here,
482 // but that may involve a few attempts. Unfortunately we can't switch
483 // usernames during one authentication session with libgit2, so to
484 // handle this we bail out of this authentication session after setting
485 // the flag `ssh_username_requested`, and then we handle this below.
a85c917b 486 if allowed.contains(git2::CredentialType::USERNAME) {
537f7185
AC
487 debug_assert!(username.is_none());
488 ssh_username_requested = true;
1e682848 489 return Err(git2::Error::from_str("gonna try usernames later"));
530e1d18 490 }
f66d7163
AC
491
492 // An "SSH_KEY" authentication indicates that we need some sort of SSH
493 // authentication. This can currently either come from the ssh-agent
494 // process or from a raw in-memory SSH key. Cargo only supports using
495 // ssh-agent currently.
496 //
537f7185 497 // If we get called with this then the only way that should be possible
f7c91ba6 498 // is if a username is specified in the URL itself (e.g., `username` is
537f7185 499 // Some), hence the unwrap() here. We try custom usernames down below.
a85c917b 500 if allowed.contains(git2::CredentialType::SSH_KEY) && !tried_sshkey {
b379a5b0
NE
501 // If ssh-agent authentication fails, libgit2 will keep
502 // calling this callback asking for other authentication
503 // methods to try. Make sure we only try ssh-agent once,
504 // to avoid looping forever.
505 tried_sshkey = true;
537f7185
AC
506 let username = username.unwrap();
507 debug_assert!(!ssh_username_requested);
508 ssh_agent_attempts.push(username.to_string());
1e682848 509 return git2::Cred::ssh_key_from_agent(username);
530e1d18 510 }
f66d7163
AC
511
512 // Sometimes libgit2 will ask for a username/password in plaintext. This
513 // is where Cargo would have an interactive prompt if we supported it,
514 // but we currently don't! Right now the only way we support fetching a
515 // plaintext password is through the `credential.helper` support, so
516 // fetch that here.
96ab67b8
HM
517 //
518 // If ssh-agent authentication fails, libgit2 will keep calling this
519 // callback asking for other authentication methods to try. Check
520 // cred_helper_bad to make sure we only try the git credentail helper
521 // once, to avoid looping forever.
f16efff1
AC
522 if allowed.contains(git2::CredentialType::USER_PASS_PLAINTEXT) && cred_helper_bad.is_none()
523 {
f66d7163 524 let r = git2::Cred::credential_helper(cfg, url, username);
537f7185 525 cred_helper_bad = Some(r.is_err());
1e682848 526 return r;
f66d7163
AC
527 }
528
529 // I'm... not sure what the DEFAULT kind of authentication is, but seems
530 // easy to support?
a85c917b 531 if allowed.contains(git2::CredentialType::DEFAULT) {
1e682848 532 return git2::Cred::default();
f66d7163
AC
533 }
534
535 // Whelp, we tried our best
536 Err(git2::Error::from_str("no authentication available"))
219f9902 537 });
f66d7163 538
537f7185
AC
539 // Ok, so if it looks like we're going to be doing ssh authentication, we
540 // want to try a few different usernames as one wasn't specified in the URL
541 // for us to use. In order, we'll try:
542 //
543 // * A credential helper's username for this URL, if available.
544 // * This account's username.
545 // * "git"
546 //
547 // We have to restart the authentication session each time (due to
548 // constraints in libssh2 I guess? maybe this is inherent to ssh?), so we
549 // call our callback, `f`, in a loop here.
550 if ssh_username_requested {
551 debug_assert!(res.is_err());
c4e5670b 552 let mut attempts = vec![String::from("git")];
537f7185
AC
553 if let Ok(s) = env::var("USER").or_else(|_| env::var("USERNAME")) {
554 attempts.push(s);
555 }
556 if let Some(ref s) = cred_helper.username {
557 attempts.push(s.clone());
558 }
559
560 while let Some(s) = attempts.pop() {
561 // We should get `USERNAME` first, where we just return our attempt,
562 // and then after that we should get `SSH_KEY`. If the first attempt
563 // fails we'll get called again, but we don't have another option so
564 // we bail out.
565 let mut attempts = 0;
566 res = f(&mut |_url, username, allowed| {
a85c917b 567 if allowed.contains(git2::CredentialType::USERNAME) {
537f7185
AC
568 return git2::Cred::username(&s);
569 }
a85c917b 570 if allowed.contains(git2::CredentialType::SSH_KEY) {
537f7185
AC
571 debug_assert_eq!(Some(&s[..]), username);
572 attempts += 1;
573 if attempts == 1 {
574 ssh_agent_attempts.push(s.to_string());
1e682848 575 return git2::Cred::ssh_key_from_agent(&s);
537f7185
AC
576 }
577 }
578 Err(git2::Error::from_str("no authentication available"))
579 });
580
581 // If we made two attempts then that means:
582 //
583 // 1. A username was requested, we returned `s`.
584 // 2. An ssh key was requested, we returned to look up `s` in the
585 // ssh agent.
586 // 3. For whatever reason that lookup failed, so we were asked again
587 // for another mode of authentication.
588 //
589 // Essentially, if `attempts == 2` then in theory the only error was
f7c91ba6 590 // that this username failed to authenticate (e.g., no other network
537f7185
AC
591 // errors happened). Otherwise something else is funny so we bail
592 // out.
593 if attempts != 2 {
1e682848 594 break;
537f7185
AC
595 }
596 }
597 }
6514c289
AC
598 let mut err = match res {
599 Ok(e) => return Ok(e),
600 Err(e) => e,
601 };
f66d7163
AC
602
603 // In the case of an authentication failure (where we tried something) then
604 // we try to give a more helpful error message about precisely what we
605 // tried.
6514c289 606 if any_attempts {
f66d7163 607 let mut msg = "failed to authenticate when downloading \
1e682848
AC
608 repository"
609 .to_string();
6514c289
AC
610
611 if let Some(attempt) = &url_attempt {
612 if url != attempt {
613 msg.push_str(": ");
614 msg.push_str(attempt);
615 }
616 }
d5541331 617 msg.push('\n');
c5611a32 618 if !ssh_agent_attempts.is_empty() {
1e682848
AC
619 let names = ssh_agent_attempts
620 .iter()
621 .map(|s| format!("`{}`", s))
622 .collect::<Vec<_>>()
623 .join(", ");
624 msg.push_str(&format!(
6514c289
AC
625 "\n* attempted ssh-agent authentication, but \
626 no usernames succeeded: {}",
1e682848
AC
627 names
628 ));
f66d7163 629 }
537f7185 630 if let Some(failed_cred_helper) = cred_helper_bad {
f66d7163 631 if failed_cred_helper {
1e682848 632 msg.push_str(
6514c289 633 "\n* attempted to find username/password via \
1e682848
AC
634 git's `credential.helper` support, but failed",
635 );
f66d7163 636 } else {
1e682848 637 msg.push_str(
6514c289 638 "\n* attempted to find username/password via \
1e682848
AC
639 `credential.helper`, but maybe the found \
640 credentials were incorrect",
641 );
f66d7163
AC
642 }
643 }
6514c289
AC
644 msg.push_str("\n\n");
645 msg.push_str("if the git CLI succeeds then `net.git-fetch-with-cli` may help here\n");
646 msg.push_str("https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli");
647 err = err.context(msg);
648
649 // Otherwise if we didn't even get to the authentication phase them we may
650 // have failed to set up a connection, in these cases hint on the
651 // `net.git-fetch-with-cli` configuration option.
652 } else if let Some(e) = err.downcast_ref::<git2::Error>() {
653 match e.class() {
654 ErrorClass::Net
655 | ErrorClass::Ssl
656 | ErrorClass::Submodule
657 | ErrorClass::FetchHead
658 | ErrorClass::Ssh
659 | ErrorClass::Callback
660 | ErrorClass::Http => {
661 let mut msg = "network failure seems to have happened\n".to_string();
662 msg.push_str(
663 "if a proxy or similar is necessary `net.git-fetch-with-cli` may help here\n",
664 );
665 msg.push_str(
666 "https://doc.rust-lang.org/cargo/reference/config.html#netgit-fetch-with-cli",
667 );
668 err = err.context(msg);
669 }
670 _ => {}
671 }
672 }
673
674 Err(err)
219f9902
AC
675}
676
b8b7faee 677fn reset(repo: &git2::Repository, obj: &git2::Object<'_>, config: &Config) -> CargoResult<()> {
143b0600
AC
678 let mut pb = Progress::new("Checkout", config);
679 let mut opts = git2::build::CheckoutBuilder::new();
680 opts.progress(|_, cur, max| {
77d993cb 681 drop(pb.tick(cur, max, ""));
143b0600 682 });
869642bb 683 debug!("doing reset");
143b0600 684 repo.reset(obj, git2::ResetType::Hard, Some(&mut opts))?;
869642bb 685 debug!("reset done");
143b0600
AC
686 Ok(())
687}
688
1e682848
AC
689pub fn with_fetch_options(
690 git_config: &git2::Config,
04008795 691 url: &str,
1e682848 692 config: &Config,
b8b7faee 693 cb: &mut dyn FnMut(git2::FetchOptions<'_>) -> CargoResult<()>,
1e682848 694) -> CargoResult<()> {
5cca4e8c 695 let mut progress = Progress::new("Fetch", config);
74584910 696 network::with_retry(config, || {
04008795 697 with_authentication(url, git_config, |f| {
d58aa8ab 698 let mut last_update = Instant::now();
5cca4e8c 699 let mut rcb = git2::RemoteCallbacks::new();
a89b1e8a
WL
700 // We choose `N=10` here to make a `300ms * 10slots ~= 3000ms`
701 // sliding window for tracking the data transfer rate (in bytes/s).
702 let mut counter = MetricsCounter::<10>::new(0, last_update);
5cca4e8c 703 rcb.credentials(f);
5cca4e8c 704 rcb.transfer_progress(|stats| {
d58aa8ab
WL
705 let indexed_deltas = stats.indexed_deltas();
706 let msg = if indexed_deltas > 0 {
707 // Resolving deltas.
f2172db6
WL
708 format!(
709 ", ({}/{}) resolving deltas",
710 indexed_deltas,
711 stats.total_deltas()
712 )
d58aa8ab
WL
713 } else {
714 // Receiving objects.
e4d43472
WL
715 //
716 // # Caveat
717 //
718 // Progress bar relies on git2 calling `transfer_progress`
719 // to update its transfer rate, but we cannot guarantee a
720 // periodic call of that callback. Thus if we don't receive
721 // any data for, say, 10 seconds, the rate will get stuck
722 // and never go down to 0B/s.
723 // In the future, we need to find away to update the rate
724 // even when the callback is not called.
a89b1e8a
WL
725 let now = Instant::now();
726 // Scrape a `received_bytes` to the counter every 300ms.
727 if now - last_update > Duration::from_millis(300) {
728 counter.add(stats.received_bytes(), now);
729 last_update = now;
7e3f7d64 730 }
d58aa8ab 731 fn format_bytes(bytes: f32) -> (&'static str, f32) {
9df531b2 732 static UNITS: [&str; 5] = ["", "Ki", "Mi", "Gi", "Ti"];
d58aa8ab
WL
733 let i = (bytes.log2() / 10.0).min(4.0) as usize;
734 (UNITS[i], bytes / 1024_f32.powi(i as i32))
735 }
de84f352 736 let (unit, rate) = format_bytes(counter.rate());
9df531b2 737 format!(", {:.2}{}B/s", rate, unit)
d58aa8ab 738 };
1e682848 739 progress
d58aa8ab 740 .tick(stats.indexed_objects(), stats.total_objects(), &msg)
1e682848 741 .is_ok()
5cca4e8c
AC
742 });
743
744 // Create a local anonymous remote in the repository to fetch the
745 // url
746 let mut opts = git2::FetchOptions::new();
869642bb 747 opts.remote_callbacks(rcb);
5cca4e8c
AC
748 cb(opts)
749 })?;
750 Ok(())
751 })
752}
753
1e682848
AC
754pub fn fetch(
755 repo: &mut git2::Repository,
04008795 756 url: &str,
869642bb 757 reference: &GitReference,
1e682848
AC
758 config: &Config,
759) -> CargoResult<()> {
ec5f78f9 760 if config.frozen() {
3a18c89a 761 anyhow::bail!(
1e682848
AC
762 "attempting to update a git repository, but --frozen \
763 was specified"
764 )
a504f480 765 }
ec5f78f9 766 if !config.network_allowed() {
3a18c89a 767 anyhow::bail!("can't update a git repository in the offline mode")
ec5f78f9 768 }
219f9902 769
43914c53 770 // If we're fetching from GitHub, attempt GitHub's special fast path for
fe5a5c78 771 // testing if we've already got an up-to-date copy of the repository
869642bb
AC
772 match github_up_to_date(repo, url, reference, config) {
773 Ok(true) => return Ok(()),
774 Ok(false) => {}
775 Err(e) => debug!("failed to check github {:?}", e),
fe5a5c78
AC
776 }
777
778 // We reuse repositories quite a lot, so before we go through and update the
779 // repo check to see if it's a little too old and could benefit from a gc.
780 // In theory this shouldn't be too too expensive compared to the network
781 // request we're about to issue.
782 maybe_gc_repo(repo)?;
783
869642bb
AC
784 // Translate the reference desired here into an actual list of refspecs
785 // which need to get fetched. Additionally record if we're fetching tags.
786 let mut refspecs = Vec::new();
787 let mut tags = false;
8d5576ba
EH
788 // The `+` symbol on the refspec means to allow a forced (fast-forward)
789 // update which is needed if there is ever a force push that requires a
790 // fast-forward.
869642bb
AC
791 match reference {
792 // For branches and tags we can fetch simply one reference and copy it
793 // locally, no need to fetch other branches/tags.
794 GitReference::Branch(b) => {
8d5576ba 795 refspecs.push(format!("+refs/heads/{0}:refs/remotes/origin/{0}", b));
869642bb
AC
796 }
797 GitReference::Tag(t) => {
8d5576ba 798 refspecs.push(format!("+refs/tags/{0}:refs/remotes/origin/tags/{0}", t));
869642bb
AC
799 }
800
4c02977c 801 GitReference::DefaultBranch => {
8d5576ba 802 refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
4c02977c
AC
803 }
804
256213a6
DT
805 GitReference::Rev(rev) => {
806 if rev.starts_with("refs/") {
8d5576ba 807 refspecs.push(format!("+{0}:{0}", rev));
256213a6
DT
808 } else {
809 // We don't know what the rev will point to. To handle this
810 // situation we fetch all branches and tags, and then we pray
811 // it's somewhere in there.
8d5576ba
EH
812 refspecs.push(String::from("+refs/heads/*:refs/remotes/origin/*"));
813 refspecs.push(String::from("+HEAD:refs/remotes/origin/HEAD"));
256213a6
DT
814 tags = true;
815 }
869642bb
AC
816 }
817 }
818
b4cd6095 819 // Unfortunately `libgit2` is notably lacking in the realm of authentication
a0591eea
AC
820 // when compared to the `git` command line. As a result, allow an escape
821 // hatch for users that would prefer to use `git`-the-CLI for fetching
822 // repositories instead of `libgit2`-the-library. This should make more
823 // flavors of authentication possible while also still giving us all the
824 // speed and portability of using `libgit2`.
8d659063 825 if let Some(true) = config.net_config()?.git_fetch_with_cli {
869642bb 826 return fetch_with_cli(repo, url, &refspecs, tags, config);
a0591eea
AC
827 }
828
fe5a5c78 829 debug!("doing a fetch for {}", url);
c933673e
AC
830 let git_config = git2::Config::open_default()?;
831 with_fetch_options(&git_config, url, config, &mut |mut opts| {
869642bb
AC
832 if tags {
833 opts.download_tags(git2::AutotagOption::All);
834 }
c933673e
AC
835 // The `fetch` operation here may fail spuriously due to a corrupt
836 // repository. It could also fail, however, for a whole slew of other
837 // reasons (aka network related reasons). We want Cargo to automatically
838 // recover from corrupt repositories, but we don't want Cargo to stomp
2b41dd4a 839 // over other legitimate errors.
c933673e
AC
840 //
841 // Consequently we save off the error of the `fetch` operation and if it
842 // looks like a "corrupt repo" error then we blow away the repo and try
843 // again. If it looks like any other kind of error, or if we've already
844 // blown away the repository, then we want to return the error as-is.
845 let mut repo_reinitialized = false;
846 loop {
869642bb 847 debug!("initiating fetch of {:?} from {}", refspecs, url);
fecb7246 848 let res = repo
04008795 849 .remote_anonymous(url)?
869642bb 850 .fetch(&refspecs, Some(&mut opts), None);
c933673e
AC
851 let err = match res {
852 Ok(()) => break,
853 Err(e) => e,
854 };
855 debug!("fetch failed: {}", err);
856
3bbb44c7
EH
857 if !repo_reinitialized && matches!(err.class(), ErrorClass::Reference | ErrorClass::Odb)
858 {
c933673e 859 repo_reinitialized = true;
1e682848
AC
860 debug!(
861 "looks like this is a corrupt repository, reinitializing \
862 and trying again"
863 );
c933673e 864 if reinitialize(repo).is_ok() {
1e682848 865 continue;
c933673e
AC
866 }
867 }
868
1e682848 869 return Err(err.into());
c933673e 870 }
219f9902
AC
871 Ok(())
872 })
873}
fe5a5c78 874
a0591eea
AC
875fn fetch_with_cli(
876 repo: &mut git2::Repository,
04008795 877 url: &str,
869642bb
AC
878 refspecs: &[String],
879 tags: bool,
a0591eea
AC
880 config: &Config,
881) -> CargoResult<()> {
88810035 882 let mut cmd = ProcessBuilder::new("git");
869642bb
AC
883 cmd.arg("fetch");
884 if tags {
885 cmd.arg("--tags");
886 }
887 cmd.arg("--force") // handle force pushes
e307f047 888 .arg("--update-head-ok") // see discussion in #2078
04008795 889 .arg(url)
869642bb 890 .args(refspecs)
00fd31dd
EH
891 // If cargo is run by git (for example, the `exec` command in `git
892 // rebase`), the GIT_DIR is set by git and will point to the wrong
893 // location (this takes precedence over the cwd). Make sure this is
894 // unset so git will look at cwd for the repo.
895 .env_remove("GIT_DIR")
896 // The reset of these may not be necessary, but I'm including them
897 // just to be extra paranoid and avoid any issues.
898 .env_remove("GIT_WORK_TREE")
899 .env_remove("GIT_INDEX_FILE")
900 .env_remove("GIT_OBJECT_DIRECTORY")
901 .env_remove("GIT_ALTERNATE_OBJECT_DIRECTORIES")
a0591eea 902 .cwd(repo.path());
fecb7246
AC
903 config
904 .shell()
905 .verbose(|s| s.status("Running", &cmd.to_string()))?;
1667b75a 906 cmd.exec_with_output()?;
a0591eea
AC
907 Ok(())
908}
909
fe5a5c78
AC
910/// Cargo has a bunch of long-lived git repositories in its global cache and
911/// some, like the index, are updated very frequently. Right now each update
912/// creates a new "pack file" inside the git database, and over time this can
913/// cause bad performance and bad current behavior in libgit2.
914///
915/// One pathological use case today is where libgit2 opens hundreds of file
916/// descriptors, getting us dangerously close to blowing out the OS limits of
917/// how many fds we can have open. This is detailed in #4403.
918///
919/// To try to combat this problem we attempt a `git gc` here. Note, though, that
920/// we may not even have `git` installed on the system! As a result we
921/// opportunistically try a `git gc` when the pack directory looks too big, and
922/// failing that we just blow away the repository and start over.
923fn maybe_gc_repo(repo: &mut git2::Repository) -> CargoResult<()> {
924 // Here we arbitrarily declare that if you have more than 100 files in your
925 // `pack` folder that we need to do a gc.
926 let entries = match repo.path().join("objects/pack").read_dir() {
927 Ok(e) => e.count(),
928 Err(_) => {
929 debug!("skipping gc as pack dir appears gone");
1e682848 930 return Ok(());
fe5a5c78
AC
931 }
932 };
1e682848
AC
933 let max = env::var("__CARGO_PACKFILE_LIMIT")
934 .ok()
fe5a5c78
AC
935 .and_then(|s| s.parse::<usize>().ok())
936 .unwrap_or(100);
937 if entries < max {
938 debug!("skipping gc as there's only {} pack files", entries);
1e682848 939 return Ok(());
fe5a5c78
AC
940 }
941
942 // First up, try a literal `git gc` by shelling out to git. This is pretty
943 // likely to fail though as we may not have `git` installed. Note that
944 // libgit2 doesn't currently implement the gc operation, so there's no
945 // equivalent there.
1e682848
AC
946 match Command::new("git")
947 .arg("gc")
948 .current_dir(repo.path())
949 .output()
950 {
fe5a5c78 951 Ok(out) => {
1e682848
AC
952 debug!(
953 "git-gc status: {}\n\nstdout ---\n{}\nstderr ---\n{}",
954 out.status,
955 String::from_utf8_lossy(&out.stdout),
956 String::from_utf8_lossy(&out.stderr)
957 );
fe5a5c78
AC
958 if out.status.success() {
959 let new = git2::Repository::open(repo.path())?;
ee98c351 960 *repo = new;
1e682848 961 return Ok(());
fe5a5c78
AC
962 }
963 }
964 Err(e) => debug!("git-gc failed to spawn: {}", e),
965 }
966
967 // Alright all else failed, let's start over.
c933673e
AC
968 reinitialize(repo)
969}
970
971fn reinitialize(repo: &mut git2::Repository) -> CargoResult<()> {
fe5a5c78
AC
972 // Here we want to drop the current repository object pointed to by `repo`,
973 // so we initialize temporary repository in a sub-folder, blow away the
974 // existing git folder, and then recreate the git repo. Finally we blow away
975 // the `tmp` folder we allocated.
976 let path = repo.path().to_path_buf();
c933673e 977 debug!("reinitializing git repo at {:?}", path);
fe5a5c78 978 let tmp = path.join("tmp");
c933673e 979 let bare = !repo.path().ends_with(".git");
2a4cdc67 980 *repo = init(&tmp, false)?;
fe5a5c78
AC
981 for entry in path.read_dir()? {
982 let entry = entry?;
983 if entry.file_name().to_str() == Some("tmp") {
1e682848 984 continue;
fe5a5c78
AC
985 }
986 let path = entry.path();
c933673e 987 drop(paths::remove_file(&path).or_else(|_| paths::remove_dir_all(&path)));
fe5a5c78 988 }
2a4cdc67 989 *repo = init(&path, bare)?;
c933673e 990 paths::remove_dir_all(&tmp)?;
fe5a5c78
AC
991 Ok(())
992}
993
2a4cdc67
AC
994fn init(path: &Path, bare: bool) -> CargoResult<git2::Repository> {
995 let mut opts = git2::RepositoryInitOptions::new();
b4cd6095 996 // Skip anything related to templates, they just call all sorts of issues as
2a4cdc67
AC
997 // we really don't want to use them yet they insist on being used. See #6240
998 // for an example issue that comes up.
999 opts.external_template(false);
1000 opts.bare(bare);
1001 Ok(git2::Repository::init_opts(&path, &opts)?)
1002}
1003
fe5a5c78 1004/// Updating the index is done pretty regularly so we want it to be as fast as
43914c53 1005/// possible. For registries hosted on GitHub (like the crates.io index) there's
fe5a5c78
AC
1006/// a fast path available to use [1] to tell us that there's no updates to be
1007/// made.
1008///
1009/// This function will attempt to hit that fast path and verify that the `oid`
869642bb
AC
1010/// is actually the current branch of the repository. If `true` is returned then
1011/// no update needs to be performed, but if `false` is returned then the
1012/// standard update logic still needs to happen.
fe5a5c78
AC
1013///
1014/// [1]: https://developer.github.com/v3/repos/commits/#get-the-sha-1-of-a-commit-reference
1015///
1016/// Note that this function should never cause an actual failure because it's
1017/// just a fast path. As a result all errors are ignored in this function and we
1018/// just return a `bool`. Any real errors will be reported through the normal
1019/// update path above.
869642bb
AC
1020fn github_up_to_date(
1021 repo: &mut git2::Repository,
1022 url: &str,
1023 reference: &GitReference,
1024 config: &Config,
1025) -> CargoResult<bool> {
1026 let url = Url::parse(url)?;
1027 if url.host_str() != Some("github.com") {
1028 return Ok(false);
1029 }
1030
1031 let github_branch_name = match reference {
1032 GitReference::Branch(branch) => branch,
1033 GitReference::Tag(tag) => tag,
07162dba 1034 GitReference::DefaultBranch => "HEAD",
19bb1df2
DT
1035 GitReference::Rev(rev) => {
1036 if rev.starts_with("refs/") {
1037 rev
1038 } else {
1039 debug!("can't use github fast path with `rev = \"{}\"`", rev);
1040 return Ok(false);
1041 }
869642bb
AC
1042 }
1043 };
fe5a5c78 1044
43914c53 1045 // This expects GitHub urls in the form `github.com/user/repo` and nothing
fe5a5c78 1046 // else
869642bb
AC
1047 let mut pieces = url
1048 .path_segments()
1049 .ok_or_else(|| anyhow!("no path segments on url"))?;
1050 let username = pieces
1051 .next()
1052 .ok_or_else(|| anyhow!("couldn't find username"))?;
1053 let repository = pieces
1054 .next()
1055 .ok_or_else(|| anyhow!("couldn't find repository name"))?;
fe5a5c78 1056 if pieces.next().is_some() {
869642bb 1057 anyhow::bail!("too many segments on URL");
fe5a5c78
AC
1058 }
1059
437e5d7e
AC
1060 // Trim off the `.git` from the repository, if present, since that's
1061 // optional for GitHub and won't work when we try to use the API as well.
d5541331 1062 let repository = repository.strip_suffix(".git").unwrap_or(repository);
437e5d7e 1063
1e682848 1064 let url = format!(
869642bb
AC
1065 "https://api.github.com/repos/{}/{}/commits/{}",
1066 username, repository, github_branch_name,
1e682848 1067 );
869642bb
AC
1068 let mut handle = config.http()?.borrow_mut();
1069 debug!("attempting GitHub fast path for {}", url);
1070 handle.get(true)?;
1071 handle.url(&url)?;
1072 handle.useragent("cargo")?;
fe5a5c78 1073 let mut headers = List::new();
869642bb 1074 headers.append("Accept: application/vnd.github.3.sha")?;
7dd9872c 1075 headers.append(&format!("If-None-Match: \"{}\"", reference.resolve(repo)?))?;
869642bb
AC
1076 handle.http_headers(headers)?;
1077 handle.perform()?;
1078 Ok(handle.response_code()? == 304)
fe5a5c78 1079}