use std::collections::HashMap;
-use std::io::prelude::*;
-use std::fs::File;
use std::path::Path;
+use std::str;
use serde_json;
use core::dependency::{Dependency, DependencyInner, Kind};
use core::{SourceId, Summary, PackageId, Registry};
use sources::registry::{RegistryPackage, RegistryDependency, INDEX_LOCK};
+use sources::registry::RegistryData;
use util::{CargoResult, ChainError, internal, Filesystem, Config};
+use util::human;
pub struct RegistryIndex<'cfg> {
source_id: SourceId,
pub fn new(id: &SourceId,
path: &Filesystem,
config: &'cfg Config,
- locked: bool) -> RegistryIndex<'cfg> {
+ locked: bool)
+ -> RegistryIndex<'cfg> {
RegistryIndex {
source_id: id.clone(),
path: path.clone(),
}
/// Return the hash listed for a specified PackageId.
- pub fn hash(&mut self, pkg: &PackageId) -> CargoResult<String> {
+ pub fn hash(&mut self,
+ pkg: &PackageId,
+ load: &mut RegistryData)
+ -> CargoResult<String> {
let key = (pkg.name().to_string(), pkg.version().to_string());
if let Some(s) = self.hashes.get(&key) {
return Ok(s.clone())
}
// Ok, we're missing the key, so parse the index file to load it.
- self.summaries(pkg.name())?;
+ self.summaries(pkg.name(), load)?;
self.hashes.get(&key).chain_error(|| {
internal(format!("no hash listed for {}", pkg))
}).map(|s| s.clone())
///
/// Returns a list of pairs of (summary, yanked) for the package name
/// specified.
- pub fn summaries(&mut self, name: &str) -> CargoResult<&Vec<(Summary, bool)>> {
+ pub fn summaries(&mut self,
+ name: &str,
+ load: &mut RegistryData)
+ -> CargoResult<&Vec<(Summary, bool)>> {
if self.cache.contains_key(name) {
return Ok(&self.cache[name]);
}
- let summaries = self.load_summaries(name)?;
+ let summaries = self.load_summaries(name, load)?;
let summaries = summaries.into_iter().filter(|summary| {
summary.0.package_id().name() == name
}).collect();
Ok(&self.cache[name])
}
- fn load_summaries(&mut self, name: &str) -> CargoResult<Vec<(Summary, bool)>> {
- let (path, _lock) = if self.locked {
+ fn load_summaries(&mut self,
+ name: &str,
+ load: &mut RegistryData)
+ -> CargoResult<Vec<(Summary, bool)>> {
+ let (root, _lock) = if self.locked {
let lock = self.path.open_ro(Path::new(INDEX_LOCK),
self.config,
"the registry index");
// see module comment for why this is structured the way it is
let path = match fs_name.len() {
- 1 => path.join("1").join(&fs_name),
- 2 => path.join("2").join(&fs_name),
- 3 => path.join("3").join(&fs_name[..1]).join(&fs_name),
- _ => path.join(&fs_name[0..2])
- .join(&fs_name[2..4])
- .join(&fs_name),
+ 1 => format!("1/{}", fs_name),
+ 2 => format!("2/{}", fs_name),
+ 3 => format!("3/{}/{}", &fs_name[..1], fs_name),
+ _ => format!("{}/{}/{}", &fs_name[0..2], &fs_name[2..4], fs_name),
+ // 1 => Path::new("1").join(fs_name),
+ // 2 => Path::new("2").join(fs_name),
+ // 3 => Path::new("3").join(&fs_name[..1]).join(fs_name),
+ // _ => Path::new(&fs_name[0..2]).join(&fs_name[2..4]).join(fs_name),
};
- match File::open(&path) {
- Ok(mut f) => {
- let mut contents = String::new();
- f.read_to_string(&mut contents)?;
- let ret: CargoResult<Vec<(Summary, bool)>>;
- ret = contents.lines().filter(|l| !l.trim().is_empty())
- .map(|l| self.parse_registry_package(l))
- .collect();
- ret.chain_error(|| {
- internal(format!("failed to parse registry's information \
- for: {}", name))
- })
+ match load.load(&root, Path::new(&path)) {
+ Ok(contents) => {
+ let contents = str::from_utf8(&contents).map_err(|_| {
+ human("registry index file was not valid utf-8")
+ })?;
+ let lines = contents.lines()
+ .map(|s| s.trim())
+ .filter(|l| !l.is_empty());
+
+ // Attempt forwards-compatibility on the index by ignoring
+ // everything that we ourselves don't understand, that should
+ // allow future cargo implementations to break the
+ // interpretation of each line here and older cargo will simply
+ // ignore the new lines.
+ Ok(lines.filter_map(|line| {
+ self.parse_registry_package(line).ok()
+ }).collect())
}
Err(..) => Ok(Vec::new()),
}
.set_kind(kind)
.into_dependency())
}
-}
-impl<'cfg> Registry for RegistryIndex<'cfg> {
- fn query(&mut self, dep: &Dependency) -> CargoResult<Vec<Summary>> {
+ pub fn query(&mut self,
+ dep: &Dependency,
+ load: &mut RegistryData)
+ -> CargoResult<Vec<Summary>> {
let mut summaries = {
- let summaries = self.summaries(dep.name())?;
+ let summaries = self.summaries(dep.name(), load)?;
summaries.iter().filter(|&&(_, yanked)| {
dep.source_id().precise().is_some() || !yanked
}).map(|s| s.0.clone()).collect::<Vec<_>>()
});
summaries.query(dep)
}
-
- fn supports_checksums(&self) -> bool {
- true
- }
}
pub trait RegistryData {
fn index_path(&self) -> &Filesystem;
- fn config(&self) -> CargoResult<Option<RegistryConfig>>;
+ fn load(&self, root: &Path, path: &Path) -> CargoResult<Vec<u8>>;
+ fn config(&mut self) -> CargoResult<Option<RegistryConfig>>;
fn update_index(&mut self) -> CargoResult<()>;
fn download(&mut self,
pkg: &PackageId,
/// Decode the configuration stored within the registry.
///
/// This requires that the index has been at least checked out.
- pub fn config(&self) -> CargoResult<Option<RegistryConfig>> {
+ pub fn config(&mut self) -> CargoResult<Option<RegistryConfig>> {
self.ops.config()
}
// come back with no summaries, then our registry may need to be
// updated, so we fall back to performing a lazy update.
if dep.source_id().precise().is_some() && !self.updated {
- if self.index.query(dep)?.is_empty() {
+ if self.index.query(dep, &mut *self.ops)?.is_empty() {
self.do_update()?;
}
}
- self.index.query(dep)
+ self.index.query(dep, &mut *self.ops)
}
fn supports_checksums(&self) -> bool {
}
fn download(&mut self, package: &PackageId) -> CargoResult<Package> {
- let hash = self.index.hash(package)?;
+ let hash = self.index.hash(package, &mut *self.ops)?;
let path = self.ops.download(package, &hash)?;
let path = self.unpack_package(package, &path).chain_error(|| {
internal(format!("failed to unpack package `{}`", package))
// differ due to historical Cargo bugs. To paper over these we trash the
// *summary* loaded from the Cargo.toml we just downloaded with the one
// we loaded from the index.
- let summaries = self.index.summaries(package.name())?;
+ let summaries = self.index.summaries(package.name(), &mut *self.ops)?;
let summary = summaries.iter().map(|s| &s.0).find(|s| {
s.package_id() == package
}).expect("summary not found");
+use std::cell::{RefCell, Ref, Cell};
use std::io::SeekFrom;
use std::io::prelude::*;
+use std::mem;
use std::path::Path;
use curl::easy::{Easy, List};
use sources::git;
use sources::registry::{RegistryData, RegistryConfig, INDEX_LOCK};
use util::network;
-use util::paths;
-use util::{FileLock, Filesystem};
+use util::{FileLock, Filesystem, LazyCell};
use util::{Config, CargoResult, ChainError, human, Sha256, ToUrl};
use util::errors::HttpError;
cache_path: Filesystem,
source_id: SourceId,
config: &'cfg Config,
- handle: Option<Easy>,
+ handle: LazyCell<RefCell<Easy>>,
+ tree: RefCell<Option<git2::Tree<'static>>>,
+ repo: LazyCell<git2::Repository>,
+ head: Cell<Option<git2::Oid>>,
}
impl<'cfg> RemoteRegistry<'cfg> {
cache_path: config.registry_cache_path().join(name),
source_id: source_id.clone(),
config: config,
- handle: None,
+ tree: RefCell::new(None),
+ handle: LazyCell::new(),
+ repo: LazyCell::new(),
+ head: Cell::new(None),
}
}
+
+ fn easy(&self) -> CargoResult<&RefCell<Easy>> {
+ self.handle.get_or_try_init(|| {
+ ops::http_handle(self.config).map(RefCell::new)
+ })
+ }
+
+ fn repo(&self) -> CargoResult<&git2::Repository> {
+ self.repo.get_or_try_init(|| {
+ let path = self.index_path.clone().into_path_unlocked();
+
+ match git2::Repository::open(&path) {
+ Ok(repo) => Ok(repo),
+ Err(_) => {
+ self.index_path.create_dir()?;
+ let lock = self.index_path.open_rw(Path::new(INDEX_LOCK),
+ self.config,
+ "the registry index")?;
+ let _ = lock.remove_siblings();
+ Ok(git2::Repository::init_bare(&path)?)
+ }
+ }
+ })
+ }
+
+ fn head(&self) -> CargoResult<git2::Oid> {
+ if self.head.get().is_none() {
+ let oid = self.repo()?.refname_to_id("refs/remotes/origin/master")?;
+ self.head.set(Some(oid));
+ }
+ Ok(self.head.get().unwrap())
+ }
+
+ fn tree(&self) -> CargoResult<Ref<git2::Tree>> {
+ {
+ let tree = self.tree.borrow();
+ if tree.is_some() {
+ return Ok(Ref::map(tree, |s| s.as_ref().unwrap()))
+ }
+ }
+ let repo = self.repo()?;
+ let commit = repo.find_commit(self.head()?)?;
+ let tree = commit.tree()?;
+
+ // Unfortunately in libgit2 the tree objects look like they've got a
+ // reference to the repository object which means that a tree cannot
+ // outlive the repository that it came from. Here we want to cache this
+ // tree, though, so to accomplish this we transmute it to a static
+ // lifetime.
+ //
+ // Note that we don't actually hand out the static lifetime, instead we
+ // only return a scoped one from this function. Additionally the repo
+ // we loaded from (above) lives as long as this object
+ // (`RemoteRegistry`) so we then just need to ensure that the tree is
+ // destroyed first in the destructor, hence the destructor on
+ // `RemoteRegistry` below.
+ let tree = unsafe {
+ mem::transmute::<git2::Tree, git2::Tree<'static>>(tree)
+ };
+ *self.tree.borrow_mut() = Some(tree);
+ Ok(Ref::map(self.tree.borrow(), |s| s.as_ref().unwrap()))
+ }
}
impl<'cfg> RegistryData for RemoteRegistry<'cfg> {
&self.index_path
}
- fn config(&self) -> CargoResult<Option<RegistryConfig>> {
- let lock = self.index_path.open_ro(Path::new(INDEX_LOCK),
- self.config,
- "the registry index")?;
- let path = lock.path().parent().unwrap();
- let contents = paths::read(&path.join("config.json"))?;
- let config = serde_json::from_str(&contents)?;
+ fn load(&self, _root: &Path, path: &Path) -> CargoResult<Vec<u8>> {
+ // Note that the index calls this method and the filesystem is locked
+ // in the index, so we don't need to worry about an `update_index`
+ // happening in a different process.
+ let repo = self.repo()?;
+ let tree = self.tree()?;
+ let entry = tree.get_path(path)?;
+ let object = entry.to_object(&repo)?;
+ let blob = match object.as_blob() {
+ Some(blob) => blob,
+ None => bail!("path `{}` is not a blob in the git repo", path.display()),
+ };
+ Ok(blob.content().to_vec())
+ }
+
+ fn config(&mut self) -> CargoResult<Option<RegistryConfig>> {
+ self.repo()?; // create intermediate dirs and initialize the repo
+ let _lock = self.index_path.open_ro(Path::new(INDEX_LOCK),
+ self.config,
+ "the registry index")?;
+ let json = self.load(Path::new(""), Path::new("config.json"))?;
+ let config = serde_json::from_slice(&json)?;
Ok(Some(config))
}
// hit the index, which may not actually read this configuration.
ops::http_handle(self.config)?;
- // Then we actually update the index
- self.index_path.create_dir()?;
- let lock = self.index_path.open_rw(Path::new(INDEX_LOCK),
- self.config,
- "the registry index")?;
- let path = lock.path().parent().unwrap();
-
+ let repo = self.repo()?;
+ let _lock = self.index_path.open_rw(Path::new(INDEX_LOCK),
+ self.config,
+ "the registry index")?;
self.config.shell().status("Updating",
format!("registry `{}`", self.source_id.url()))?;
- let repo = git2::Repository::open(path).or_else(|_| {
- let _ = lock.remove_siblings();
- git2::Repository::init(path)
- })?;
-
if self.source_id.url().host_str() == Some("github.com") {
- if let Ok(oid) = repo.refname_to_id("refs/heads/master") {
- let handle = match self.handle {
- Some(ref mut handle) => handle,
- None => {
- self.handle = Some(ops::http_handle(self.config)?);
- self.handle.as_mut().unwrap()
- }
- };
+ if let Ok(oid) = self.head() {
+ let mut handle = self.easy()?.borrow_mut();
debug!("attempting github fast path for {}",
self.source_id.url());
- if github_up_to_date(handle, self.source_id.url(), &oid) {
+ if github_up_to_date(&mut handle, self.source_id.url(), &oid) {
return Ok(())
}
debug!("fast path failed, falling back to a git fetch");
}
}
- // git fetch origin
+ // git fetch origin master
let url = self.source_id.url().to_string();
let refspec = "refs/heads/master:refs/remotes/origin/master";
-
git::fetch(&repo, &url, refspec, self.config).chain_error(|| {
human(format!("failed to fetch `{}`", url))
})?;
-
- // git reset --hard origin/master
- let reference = "refs/remotes/origin/master";
- let oid = repo.refname_to_id(reference)?;
- trace!("[{}] updating to rev {}", self.source_id, oid);
- let object = repo.find_object(oid, None)?;
- repo.reset(&object, git2::ResetType::Hard, None)?;
+ self.head.set(None);
+ *self.tree.borrow_mut() = None;
Ok(())
}
.push(&pkg.version().to_string())
.push("download");
- let handle = match self.handle {
- Some(ref mut handle) => handle,
- None => {
- self.handle = Some(ops::http_handle(self.config)?);
- self.handle.as_mut().unwrap()
- }
- };
// TODO: don't download into memory, but ensure that if we ctrl-c a
// download we should resume either from the start or the middle
// on the next time
let url = url.to_string();
+ let mut handle = self.easy()?.borrow_mut();
handle.get(true)?;
handle.url(&url)?;
handle.follow_location(true)?;
}
}
+impl<'cfg> Drop for RemoteRegistry<'cfg> {
+ fn drop(&mut self) {
+ // Just be sure to drop this before our other fields
+ self.tree.borrow_mut().take();
+ }
+}
+
/// Updating the index is done pretty regularly so we want it to be as fast as
/// possible. For registries hosted on github (like the crates.io index) there's
/// a fast path available to use [1] to tell us that there's no updates to be