]> git.proxmox.com Git - cargo.git/commitdiff
Remove more allocatoins in index querying
authorAlex Crichton <alex@alexcrichton.com>
Fri, 2 Jun 2017 15:58:08 +0000 (08:58 -0700)
committerAlex Crichton <alex@alexcrichton.com>
Mon, 5 Jun 2017 14:36:44 +0000 (07:36 -0700)
Removing some allocations arounds the stored hashes by having nested hash maps
instead of tuple keys. Also remove an intermediate array when parsing
dependencies through a custom implementation of `Deserialize`. While this
doesn't make this code path blazingly fast it definitely knocks it down in the
profiles below other higher-value targets.

Cargo.lock
Cargo.toml
src/cargo/lib.rs
src/cargo/sources/registry/index.rs
src/cargo/sources/registry/mod.rs
src/cargo/util/paths.rs

index 02a1624781cada5522121ab59fa821afa283b808..18b2ab060b70818af8cd1130bfaa95cf190d0352 100644 (file)
@@ -28,6 +28,7 @@ dependencies = [
  "openssl 0.9.13 (registry+https://github.com/rust-lang/crates.io-index)",
  "psapi-sys 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
+ "scoped-tls 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "semver 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde_derive 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -623,6 +624,11 @@ name = "rustc-serialize"
 version = "0.3.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 
+[[package]]
+name = "scoped-tls"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
 [[package]]
 name = "semver"
 version = "0.7.0"
@@ -921,6 +927,7 @@ dependencies = [
 "checksum regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad890a5eef7953f55427c50575c680c42841653abd2b028b68cd223d157f62db"
 "checksum rustc-demangle 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3058a43ada2c2d0b92b3ae38007a2d0fa5e9db971be260e0171408a4ff471c95"
 "checksum rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)" = "dcf128d1287d2ea9d80910b5f1120d0b8eede3fbf1abe91c40d39ea7d51e6fda"
+"checksum scoped-tls 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f417c22df063e9450888a7561788e9bd46d3bb3c1466435b4eccb903807f147d"
 "checksum semver 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3fdd61b85a0fa777f7fb7c454b9189b2941b110d1385ce84d7f76efdf1606a85"
 "checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
 "checksum serde 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c2f530d36fb84ec48fb7146936881f026cdbf4892028835fd9398475f82c1bb4"
index 7a4065ccacd7a42e2e25d8fb25b827b16d90a0bc..894f6e04c38bb3661507aa809d457e685c199bae 100644 (file)
@@ -35,6 +35,7 @@ libgit2-sys = "0.6"
 log = "0.3"
 num_cpus = "1.0"
 rustc-serialize = "0.3"
+scoped-tls = "0.1"
 semver = { version = "0.7.0", features = ["serde"] }
 serde = "1.0"
 serde_derive = "1.0"
index 8e723a9a4a71c0b4f3d34ae4f27573f091b0dd44..2fe2a572b13bf5a8bb1f3a763d375fee17df1d32 100755 (executable)
@@ -5,6 +5,7 @@
 #[cfg(test)] extern crate hamcrest;
 #[macro_use] extern crate error_chain;
 #[macro_use] extern crate log;
+#[macro_use] extern crate scoped_tls;
 #[macro_use] extern crate serde_derive;
 #[macro_use] extern crate serde_json;
 extern crate crates_io as registry;
index 413502467cecddb0278eba11b65b48183774da73..14c8ab7d6eb104ebc7fa70280894adb39cde4064 100644 (file)
@@ -3,10 +3,11 @@ use std::path::Path;
 use std::str;
 
 use serde_json;
+use semver::Version;
 
-use core::dependency::{Dependency, DependencyInner, Kind};
+use core::dependency::Dependency;
 use core::{SourceId, Summary, PackageId};
-use sources::registry::{RegistryPackage, RegistryDependency, INDEX_LOCK};
+use sources::registry::{RegistryPackage, INDEX_LOCK};
 use sources::registry::RegistryData;
 use util::{CargoError, CargoResult, internal, Filesystem, Config};
 
@@ -14,7 +15,7 @@ pub struct RegistryIndex<'cfg> {
     source_id: SourceId,
     path: Filesystem,
     cache: HashMap<String, Vec<(Summary, bool)>>,
-    hashes: HashMap<(String, String), String>, // (name, vers) => cksum
+    hashes: HashMap<String, HashMap<Version, String>>, // (name, vers) => cksum
     config: &'cfg Config,
     locked: bool,
 }
@@ -40,13 +41,14 @@ impl<'cfg> RegistryIndex<'cfg> {
                 pkg: &PackageId,
                 load: &mut RegistryData)
                 -> CargoResult<String> {
-        let key = (pkg.name().to_string(), pkg.version().to_string());
-        if let Some(s) = self.hashes.get(&key) {
+        let name = pkg.name();
+        let version = pkg.version();
+        if let Some(s) = self.hashes.get(name).and_then(|v| v.get(version)) {
             return Ok(s.clone())
         }
         // Ok, we're missing the key, so parse the index file to load it.
-        self.summaries(pkg.name(), load)?;
-        self.hashes.get(&key).ok_or_else(|| {
+        self.summaries(name, load)?;
+        self.hashes.get(name).and_then(|v| v.get(version)).ok_or_else(|| {
             internal(format!("no hash listed for {}", pkg))
         }).map(|s| s.clone())
     }
@@ -103,6 +105,7 @@ impl<'cfg> RegistryIndex<'cfg> {
             let contents = str::from_utf8(contents).map_err(|_| {
                 CargoError::from("registry index file was not valid utf-8")
             })?;
+            ret.reserve(contents.lines().count());
             let lines = contents.lines()
                                 .map(|s| s.trim())
                                 .filter(|l| !l.is_empty());
@@ -137,52 +140,22 @@ impl<'cfg> RegistryIndex<'cfg> {
                               -> CargoResult<(Summary, bool)> {
         let RegistryPackage {
             name, vers, cksum, deps, features, yanked
-        } = serde_json::from_str::<RegistryPackage>(line)?;
+        } = super::DEFAULT_ID.set(&self.source_id, || {
+            serde_json::from_str::<RegistryPackage>(line)
+        })?;
         let pkgid = PackageId::new(&name, &vers, &self.source_id)?;
-        let deps: CargoResult<Vec<Dependency>> = deps.into_iter().map(|dep| {
-            self.parse_registry_dependency(dep)
-        }).collect();
-        let deps = deps?;
-        let summary = Summary::new(pkgid, deps, features)?;
+        let summary = Summary::new(pkgid, deps.inner, features)?;
         let summary = summary.set_checksum(cksum.clone());
-        self.hashes.insert((name, vers), cksum);
+        if self.hashes.contains_key(&name[..]) {
+            self.hashes.get_mut(&name[..]).unwrap().insert(vers, cksum);
+        } else {
+            self.hashes.entry(name.into_owned())
+                .or_insert_with(HashMap::new)
+                .insert(vers, cksum);
+        }
         Ok((summary, yanked.unwrap_or(false)))
     }
 
-    /// Converts an encoded dependency in the registry to a cargo dependency
-    fn parse_registry_dependency(&self, dep: RegistryDependency)
-                                 -> CargoResult<Dependency> {
-        let RegistryDependency {
-            name, req, features, optional, default_features, target, kind
-        } = dep;
-
-        let mut dep = DependencyInner::parse(&name, Some(&req), &self.source_id, None)?;
-        let kind = match kind.as_ref().map(|s| &s[..]).unwrap_or("") {
-            "dev" => Kind::Development,
-            "build" => Kind::Build,
-            _ => Kind::Normal,
-        };
-
-        let platform = match target {
-            Some(target) => Some(target.parse()?),
-            None => None,
-        };
-
-        // Unfortunately older versions of cargo and/or the registry ended up
-        // publishing lots of entries where the features array contained the
-        // empty feature, "", inside. This confuses the resolution process much
-        // later on and these features aren't actually valid, so filter them all
-        // out here.
-        let features = features.into_iter().filter(|s| !s.is_empty()).collect();
-
-        dep.set_optional(optional)
-           .set_default_features(default_features)
-           .set_features(features)
-           .set_platform(platform)
-           .set_kind(kind);
-        Ok(dep.into_dependency())
-    }
-
     pub fn query(&mut self,
                  dep: &Dependency,
                  load: &mut RegistryData,
index 24b5daf96c1d90e243d33f3899dad8d1b2de2f22..84f801ed8e570ceb2c09181bb7b5fabec54a95a5 100644 (file)
 
 use std::borrow::Cow;
 use std::collections::HashMap;
+use std::fmt;
 use std::fs::File;
 use std::path::{PathBuf, Path};
 
 use flate2::read::GzDecoder;
+use semver::Version;
+use serde::de;
 use tar::Archive;
 
 use core::{Source, SourceId, PackageId, Package, Summary, Registry};
-use core::dependency::Dependency;
+use core::dependency::{Dependency, DependencyInner, Kind};
 use sources::PathSource;
 use util::{CargoResult, Config, internal, FileLock, Filesystem};
 use util::errors::CargoResultExt;
@@ -200,14 +203,18 @@ pub struct RegistryConfig {
 
 #[derive(Deserialize)]
 struct RegistryPackage<'a> {
-    name: String,
-    vers: String,
-    deps: Vec<RegistryDependency<'a>>,
+    name: Cow<'a, str>,
+    vers: Version,
+    deps: DependencyList,
     features: HashMap<String, Vec<String>>,
     cksum: String,
     yanked: Option<bool>,
 }
 
+struct DependencyList {
+    inner: Vec<Dependency>,
+}
+
 #[derive(Deserialize)]
 struct RegistryDependency<'a> {
     name: Cow<'a, str>,
@@ -397,3 +404,85 @@ impl<'cfg> Source for RegistrySource<'cfg> {
         Ok(pkg.package_id().version().to_string())
     }
 }
+
+// TODO: this is pretty unfortunate, ideally we'd use `DeserializeSeed` which
+//       is intended for "deserializing with context" but that means we couldn't
+//       use `#[derive(Deserialize)]` on `RegistryPackage` unfortunately.
+//
+// I'm told, however, that https://github.com/serde-rs/serde/pull/909 will solve
+// all our problems here. Until that lands this thread local is just a
+// workaround in the meantime.
+//
+// If you're reading this and find this thread local funny, check to see if that
+// PR is merged. If it is then let's ditch this thread local!
+scoped_thread_local!(static DEFAULT_ID: SourceId);
+
+impl<'de> de::Deserialize<'de> for DependencyList {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+        where D: de::Deserializer<'de>,
+    {
+        return deserializer.deserialize_seq(Visitor);
+
+        struct Visitor;
+
+        impl<'de> de::Visitor<'de> for Visitor {
+            type Value = DependencyList;
+
+            fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+                write!(formatter, "a list of dependencies")
+            }
+
+            fn visit_seq<A>(self, mut seq: A) -> Result<DependencyList, A::Error>
+                where A: de::SeqAccess<'de>,
+            {
+                let mut ret = Vec::new();
+                if let Some(size) = seq.size_hint() {
+                    ret.reserve(size);
+                }
+                while let Some(element) = seq.next_element::<RegistryDependency>()? {
+                    ret.push(parse_registry_dependency(element).map_err(|e| {
+                        de::Error::custom(e)
+                    })?);
+                }
+
+                Ok(DependencyList { inner: ret })
+            }
+        }
+    }
+}
+
+/// Converts an encoded dependency in the registry to a cargo dependency
+fn parse_registry_dependency(dep: RegistryDependency)
+                             -> CargoResult<Dependency> {
+    let RegistryDependency {
+        name, req, features, optional, default_features, target, kind
+    } = dep;
+
+    let mut dep = DEFAULT_ID.with(|id| {
+        DependencyInner::parse(&name, Some(&req), id, None)
+    })?;
+    let kind = match kind.as_ref().map(|s| &s[..]).unwrap_or("") {
+        "dev" => Kind::Development,
+        "build" => Kind::Build,
+        _ => Kind::Normal,
+    };
+
+    let platform = match target {
+        Some(target) => Some(target.parse()?),
+        None => None,
+    };
+
+    // Unfortunately older versions of cargo and/or the registry ended up
+    // publishing lots of entries where the features array contained the
+    // empty feature, "", inside. This confuses the resolution process much
+    // later on and these features aren't actually valid, so filter them all
+    // out here.
+    let features = features.into_iter().filter(|s| !s.is_empty()).collect();
+
+    dep.set_optional(optional)
+       .set_default_features(default_features)
+       .set_features(features)
+       .set_platform(platform)
+       .set_kind(kind);
+    Ok(dep.into_dependency())
+}
index fecc166a555c2bf56b12dc39ecfab6547d323fc8..988c4b9b46feb141cd3cc620a84df9bdd7774edb 100644 (file)
@@ -69,20 +69,19 @@ pub fn without_prefix<'a>(a: &'a Path, b: &'a Path) -> Option<&'a Path> {
 }
 
 pub fn read(path: &Path) -> CargoResult<String> {
-    (|| -> CargoResult<_> {
-        let mut ret = String::new();
-        let mut f = File::open(path)?;
-        f.read_to_string(&mut ret)?;
-        Ok(ret)
-    })().chain_err(|| {
-        format!("failed to read `{}`", path.display())
-    })
+    match String::from_utf8(read_bytes(path)?) {
+        Ok(s) => Ok(s),
+        Err(_) => bail!("path at `{}` was not valid utf-8", path.display()),
+    }
 }
 
 pub fn read_bytes(path: &Path) -> CargoResult<Vec<u8>> {
     (|| -> CargoResult<_> {
         let mut ret = Vec::new();
         let mut f = File::open(path)?;
+        if let Ok(m) = f.metadata() {
+            ret.reserve(m.len() as usize + 1);
+        }
         f.read_to_end(&mut ret)?;
         Ok(ret)
     })().chain_err(|| {