From 1f764c55484cb5ca4899a0b51385ae91177dd7d8 Mon Sep 17 00:00:00 2001 From: Eh2406 Date: Sat, 3 Mar 2018 21:01:33 -0500 Subject: [PATCH 1/6] make a global string interner In a test on https://github.com/rust-lang/cargo/issues/4810#issuecomment-357553286 Before we got to 5000000 ticks in ~72 sec After we got to 5000000 ticks in ~65 sec --- Cargo.toml | 1 + src/cargo/core/interning.rs | 34 ++++++++++++++++++++++++++++++++++ src/cargo/core/mod.rs | 1 + src/cargo/core/resolver/mod.rs | 9 +++++---- src/cargo/lib.rs | 1 + 5 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 src/cargo/core/interning.rs diff --git a/Cargo.toml b/Cargo.toml index e7d52c3e56b..477a044f688 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,7 @@ glob = "0.2" hex = "0.3" home = "0.3" ignore = "0.4" +lazy_static = "1.0.0" jobserver = "0.1.9" lazycell = "0.6" libc = "0.2" diff --git a/src/cargo/core/interning.rs b/src/cargo/core/interning.rs new file mode 100644 index 00000000000..82b6830ee1a --- /dev/null +++ b/src/cargo/core/interning.rs @@ -0,0 +1,34 @@ +use std::fmt; +use std::sync::RwLock; +use std::collections::HashMap; + +lazy_static! { + static ref STRING_CASHE: RwLock<(Vec, HashMap)> = + RwLock::new((Vec::new(), HashMap::new())); +} + +#[derive(Eq, PartialEq, Hash, Clone, Copy)] +pub struct InternedString { + id: usize +} + +impl InternedString { + pub fn new(str: &str) -> InternedString { + let (ref mut str_from_id, ref mut id_from_str) = *STRING_CASHE.write().unwrap(); + if let Some(&id) = id_from_str.get(str) { + return InternedString { id }; + } + str_from_id.push(str.to_string()); + id_from_str.insert(str.to_string(), str_from_id.len() - 1); + return InternedString { id: str_from_id.len() - 1 } + } +// pub fn to_inner(&self) -> String { +// STRING_CASHE.read().unwrap().0[self.id].to_string() +// } +} + +impl fmt::Debug for InternedString { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "InternedString {{ {} }}", STRING_CASHE.read().unwrap().0[self.id]) + } +} diff --git a/src/cargo/core/mod.rs b/src/cargo/core/mod.rs index df1eff0725e..53068391052 100644 --- a/src/cargo/core/mod.rs +++ b/src/cargo/core/mod.rs @@ -21,6 +21,7 @@ pub mod resolver; pub mod summary; pub mod shell; pub mod registry; +mod interning; mod package_id_spec; mod workspace; mod features; diff --git a/src/cargo/core/resolver/mod.rs b/src/cargo/core/resolver/mod.rs index 1ee360f40f5..49537d676ab 100644 --- a/src/cargo/core/resolver/mod.rs +++ b/src/cargo/core/resolver/mod.rs @@ -60,6 +60,7 @@ use url::Url; use core::{PackageId, Registry, SourceId, Summary, Dependency}; use core::PackageIdSpec; +use core::interning::InternedString; use util::config::Config; use util::Graph; use util::errors::{CargoResult, CargoError}; @@ -344,7 +345,7 @@ struct Context { warnings: RcList, } -type Activations = HashMap>>>; +type Activations = HashMap>>>; /// Builds the list of all packages required to build the first argument. pub fn resolve(summaries: &[(Summary, Method)], @@ -1290,7 +1291,7 @@ impl Context { method: &Method) -> CargoResult { let id = summary.package_id(); let prev = self.activations - .entry(id.name().to_string()) + .entry(InternedString::new(id.name())) .or_insert_with(HashMap::new) .entry(id.source_id().clone()) .or_insert_with(||Rc::new(Vec::new())); @@ -1352,14 +1353,14 @@ impl Context { } fn prev_active(&self, dep: &Dependency) -> &[Summary] { - self.activations.get(dep.name()) + self.activations.get(&InternedString::new(dep.name())) .and_then(|v| v.get(dep.source_id())) .map(|v| &v[..]) .unwrap_or(&[]) } fn is_active(&self, id: &PackageId) -> bool { - self.activations.get(id.name()) + self.activations.get(&InternedString::new(id.name())) .and_then(|v| v.get(id.source_id())) .map(|v| v.iter().any(|s| s.package_id() == id)) .unwrap_or(false) diff --git a/src/cargo/lib.rs b/src/cargo/lib.rs index abcdd3f4adc..5a5c3991166 100644 --- a/src/cargo/lib.rs +++ b/src/cargo/lib.rs @@ -27,6 +27,7 @@ extern crate home; extern crate ignore; extern crate jobserver; extern crate lazycell; +#[macro_use] extern crate lazy_static; extern crate libc; extern crate libgit2_sys; extern crate num_cpus; From aa58d27a92c62756c3dfe5e1ade3607efa0ce184 Mon Sep 17 00:00:00 2001 From: Eh2406 Date: Sat, 3 Mar 2018 22:38:10 -0500 Subject: [PATCH 2/6] intern the features In a test on https://github.com/rust-lang/cargo/issues/4810#issuecomment-357553286 Before we got to 5000000 ticks in ~65 sec After we got to 5000000 ticks in ~52 sec --- src/cargo/core/interning.rs | 6 +++--- src/cargo/core/resolver/mod.rs | 12 +++++------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/cargo/core/interning.rs b/src/cargo/core/interning.rs index 82b6830ee1a..c24b6e70f81 100644 --- a/src/cargo/core/interning.rs +++ b/src/cargo/core/interning.rs @@ -22,9 +22,9 @@ impl InternedString { id_from_str.insert(str.to_string(), str_from_id.len() - 1); return InternedString { id: str_from_id.len() - 1 } } -// pub fn to_inner(&self) -> String { -// STRING_CASHE.read().unwrap().0[self.id].to_string() -// } + pub fn to_inner(&self) -> String { + STRING_CASHE.read().unwrap().0[self.id].to_string() + } } impl fmt::Debug for InternedString { diff --git a/src/cargo/core/resolver/mod.rs b/src/cargo/core/resolver/mod.rs index 49537d676ab..361f3f951a2 100644 --- a/src/cargo/core/resolver/mod.rs +++ b/src/cargo/core/resolver/mod.rs @@ -332,7 +332,7 @@ struct Context { // switch to persistent hash maps if we can at some point or otherwise // make these much cheaper to clone in general. activations: Activations, - resolve_features: HashMap>, + resolve_features: HashMap>, links: HashMap, // These are two cheaply-cloneable lists (O(1) clone) which are effectively @@ -371,7 +371,7 @@ pub fn resolve(summaries: &[(Summary, Method)], metadata: BTreeMap::new(), replacements: cx.resolve_replacements(), features: cx.resolve_features.iter().map(|(k, v)| { - (k.clone(), v.clone()) + (k.clone(), v.iter().map(|x| x.to_inner()).collect()) }).collect(), unused_patches: Vec::new(), }; @@ -1318,8 +1318,8 @@ impl Context { let has_default_feature = summary.features().contains_key("default"); Ok(match self.resolve_features.get(id) { Some(prev) => { - features.iter().all(|f| prev.contains(f)) && - (!use_default || prev.contains("default") || + features.iter().all(|f| prev.contains(&InternedString::new(f))) && + (!use_default || prev.contains(&InternedString::new("default")) || !has_default_feature) } None => features.is_empty() && (!use_default || !has_default_feature) @@ -1434,9 +1434,7 @@ impl Context { let set = self.resolve_features.entry(pkgid.clone()) .or_insert_with(HashSet::new); for feature in reqs.used { - if !set.contains(feature) { - set.insert(feature.to_string()); - } + set.insert(InternedString::new(feature)); } } From ad26b8902675f67761f232de9c1baa66a717a787 Mon Sep 17 00:00:00 2001 From: Eh2406 Date: Sat, 3 Mar 2018 23:08:13 -0500 Subject: [PATCH 3/6] and links just to be throw --- src/cargo/core/resolver/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cargo/core/resolver/mod.rs b/src/cargo/core/resolver/mod.rs index 361f3f951a2..1232151afbf 100644 --- a/src/cargo/core/resolver/mod.rs +++ b/src/cargo/core/resolver/mod.rs @@ -333,7 +333,7 @@ struct Context { // make these much cheaper to clone in general. activations: Activations, resolve_features: HashMap>, - links: HashMap, + links: HashMap, // These are two cheaply-cloneable lists (O(1) clone) which are effectively // hash maps but are built up as "construction lists". We'll iterate these @@ -706,7 +706,7 @@ impl RemainingCandidates { fn next( &mut self, prev_active: &[Summary], - links: &HashMap, + links: &HashMap, ) -> Result<(Candidate, bool), HashMap> { // Filter the set of candidates based on the previously activated // versions for this dependency. We can actually use a version if it @@ -723,7 +723,7 @@ impl RemainingCandidates { use std::mem::replace; for (_, b) in self.remaining.by_ref() { if let Some(link) = b.summary.links() { - if let Some(a) = links.get(link) { + if let Some(a) = links.get(&InternedString::new(link)) { if a != b.summary.package_id() { self.conflicting_prev_active .entry(a.clone()) @@ -1298,7 +1298,7 @@ impl Context { if !prev.iter().any(|c| c == summary) { self.resolve_graph.push(GraphNode::Add(id.clone())); if let Some(link) = summary.links() { - ensure!(self.links.insert(link.to_owned(), id.clone()).is_none(), + ensure!(self.links.insert(InternedString::new(link), id.clone()).is_none(), "Attempting to resolve a with more then one crate with the links={}. \n\ This will not build as is. Consider rebuilding the .lock file.", link); } From 26bfc7de367860794ea81f468a44ae68cd6deb70 Mon Sep 17 00:00:00 2001 From: Eh2406 Date: Tue, 6 Mar 2018 17:44:31 -0500 Subject: [PATCH 4/6] add unsafe --- src/cargo/core/interning.rs | 43 ++++++++++++++++++++++++---------- src/cargo/core/resolver/mod.rs | 2 +- 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/src/cargo/core/interning.rs b/src/cargo/core/interning.rs index c24b6e70f81..16f9c134742 100644 --- a/src/cargo/core/interning.rs +++ b/src/cargo/core/interning.rs @@ -1,34 +1,51 @@ use std::fmt; use std::sync::RwLock; -use std::collections::HashMap; +use std::collections::HashSet; +use std::slice; +use std::str; +use std::mem; + +pub fn leek(s: String) -> &'static str { + let ptr = s.as_ptr(); + let len = s.len(); + mem::forget(s); + unsafe { + let slice = slice::from_raw_parts(ptr, len); + str::from_utf8_unchecked(slice) + } +} lazy_static! { - static ref STRING_CASHE: RwLock<(Vec, HashMap)> = - RwLock::new((Vec::new(), HashMap::new())); + static ref STRING_CASHE: RwLock> = + RwLock::new(HashSet::new()); } #[derive(Eq, PartialEq, Hash, Clone, Copy)] pub struct InternedString { - id: usize + ptr: *const u8, + len: usize, } impl InternedString { pub fn new(str: &str) -> InternedString { - let (ref mut str_from_id, ref mut id_from_str) = *STRING_CASHE.write().unwrap(); - if let Some(&id) = id_from_str.get(str) { - return InternedString { id }; + let mut cache = STRING_CASHE.write().unwrap(); + if let Some(&s) = cache.get(str) { + return InternedString { ptr: s.as_ptr(), len: s.len() }; } - str_from_id.push(str.to_string()); - id_from_str.insert(str.to_string(), str_from_id.len() - 1); - return InternedString { id: str_from_id.len() - 1 } + let s = leek(str.to_string()); + cache.insert(s); + InternedString { ptr: s.as_ptr(), len: s.len() } } - pub fn to_inner(&self) -> String { - STRING_CASHE.read().unwrap().0[self.id].to_string() + pub fn to_inner(&self) -> &'static str { + unsafe { + let slice = slice::from_raw_parts(self.ptr, self.len); + str::from_utf8_unchecked(slice) + } } } impl fmt::Debug for InternedString { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "InternedString {{ {} }}", STRING_CASHE.read().unwrap().0[self.id]) + write!(f, "InternedString {{ {} }}", self.to_inner()) } } diff --git a/src/cargo/core/resolver/mod.rs b/src/cargo/core/resolver/mod.rs index 1232151afbf..011766d952b 100644 --- a/src/cargo/core/resolver/mod.rs +++ b/src/cargo/core/resolver/mod.rs @@ -371,7 +371,7 @@ pub fn resolve(summaries: &[(Summary, Method)], metadata: BTreeMap::new(), replacements: cx.resolve_replacements(), features: cx.resolve_features.iter().map(|(k, v)| { - (k.clone(), v.iter().map(|x| x.to_inner()).collect()) + (k.clone(), v.iter().map(|x| x.to_inner().to_string()).collect()) }).collect(), unused_patches: Vec::new(), }; From 827fdf84c8a85cdb02f4446739073a8bbf0e518d Mon Sep 17 00:00:00 2001 From: Eh2406 Date: Wed, 7 Mar 2018 17:10:55 -0500 Subject: [PATCH 5/6] use `into_boxed_str` to shrink before we leek --- src/cargo/core/interning.rs | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/src/cargo/core/interning.rs b/src/cargo/core/interning.rs index 16f9c134742..f8d028b3455 100644 --- a/src/cargo/core/interning.rs +++ b/src/cargo/core/interning.rs @@ -4,11 +4,13 @@ use std::collections::HashSet; use std::slice; use std::str; use std::mem; +use std::cmp::Ordering; pub fn leek(s: String) -> &'static str { - let ptr = s.as_ptr(); - let len = s.len(); - mem::forget(s); + let boxed = s.into_boxed_str(); + let ptr = boxed.as_ptr(); + let len = boxed.len(); + mem::forget(boxed); unsafe { let slice = slice::from_raw_parts(ptr, len); str::from_utf8_unchecked(slice) @@ -49,3 +51,18 @@ impl fmt::Debug for InternedString { write!(f, "InternedString {{ {} }}", self.to_inner()) } } + +impl Ord for InternedString { + fn cmp(&self, other: &InternedString) -> Ordering { + self.to_inner().cmp(&other.to_inner()) + } +} + +impl PartialOrd for InternedString { + fn partial_cmp(&self, other: &InternedString) -> Option { + Some(self.cmp(other)) + } +} + +unsafe impl Send for InternedString {} +unsafe impl Sync for InternedString {} \ No newline at end of file From 98480e8591d30575daedf738d37aa4488050547a Mon Sep 17 00:00:00 2001 From: Eh2406 Date: Wed, 7 Mar 2018 17:19:53 -0500 Subject: [PATCH 6/6] use `Deref` instead of an explicit function --- src/cargo/core/interning.rs | 20 ++++++++++---------- src/cargo/core/resolver/mod.rs | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/cargo/core/interning.rs b/src/cargo/core/interning.rs index f8d028b3455..9fea6503d7f 100644 --- a/src/cargo/core/interning.rs +++ b/src/cargo/core/interning.rs @@ -1,10 +1,10 @@ -use std::fmt; use std::sync::RwLock; use std::collections::HashSet; use std::slice; use std::str; use std::mem; use std::cmp::Ordering; +use std::ops::Deref; pub fn leek(s: String) -> &'static str { let boxed = s.into_boxed_str(); @@ -38,23 +38,23 @@ impl InternedString { cache.insert(s); InternedString { ptr: s.as_ptr(), len: s.len() } } - pub fn to_inner(&self) -> &'static str { +} + +impl Deref for InternedString { + type Target = str; + + fn deref(&self) -> &'static str { unsafe { let slice = slice::from_raw_parts(self.ptr, self.len); - str::from_utf8_unchecked(slice) + &str::from_utf8_unchecked(slice) } } } -impl fmt::Debug for InternedString { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "InternedString {{ {} }}", self.to_inner()) - } -} - impl Ord for InternedString { fn cmp(&self, other: &InternedString) -> Ordering { - self.to_inner().cmp(&other.to_inner()) + let str: &str = &*self; + str.cmp(&*other) } } diff --git a/src/cargo/core/resolver/mod.rs b/src/cargo/core/resolver/mod.rs index 011766d952b..f8b0b26c94f 100644 --- a/src/cargo/core/resolver/mod.rs +++ b/src/cargo/core/resolver/mod.rs @@ -371,7 +371,7 @@ pub fn resolve(summaries: &[(Summary, Method)], metadata: BTreeMap::new(), replacements: cx.resolve_replacements(), features: cx.resolve_features.iter().map(|(k, v)| { - (k.clone(), v.iter().map(|x| x.to_inner().to_string()).collect()) + (k.clone(), v.iter().map(|x| x.to_string()).collect()) }).collect(), unused_patches: Vec::new(), };