Skip to content

Commit

Permalink
Auto merge of #5157 - Eh2406:more_interning, r=alexcrichton
Browse files Browse the repository at this point in the history
More interning

This is a forward approach to interning. Specifically `Dependency` and `PackageId` store their names as `InternedString`s and leave that value interned as long as possible. The alternative is to make a new `interned_name` function. The advantage of this approach is that a number of places in the code are doing `deb.name() == pid.name()` and are now using the fast pointer compare instead of the string compare, without the code needing to change. The disadvantage is that lots of places need to call `deref` with `&*` to convert to an `&str` and sum need to use `.to_inner()` to get a `&'static str`.

In a test on #4810 (comment)
Before we got to 10000000 ticks in ~48 sec
After we got to 10000000 ticks in ~44 sec
  • Loading branch information
bors committed Mar 9, 2018
2 parents 3f7a426 + 1fd3496 commit 5f83bb4
Show file tree
Hide file tree
Showing 18 changed files with 72 additions and 54 deletions.
11 changes: 6 additions & 5 deletions src/cargo/core/dependency.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use semver::ReqParseError;
use serde::ser;

use core::{SourceId, Summary, PackageId};
use core::interning::InternedString;
use util::{Cfg, CfgExpr, Config};
use util::errors::{CargoResult, CargoResultExt, CargoError};

Expand All @@ -20,7 +21,7 @@ pub struct Dependency {
/// The data underlying a Dependency.
#[derive(PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Debug)]
struct Inner {
name: String,
name: InternedString,
source_id: SourceId,
registry_id: Option<SourceId>,
req: VersionReq,
Expand Down Expand Up @@ -63,7 +64,7 @@ impl ser::Serialize for Dependency {
where S: ser::Serializer,
{
SerializedDependency {
name: self.name(),
name: &*self.name(),
source: self.source_id(),
req: self.version_req().to_string(),
kind: self.kind(),
Expand Down Expand Up @@ -174,7 +175,7 @@ impl Dependency {
pub fn new_override(name: &str, source_id: &SourceId) -> Dependency {
Dependency {
inner: Rc::new(Inner {
name: name.to_string(),
name: InternedString::new(name),
source_id: source_id.clone(),
registry_id: None,
req: VersionReq::any(),
Expand All @@ -194,8 +195,8 @@ impl Dependency {
&self.inner.req
}

pub fn name(&self) -> &str {
&self.inner.name
pub fn name(&self) -> InternedString {
self.inner.name
}

pub fn source_id(&self) -> &SourceId {
Expand Down
32 changes: 23 additions & 9 deletions src/cargo/core/interning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ use std::str;
use std::mem;
use std::cmp::Ordering;
use std::ops::Deref;
use std::hash::{Hash, Hasher};

pub fn leek(s: String) -> &'static str {
let boxed = s.into_boxed_str();
Expand All @@ -23,7 +24,7 @@ lazy_static! {
RwLock::new(HashSet::new());
}

#[derive(Eq, PartialEq, Hash, Clone, Copy)]
#[derive(Eq, PartialEq, Clone, Copy)]
pub struct InternedString {
ptr: *const u8,
len: usize,
Expand All @@ -39,30 +40,43 @@ impl InternedString {
cache.insert(s);
InternedString { ptr: s.as_ptr(), len: s.len() }
}
pub fn to_inner(&self) -> &'static str {
unsafe {
let slice = slice::from_raw_parts(self.ptr, self.len);
&str::from_utf8_unchecked(slice)
}
}
}

impl Deref for InternedString {
type Target = str;

fn deref(&self) -> &'static str {
unsafe {
let slice = slice::from_raw_parts(self.ptr, self.len);
&str::from_utf8_unchecked(slice)
}
self.to_inner()
}
}

impl Hash for InternedString {
fn hash<H: Hasher>(&self, state: &mut H) {
self.to_inner().hash(state);
}
}

impl fmt::Debug for InternedString {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let str: &str = &*self;
write!(f, "InternedString {{ {} }}", str)
write!(f, "InternedString {{ {} }}", self.to_inner())
}
}

impl fmt::Display for InternedString {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.to_inner())
}
}

impl Ord for InternedString {
fn cmp(&self, other: &InternedString) -> Ordering {
let str: &str = &*self;
str.cmp(&*other)
self.to_inner().cmp(&*other)
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/cargo/core/manifest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use url::Url;

use core::{Dependency, PackageId, Summary, SourceId, PackageIdSpec};
use core::{WorkspaceConfig, Epoch, Features, Feature};
use core::interning::InternedString;
use util::Config;
use util::toml::TomlManifest;
use util::errors::*;
Expand Down Expand Up @@ -301,7 +302,7 @@ impl Manifest {
pub fn exclude(&self) -> &[String] { &self.exclude }
pub fn include(&self) -> &[String] { &self.include }
pub fn metadata(&self) -> &ManifestMetadata { &self.metadata }
pub fn name(&self) -> &str { self.package_id().name() }
pub fn name(&self) -> InternedString { self.package_id().name() }
pub fn package_id(&self) -> &PackageId { self.summary.package_id() }
pub fn summary(&self) -> &Summary { &self.summary }
pub fn targets(&self) -> &[Target] { &self.targets }
Expand Down
5 changes: 3 additions & 2 deletions src/cargo/core/package.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use lazycell::LazyCell;

use core::{Dependency, Manifest, PackageId, SourceId, Target};
use core::{Summary, SourceMap};
use core::interning::InternedString;
use ops;
use util::{Config, internal, lev_distance};
use util::errors::{CargoResult, CargoResultExt};
Expand Down Expand Up @@ -55,7 +56,7 @@ impl ser::Serialize for Package {
let description = manmeta.description.as_ref().map(String::as_ref);

SerializedPackage {
name: package_id.name(),
name: &*package_id.name(),
version: &package_id.version().to_string(),
id: package_id,
license,
Expand Down Expand Up @@ -95,7 +96,7 @@ impl Package {
/// Get the path to the manifest
pub fn manifest_path(&self) -> &Path { &self.manifest_path }
/// Get the name of the package
pub fn name(&self) -> &str { self.package_id().name() }
pub fn name(&self) -> InternedString { self.package_id().name() }
/// Get the PackageId object for the package (fully defines a package)
pub fn package_id(&self) -> &PackageId { self.manifest.package_id() }
/// Get the root folder of the package
Expand Down
13 changes: 7 additions & 6 deletions src/cargo/core/package_id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use serde::ser;

use util::{CargoResult, ToSemver};
use core::source::SourceId;
use core::interning::InternedString;

/// Identifier for a specific version of a package in a specific source.
#[derive(Clone)]
Expand All @@ -20,7 +21,7 @@ pub struct PackageId {

#[derive(PartialEq, PartialOrd, Eq, Ord)]
struct PackageIdInner {
name: String,
name: InternedString,
version: semver::Version,
source_id: SourceId,
}
Expand Down Expand Up @@ -63,7 +64,7 @@ impl<'de> de::Deserialize<'de> for PackageId {

Ok(PackageId {
inner: Arc::new(PackageIdInner {
name: name.to_string(),
name: InternedString::new(name),
version,
source_id,
}),
Expand Down Expand Up @@ -102,21 +103,21 @@ impl PackageId {
let v = version.to_semver()?;
Ok(PackageId {
inner: Arc::new(PackageIdInner {
name: name.to_string(),
name: InternedString::new(name),
version: v,
source_id: sid.clone(),
}),
})
}

pub fn name(&self) -> &str { &self.inner.name }
pub fn name(&self) -> InternedString { self.inner.name }
pub fn version(&self) -> &semver::Version { &self.inner.version }
pub fn source_id(&self) -> &SourceId { &self.inner.source_id }

pub fn with_precise(&self, precise: Option<String>) -> PackageId {
PackageId {
inner: Arc::new(PackageIdInner {
name: self.inner.name.to_string(),
name: self.inner.name,
version: self.inner.version.clone(),
source_id: self.inner.source_id.with_precise(precise),
}),
Expand All @@ -126,7 +127,7 @@ impl PackageId {
pub fn with_source_id(&self, source: &SourceId) -> PackageId {
PackageId {
inner: Arc::new(PackageIdInner {
name: self.inner.name.to_string(),
name: self.inner.name,
version: self.inner.version.clone(),
source_id: source.clone(),
}),
Expand Down
2 changes: 1 addition & 1 deletion src/cargo/core/package_id_spec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ impl PackageIdSpec {
}

pub fn matches(&self, package_id: &PackageId) -> bool {
if self.name() != package_id.name() { return false }
if self.name() != &*package_id.name() { return false }

if let Some(ref v) = self.version {
if v != package_id.version() {
Expand Down
8 changes: 4 additions & 4 deletions src/cargo/core/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ impl<'cfg> PackageRegistry<'cfg> {
-> CargoResult<Option<Summary>> {
for s in self.overrides.iter() {
let src = self.sources.get_mut(s).unwrap();
let dep = Dependency::new_override(dep.name(), s);
let dep = Dependency::new_override(&*dep.name(), s);
let mut results = src.query_vec(&dep)?;
if !results.is_empty() {
return Ok(Some(results.remove(0)))
Expand Down Expand Up @@ -519,7 +519,7 @@ fn lock(locked: &LockedMap,
patches: &HashMap<Url, Vec<PackageId>>,
summary: Summary) -> Summary {
let pair = locked.get(summary.source_id()).and_then(|map| {
map.get(summary.name())
map.get(&*summary.name())
}).and_then(|vec| {
vec.iter().find(|&&(ref id, _)| id == summary.package_id())
});
Expand Down Expand Up @@ -568,7 +568,7 @@ fn lock(locked: &LockedMap,
// all known locked packages to see if they match this dependency.
// If anything does then we lock it to that and move on.
let v = locked.get(dep.source_id()).and_then(|map| {
map.get(dep.name())
map.get(&*dep.name())
}).and_then(|vec| {
vec.iter().find(|&&(ref id, _)| dep.matches_id(id))
});
Expand All @@ -593,7 +593,7 @@ fn lock(locked: &LockedMap,
assert!(remaining.next().is_none());
let patch_source = patch_id.source_id();
let patch_locked = locked.get(patch_source).and_then(|m| {
m.get(patch_id.name())
m.get(&*patch_id.name())
}).map(|list| {
list.iter().any(|&(ref id, _)| id == patch_id)
}).unwrap_or(false);
Expand Down
22 changes: 11 additions & 11 deletions src/cargo/core/resolver/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1044,7 +1044,7 @@ fn activation_error(cx: &Context,
for &(p, r) in links_errors.iter() {
if let ConflictReason::Links(ref link) = *r {
msg.push_str("\n\nthe package `");
msg.push_str(dep.name());
msg.push_str(&*dep.name());
msg.push_str("` links to the native library `");
msg.push_str(link);
msg.push_str("`, but it conflicts with a previous package which links to `");
Expand All @@ -1059,13 +1059,13 @@ fn activation_error(cx: &Context,
for &(p, r) in features_errors.iter() {
if let ConflictReason::MissingFeatures(ref features) = *r {
msg.push_str("\n\nthe package `");
msg.push_str(p.name());
msg.push_str(&*p.name());
msg.push_str("` depends on `");
msg.push_str(dep.name());
msg.push_str(&*dep.name());
msg.push_str("`, with features: `");
msg.push_str(features);
msg.push_str("` but `");
msg.push_str(dep.name());
msg.push_str(&*dep.name());
msg.push_str("` does not have these features.\n");
}
// p == parent so the full path is redundant.
Expand All @@ -1082,7 +1082,7 @@ fn activation_error(cx: &Context,
}

msg.push_str("\n\nfailed to select a version for `");
msg.push_str(dep.name());
msg.push_str(&*dep.name());
msg.push_str("` which could resolve this conflict");

return format_err!("{}", msg)
Expand Down Expand Up @@ -1274,7 +1274,7 @@ fn build_requirements<'a, 'b: 'a>(s: &'a Summary, method: &'b Method)
reqs.require_feature(key)?;
}
for dep in s.dependencies().iter().filter(|d| d.is_optional()) {
reqs.require_dependency(dep.name());
reqs.require_dependency(dep.name().to_inner());
}
}
Method::Required { features: requested_features, .. } => {
Expand Down Expand Up @@ -1304,7 +1304,7 @@ impl Context {
method: &Method) -> CargoResult<bool> {
let id = summary.package_id();
let prev = self.activations
.entry((InternedString::new(id.name()), id.source_id().clone()))
.entry((id.name(), id.source_id().clone()))
.or_insert_with(||Rc::new(Vec::new()));
if !prev.iter().any(|c| c == summary) {
self.resolve_graph.push(GraphNode::Add(id.clone()));
Expand Down Expand Up @@ -1365,13 +1365,13 @@ impl Context {
}

fn prev_active(&self, dep: &Dependency) -> &[Summary] {
self.activations.get(&(InternedString::new(dep.name()), dep.source_id().clone()))
self.activations.get(&(dep.name(), dep.source_id().clone()))
.map(|v| &v[..])
.unwrap_or(&[])
}

fn is_active(&self, id: &PackageId) -> bool {
self.activations.get(&(InternedString::new(id.name()), id.source_id().clone()))
self.activations.get(&(id.name(), id.source_id().clone()))
.map(|v| v.iter().any(|s| s.package_id() == id))
.unwrap_or(false)
}
Expand All @@ -1397,12 +1397,12 @@ impl Context {
// Next, collect all actually enabled dependencies and their features.
for dep in deps {
// Skip optional dependencies, but not those enabled through a feature
if dep.is_optional() && !reqs.deps.contains_key(dep.name()) {
if dep.is_optional() && !reqs.deps.contains_key(&*dep.name()) {
continue
}
// So we want this dependency. Move the features we want from `feature_deps`
// to `ret`.
let base = reqs.deps.remove(dep.name()).unwrap_or((false, vec![]));
let base = reqs.deps.remove(&*dep.name()).unwrap_or((false, vec![]));
if !dep.is_optional() && base.0 {
self.warnings.push(
format!("Package `{}` does not have feature `{}`. It has a required dependency \
Expand Down
6 changes: 3 additions & 3 deletions src/cargo/core/summary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ impl Summary {
features: BTreeMap<String, Vec<String>>,
links: Option<String>) -> CargoResult<Summary> {
for dep in dependencies.iter() {
if features.get(dep.name()).is_some() {
if features.get(&*dep.name()).is_some() {
bail!("Features and dependencies cannot have the \
same name: `{}`", dep.name())
}
Expand All @@ -47,7 +47,7 @@ impl Summary {
let dep = parts.next().unwrap();
let is_reexport = parts.next().is_some();
if !is_reexport && features.get(dep).is_some() { continue }
match dependencies.iter().find(|d| d.name() == dep) {
match dependencies.iter().find(|d| &*d.name() == dep) {
Some(d) => {
if d.is_optional() || is_reexport { continue }
bail!("Feature `{}` depends on `{}` which is not an \
Expand Down Expand Up @@ -78,7 +78,7 @@ impl Summary {
}

pub fn package_id(&self) -> &PackageId { &self.inner.package_id }
pub fn name(&self) -> &str { self.package_id().name() }
pub fn name(&self) -> InternedString { self.package_id().name() }
pub fn version(&self) -> &Version { self.package_id().version() }
pub fn source_id(&self) -> &SourceId { self.package_id().source_id() }
pub fn dependencies(&self) -> &[Dependency] { &self.inner.dependencies }
Expand Down
2 changes: 1 addition & 1 deletion src/cargo/ops/cargo_doc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ pub fn doc(ws: &Workspace, options: &DocOptions) -> CargoResult<()> {
bail!("Passing multiple packages and `open` is not supported.\n\
Please re-run this command with `-p <spec>` where `<spec>` \
is one of the following:\n {}",
pkgs.iter().map(|p| p.name()).collect::<Vec<_>>().join("\n "));
pkgs.iter().map(|p| p.name().to_inner()).collect::<Vec<_>>().join("\n "));
} else if pkgs.len() == 1 {
pkgs[0].name().replace("-", "_")
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/cargo/ops/cargo_generate_lockfile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ pub fn update_lockfile(ws: &Workspace, opts: &UpdateOptions)
resolve: &'a Resolve) ->
Vec<(Vec<&'a PackageId>, Vec<&'a PackageId>)> {
fn key(dep: &PackageId) -> (&str, &SourceId) {
(dep.name(), dep.source_id())
(dep.name().to_inner(), dep.source_id())
}

// Removes all package ids in `b` from `a`. Note that this is somewhat
Expand Down
Loading

0 comments on commit 5f83bb4

Please sign in to comment.