Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reuse phf hash and remove phf::OrderedSet indirection #103

Merged
merged 3 commits into from
Sep 1, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

name = "string_cache"
version = "0.1.10"
version = "0.1.11"
authors = [ "The Servo Project Developers" ]
description = "A string interning library for Rust, developed as part of the Servo project."
license = "MIT / Apache-2.0"
Expand Down Expand Up @@ -37,13 +37,13 @@ optional = true

[dependencies.string_cache_plugin]
path = "plugin"
version = "0.1.5"
version = "0.1.7"
optional = true

[dependencies.string_cache_shared]
path = "shared"
version = "0.1.4"
version = "0.1.6"

[build-dependencies.string_cache_shared]
path = "shared"
version = "0.1.4"
version = "0.1.6"
2 changes: 1 addition & 1 deletion build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ fn generate_combination(prefix1: String, suffix: &str, url: &str, file: &mut Buf
}

fn atom(s: &str) -> String {
let data = pack_static(STATIC_ATOM_SET.get_index(s).unwrap() as u32);
let data = pack_static(STATIC_ATOM_SET.get_index_or_hash(s).unwrap() as u32);
format!("$crate::Atom {{ data: 0x{:x} }}", data)
}

Expand Down
4 changes: 2 additions & 2 deletions plugin/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

name = "string_cache_plugin"
version = "0.1.6"
version = "0.1.7"
authors = [ "The Servo Project Developers" ]
description = "A string interning library for Rust, developed as part of the Servo project − compiler plugin."
license = "MIT / Apache-2.0"
Expand All @@ -14,7 +14,7 @@ plugin = true

[dependencies.string_cache_shared]
path = "../shared"
version = "0.1.0"
version = "0.1.6"

[dependencies]
lazy_static = "0.1.10"
Expand Down
6 changes: 3 additions & 3 deletions plugin/src/atom/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ impl MacResult for AtomResult {
}

fn make_atom_result(cx: &mut ExtCtxt, name: &str) -> Option<AtomResult> {
let i = match ::string_cache_shared::STATIC_ATOM_SET.get_index(name) {
Some(i) => i,
None => return None,
let i = match ::string_cache_shared::STATIC_ATOM_SET.get_index_or_hash(name) {
Ok(i) => i,
Err(_hash) => return None,
};

let data = ::string_cache_shared::pack_static(i as u32);
Expand Down
6 changes: 3 additions & 3 deletions shared/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[package]

name = "string_cache_shared"
version = "0.1.5"
version = "0.1.6"
authors = [ "The Servo Project Developers" ]
description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate."
license = "MIT / Apache-2.0"
Expand All @@ -15,7 +15,7 @@ path = "lib.rs"

[dependencies]
debug_unreachable = "0.0.6"
phf = "0.7.3"
phf_shared = "0.7.4"

[build-dependencies]
phf_codegen = "0.7.3"
phf_generator = "0.7.4"
31 changes: 24 additions & 7 deletions shared/build.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
extern crate phf_codegen;
extern crate phf_generator;

mod static_atom_list;

Expand All @@ -7,14 +7,31 @@ use std::io::{BufWriter, Write};
use std::path::Path;

fn main() {
let mut set = phf_codegen::OrderedSet::new();
for &atom in static_atom_list::ATOMS {
set.entry(atom);
let mut set = std::collections::HashSet::new();
for atom in static_atom_list::ATOMS {
if !set.insert(atom) {
panic!("duplicate static atom `{:?}`", atom);
}
}

let state = phf_generator::generate_hash(static_atom_list::ATOMS);

let path = Path::new(&std::env::var("OUT_DIR").unwrap()).join("static_atom_set.rs");
let mut file = BufWriter::new(File::create(&path).unwrap());
write!(&mut file, "pub static STATIC_ATOM_SET: phf::OrderedSet<&'static str> = ").unwrap();
set.build(&mut file).unwrap();
write!(&mut file, ";\n").unwrap();
macro_rules! w {
($($arg: expr),+) => { (writeln!(&mut file, $($arg),+).unwrap()) }
}
w!("pub static STATIC_ATOM_SET: StaticAtomSet = StaticAtomSet {{");
w!(" key: {},", state.key);
w!(" disps: &[");
for &(d1, d2) in &state.disps {
w!(" ({}, {}),", d1, d2);
}
w!(" ],");
w!(" atoms: &[");
for &idx in &state.map {
w!(" {:?},", static_atom_list::ATOMS[idx]);
}
w!(" ],");
w!("}};");
}
31 changes: 30 additions & 1 deletion shared/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#![cfg_attr(test, deny(warnings))]

#[macro_use] extern crate debug_unreachable;
extern crate phf;
extern crate phf_shared;

use std::ptr;
use std::slice;
Expand All @@ -32,6 +32,35 @@ pub const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, av

pub const MAX_INLINE_LEN: usize = 7;

pub struct StaticAtomSet {
key: u64,
disps: &'static [(u32, u32)],
atoms: &'static [&'static str],
}

impl StaticAtomSet {
#[inline]
pub fn get_index_or_hash(&self, s: &str) -> Result<u32, u64> {
let hash = phf_shared::hash(s, self.key);
let index = phf_shared::get_index(hash, self.disps, self.atoms.len());
if self.atoms[index as usize] == s {
Ok(index)
} else {
Err(hash)
}
}

#[inline]
pub fn index(&self, i: u32) -> Option<&'static str> {
self.atoms.get(i as usize).map(|&s| s)
}

#[inline]
pub fn iter(&self) -> slice::Iter<&'static str> {
self.atoms.iter()
}
}

// Atoms use a compact representation which fits this enum in a single u64.
// Inlining avoids actually constructing the unpacked representation in memory.
#[allow(missing_copy_implementations)]
Expand Down
9 changes: 1 addition & 8 deletions shared/static_atom_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,7 @@

pub static ATOMS: &'static [&'static str] = &[

// The first 64 atoms are special: we can quickly check membership
// in sets of these, using a bitmask. This includes every tag that
// appears in more than one set in the tree builder spec, plus a
// few others (arbitrarily chosen).
//
// FIXME(kmc): check if this is really true with the packed tag bits
// The order is not preserved by phf.

"a",
"address",
Expand Down Expand Up @@ -81,8 +76,6 @@ pub static ATOMS: &'static [&'static str] = &[
"track",
"xmp",

// End of first 64 atoms.

"",

// XML namespaces known to the HTML syntax spec
Expand Down
33 changes: 15 additions & 18 deletions src/atom/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ use std::mem;
use std::ops;
use std::str;
use std::cmp::Ordering::{self, Equal};
use std::hash::{Hash, SipHasher, Hasher};
use std::sync::Mutex;
use std::sync::atomic::AtomicIsize;
use std::sync::atomic::Ordering::SeqCst;
Expand Down Expand Up @@ -66,12 +65,7 @@ impl StringCache {
}
}

fn add(&mut self, string_to_add: &str) -> *mut StringCacheEntry {
let hash = {
let mut hasher = SipHasher::default();
string_to_add.hash(&mut hasher);
hasher.finish()
};
fn add(&mut self, string_to_add: &str, hash: u64) -> *mut StringCacheEntry {
let bucket_index = (hash & BUCKET_MASK) as usize;
{
let mut ptr: Option<&mut Box<StringCacheEntry>> =
Expand Down Expand Up @@ -148,16 +142,16 @@ impl Atom {

#[inline]
pub fn from_slice(string_to_add: &str) -> Atom {
let unpacked = match STATIC_ATOM_SET.get_index(string_to_add) {
Some(id) => Static(id as u32),
None => {
let unpacked = match STATIC_ATOM_SET.get_index_or_hash(string_to_add) {
Ok(id) => Static(id as u32),
Err(hash) => {
let len = string_to_add.len();
if len <= string_cache_shared::MAX_INLINE_LEN {
let mut buf: [u8; 7] = [0; 7];
copy_memory(string_to_add.as_bytes(), &mut buf);
Inline(len as u8, buf)
} else {
Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add) as *mut ())
Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add, hash) as *mut ())
}
}
};
Expand All @@ -175,7 +169,7 @@ impl Atom {
let buf = string_cache_shared::inline_orig_bytes(&self.data);
str::from_utf8(buf).unwrap()
},
Static(idx) => *STATIC_ATOM_SET.index(idx as usize).expect("bad static atom"),
Static(idx) => STATIC_ATOM_SET.index(idx).expect("bad static atom"),
Dynamic(entry) => {
let entry = entry as *mut StringCacheEntry;
&(*entry).string
Expand Down Expand Up @@ -442,19 +436,22 @@ mod tests {
assert_eq_fmt!("0x{:016X}", Atom::from_slice(s).data, data);
}

fn check_static(s: &str, x: Atom, data: u64) {
check(s, data);
assert_eq_fmt!("0x{:016X}", x.data, data);
fn check_static(s: &str, x: Atom) {
use string_cache_shared::STATIC_ATOM_SET;
assert_eq_fmt!("0x{:016X}", x.data, Atom::from_slice(s).data);
assert_eq!(0x2, x.data & 0xFFFF_FFFF);
// The index is unspecified by phf.
assert!((x.data >> 32) <= STATIC_ATOM_SET.iter().len() as u64);
}

// This test is here to make sure we don't change atom representation
// by accident. It may need adjusting if there are changes to the
// static atom table, the tag values, etc.

// Static atoms
check_static("a", atom!(a), 0x0000_0000_0000_0002);
check_static("address", atom!(address), 0x0000_0001_0000_0002);
check_static("area", atom!(area), 0x0000_0003_0000_0002);
check_static("a", atom!(a));
check_static("address", atom!(address));
check_static("area", atom!(area));

// Inline atoms
check("e", 0x0000_0000_0000_6511);
Expand Down