diff --git a/Cargo.toml b/Cargo.toml index 8c67da8..0301a7b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.10" +version = "0.1.11" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -37,13 +37,13 @@ optional = true [dependencies.string_cache_plugin] path = "plugin" -version = "0.1.5" +version = "0.1.7" optional = true [dependencies.string_cache_shared] path = "shared" -version = "0.1.4" +version = "0.1.6" [build-dependencies.string_cache_shared] path = "shared" -version = "0.1.4" +version = "0.1.6" diff --git a/build.rs b/build.rs index 88ce1f1..87c226e 100644 --- a/build.rs +++ b/build.rs @@ -44,7 +44,7 @@ fn generate_combination(prefix1: String, suffix: &str, url: &str, file: &mut Buf } fn atom(s: &str) -> String { - let data = pack_static(STATIC_ATOM_SET.get_index(s).unwrap() as u32); + let data = pack_static(STATIC_ATOM_SET.get_index_or_hash(s).unwrap() as u32); format!("$crate::Atom {{ data: 0x{:x} }}", data) } diff --git a/plugin/Cargo.toml b/plugin/Cargo.toml index 089082d..25f0155 100644 --- a/plugin/Cargo.toml +++ b/plugin/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_plugin" -version = "0.1.6" +version = "0.1.7" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − compiler plugin." license = "MIT / Apache-2.0" @@ -14,7 +14,7 @@ plugin = true [dependencies.string_cache_shared] path = "../shared" -version = "0.1.0" +version = "0.1.6" [dependencies] lazy_static = "0.1.10" diff --git a/plugin/src/atom/mod.rs b/plugin/src/atom/mod.rs index a9568ef..bff6189 100644 --- a/plugin/src/atom/mod.rs +++ b/plugin/src/atom/mod.rs @@ -44,9 +44,9 @@ impl MacResult for AtomResult { } fn make_atom_result(cx: &mut ExtCtxt, name: &str) -> Option { - let i = match ::string_cache_shared::STATIC_ATOM_SET.get_index(name) { - Some(i) => i, - None => return None, + let i = match ::string_cache_shared::STATIC_ATOM_SET.get_index_or_hash(name) { + Ok(i) => i, + Err(_hash) => return None, }; let data = ::string_cache_shared::pack_static(i as u32); diff --git a/shared/Cargo.toml b/shared/Cargo.toml index 6667df4..c5c0bf4 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_shared" -version = "0.1.5" +version = "0.1.6" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate." license = "MIT / Apache-2.0" @@ -15,7 +15,7 @@ path = "lib.rs" [dependencies] debug_unreachable = "0.0.6" -phf = "0.7.3" +phf_shared = "0.7.4" [build-dependencies] -phf_codegen = "0.7.3" +phf_generator = "0.7.4" diff --git a/shared/build.rs b/shared/build.rs index 157dc77..9a0cbeb 100644 --- a/shared/build.rs +++ b/shared/build.rs @@ -1,4 +1,4 @@ -extern crate phf_codegen; +extern crate phf_generator; mod static_atom_list; @@ -7,14 +7,31 @@ use std::io::{BufWriter, Write}; use std::path::Path; fn main() { - let mut set = phf_codegen::OrderedSet::new(); - for &atom in static_atom_list::ATOMS { - set.entry(atom); + let mut set = std::collections::HashSet::new(); + for atom in static_atom_list::ATOMS { + if !set.insert(atom) { + panic!("duplicate static atom `{:?}`", atom); + } } + let state = phf_generator::generate_hash(static_atom_list::ATOMS); + let path = Path::new(&std::env::var("OUT_DIR").unwrap()).join("static_atom_set.rs"); let mut file = BufWriter::new(File::create(&path).unwrap()); - write!(&mut file, "pub static STATIC_ATOM_SET: phf::OrderedSet<&'static str> = ").unwrap(); - set.build(&mut file).unwrap(); - write!(&mut file, ";\n").unwrap(); + macro_rules! w { + ($($arg: expr),+) => { (writeln!(&mut file, $($arg),+).unwrap()) } + } + w!("pub static STATIC_ATOM_SET: StaticAtomSet = StaticAtomSet {{"); + w!(" key: {},", state.key); + w!(" disps: &["); + for &(d1, d2) in &state.disps { + w!(" ({}, {}),", d1, d2); + } + w!(" ],"); + w!(" atoms: &["); + for &idx in &state.map { + w!(" {:?},", static_atom_list::ATOMS[idx]); + } + w!(" ],"); + w!("}};"); } diff --git a/shared/lib.rs b/shared/lib.rs index a8226b7..9475bd6 100644 --- a/shared/lib.rs +++ b/shared/lib.rs @@ -14,7 +14,7 @@ #![cfg_attr(test, deny(warnings))] #[macro_use] extern crate debug_unreachable; -extern crate phf; +extern crate phf_shared; use std::ptr; use std::slice; @@ -32,6 +32,35 @@ pub const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, av pub const MAX_INLINE_LEN: usize = 7; +pub struct StaticAtomSet { + key: u64, + disps: &'static [(u32, u32)], + atoms: &'static [&'static str], +} + +impl StaticAtomSet { + #[inline] + pub fn get_index_or_hash(&self, s: &str) -> Result { + let hash = phf_shared::hash(s, self.key); + let index = phf_shared::get_index(hash, self.disps, self.atoms.len()); + if self.atoms[index as usize] == s { + Ok(index) + } else { + Err(hash) + } + } + + #[inline] + pub fn index(&self, i: u32) -> Option<&'static str> { + self.atoms.get(i as usize).map(|&s| s) + } + + #[inline] + pub fn iter(&self) -> slice::Iter<&'static str> { + self.atoms.iter() + } +} + // Atoms use a compact representation which fits this enum in a single u64. // Inlining avoids actually constructing the unpacked representation in memory. #[allow(missing_copy_implementations)] diff --git a/shared/static_atom_list.rs b/shared/static_atom_list.rs index 231c22a..855718b 100644 --- a/shared/static_atom_list.rs +++ b/shared/static_atom_list.rs @@ -9,12 +9,7 @@ pub static ATOMS: &'static [&'static str] = &[ - // The first 64 atoms are special: we can quickly check membership - // in sets of these, using a bitmask. This includes every tag that - // appears in more than one set in the tree builder spec, plus a - // few others (arbitrarily chosen). - // - // FIXME(kmc): check if this is really true with the packed tag bits + // The order is not preserved by phf. "a", "address", @@ -81,8 +76,6 @@ pub static ATOMS: &'static [&'static str] = &[ "track", "xmp", - // End of first 64 atoms. - "", // XML namespaces known to the HTML syntax spec diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 82589ce..ec9eef1 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -16,7 +16,6 @@ use std::mem; use std::ops; use std::str; use std::cmp::Ordering::{self, Equal}; -use std::hash::{Hash, SipHasher, Hasher}; use std::sync::Mutex; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; @@ -66,12 +65,7 @@ impl StringCache { } } - fn add(&mut self, string_to_add: &str) -> *mut StringCacheEntry { - let hash = { - let mut hasher = SipHasher::default(); - string_to_add.hash(&mut hasher); - hasher.finish() - }; + fn add(&mut self, string_to_add: &str, hash: u64) -> *mut StringCacheEntry { let bucket_index = (hash & BUCKET_MASK) as usize; { let mut ptr: Option<&mut Box> = @@ -148,16 +142,16 @@ impl Atom { #[inline] pub fn from_slice(string_to_add: &str) -> Atom { - let unpacked = match STATIC_ATOM_SET.get_index(string_to_add) { - Some(id) => Static(id as u32), - None => { + let unpacked = match STATIC_ATOM_SET.get_index_or_hash(string_to_add) { + Ok(id) => Static(id as u32), + Err(hash) => { let len = string_to_add.len(); if len <= string_cache_shared::MAX_INLINE_LEN { let mut buf: [u8; 7] = [0; 7]; copy_memory(string_to_add.as_bytes(), &mut buf); Inline(len as u8, buf) } else { - Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add) as *mut ()) + Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add, hash) as *mut ()) } } }; @@ -175,7 +169,7 @@ impl Atom { let buf = string_cache_shared::inline_orig_bytes(&self.data); str::from_utf8(buf).unwrap() }, - Static(idx) => *STATIC_ATOM_SET.index(idx as usize).expect("bad static atom"), + Static(idx) => STATIC_ATOM_SET.index(idx).expect("bad static atom"), Dynamic(entry) => { let entry = entry as *mut StringCacheEntry; &(*entry).string @@ -442,9 +436,12 @@ mod tests { assert_eq_fmt!("0x{:016X}", Atom::from_slice(s).data, data); } - fn check_static(s: &str, x: Atom, data: u64) { - check(s, data); - assert_eq_fmt!("0x{:016X}", x.data, data); + fn check_static(s: &str, x: Atom) { + use string_cache_shared::STATIC_ATOM_SET; + assert_eq_fmt!("0x{:016X}", x.data, Atom::from_slice(s).data); + assert_eq!(0x2, x.data & 0xFFFF_FFFF); + // The index is unspecified by phf. + assert!((x.data >> 32) <= STATIC_ATOM_SET.iter().len() as u64); } // This test is here to make sure we don't change atom representation @@ -452,9 +449,9 @@ mod tests { // static atom table, the tag values, etc. // Static atoms - check_static("a", atom!(a), 0x0000_0000_0000_0002); - check_static("address", atom!(address), 0x0000_0001_0000_0002); - check_static("area", atom!(area), 0x0000_0003_0000_0002); + check_static("a", atom!(a)); + check_static("address", atom!(address)); + check_static("area", atom!(area)); // Inline atoms check("e", 0x0000_0000_0000_6511);