Skip to content

Commit 3a97af6

Browse files
committed
Use new string_cache_codegen crate for static atoms
1 parent 23730c1 commit 3a97af6

File tree

5 files changed

+92
-133
lines changed

5 files changed

+92
-133
lines changed

Cargo.toml

+3-3
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ heap_size = ["heapsize", "heapsize_plugin"]
3030
[dependencies]
3131
lazy_static = "0.1.10"
3232
serde = ">=0.6, <0.8"
33+
phf = "0.7.4"
3334
phf_shared = "0.7.4"
3435
debug_unreachable = "0.0.6"
3536

@@ -48,6 +49,5 @@ optional = true
4849
version = "0.1.4"
4950
optional = true
5051

51-
[build-dependencies]
52-
phf_generator = "0.7.4"
53-
phf_shared = "0.7.4"
52+
[build-dependencies.string_cache_codegen]
53+
version = "0.2.11"

build.rs

+7-61
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,19 @@
1-
extern crate phf_shared;
2-
extern crate phf_generator;
1+
extern crate string_cache_codegen;
32

4-
#[path = "src/shared.rs"] #[allow(dead_code)] mod shared;
53
#[path = "src/static_atom_list.rs"] mod static_atom_list;
64

75
use std::env;
86
use std::fs::File;
9-
use std::io::{BufWriter, Write};
10-
use std::mem;
7+
use std::io::BufWriter;
118
use std::path::Path;
12-
use std::slice;
139

1410
fn main() {
15-
let hash_state = generate();
16-
write_static_atom_set(&hash_state);
17-
write_atom_macro(&hash_state);
18-
}
19-
20-
fn generate() -> phf_generator::HashState {
21-
let mut set = std::collections::HashSet::new();
22-
for atom in static_atom_list::ATOMS {
23-
if !set.insert(atom) {
24-
panic!("duplicate static atom `{:?}`", atom);
25-
}
26-
}
27-
phf_generator::generate_hash(static_atom_list::ATOMS)
28-
}
29-
30-
fn write_static_atom_set(hash_state: &phf_generator::HashState) {
31-
let path = Path::new(&std::env::var("OUT_DIR").unwrap()).join("static_atom_set.rs");
11+
let path = Path::new(&env::var("OUT_DIR").unwrap()).join("static_atoms.rs");
3212
let mut file = BufWriter::new(File::create(&path).unwrap());
33-
macro_rules! w {
34-
($($arg: expr),+) => { (writeln!(&mut file, $($arg),+).unwrap()) }
35-
}
36-
w!("pub static STATIC_ATOM_SET: StaticAtomSet = StaticAtomSet {{");
37-
w!(" key: {},", hash_state.key);
38-
w!(" disps: &[");
39-
for &(d1, d2) in &hash_state.disps {
40-
w!(" ({}, {}),", d1, d2);
41-
}
42-
w!(" ],");
43-
w!(" atoms: &[");
44-
for &idx in &hash_state.map {
45-
w!(" {:?},", static_atom_list::ATOMS[idx]);
46-
}
47-
w!(" ],");
48-
w!("}};");
49-
}
5013

51-
fn write_atom_macro(hash_state: &phf_generator::HashState) {
52-
let set = shared::StaticAtomSet {
53-
key: hash_state.key,
54-
disps: leak(hash_state.disps.clone()),
55-
atoms: leak(hash_state.map.iter().map(|&idx| static_atom_list::ATOMS[idx]).collect()),
56-
};
57-
58-
let path = Path::new(&env::var("OUT_DIR").unwrap()).join("atom_macro.rs");
59-
let mut file = BufWriter::new(File::create(&path).unwrap());
60-
writeln!(file, r"#[macro_export]").unwrap();
61-
writeln!(file, r"macro_rules! atom {{").unwrap();
62-
for &s in set.iter() {
63-
let data = shared::pack_static(set.get_index_or_hash(s).unwrap() as u32);
64-
writeln!(file, r"({:?}) => {{ $crate::Atom {{ data: 0x{:x} }} }};", s, data).unwrap();
14+
let mut builder = string_cache_codegen::AtomSetBuilder::new();
15+
for atom in static_atom_list::ATOMS {
16+
builder.atom(atom);
6517
}
66-
writeln!(file, r"}}").unwrap();
67-
}
68-
69-
fn leak<T>(v: Vec<T>) -> &'static [T] {
70-
let slice = unsafe { slice::from_raw_parts(v.as_ptr(), v.len()) };
71-
mem::forget(v);
72-
slice
18+
builder.build(&mut file, "ServoAtom", "STATIC_ATOM_SET", "atom");
7319
}

src/atom/mod.rs

+73-42
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,19 @@ use std::ops;
2323
use std::ptr;
2424
use std::slice;
2525
use std::str;
26+
use std::hash::Hash;
27+
use std::marker::PhantomData;
2628
use std::sync::Mutex;
2729
use std::sync::atomic::AtomicIsize;
2830
use std::sync::atomic::Ordering::SeqCst;
2931

3032
use shared::{STATIC_TAG, INLINE_TAG, DYNAMIC_TAG, TAG_MASK, MAX_INLINE_LEN, STATIC_SHIFT_BITS,
31-
ENTRY_ALIGNMENT, pack_static, StaticAtomSet};
33+
ENTRY_ALIGNMENT, pack_static, dynamic_hash};
3234
use self::UnpackedAtom::{Dynamic, Inline, Static};
3335

3436
#[cfg(feature = "log-events")]
3537
use event::Event;
3638

37-
include!(concat!(env!("OUT_DIR"), "/static_atom_set.rs"));
38-
3939
#[cfg(not(feature = "log-events"))]
4040
macro_rules! log (($e:expr) => (()));
4141

@@ -160,29 +160,58 @@ impl StringCache {
160160
}
161161
}
162162

163+
pub trait Kind: Eq + Hash + Ord + PartialEq + PartialOrd {
164+
#[inline]
165+
fn get_index_or_hash(s: &str) -> Result<u32, u64>;
166+
167+
#[inline]
168+
fn index(i: u32) -> Option<&'static str>;
169+
}
170+
171+
// Although DefaultKind isn't used right now, it will be when ServoAtomKind is
172+
// removed. Note it will only be used for dynamic atoms.
173+
#[derive(Eq, Hash, Ord, PartialEq, PartialOrd)]
174+
pub enum DefaultKind {}
175+
impl Kind for DefaultKind {
176+
// There are no static atoms for DefaultKind so there's never an index
177+
#[inline]
178+
fn get_index_or_hash(s: &str) -> Result<u32, u64> {
179+
Err(dynamic_hash(s))
180+
}
181+
182+
#[inline]
183+
fn index(_: u32) -> Option<&'static str> {
184+
None
185+
}
186+
}
187+
188+
pub type Atom = BaseAtom<super::ServoAtomKind>;
189+
163190
// NOTE: Deriving Eq here implies that a given string must always
164-
// be interned the same way.
191+
// be interned the same way for the same kind.
165192
#[cfg_attr(feature = "unstable", unsafe_no_drop_flag)] // See tests::atom_drop_is_idempotent
166193
#[cfg_attr(feature = "heap_size", derive(HeapSizeOf))]
167194
#[derive(Eq, Hash, PartialEq)]
168-
pub struct Atom {
195+
// After RFC 213, rename `BaseAtom` to `Atom` and use `K=super::ServoAtomKind`.
196+
pub struct BaseAtom<K> where K: Kind {
169197
/// This field is public so that the `atom!()` macro can use it.
170198
/// You should not otherwise access this field.
171199
pub data: u64,
200+
pub kind: PhantomData<K>,
172201
}
173202

174-
impl Atom {
203+
impl<K> BaseAtom<K> where K: Kind {
175204
#[inline(always)]
176205
unsafe fn unpack(&self) -> UnpackedAtom {
177206
UnpackedAtom::from_packed(self.data)
178207
}
179208
}
180209

181-
impl<'a> From<Cow<'a, str>> for Atom {
210+
impl<'a, K> From<Cow<'a, str>> for BaseAtom<K> where K: Kind {
182211
#[inline]
183-
fn from(string_to_add: Cow<'a, str>) -> Atom {
184-
let unpacked = match STATIC_ATOM_SET.get_index_or_hash(&*string_to_add) {
185-
Ok(id) => Static(id as u32),
212+
fn from(string_to_add: Cow<'a, str>) -> BaseAtom<K> {
213+
let unpacked = match K::get_index_or_hash(&*string_to_add) {
214+
Ok(id) => Static(id),
186215
Err(hash) => {
187216
let len = string_to_add.len();
188217
if len <= MAX_INLINE_LEN {
@@ -197,27 +226,27 @@ impl<'a> From<Cow<'a, str>> for Atom {
197226

198227
let data = unsafe { unpacked.pack() };
199228
log!(Event::Intern(data));
200-
Atom { data: data }
229+
BaseAtom { data: data, kind: PhantomData }
201230
}
202231
}
203232

204-
impl<'a> From<&'a str> for Atom {
233+
impl<'a, K> From<&'a str> for BaseAtom<K> where K: Kind {
205234
#[inline]
206-
fn from(string_to_add: &str) -> Atom {
207-
Atom::from(Cow::Borrowed(string_to_add))
235+
fn from(string_to_add: &str) -> BaseAtom<K> {
236+
BaseAtom::from(Cow::Borrowed(string_to_add))
208237
}
209238
}
210239

211-
impl From<String> for Atom {
240+
impl<K> From<String> for BaseAtom<K> where K: Kind {
212241
#[inline]
213-
fn from(string_to_add: String) -> Atom {
214-
Atom::from(Cow::Owned(string_to_add))
242+
fn from(string_to_add: String) -> BaseAtom<K> {
243+
BaseAtom::from(Cow::Owned(string_to_add))
215244
}
216245
}
217246

218-
impl Clone for Atom {
247+
impl<K> Clone for BaseAtom<K> where K: Kind {
219248
#[inline(always)]
220-
fn clone(&self) -> Atom {
249+
fn clone(&self) -> BaseAtom<K> {
221250
unsafe {
222251
match from_packed_dynamic(self.data) {
223252
Some(entry) => {
@@ -227,17 +256,18 @@ impl Clone for Atom {
227256
None => (),
228257
}
229258
}
230-
Atom {
231-
data: self.data
259+
BaseAtom {
260+
data: self.data,
261+
kind: PhantomData,
232262
}
233263
}
234264
}
235265

236-
impl Drop for Atom {
266+
impl<K> Drop for BaseAtom<K> where K: Kind {
237267
#[inline]
238268
fn drop(&mut self) {
239269
// Out of line to guide inlining.
240-
fn drop_slow(this: &mut Atom) {
270+
fn drop_slow<K>(this: &mut BaseAtom<K>) where K: Kind {
241271
STRING_CACHE.lock().unwrap().remove(this.data);
242272
}
243273

@@ -256,7 +286,7 @@ impl Drop for Atom {
256286
}
257287

258288

259-
impl ops::Deref for Atom {
289+
impl<K> ops::Deref for BaseAtom<K> where K: Kind {
260290
type Target = str;
261291

262292
#[inline]
@@ -267,7 +297,7 @@ impl ops::Deref for Atom {
267297
let buf = inline_orig_bytes(&self.data);
268298
str::from_utf8(buf).unwrap()
269299
},
270-
Static(idx) => STATIC_ATOM_SET.index(idx).expect("bad static atom"),
300+
Static(idx) => K::index(idx).expect("bad static atom"),
271301
Dynamic(entry) => {
272302
let entry = entry as *mut StringCacheEntry;
273303
&(*entry).string
@@ -277,14 +307,14 @@ impl ops::Deref for Atom {
277307
}
278308
}
279309

280-
impl fmt::Display for Atom {
310+
impl<K> fmt::Display for BaseAtom<K> where K: Kind {
281311
#[inline]
282312
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
283313
<str as fmt::Display>::fmt(self, f)
284314
}
285315
}
286316

287-
impl fmt::Debug for Atom {
317+
impl<K> fmt::Debug for BaseAtom<K> where K: Kind {
288318
#[inline]
289319
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
290320
let ty_str = unsafe {
@@ -299,63 +329,63 @@ impl fmt::Debug for Atom {
299329
}
300330
}
301331

302-
impl PartialOrd for Atom {
332+
impl<K> PartialOrd for BaseAtom<K> where K: Kind {
303333
#[inline]
304-
fn partial_cmp(&self, other: &Atom) -> Option<Ordering> {
334+
fn partial_cmp(&self, other: &BaseAtom<K>) -> Option<Ordering> {
305335
if self.data == other.data {
306336
return Some(Equal);
307337
}
308338
self.as_ref().partial_cmp(other.as_ref())
309339
}
310340
}
311341

312-
impl Ord for Atom {
342+
impl<K> Ord for BaseAtom<K> where K: Kind {
313343
#[inline]
314-
fn cmp(&self, other: &Atom) -> Ordering {
344+
fn cmp(&self, other: &BaseAtom<K>) -> Ordering {
315345
if self.data == other.data {
316346
return Equal;
317347
}
318348
self.as_ref().cmp(other.as_ref())
319349
}
320350
}
321351

322-
impl AsRef<str> for Atom {
352+
impl<K> AsRef<str> for BaseAtom<K> where K: Kind {
323353
fn as_ref(&self) -> &str {
324354
&self
325355
}
326356
}
327357

328-
impl Serialize for Atom {
358+
impl<K> Serialize for BaseAtom<K> where K: Kind {
329359
fn serialize<S>(&self, serializer: &mut S) -> Result<(),S::Error> where S: Serializer {
330360
let string: &str = self.as_ref();
331361
string.serialize(serializer)
332362
}
333363
}
334364

335-
impl Deserialize for Atom {
336-
fn deserialize<D>(deserializer: &mut D) -> Result<Atom,D::Error> where D: Deserializer {
365+
impl<K> Deserialize for BaseAtom<K> where K: Kind {
366+
fn deserialize<D>(deserializer: &mut D) -> Result<BaseAtom<K>,D::Error> where D: Deserializer {
337367
let string: String = try!(Deserialize::deserialize(deserializer));
338-
Ok(Atom::from(&*string))
368+
Ok(BaseAtom::from(&*string))
339369
}
340370
}
341371

342372
// AsciiExt requires mutating methods, so we just implement the non-mutating ones.
343373
// We don't need to implement is_ascii because there's no performance improvement
344374
// over the one from &str.
345-
impl Atom {
346-
pub fn to_ascii_uppercase(&self) -> Atom {
375+
impl<K> BaseAtom<K> where K: Kind {
376+
pub fn to_ascii_uppercase(&self) -> BaseAtom<K> {
347377
if self.chars().all(char::is_uppercase) {
348378
self.clone()
349379
} else {
350-
Atom::from(&*((&**self).to_ascii_uppercase()))
380+
BaseAtom::from(&*((&**self).to_ascii_uppercase()))
351381
}
352382
}
353383

354-
pub fn to_ascii_lowercase(&self) -> Atom {
384+
pub fn to_ascii_lowercase(&self) -> BaseAtom<K> {
355385
if self.chars().all(char::is_lowercase) {
356386
self.clone()
357387
} else {
358-
Atom::from(&*((&**self).to_ascii_lowercase()))
388+
BaseAtom::from(&*((&**self).to_ascii_lowercase()))
359389
}
360390
}
361391

@@ -486,7 +516,8 @@ mod bench;
486516
mod tests {
487517
use std::mem;
488518
use std::thread;
489-
use super::{Atom, StringCacheEntry, STATIC_ATOM_SET};
519+
use super::{Atom, StringCacheEntry};
520+
use super::super::STATIC_ATOM_SET;
490521
use super::UnpackedAtom::{Dynamic, Inline, Static};
491522
use shared::ENTRY_ALIGNMENT;
492523

src/lib.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#[macro_use] extern crate lazy_static;
2424
#[macro_use] extern crate debug_unreachable;
2525
extern crate serde;
26+
extern crate phf;
2627
extern crate phf_shared;
2728

2829
pub use atom::Atom;
@@ -55,7 +56,7 @@ macro_rules! ns {
5556
(mathml) => { $crate::Namespace(atom!("http://www.w3.org/1998/Math/MathML")) };
5657
}
5758

58-
include!(concat!(env!("OUT_DIR"), "/atom_macro.rs"));
59+
include!(concat!(env!("OUT_DIR"), "/static_atoms.rs"));
5960

6061
#[cfg(feature = "log-events")]
6162
#[macro_use]
@@ -73,4 +74,5 @@ pub mod shared;
7374
mod string_cache {
7475
pub use atom;
7576
pub use namespace;
77+
pub use shared;
7678
}

0 commit comments

Comments
 (0)