From eb85a9a38003b018a282abcfb7118399fc28c23a Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Thu, 19 Nov 2015 20:12:25 +0100 Subject: [PATCH 1/2] Merge into a single crate. Use macros even on unstable. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Breaking changes: * `ns!("")` should be written `ns!()` * Other `ns!(…)` macros should be lowercase, unquoted. --- Cargo.toml | 19 +-- build.rs | 83 +++++++---- examples/summarize-events/Cargo.toml | 4 +- examples/summarize-events/src/main.rs | 22 +-- plugin/Cargo.toml | 21 --- plugin/src/atom/mod.rs | 101 -------------- plugin/src/lib.rs | 38 ----- shared/Cargo.toml | 21 --- shared/build.rs | 37 ----- shared/lib.rs | 193 -------------------------- src/atom/bench.rs | 2 +- src/atom/mod.rs | 140 +++++++++++++++++-- src/lib.rs | 60 ++++---- src/namespace.rs | 21 +-- src/shared.rs | 54 +++++++ {shared => src}/static_atom_list.rs | 0 16 files changed, 303 insertions(+), 513 deletions(-) delete mode 100644 plugin/Cargo.toml delete mode 100644 plugin/src/atom/mod.rs delete mode 100644 plugin/src/lib.rs delete mode 100644 shared/Cargo.toml delete mode 100644 shared/build.rs delete mode 100644 shared/lib.rs create mode 100644 src/shared.rs rename {shared => src}/static_atom_list.rs (100%) diff --git a/Cargo.toml b/Cargo.toml index f85e6a6..3345041 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ doctest = false log-events = ["rustc-serialize"] # Use unstable features to optimize space and time (memory and CPU usage). -unstable = ["string_cache_plugin"] +unstable = [] # HeapSizeOf support heap_size = ["heapsize", "heapsize_plugin"] @@ -30,6 +30,8 @@ heap_size = ["heapsize", "heapsize_plugin"] [dependencies] lazy_static = "0.1.10" serde = "0.6" +phf_shared = "0.7.4" +debug_unreachable = "0.0.6" [dev-dependencies] rand = "0" @@ -38,15 +40,6 @@ rand = "0" version = "0" optional = true -[dependencies.string_cache_plugin] -path = "plugin" -version = "0.1.9" -optional = true - -[dependencies.string_cache_shared] -path = "shared" -version = "0.1.9" - [dependencies.heapsize] version = "0.1.1" optional = true @@ -55,6 +48,6 @@ optional = true version = "0.1.0" optional = true -[build-dependencies.string_cache_shared] -path = "shared" -version = "0.1.9" +[build-dependencies] +phf_generator = "0.7.4" +phf_shared = "0.7.4" diff --git a/build.rs b/build.rs index 87c226e..b3f870c 100644 --- a/build.rs +++ b/build.rs @@ -1,50 +1,81 @@ -extern crate string_cache_shared; +extern crate phf_shared; +extern crate phf_generator; -use string_cache_shared::{STATIC_ATOM_SET, ALL_NS, pack_static}; +#[path = "src/shared.rs"] #[allow(dead_code)] mod shared; +#[path = "src/static_atom_list.rs"] mod static_atom_list; use std::env; -use std::ascii::AsciiExt; use std::fs::File; use std::io::{BufWriter, Write}; +use std::mem; use std::path::Path; +use std::slice; fn main() { - let path = Path::new(&env::var("OUT_DIR").unwrap()).join("ns_atom_macros_without_plugin.rs"); - let mut file = BufWriter::new(File::create(&path).unwrap()); - writeln!(file, r"#[macro_export]").unwrap(); - writeln!(file, r"macro_rules! ns {{").unwrap(); - writeln!(file, "(\"\") => {{ $crate::Namespace({}) }};", atom("")).unwrap(); - for &(prefix, url) in ALL_NS { - if !prefix.is_empty() { - generate_combination("".to_owned(), prefix, url, &mut file); + let hash_state = generate(); + write_static_atom_set(&hash_state); + write_atom_macro(&hash_state); +} + +fn generate() -> phf_generator::HashState { + let mut set = std::collections::HashSet::new(); + for atom in static_atom_list::ATOMS { + if !set.insert(atom) { + panic!("duplicate static atom `{:?}`", atom); } } - writeln!(file, r"}}").unwrap(); + phf_generator::generate_hash(static_atom_list::ATOMS) +} +fn write_static_atom_set(hash_state: &phf_generator::HashState) { + let path = Path::new(&std::env::var("OUT_DIR").unwrap()).join("static_atom_set.rs"); + let mut file = BufWriter::new(File::create(&path).unwrap()); + macro_rules! w { + ($($arg: expr),+) => { (writeln!(&mut file, $($arg),+).unwrap()) } + } + w!("pub static STATIC_ATOM_SET: StaticAtomSet = StaticAtomSet {{"); + w!(" key: {},", hash_state.key); + w!(" disps: &["); + for &(d1, d2) in &hash_state.disps { + w!(" ({}, {}),", d1, d2); + } + w!(" ],"); + w!(" atoms: &["); + for &idx in &hash_state.map { + w!(" {:?},", static_atom_list::ATOMS[idx]); + } + w!(" ],"); + w!("}};"); +} + +fn write_atom_macro(hash_state: &phf_generator::HashState) { + let set = shared::StaticAtomSet { + key: hash_state.key, + disps: leak(hash_state.disps.clone()), + atoms: leak(hash_state.map.iter().map(|&idx| static_atom_list::ATOMS[idx]).collect()), + }; + + let path = Path::new(&env::var("OUT_DIR").unwrap()).join("atom_macro.rs"); + let mut file = BufWriter::new(File::create(&path).unwrap()); writeln!(file, r"#[macro_export]").unwrap(); writeln!(file, r"macro_rules! atom {{").unwrap(); - for &s in STATIC_ATOM_SET.iter() { + for &s in set.iter() { if is_ident(s) { - writeln!(file, r"( {} ) => {{ {} }};", s, atom(s)).unwrap(); + writeln!(file, r"( {} ) => {{ {} }};", s, atom(&set, s)).unwrap(); } - writeln!(file, r"({:?}) => {{ {} }};", s, atom(s)).unwrap(); + writeln!(file, r"({:?}) => {{ {} }};", s, atom(&set, s)).unwrap(); } writeln!(file, r"}}").unwrap(); } -fn generate_combination(prefix1: String, suffix: &str, url: &str, file: &mut BufWriter) { - if suffix.is_empty() { - writeln!(file, r"({:?}) => {{ $crate::Namespace({}) }};", prefix1, atom(url)).unwrap(); - writeln!(file, r"( {} ) => {{ $crate::Namespace({}) }};", prefix1, atom(url)).unwrap(); - } else { - let prefix2 = prefix1.clone(); - generate_combination(prefix1 + &*suffix[..1].to_ascii_lowercase(), &suffix[1..], url, file); - generate_combination(prefix2 + &*suffix[..1].to_ascii_uppercase(), &suffix[1..], url, file); - } +fn leak(v: Vec) -> &'static [T] { + let slice = unsafe { slice::from_raw_parts(v.as_ptr(), v.len()) }; + mem::forget(v); + slice } -fn atom(s: &str) -> String { - let data = pack_static(STATIC_ATOM_SET.get_index_or_hash(s).unwrap() as u32); +fn atom(set: &shared::StaticAtomSet, s: &str) -> String { + let data = shared::pack_static(set.get_index_or_hash(s).unwrap() as u32); format!("$crate::Atom {{ data: 0x{:x} }}", data) } diff --git a/examples/summarize-events/Cargo.toml b/examples/summarize-events/Cargo.toml index 27d173f..7d2e7ba 100644 --- a/examples/summarize-events/Cargo.toml +++ b/examples/summarize-events/Cargo.toml @@ -7,9 +7,7 @@ authors = [ "The Servo Project Developers" ] [dependencies] csv = "0" rustc-serialize = "0" +phf_shared = "0.7.4" [dependencies.string_cache] path = "../.." - -[dependencies.string_cache_shared] -path = "../../shared" diff --git a/examples/summarize-events/src/main.rs b/examples/summarize-events/src/main.rs index b5f08fe..1b1aa64 100644 --- a/examples/summarize-events/src/main.rs +++ b/examples/summarize-events/src/main.rs @@ -9,8 +9,12 @@ extern crate csv; extern crate string_cache; -extern crate string_cache_shared; extern crate rustc_serialize; +extern crate phf_shared; + +#[path = "../../../src/shared.rs"] +#[allow(dead_code)] +mod shared; use string_cache::Atom; @@ -35,18 +39,18 @@ enum Kind { impl Kind { fn from_tag(tag: u8) -> Kind { match tag { - string_cache_shared::DYNAMIC_TAG => Kind::Dynamic, - string_cache_shared::INLINE_TAG => Kind::Inline, - string_cache_shared::STATIC_TAG => Kind::Static, + shared::DYNAMIC_TAG => Kind::Dynamic, + shared::INLINE_TAG => Kind::Inline, + shared::STATIC_TAG => Kind::Static, _ => panic!() } } fn to_tag(self) -> u8 { match self { - Kind::Dynamic => string_cache_shared::DYNAMIC_TAG, - Kind::Inline => string_cache_shared::INLINE_TAG, - Kind::Static => string_cache_shared::STATIC_TAG, + Kind::Dynamic => shared::DYNAMIC_TAG, + Kind::Inline => shared::INLINE_TAG, + Kind::Static => shared::STATIC_TAG, } } } @@ -77,10 +81,10 @@ fn main() { match &ev.event[..] { "intern" => { let tag = (ev.id & 0xf) as u8; - assert!(tag <= string_cache_shared::STATIC_TAG); + assert!(tag <= shared::STATIC_TAG); let string = match tag { - string_cache_shared::DYNAMIC_TAG => dynamic[&ev.id].clone(), + shared::DYNAMIC_TAG => dynamic[&ev.id].clone(), // FIXME: We really shouldn't be allowed to do this. It's a memory-safety // hazard; the field is only public for the atom!() macro. diff --git a/plugin/Cargo.toml b/plugin/Cargo.toml deleted file mode 100644 index 9ba2dc1..0000000 --- a/plugin/Cargo.toml +++ /dev/null @@ -1,21 +0,0 @@ -[package] - -name = "string_cache_plugin" -version = "0.1.10" -authors = [ "The Servo Project Developers" ] -description = "A string interning library for Rust, developed as part of the Servo project − compiler plugin." -license = "MIT / Apache-2.0" -repository = "https://github.com/servo/string-cache" - -[lib] - -name = "string_cache_plugin" -plugin = true - -[dependencies.string_cache_shared] -path = "../shared" -version = "0.1.9" - -[dependencies] -lazy_static = "0.1.10" -mac = "0.0.2" diff --git a/plugin/src/atom/mod.rs b/plugin/src/atom/mod.rs deleted file mode 100644 index 3176508..0000000 --- a/plugin/src/atom/mod.rs +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2014 The Servo Project Developers. See the -// COPYRIGHT file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -use syntax::ptr::P; -use syntax::codemap::Span; -use syntax::ast::TokenTree; -use syntax::ast; -use syntax::ext::base::{ExtCtxt, MacResult, MacEager}; -use syntax::parse::token::{InternedString, Ident, Literal, Lit}; - -use std::iter::Chain; -use std::collections::HashMap; -use std::ascii::AsciiExt; - - -fn atom_tok_to_str(t: &TokenTree) -> Option { - Some(match *t { - TokenTree::Token(_, Ident(s, _)) => s.name.as_str(), - TokenTree::Token(_, Literal(Lit::Str_(s), _)) => s.as_str(), - _ => return None, - }) -} - -// FIXME: libsyntax should provide this (rust-lang/rust#17637) -struct AtomResult { - expr: P, - pat: P, -} - -impl MacResult for AtomResult { - fn make_expr(self: Box) -> Option> { - Some(self.expr) - } - - fn make_pat(self: Box) -> Option> { - Some(self.pat) - } -} - -fn make_atom_result(cx: &mut ExtCtxt, name: &str) -> Option { - let i = match ::string_cache_shared::STATIC_ATOM_SET.get_index_or_hash(name) { - Ok(i) => i, - Err(_hash) => return None, - }; - - let data = ::string_cache_shared::pack_static(i as u32); - - Some(AtomResult { - expr: quote_expr!(&mut *cx, ::string_cache::atom::Atom { data: $data }), - pat: quote_pat!(&mut *cx, ::string_cache::atom::Atom { data: $data }), - }) -} - -// Translate `atom!(title)` or `atom!("font-weight")` into an `Atom` constant or pattern. -pub fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { - let usage = "Usage: atom!(html) or atom!(\"font-weight\")"; - let name = match tt { - [ref t] => ext_expect!(cx, sp, atom_tok_to_str(t), usage), - _ => ext_bail!(cx, sp, usage), - }; - box ext_expect!(cx, sp, make_atom_result(cx, &*name), - &format!("Unknown static atom {}", &*name)) -} - -// Translate `ns!(HTML)` into `Namespace { atom: atom!("http://www.w3.org/1999/xhtml") }`. -// The argument is ASCII-case-insensitive. -pub fn expand_ns(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { - use string_cache_shared::ALL_NS; - - fn usage() -> String { - let ns_names: Vec<&'static str> = ALL_NS[1..].iter() - .map(|&(x, _)| x).collect(); - format!("Usage: ns!(HTML), case-insensitive. \ - Known namespaces: {}", - ns_names.join(" ")) - } - - let name = ext_expect!(cx, sp, match tt { - [ref t] => atom_tok_to_str(t), - _ => None, - }, &usage()); - - let &(_, url) = ext_expect!(cx, sp, - ALL_NS.iter().find(|&&(short, _)| short.eq_ignore_ascii_case(&*name)), - &usage()); - - // All of the URLs should be in the static atom table. - let AtomResult { expr, pat } = ext_expect!(cx, sp, make_atom_result(cx, url), - &format!("internal plugin error: can't find namespace url {}", url)); - - box AtomResult { - expr: quote_expr!(&mut *cx, ::string_cache::namespace::Namespace($expr)), - pat: quote_pat!(&mut *cx, ::string_cache::namespace::Namespace($pat)), - } -} diff --git a/plugin/src/lib.rs b/plugin/src/lib.rs deleted file mode 100644 index ac4f6f3..0000000 --- a/plugin/src/lib.rs +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2014 The Servo Project Developers. See the -// COPYRIGHT file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -#![crate_name="string_cache_plugin"] -#![crate_type="dylib"] - -#![feature(plugin_registrar, quote, box_syntax)] -#![feature(rustc_private, slice_patterns)] -#![cfg_attr(test, deny(warnings))] -#![allow(unused_imports)] // for quotes - -extern crate syntax; -extern crate rustc; - -#[macro_use] -extern crate lazy_static; - -#[macro_use] -extern crate mac; - -extern crate string_cache_shared; - -use rustc::plugin::Registry; - -mod atom; - -// NB: This needs to be public or we get a linker error. -#[plugin_registrar] -pub fn plugin_registrar(reg: &mut Registry) { - reg.register_macro("atom", atom::expand_atom); - reg.register_macro("ns", atom::expand_ns); -} diff --git a/shared/Cargo.toml b/shared/Cargo.toml deleted file mode 100644 index 60ab570..0000000 --- a/shared/Cargo.toml +++ /dev/null @@ -1,21 +0,0 @@ -[package] - -name = "string_cache_shared" -version = "0.1.12" -authors = [ "The Servo Project Developers" ] -description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate." -license = "MIT / Apache-2.0" -repository = "https://github.com/servo/string-cache" -build = "build.rs" - -[lib] - -name = "string_cache_shared" -path = "lib.rs" - -[dependencies] -debug_unreachable = "0.0.6" -phf_shared = "0.7.4" - -[build-dependencies] -phf_generator = "0.7.4" diff --git a/shared/build.rs b/shared/build.rs deleted file mode 100644 index 9a0cbeb..0000000 --- a/shared/build.rs +++ /dev/null @@ -1,37 +0,0 @@ -extern crate phf_generator; - -mod static_atom_list; - -use std::fs::File; -use std::io::{BufWriter, Write}; -use std::path::Path; - -fn main() { - let mut set = std::collections::HashSet::new(); - for atom in static_atom_list::ATOMS { - if !set.insert(atom) { - panic!("duplicate static atom `{:?}`", atom); - } - } - - let state = phf_generator::generate_hash(static_atom_list::ATOMS); - - let path = Path::new(&std::env::var("OUT_DIR").unwrap()).join("static_atom_set.rs"); - let mut file = BufWriter::new(File::create(&path).unwrap()); - macro_rules! w { - ($($arg: expr),+) => { (writeln!(&mut file, $($arg),+).unwrap()) } - } - w!("pub static STATIC_ATOM_SET: StaticAtomSet = StaticAtomSet {{"); - w!(" key: {},", state.key); - w!(" disps: &["); - for &(d1, d2) in &state.disps { - w!(" ({}, {}),", d1, d2); - } - w!(" ],"); - w!(" atoms: &["); - for &idx in &state.map { - w!(" {:?},", static_atom_list::ATOMS[idx]); - } - w!(" ],"); - w!("}};"); -} diff --git a/shared/lib.rs b/shared/lib.rs deleted file mode 100644 index 9475bd6..0000000 --- a/shared/lib.rs +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright 2014 The Servo Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! Details of the atom representation that need to be shared between -//! the macros crate and the run-time library, in order to guarantee -//! consistency. - -#![cfg_attr(test, deny(warnings))] - -#[macro_use] extern crate debug_unreachable; -extern crate phf_shared; - -use std::ptr; -use std::slice; - -pub use self::UnpackedAtom::{Dynamic, Inline, Static}; - -include!(concat!(env!("OUT_DIR"), "/static_atom_set.rs")); - -// FIXME(rust-lang/rust#18153): generate these from an enum -pub const DYNAMIC_TAG: u8 = 0b_00; -pub const INLINE_TAG: u8 = 0b_01; // len in upper nybble -pub const STATIC_TAG: u8 = 0b_10; -pub const TAG_MASK: u64 = 0b_11; -pub const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, available for tagging. - -pub const MAX_INLINE_LEN: usize = 7; - -pub struct StaticAtomSet { - key: u64, - disps: &'static [(u32, u32)], - atoms: &'static [&'static str], -} - -impl StaticAtomSet { - #[inline] - pub fn get_index_or_hash(&self, s: &str) -> Result { - let hash = phf_shared::hash(s, self.key); - let index = phf_shared::get_index(hash, self.disps, self.atoms.len()); - if self.atoms[index as usize] == s { - Ok(index) - } else { - Err(hash) - } - } - - #[inline] - pub fn index(&self, i: u32) -> Option<&'static str> { - self.atoms.get(i as usize).map(|&s| s) - } - - #[inline] - pub fn iter(&self) -> slice::Iter<&'static str> { - self.atoms.iter() - } -} - -// Atoms use a compact representation which fits this enum in a single u64. -// Inlining avoids actually constructing the unpacked representation in memory. -#[allow(missing_copy_implementations)] -pub enum UnpackedAtom { - /// Pointer to a dynamic table entry. Must be 16-byte aligned! - Dynamic(*mut ()), - - /// Length + bytes of string. - Inline(u8, [u8; 7]), - - /// Index in static interning table. - Static(u32), -} - -const STATIC_SHIFT_BITS: usize = 32; - -pub static ALL_NS: &'static [(&'static str, &'static str)] = &[ - ("", ""), - ("html", "http://www.w3.org/1999/xhtml"), - ("xml", "http://www.w3.org/XML/1998/namespace"), - ("xmlns", "http://www.w3.org/2000/xmlns/"), - ("xlink", "http://www.w3.org/1999/xlink"), - ("svg", "http://www.w3.org/2000/svg"), - ("mathml", "http://www.w3.org/1998/Math/MathML"), -]; - -struct RawSlice { - data: *const u8, - len: usize, -} - -#[cfg(target_endian = "little")] // Not implemented yet for big-endian -#[inline(always)] -unsafe fn inline_atom_slice(x: &u64) -> RawSlice { - let x: *const u64 = x; - RawSlice { - data: (x as *const u8).offset(1), - len: 7, - } -} - -pub fn pack_static(n: u32) -> u64 { - (STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS) -} - -impl UnpackedAtom { - #[inline(always)] - pub unsafe fn pack(self) -> u64 { - match self { - Static(n) => pack_static(n), - Dynamic(p) => { - let n = p as u64; - debug_assert!(0 == n & TAG_MASK); - n - } - Inline(len, buf) => { - debug_assert!((len as usize) <= MAX_INLINE_LEN); - let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); - { - let raw_slice = inline_atom_slice(&mut data); - let dest: &mut [u8] = slice::from_raw_parts_mut( - raw_slice.data as *mut u8, raw_slice.len); - copy_memory(&buf[..], dest); - } - data - } - } - } - - #[inline(always)] - pub unsafe fn from_packed(data: u64) -> UnpackedAtom { - debug_assert!(DYNAMIC_TAG == 0); // Dynamic is untagged - - match (data & TAG_MASK) as u8 { - DYNAMIC_TAG => Dynamic(data as *mut ()), - STATIC_TAG => Static((data >> STATIC_SHIFT_BITS) as u32), - INLINE_TAG => { - let len = ((data & 0xf0) >> 4) as usize; - debug_assert!(len <= MAX_INLINE_LEN); - let mut buf: [u8; 7] = [0; 7]; - let raw_slice = inline_atom_slice(&data); - let src: &[u8] = slice::from_raw_parts(raw_slice.data, raw_slice.len); - copy_memory(src, &mut buf[..]); - Inline(len as u8, buf) - }, - _ => debug_unreachable!(), - } - } -} - -/// Used for a fast path in Clone and Drop. -#[inline(always)] -pub unsafe fn from_packed_dynamic(data: u64) -> Option<*mut ()> { - if (DYNAMIC_TAG as u64) == (data & TAG_MASK) { - Some(data as *mut ()) - } else { - None - } -} - -/// For as_slice on inline atoms, we need a pointer into the original -/// string contents. -/// -/// It's undefined behavior to call this on a non-inline atom!! -#[inline(always)] -pub unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { - match UnpackedAtom::from_packed(*data) { - Inline(len, _) => { - let raw_slice = inline_atom_slice(&data); - let src: &[u8] = slice::from_raw_parts(raw_slice.data, raw_slice.len); - &src[..(len as usize)] - } - _ => debug_unreachable!(), - } -} - - -/// Copy of std::slice::bytes::copy_memory, which is unstable. -#[inline] -pub fn copy_memory(src: &[u8], dst: &mut [u8]) { - let len_src = src.len(); - assert!(dst.len() >= len_src); - // `dst` is unaliasable, so we know statically it doesn't overlap - // with `src`. - unsafe { - ptr::copy_nonoverlapping(src.as_ptr(), - dst.as_mut_ptr(), - len_src); - } -} diff --git a/src/atom/bench.rs b/src/atom/bench.rs index d7b18b9..864945c 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -135,7 +135,7 @@ macro_rules! bench_all ( use std::iter::repeat; use atom::Atom; - use string_cache_shared::{Static, Inline, Dynamic}; + use atom::UnpackedAtom::{Static, Inline, Dynamic}; use super::mk; diff --git a/src/atom/mod.rs b/src/atom/mod.rs index a6e361d..1b143e3 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -14,18 +14,23 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt; use std::mem; use std::ops; +use std::ptr; +use std::slice; use std::str; use std::cmp::Ordering::{self, Equal}; use std::sync::Mutex; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; -use string_cache_shared::{self, UnpackedAtom, Static, Inline, Dynamic, STATIC_ATOM_SET, - ENTRY_ALIGNMENT, copy_memory}; +use shared::{STATIC_TAG, INLINE_TAG, DYNAMIC_TAG, TAG_MASK, MAX_INLINE_LEN, STATIC_SHIFT_BITS, + ENTRY_ALIGNMENT, pack_static, StaticAtomSet}; +use self::UnpackedAtom::{Dynamic, Inline, Static}; #[cfg(feature = "log-events")] use event::Event; +include!(concat!(env!("OUT_DIR"), "/static_atom_set.rs")); + #[cfg(not(feature = "log-events"))] macro_rules! log (($e:expr) => (())); @@ -149,7 +154,7 @@ impl<'a> From<&'a str> for Atom { Ok(id) => Static(id as u32), Err(hash) => { let len = string_to_add.len(); - if len <= string_cache_shared::MAX_INLINE_LEN { + if len <= MAX_INLINE_LEN { let mut buf: [u8; 7] = [0; 7]; copy_memory(string_to_add.as_bytes(), &mut buf); Inline(len as u8, buf) @@ -169,7 +174,7 @@ impl Clone for Atom { #[inline(always)] fn clone(&self) -> Atom { unsafe { - match string_cache_shared::from_packed_dynamic(self.data) { + match from_packed_dynamic(self.data) { Some(entry) => { let entry = entry as *mut StringCacheEntry; (*entry).ref_count.fetch_add(1, SeqCst); @@ -192,7 +197,7 @@ impl Drop for Atom { } unsafe { - match string_cache_shared::from_packed_dynamic(self.data) { + match from_packed_dynamic(self.data) { Some(entry) => { let entry = entry as *mut StringCacheEntry; if (*entry).ref_count.fetch_sub(1, SeqCst) == 1 { @@ -214,7 +219,7 @@ impl ops::Deref for Atom { unsafe { match self.unpack() { Inline(..) => { - let buf = string_cache_shared::inline_orig_bytes(&self.data); + let buf = inline_orig_bytes(&self.data); str::from_utf8(buf).unwrap() }, Static(idx) => STATIC_ATOM_SET.index(idx).expect("bad static atom"), @@ -289,6 +294,121 @@ impl Deserialize for Atom { } } +// Atoms use a compact representation which fits this enum in a single u64. +// Inlining avoids actually constructing the unpacked representation in memory. +#[allow(missing_copy_implementations)] +enum UnpackedAtom { + /// Pointer to a dynamic table entry. Must be 16-byte aligned! + Dynamic(*mut ()), + + /// Length + bytes of string. + Inline(u8, [u8; 7]), + + /// Index in static interning table. + Static(u32), +} + +struct RawSlice { + data: *const u8, + len: usize, +} + +#[cfg(target_endian = "little")] // Not implemented yet for big-endian +#[inline(always)] +unsafe fn inline_atom_slice(x: &u64) -> RawSlice { + let x: *const u64 = x; + RawSlice { + data: (x as *const u8).offset(1), + len: 7, + } +} + +impl UnpackedAtom { + #[inline(always)] + unsafe fn pack(self) -> u64 { + match self { + Static(n) => pack_static(n), + Dynamic(p) => { + let n = p as u64; + debug_assert!(0 == n & TAG_MASK); + n + } + Inline(len, buf) => { + debug_assert!((len as usize) <= MAX_INLINE_LEN); + let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); + { + let raw_slice = inline_atom_slice(&mut data); + let dest: &mut [u8] = slice::from_raw_parts_mut( + raw_slice.data as *mut u8, raw_slice.len); + copy_memory(&buf[..], dest); + } + data + } + } + } + + #[inline(always)] + unsafe fn from_packed(data: u64) -> UnpackedAtom { + debug_assert!(DYNAMIC_TAG == 0); // Dynamic is untagged + + match (data & TAG_MASK) as u8 { + DYNAMIC_TAG => Dynamic(data as *mut ()), + STATIC_TAG => Static((data >> STATIC_SHIFT_BITS) as u32), + INLINE_TAG => { + let len = ((data & 0xf0) >> 4) as usize; + debug_assert!(len <= MAX_INLINE_LEN); + let mut buf: [u8; 7] = [0; 7]; + let raw_slice = inline_atom_slice(&data); + let src: &[u8] = slice::from_raw_parts(raw_slice.data, raw_slice.len); + copy_memory(src, &mut buf[..]); + Inline(len as u8, buf) + }, + _ => debug_unreachable!(), + } + } +} + +/// Used for a fast path in Clone and Drop. +#[inline(always)] +unsafe fn from_packed_dynamic(data: u64) -> Option<*mut ()> { + if (DYNAMIC_TAG as u64) == (data & TAG_MASK) { + Some(data as *mut ()) + } else { + None + } +} + +/// For as_slice on inline atoms, we need a pointer into the original +/// string contents. +/// +/// It's undefined behavior to call this on a non-inline atom!! +#[inline(always)] +unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { + match UnpackedAtom::from_packed(*data) { + Inline(len, _) => { + let raw_slice = inline_atom_slice(&data); + let src: &[u8] = slice::from_raw_parts(raw_slice.data, raw_slice.len); + &src[..(len as usize)] + } + _ => debug_unreachable!(), + } +} + + +/// Copy of std::slice::bytes::copy_memory, which is unstable. +#[inline] +fn copy_memory(src: &[u8], dst: &mut [u8]) { + let len_src = src.len(); + assert!(dst.len() >= len_src); + // `dst` is unaliasable, so we know statically it doesn't overlap + // with `src`. + unsafe { + ptr::copy_nonoverlapping(src.as_ptr(), + dst.as_mut_ptr(), + len_src); + } +} + #[cfg(all(test, feature = "unstable"))] mod bench; @@ -296,8 +416,9 @@ mod bench; mod tests { use std::mem; use std::thread; - use super::{Atom, StringCacheEntry}; - use string_cache_shared::{Static, Inline, Dynamic, ENTRY_ALIGNMENT}; + use super::{Atom, StringCacheEntry, STATIC_ATOM_SET}; + use super::UnpackedAtom::{Dynamic, Inline, Static}; + use shared::ENTRY_ALIGNMENT; #[test] fn test_as_slice() { @@ -435,7 +556,6 @@ mod tests { } fn check_static(s: &str, x: Atom) { - use string_cache_shared::STATIC_ATOM_SET; assert_eq_fmt!("0x{:016X}", x.data, Atom::from(s).data); assert_eq!(0x2, x.data & 0xFFFF_FFFF); // The index is unspecified by phf. @@ -526,7 +646,7 @@ mod tests { #[cfg(feature = "unstable")] #[test] fn atom_drop_is_idempotent() { - use string_cache_shared::from_packed_dynamic; + use super::from_packed_dynamic; unsafe { assert_eq!(from_packed_dynamic(mem::POST_DROP_U64), None); } diff --git a/src/lib.rs b/src/lib.rs index 1f9c4b2..65ad039 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,43 +12,50 @@ #![cfg_attr(test, deny(warnings))] #![cfg_attr(all(test, feature = "unstable"), feature(test, filling_drop))] -#![cfg_attr(feature = "unstable", feature(unsafe_no_drop_flag, plugin))] +#![cfg_attr(feature = "unstable", feature(unsafe_no_drop_flag))] #![cfg_attr(feature = "heap_size", feature(plugin, custom_derive))] -#![cfg_attr(feature = "unstable", plugin(string_cache_plugin))] #![cfg_attr(feature = "heap_size", plugin(heapsize_plugin))] -#[cfg(all(test, feature = "unstable"))] -extern crate test; - -#[macro_use] -extern crate lazy_static; - -#[cfg(test)] -extern crate rand; - -#[cfg(feature = "log-events")] -extern crate rustc_serialize; - -#[cfg(feature = "heap_size")] -extern crate heapsize; - +#[cfg(all(test, feature = "unstable"))] extern crate test; +#[cfg(feature = "log-events")] extern crate rustc_serialize; +#[cfg(feature = "heap_size")] extern crate heapsize; +#[cfg(test)] extern crate rand; +#[macro_use] extern crate lazy_static; +#[macro_use] extern crate debug_unreachable; extern crate serde; - -extern crate string_cache_shared; +extern crate phf_shared; pub use atom::Atom; pub use namespace::{Namespace, QualName}; #[macro_export] -macro_rules! qualname (($ns:tt, $local:tt) => ( - ::string_cache::namespace::QualName { - ns: ns!($ns), - local: atom!($local), +macro_rules! qualname { + ("", $local:tt) => { + $crate::namespace::QualName { + ns: ns!(), + local: atom!($local), + } + }; + ($ns:tt, $local:tt) => { + $crate::namespace::QualName { + ns: ns!($ns), + local: atom!($local), + } } -)); +} + +#[macro_export] +macro_rules! ns { + () => { $crate::Namespace(atom!("")) }; + (html) => { $crate::Namespace(atom!("http://www.w3.org/1999/xhtml")) }; + (xml) => { $crate::Namespace(atom!("http://www.w3.org/XML/1998/namespace")) }; + (xmlns) => { $crate::Namespace(atom!("http://www.w3.org/2000/xmlns/")) }; + (xlink) => { $crate::Namespace(atom!("http://www.w3.org/1999/xlink")) }; + (svg) => { $crate::Namespace(atom!("http://www.w3.org/2000/svg")) }; + (mathml) => { $crate::Namespace(atom!("http://www.w3.org/1998/Math/MathML")) }; +} -#[cfg(not(feature = "unstable"))] -include!(concat!(env!("OUT_DIR"), "/ns_atom_macros_without_plugin.rs")); +include!(concat!(env!("OUT_DIR"), "/atom_macro.rs")); #[cfg(feature = "log-events")] #[macro_use] @@ -56,6 +63,7 @@ pub mod event; pub mod atom; pub mod namespace; +pub mod shared; // A private module so that macro-expanded idents like // `::string_cache::atom::Atom` will also work in this crate. diff --git a/src/namespace.rs b/src/namespace.rs index e69eda3..b80dbb0 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -42,7 +42,7 @@ mod tests { #[test] fn ns_macro() { - assert_eq!(ns!(""), Namespace(Atom::from(""))); + assert_eq!(ns!(), Namespace(Atom::from(""))); assert_eq!(ns!(html), Namespace(Atom::from("http://www.w3.org/1999/xhtml"))); assert_eq!(ns!(xml), Namespace(Atom::from("http://www.w3.org/XML/1998/namespace"))); @@ -50,26 +50,19 @@ mod tests { assert_eq!(ns!(xlink), Namespace(Atom::from("http://www.w3.org/1999/xlink"))); assert_eq!(ns!(svg), Namespace(Atom::from("http://www.w3.org/2000/svg"))); assert_eq!(ns!(mathml), Namespace(Atom::from("http://www.w3.org/1998/Math/MathML"))); - - assert_eq!(ns!(HtMl), Namespace(Atom::from("http://www.w3.org/1999/xhtml"))); - assert_eq!(ns!(xMl), Namespace(Atom::from("http://www.w3.org/XML/1998/namespace"))); - assert_eq!(ns!(XmLnS), Namespace(Atom::from("http://www.w3.org/2000/xmlns/"))); - assert_eq!(ns!(xLiNk), Namespace(Atom::from("http://www.w3.org/1999/xlink"))); - assert_eq!(ns!(SvG), Namespace(Atom::from("http://www.w3.org/2000/svg"))); - assert_eq!(ns!(mAtHmL), Namespace(Atom::from("http://www.w3.org/1998/Math/MathML"))); } #[test] fn qualname() { - assert_eq!(QualName::new(ns!(""), atom!("")), - QualName { ns: ns!(""), local: Atom::from("") }); - assert_eq!(QualName::new(ns!(XML), atom!(base)), - QualName { ns: ns!(XML), local: atom!(base) }); + assert_eq!(QualName::new(ns!(), atom!("")), + QualName { ns: ns!(), local: Atom::from("") }); + assert_eq!(QualName::new(ns!(xml), atom!(base)), + QualName { ns: ns!(xml), local: atom!(base) }); } #[test] fn qualname_macro() { - assert_eq!(qualname!("", ""), QualName { ns: ns!(""), local: atom!("") }); - assert_eq!(qualname!(XML, base), QualName { ns: ns!(XML), local: atom!(base) }); + assert_eq!(qualname!("", ""), QualName { ns: ns!(), local: atom!("") }); + assert_eq!(qualname!(xml, base), QualName { ns: ns!(xml), local: atom!(base) }); } } diff --git a/src/shared.rs b/src/shared.rs new file mode 100644 index 0000000..a653872 --- /dev/null +++ b/src/shared.rs @@ -0,0 +1,54 @@ +// Copyright 2015 The Servo Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use phf_shared; + +// FIXME(rust-lang/rust#18153): generate these from an enum +pub const DYNAMIC_TAG: u8 = 0b_00; +pub const INLINE_TAG: u8 = 0b_01; // len in upper nybble +pub const STATIC_TAG: u8 = 0b_10; +pub const TAG_MASK: u64 = 0b_11; +pub const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, available for tagging. + +pub const MAX_INLINE_LEN: usize = 7; + +pub const STATIC_SHIFT_BITS: usize = 32; + +pub fn pack_static(n: u32) -> u64 { + (STATIC_TAG as u64) | ((n as u64) << STATIC_SHIFT_BITS) +} + +pub struct StaticAtomSet { + pub key: u64, + pub disps: &'static [(u32, u32)], + pub atoms: &'static [&'static str], +} + +impl StaticAtomSet { + #[inline] + pub fn get_index_or_hash(&self, s: &str) -> Result { + let hash = phf_shared::hash(s, self.key); + let index = phf_shared::get_index(hash, self.disps, self.atoms.len()); + if self.atoms[index as usize] == s { + Ok(index) + } else { + Err(hash) + } + } + + #[inline] + pub fn index(&self, i: u32) -> Option<&'static str> { + self.atoms.get(i as usize).map(|&s| s) + } + + #[inline] + pub fn iter(&self) -> ::std::slice::Iter<&'static str> { + self.atoms.iter() + } +} diff --git a/shared/static_atom_list.rs b/src/static_atom_list.rs similarity index 100% rename from shared/static_atom_list.rs rename to src/static_atom_list.rs From 4d505d679f3a1ba8ca243907093f30a71c9dccb3 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Fri, 20 Nov 2015 00:39:21 +0100 Subject: [PATCH 2/2] Drop support for unquoted identifiers in atom!() --- build.rs | 22 ++-------------------- src/atom/bench.rs | 6 +++--- src/atom/mod.rs | 19 +++++++++---------- src/namespace.rs | 6 +++--- 4 files changed, 17 insertions(+), 36 deletions(-) diff --git a/build.rs b/build.rs index b3f870c..7571868 100644 --- a/build.rs +++ b/build.rs @@ -60,10 +60,8 @@ fn write_atom_macro(hash_state: &phf_generator::HashState) { writeln!(file, r"#[macro_export]").unwrap(); writeln!(file, r"macro_rules! atom {{").unwrap(); for &s in set.iter() { - if is_ident(s) { - writeln!(file, r"( {} ) => {{ {} }};", s, atom(&set, s)).unwrap(); - } - writeln!(file, r"({:?}) => {{ {} }};", s, atom(&set, s)).unwrap(); + let data = shared::pack_static(set.get_index_or_hash(s).unwrap() as u32); + writeln!(file, r"({:?}) => {{ $crate::Atom {{ data: 0x{:x} }} }};", s, data).unwrap(); } writeln!(file, r"}}").unwrap(); } @@ -73,19 +71,3 @@ fn leak(v: Vec) -> &'static [T] { mem::forget(v); slice } - -fn atom(set: &shared::StaticAtomSet, s: &str) -> String { - let data = shared::pack_static(set.get_index_or_hash(s).unwrap() as u32); - format!("$crate::Atom {{ data: 0x{:x} }}", data) -} - -fn is_ident(s: &str) -> bool { - let mut chars = s.chars(); - !s.is_empty() && match chars.next().unwrap() { - 'a'...'z' | 'A'...'Z' | '_' => true, - _ => false - } && chars.all(|c| match c { - 'a'...'z' | 'A'...'Z' | '_' | '0'...'9' => true, - _ => false - }) -} diff --git a/src/atom/bench.rs b/src/atom/bench.rs index 864945c..96b0790 100644 --- a/src/atom/bench.rs +++ b/src/atom/bench.rs @@ -157,7 +157,7 @@ bench_all!([eq ne lt clone_string] for longer_string = super::longer_dynamic_a, super::longer_dynamic_b); bench_all!([eq ne intern as_ref clone is_static lt] - for static_atom = atom!(a), atom!(b)); + for static_atom = atom!("a"), atom!("b")); bench_all!([intern as_ref clone is_inline] for short_inline_atom = mk("e"), mk("f")); @@ -175,10 +175,10 @@ bench_all!([intern as_ref clone is_static] for static_at_runtime = mk("a"), mk("b")); bench_all!([ne lt x_static y_inline] - for static_vs_inline = atom!(a), mk("f")); + for static_vs_inline = atom!("a"), mk("f")); bench_all!([ne lt x_static y_dynamic] - for static_vs_dynamic = atom!(a), mk(super::longer_dynamic_b)); + for static_vs_dynamic = atom!("a"), mk(super::longer_dynamic_b)); bench_all!([ne lt x_inline y_dynamic] for inline_vs_dynamic = mk("e"), mk(super::longer_dynamic_b)); diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 1b143e3..cc0a85e 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -567,9 +567,9 @@ mod tests { // static atom table, the tag values, etc. // Static atoms - check_static("a", atom!(a)); - check_static("address", atom!(address)); - check_static("area", atom!(area)); + check_static("a", atom!("a")); + check_static("address", atom!("address")); + check_static("area", atom!("area")); // Inline atoms check("e", 0x0000_0000_0000_6511); @@ -600,7 +600,6 @@ mod tests { #[test] fn atom_macro() { - assert_eq!(atom!(body), Atom::from("body")); assert_eq!(atom!("body"), Atom::from("body")); assert_eq!(atom!("font-weight"), Atom::from("font-weight")); } @@ -608,20 +607,20 @@ mod tests { #[test] fn match_atom() { assert_eq!(2, match Atom::from("head") { - atom!(br) => 1, - atom!(html) | atom!(head) => 2, + atom!("br") => 1, + atom!("html") | atom!("head") => 2, _ => 3, }); assert_eq!(3, match Atom::from("body") { - atom!(br) => 1, - atom!(html) | atom!(head) => 2, + atom!("br") => 1, + atom!("html") | atom!("head") => 2, _ => 3, }); assert_eq!(3, match Atom::from("zzzzzz") { - atom!(br) => 1, - atom!(html) | atom!(head) => 2, + atom!("br") => 1, + atom!("html") | atom!("head") => 2, _ => 3, }); } diff --git a/src/namespace.rs b/src/namespace.rs index b80dbb0..6fe0564 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -56,13 +56,13 @@ mod tests { fn qualname() { assert_eq!(QualName::new(ns!(), atom!("")), QualName { ns: ns!(), local: Atom::from("") }); - assert_eq!(QualName::new(ns!(xml), atom!(base)), - QualName { ns: ns!(xml), local: atom!(base) }); + assert_eq!(QualName::new(ns!(xml), atom!("base")), + QualName { ns: ns!(xml), local: atom!("base") }); } #[test] fn qualname_macro() { assert_eq!(qualname!("", ""), QualName { ns: ns!(), local: atom!("") }); - assert_eq!(qualname!(xml, base), QualName { ns: ns!(xml), local: atom!(base) }); + assert_eq!(qualname!(xml, "base"), QualName { ns: ns!(xml), local: atom!("base") }); } }