From 5169a5b02a3211c26d7204c9829576d40d9be31c Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sat, 18 Jul 2015 18:22:57 +0200 Subject: [PATCH 01/13] Remove usage of unstable features in string_cache_shared --- shared/Cargo.toml | 5 ++++- shared/lib.rs | 52 +++++++++++++++++++++++++++++++++-------------- 2 files changed, 41 insertions(+), 16 deletions(-) diff --git a/shared/Cargo.toml b/shared/Cargo.toml index d0bc2ef..f669443 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache_shared" -version = "0.1.3" +version = "0.1.2" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate." license = "MIT / Apache-2.0" @@ -11,3 +11,6 @@ repository = "https://github.com/servo/string-cache" name = "string_cache_shared" path = "lib.rs" + +[dependencies] +debug_unreachable = "0.0.5" diff --git a/shared/lib.rs b/shared/lib.rs index 896015d..c48ed70 100644 --- a/shared/lib.rs +++ b/shared/lib.rs @@ -11,11 +11,12 @@ //! the macros crate and the run-time library, in order to guarantee //! consistency. -#![feature(raw, slice_bytes, core_intrinsics)] #![deny(warnings)] -use std::{mem, raw, intrinsics}; -use std::slice::bytes; +#[macro_use] extern crate debug_unreachable; + +use std::ptr; +use std::slice; pub use self::UnpackedAtom::{Dynamic, Inline, Static}; @@ -42,11 +43,16 @@ pub enum UnpackedAtom { const STATIC_SHIFT_BITS: usize = 32; +struct RawSlice { + data: *const u8, + len: usize, +} + #[cfg(target_endian = "little")] // Not implemented yet for big-endian #[inline(always)] -unsafe fn inline_atom_slice(x: &u64) -> raw::Slice { +unsafe fn inline_atom_slice(x: &u64) -> RawSlice { let x: *const u64 = x; - raw::Slice { + RawSlice { data: (x as *const u8).offset(1), len: 7, } @@ -70,8 +76,10 @@ impl UnpackedAtom { debug_assert!((len as usize) <= MAX_INLINE_LEN); let mut data: u64 = (INLINE_TAG as u64) | ((len as u64) << 4); { - let dest: &mut [u8] = mem::transmute(inline_atom_slice(&mut data)); - bytes::copy_memory(&buf[..], dest); + let raw_slice = inline_atom_slice(&mut data); + let dest: &mut [u8] = slice::from_raw_parts_mut( + raw_slice.data as *mut u8, raw_slice.len); + copy_memory(&buf[..], dest); } data } @@ -89,14 +97,12 @@ impl UnpackedAtom { let len = ((data & 0xf0) >> 4) as usize; debug_assert!(len <= MAX_INLINE_LEN); let mut buf: [u8; 7] = [0; 7]; - let src: &[u8] = mem::transmute(inline_atom_slice(&data)); - bytes::copy_memory(src, &mut buf[..]); + let raw_slice = inline_atom_slice(&data); + let src: &[u8] = slice::from_raw_parts(raw_slice.data, raw_slice.len); + copy_memory(src, &mut buf[..]); Inline(len as u8, buf) }, - - // intrinsics::unreachable() in release builds? - // See rust-lang/rust#18152. - _ => panic!("impossible"), + _ => debug_unreachable!(), } } } @@ -119,9 +125,25 @@ pub unsafe fn from_packed_dynamic(data: u64) -> Option<*mut ()> { pub unsafe fn inline_orig_bytes<'a>(data: &'a u64) -> &'a [u8] { match UnpackedAtom::from_packed(*data) { Inline(len, _) => { - let src: &[u8] = mem::transmute(inline_atom_slice(data)); + let raw_slice = inline_atom_slice(&data); + let src: &[u8] = slice::from_raw_parts(raw_slice.data, raw_slice.len); &src[..(len as usize)] } - _ => intrinsics::unreachable(), + _ => debug_unreachable!(), + } +} + + +/// Copy of std::slice::bytes::copy_memory, which is unstable. +#[inline] +pub fn copy_memory(src: &[u8], dst: &mut [u8]) { + let len_src = src.len(); + assert!(dst.len() >= len_src); + // `dst` is unaliasable, so we know statically it doesn't overlap + // with `src`. + unsafe { + ptr::copy_nonoverlapping(src.as_ptr(), + dst.as_mut_ptr(), + len_src); } } From 917dd80098ba44462321ce721576c798f276d6d8 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sat, 18 Jul 2015 18:23:41 +0200 Subject: [PATCH 02/13] Use phf_codegen instead of phf_macros --- Cargo.toml | 4 +-- plugin/src/atom/mod.rs | 26 ++----------------- plugin/src/lib.rs | 1 - shared/Cargo.toml | 5 ++++ shared/build.rs | 20 ++++++++++++++ shared/lib.rs | 3 +++ .../data.rs => shared/static_atom_list.rs | 0 src/atom/mod.rs | 9 +++---- src/lib.rs | 4 +-- 9 files changed, 35 insertions(+), 37 deletions(-) create mode 100644 shared/build.rs rename plugin/src/atom/data.rs => shared/static_atom_list.rs (100%) diff --git a/Cargo.toml b/Cargo.toml index e2b28c4..f47b41b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "string_cache" -version = "0.1.6" +version = "0.1.7" authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project." license = "MIT / Apache-2.0" @@ -22,8 +22,6 @@ log-events = ["rustc-serialize"] [dependencies] rand = "0" -phf = "0.7" -phf_macros = "0.7" lazy_static = "0.1.10" serde = "0.4.2" diff --git a/plugin/src/atom/mod.rs b/plugin/src/atom/mod.rs index 91474b4..93cdeec 100644 --- a/plugin/src/atom/mod.rs +++ b/plugin/src/atom/mod.rs @@ -18,17 +18,6 @@ use std::iter::Chain; use std::collections::HashMap; use std::ascii::AsciiExt; -mod data; - -// Build a PhfOrderedSet of static atoms. -// Takes no arguments. -pub fn expand_static_atom_set(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box { - ext_bail_if!(tt.len() != 0, cx, sp, "Usage: static_atom_map!()"); - let tts: Vec = data::ATOMS.iter().flat_map(|k| { - (quote_tokens!(&mut *cx, $k,)).into_iter() - }).collect(); - MacEager::expr(quote_expr!(&mut *cx, phf_ordered_set!($tts))) -} fn atom_tok_to_str(t: &TokenTree) -> Option { Some(get_ident(match *t { @@ -38,17 +27,6 @@ fn atom_tok_to_str(t: &TokenTree) -> Option { })) } -// Build a map from atoms to IDs for use in implementing the atom!() macro. -lazy_static! { - static ref STATIC_ATOM_MAP: HashMap<&'static str, usize> = { - let mut m = HashMap::new(); - for (i, x) in data::ATOMS.iter().enumerate() { - m.insert(*x, i); - } - m - }; -} - // FIXME: libsyntax should provide this (rust-lang/rust#17637) struct AtomResult { expr: P, @@ -66,12 +44,12 @@ impl MacResult for AtomResult { } fn make_atom_result(cx: &mut ExtCtxt, name: &str) -> Option { - let i = match STATIC_ATOM_MAP.get(name) { + let i = match ::string_cache_shared::STATIC_ATOM_SET.get_index(name) { Some(i) => i, None => return None, }; - let data = ::string_cache_shared::pack_static(*i as u32); + let data = ::string_cache_shared::pack_static(i as u32); Some(AtomResult { expr: quote_expr!(&mut *cx, ::string_cache::atom::Atom { data: $data }), diff --git a/plugin/src/lib.rs b/plugin/src/lib.rs index b5136dd..138a0f9 100644 --- a/plugin/src/lib.rs +++ b/plugin/src/lib.rs @@ -33,7 +33,6 @@ mod atom; // NB: This needs to be public or we get a linker error. #[plugin_registrar] pub fn plugin_registrar(reg: &mut Registry) { - reg.register_macro("static_atom_set", atom::expand_static_atom_set); reg.register_macro("atom", atom::expand_atom); reg.register_macro("ns", atom::expand_ns); } diff --git a/shared/Cargo.toml b/shared/Cargo.toml index f669443..60437a8 100644 --- a/shared/Cargo.toml +++ b/shared/Cargo.toml @@ -6,6 +6,7 @@ authors = [ "The Servo Project Developers" ] description = "A string interning library for Rust, developed as part of the Servo project − shared code between the compiler plugin and main crate." license = "MIT / Apache-2.0" repository = "https://github.com/servo/string-cache" +build = "build.rs" [lib] @@ -14,3 +15,7 @@ path = "lib.rs" [dependencies] debug_unreachable = "0.0.5" +phf = "0.7.3" + +[build-dependencies] +phf_codegen = "0.7.3" diff --git a/shared/build.rs b/shared/build.rs new file mode 100644 index 0000000..0aafd27 --- /dev/null +++ b/shared/build.rs @@ -0,0 +1,20 @@ +extern crate phf_codegen; + +mod static_atom_list; + +use std::fs::File; +use std::io::{BufWriter, Write}; +use std::path::Path; + +fn main() { + let mut set = phf_codegen::OrderedSet::new(); + for &atom in static_atom_list::ATOMS { + set.entry(atom); + } + + let path = Path::new(env!("OUT_DIR")).join("static_atom_set.rs"); + let mut file = BufWriter::new(File::create(&path).unwrap()); + write!(&mut file, "pub static STATIC_ATOM_SET: phf::OrderedSet<&'static str> = ").unwrap(); + set.build(&mut file).unwrap(); + write!(&mut file, ";\n").unwrap(); +} diff --git a/shared/lib.rs b/shared/lib.rs index c48ed70..8ff9db6 100644 --- a/shared/lib.rs +++ b/shared/lib.rs @@ -14,12 +14,15 @@ #![deny(warnings)] #[macro_use] extern crate debug_unreachable; +extern crate phf; use std::ptr; use std::slice; pub use self::UnpackedAtom::{Dynamic, Inline, Static}; +include!(concat!(env!("OUT_DIR"), "/static_atom_set.rs")); + // FIXME(rust-lang/rust#18153): generate these from an enum pub const DYNAMIC_TAG: u8 = 0u8; pub const INLINE_TAG: u8 = 1u8; // len in upper nybble diff --git a/plugin/src/atom/data.rs b/shared/static_atom_list.rs similarity index 100% rename from plugin/src/atom/data.rs rename to shared/static_atom_list.rs diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 9763ed2..8b101a3 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -9,7 +9,6 @@ #![allow(non_upper_case_globals)] -use phf::OrderedSet; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt; @@ -25,7 +24,7 @@ use std::sync::Mutex; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; -use string_cache_shared::{self, UnpackedAtom, Static, Inline, Dynamic}; +use string_cache_shared::{self, UnpackedAtom, Static, Inline, Dynamic, STATIC_ATOM_SET}; #[cfg(feature = "log-events")] use event::Event; @@ -37,8 +36,6 @@ macro_rules! log (($e:expr) => (())); // Needed for memory safety of the tagging scheme! const ENTRY_ALIGNMENT: usize = 16; -// Macro-generated table for static atoms. -static static_atom_set: OrderedSet<&'static str> = static_atom_set!(); struct StringCache { buckets: [*mut StringCacheEntry; 4096], @@ -173,7 +170,7 @@ impl Atom { #[inline] pub fn from_slice(string_to_add: &str) -> Atom { - let unpacked = match static_atom_set.get_index(string_to_add) { + let unpacked = match STATIC_ATOM_SET.get_index(string_to_add) { Some(id) => Static(id as u32), None => { let len = string_to_add.len(); @@ -200,7 +197,7 @@ impl Atom { let buf = string_cache_shared::inline_orig_bytes(&self.data); str::from_utf8(buf).unwrap() }, - Static(idx) => *static_atom_set.index(idx as usize).expect("bad static atom"), + Static(idx) => *STATIC_ATOM_SET.index(idx as usize).expect("bad static atom"), Dynamic(entry) => { let entry = entry as *mut StringCacheEntry; &(*entry).string diff --git a/src/lib.rs b/src/lib.rs index e52992a..f568312 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,13 +15,11 @@ #![deny(warnings)] #![cfg_attr(test, feature(test))] #![cfg_attr(bench, feature(rand))] -#![plugin(phf_macros, string_cache_plugin)] +#![plugin(string_cache_plugin)] #[cfg(test)] extern crate test; -extern crate phf; - #[macro_use] extern crate lazy_static; From 747bf2479f32c75069956e7bd2fd25de68cc00c5 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sat, 18 Jul 2015 19:34:36 +0200 Subject: [PATCH 03/13] Lower ENTRY_ALIGNMENT from 16 to 4 We only need 2 bits to store the tag (which is 0b_00, 0b_01, or 0b_10). This will enable using `Box::new` instead of `heap::allocate`. @gw, was there another reason to have it at 16? --- shared/lib.rs | 14 ++++++++------ src/atom/mod.rs | 17 +++++++++-------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/shared/lib.rs b/shared/lib.rs index 8ff9db6..466ab18 100644 --- a/shared/lib.rs +++ b/shared/lib.rs @@ -24,9 +24,11 @@ pub use self::UnpackedAtom::{Dynamic, Inline, Static}; include!(concat!(env!("OUT_DIR"), "/static_atom_set.rs")); // FIXME(rust-lang/rust#18153): generate these from an enum -pub const DYNAMIC_TAG: u8 = 0u8; -pub const INLINE_TAG: u8 = 1u8; // len in upper nybble -pub const STATIC_TAG: u8 = 2u8; +pub const DYNAMIC_TAG: u8 = 0b_00; +pub const INLINE_TAG: u8 = 0b_01; // len in upper nybble +pub const STATIC_TAG: u8 = 0b_10; +pub const TAG_MASK: u64 = 0b_11; +pub const ENTRY_ALIGNMENT: usize = 4; // Multiples have TAG_MASK bits unset, available for tagging. pub const MAX_INLINE_LEN: usize = 7; @@ -72,7 +74,7 @@ impl UnpackedAtom { Static(n) => pack_static(n), Dynamic(p) => { let n = p as u64; - debug_assert!(0 == n & 0xf); + debug_assert!(0 == n & TAG_MASK); n } Inline(len, buf) => { @@ -93,7 +95,7 @@ impl UnpackedAtom { pub unsafe fn from_packed(data: u64) -> UnpackedAtom { debug_assert!(DYNAMIC_TAG == 0); // Dynamic is untagged - match (data & 0xf) as u8 { + match (data & TAG_MASK) as u8 { DYNAMIC_TAG => Dynamic(data as *mut ()), STATIC_TAG => Static((data >> STATIC_SHIFT_BITS) as u32), INLINE_TAG => { @@ -113,7 +115,7 @@ impl UnpackedAtom { /// Used for a fast path in Clone and Drop. #[inline(always)] pub unsafe fn from_packed_dynamic(data: u64) -> Option<*mut ()> { - if (DYNAMIC_TAG as u64) == (data & 0xf) { + if (DYNAMIC_TAG as u64) == (data & TAG_MASK) { Some(data as *mut ()) } else { None diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 8b101a3..ac198e3 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -11,6 +11,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use std::cmp::max; use std::fmt; use std::mem; use std::ops; @@ -24,7 +25,8 @@ use std::sync::Mutex; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; -use string_cache_shared::{self, UnpackedAtom, Static, Inline, Dynamic, STATIC_ATOM_SET}; +use string_cache_shared::{self, UnpackedAtom, Static, Inline, Dynamic, STATIC_ATOM_SET, + ENTRY_ALIGNMENT}; #[cfg(feature = "log-events")] use event::Event; @@ -33,10 +35,6 @@ use event::Event; macro_rules! log (($e:expr) => (())); -// Needed for memory safety of the tagging scheme! -const ENTRY_ALIGNMENT: usize = 16; - - struct StringCache { buckets: [*mut StringCacheEntry; 4096], } @@ -104,8 +102,10 @@ impl StringCache { if should_add { unsafe { - ptr = heap::allocate(mem::size_of::(), ENTRY_ALIGNMENT) - as *mut StringCacheEntry; + ptr = heap::allocate( + mem::size_of::(), + max(mem::align_of::(), ENTRY_ALIGNMENT) + ) as *mut StringCacheEntry; ptr::write(ptr, StringCacheEntry::new(self.buckets[bucket_index], hash, string_to_add)); } @@ -145,7 +145,8 @@ impl StringCache { unsafe { ptr::read(ptr); heap::deallocate(ptr as *mut u8, - mem::size_of::(), ENTRY_ALIGNMENT); + mem::size_of::(), + max(mem::align_of::(), ENTRY_ALIGNMENT)); } log!(Event::Remove(key)); From 36fb3113f2731b6c8b445b7a7e7446d7c4ed7ac9 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sat, 18 Jul 2015 19:35:28 +0200 Subject: [PATCH 04/13] Update to filling drop --- src/atom/mod.rs | 19 ++++++++++++++----- src/lib.rs | 2 +- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index ac198e3..b0de01e 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -155,7 +155,7 @@ impl StringCache { // NOTE: Deriving Eq here implies that a given string must always // be interned the same way. -#[unsafe_no_drop_flag] +#[unsafe_no_drop_flag] // See tests::atom_drop_is_idempotent #[derive(Eq, Hash, PartialEq)] pub struct Atom { /// This field is public so that the `atom!()` macro can use it. @@ -236,10 +236,7 @@ impl Drop for Atom { unsafe { match string_cache_shared::from_packed_dynamic(self.data) { - // We use #[unsafe_no_drop_flag] so that Atom will be only 64 - // bits. That means we need to ignore a NULL pointer here, - // which represents a value that was moved out. - Some(entry) if !entry.is_null() => { + Some(entry) => { let entry = entry as *mut StringCacheEntry; if (*entry).ref_count.fetch_sub(1, SeqCst) == 1 { drop_slow(self); @@ -251,6 +248,7 @@ impl Drop for Atom { } } + impl ops::Deref for Atom { type Target = str; @@ -548,4 +546,15 @@ mod tests { let atom = Atom::from_slice("foobar"); let _: &str = atom.as_ref(); } + + /// Atom uses #[unsafe_no_drop_flag] to stay small, so drop() may be called more than once. + /// In calls after the first one, the atom will be filled with a POST_DROP value. + /// drop() must be a no-op in this case. + #[test] + fn atom_drop_is_idempotent() { + unsafe { + assert_eq!(::string_cache_shared::from_packed_dynamic(::std::mem::POST_DROP_U64), None); + } + } + } diff --git a/src/lib.rs b/src/lib.rs index f568312..27a588a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -13,7 +13,7 @@ #![feature(plugin, unsafe_no_drop_flag)] #![feature(slice_bytes, heap_api, hash_default)] #![deny(warnings)] -#![cfg_attr(test, feature(test))] +#![cfg_attr(test, feature(test, filling_drop))] #![cfg_attr(bench, feature(rand))] #![plugin(string_cache_plugin)] From 8a3f35c11e59a02f1380aaf51eaa177b3a2a4f57 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sat, 18 Jul 2015 22:53:17 +0200 Subject: [PATCH 05/13] Move remaining usage of unstable features behind a `unstable` Cargo feature flag. Without `--features unstable`: * `Atom` is 16 bytes instead of 8. (It has a drop flag.) * `ns!` and `atom!` are giant generated macros instead of plugins, and so may increase compile time. --- Cargo.toml | 9 +++++++ build.rs | 59 ++++++++++++++++++++++++++++++++++++++++++ plugin/src/atom/mod.rs | 10 +------ shared/lib.rs | 10 +++++++ src/atom/mod.rs | 53 ++++++++++++++++++++----------------- src/lib.rs | 8 +++--- src/namespace.rs | 29 +++++++++++---------- 7 files changed, 128 insertions(+), 50 deletions(-) create mode 100644 build.rs diff --git a/Cargo.toml b/Cargo.toml index f47b41b..2af53cc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ description = "A string interning library for Rust, developed as part of the Ser license = "MIT / Apache-2.0" repository = "https://github.com/servo/string-cache" documentation = "http://doc.servo.org/string_cache/" +build = "build.rs" [lib] name = "string_cache" @@ -20,6 +21,9 @@ doctest = false # See examples/event-log. log-events = ["rustc-serialize"] +# Use unstable features to optimize space and time (memory and CPU usage). +unstable = ["string_cache_plugin"] + [dependencies] rand = "0" lazy_static = "0.1.10" @@ -32,7 +36,12 @@ optional = true [dependencies.string_cache_plugin] path = "plugin" version = "0.1.1" +optional = true [dependencies.string_cache_shared] path = "shared" version = "0.1.0" + +[build-dependencies.string_cache_shared] +path = "shared" +version = "0.1.0" diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..7434cfc --- /dev/null +++ b/build.rs @@ -0,0 +1,59 @@ +extern crate string_cache_shared; + +use string_cache_shared::{STATIC_ATOM_SET, ALL_NS, pack_static}; + +use std::ascii::AsciiExt; +use std::fs::File; +use std::io::{BufWriter, Write}; +use std::path::Path; + +fn main() { + let path = Path::new(env!("OUT_DIR")).join("ns_macro_without_plugin.rs"); + let mut file = BufWriter::new(File::create(&path).unwrap()); + writeln!(file, r"#[macro_export]").unwrap(); + writeln!(file, r"macro_rules! ns {{").unwrap(); + writeln!(file, "(\"\") => {{ $crate::Namespace({}) }};", atom("")).unwrap(); + for &(prefix, url) in ALL_NS { + if !prefix.is_empty() { + generate_combination("".to_owned(), prefix, url, &mut file); + } + } + writeln!(file, r"}}").unwrap(); + + writeln!(file, r"#[macro_export]").unwrap(); + writeln!(file, r"macro_rules! atom {{").unwrap(); + for &s in STATIC_ATOM_SET.iter() { + if is_ident(s) { + writeln!(file, r"( {} ) => {{ {} }};", s, atom(s)).unwrap(); + } + writeln!(file, r"({:?}) => {{ {} }};", s, atom(s)).unwrap(); + } + writeln!(file, r"}}").unwrap(); +} + +fn generate_combination(prefix1: String, suffix: &str, url: &str, file: &mut BufWriter) { + if suffix.is_empty() { + writeln!(file, r"({:?}) => {{ $crate::Namespace({}) }};", prefix1, atom(url)).unwrap(); + writeln!(file, r"( {} ) => {{ $crate::Namespace({}) }};", prefix1, atom(url)).unwrap(); + } else { + let prefix2 = prefix1.clone(); + generate_combination(prefix1 + &*suffix[..1].to_ascii_lowercase(), &suffix[1..], url, file); + generate_combination(prefix2 + &*suffix[..1].to_ascii_uppercase(), &suffix[1..], url, file); + } +} + +fn atom(s: &str) -> String { + let data = pack_static(STATIC_ATOM_SET.get_index(s).unwrap() as u32); + format!("$crate::Atom {{ data: 0x{:x} }}", data) +} + +fn is_ident(s: &str) -> bool { + let mut chars = s.chars(); + !s.is_empty() && match chars.next().unwrap() { + 'a'...'z' | 'A'...'Z' | '_' => true, + _ => false + } && chars.all(|c| match c { + 'a'...'z' | 'A'...'Z' | '_' | '0'...'9' => true, + _ => false + }) +} diff --git a/plugin/src/atom/mod.rs b/plugin/src/atom/mod.rs index 93cdeec..ee0a89e 100644 --- a/plugin/src/atom/mod.rs +++ b/plugin/src/atom/mod.rs @@ -71,15 +71,7 @@ pub fn expand_atom(cx: &mut ExtCtxt, sp: Span, tt: &[TokenTree]) -> Box Box { - static ALL_NS: &'static [(&'static str, &'static str)] = &[ - ("", ""), - ("html", "http://www.w3.org/1999/xhtml"), - ("xml", "http://www.w3.org/XML/1998/namespace"), - ("xmlns", "http://www.w3.org/2000/xmlns/"), - ("xlink", "http://www.w3.org/1999/xlink"), - ("svg", "http://www.w3.org/2000/svg"), - ("mathml", "http://www.w3.org/1998/Math/MathML"), - ]; + use string_cache_shared::ALL_NS; fn usage() -> String { let ns_names: Vec<&'static str> = ALL_NS[1..].iter() diff --git a/shared/lib.rs b/shared/lib.rs index 466ab18..d0e3f09 100644 --- a/shared/lib.rs +++ b/shared/lib.rs @@ -48,6 +48,16 @@ pub enum UnpackedAtom { const STATIC_SHIFT_BITS: usize = 32; +pub static ALL_NS: &'static [(&'static str, &'static str)] = &[ + ("", ""), + ("html", "http://www.w3.org/1999/xhtml"), + ("xml", "http://www.w3.org/XML/1998/namespace"), + ("xmlns", "http://www.w3.org/2000/xmlns/"), + ("xlink", "http://www.w3.org/1999/xlink"), + ("svg", "http://www.w3.org/2000/svg"), + ("mathml", "http://www.w3.org/1998/Math/MathML"), +]; + struct RawSlice { data: *const u8, len: usize, diff --git a/src/atom/mod.rs b/src/atom/mod.rs index b0de01e..9df47be 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -11,22 +11,19 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use std::cmp::max; use std::fmt; use std::mem; use std::ops; use std::ptr; -use std::slice::bytes; use std::str; -use std::rt::heap; use std::cmp::Ordering::{self, Equal}; -use std::hash::{self, Hash, SipHasher}; +use std::hash::{Hash, SipHasher, Hasher}; use std::sync::Mutex; use std::sync::atomic::AtomicIsize; use std::sync::atomic::Ordering::SeqCst; use string_cache_shared::{self, UnpackedAtom, Static, Inline, Dynamic, STATIC_ATOM_SET, - ENTRY_ALIGNMENT}; + ENTRY_ALIGNMENT, copy_memory}; #[cfg(feature = "log-events")] use event::Event; @@ -71,7 +68,11 @@ impl StringCache { } fn add(&mut self, string_to_add: &str) -> *mut StringCacheEntry { - let hash = hash::hash::<_, SipHasher>(&string_to_add); + let hash = { + let mut hasher = SipHasher::default(); + string_to_add.hash(&mut hasher); + hasher.finish() + }; let bucket_index = (hash & (self.buckets.len()-1) as u64) as usize; let mut ptr = self.buckets[bucket_index]; @@ -101,14 +102,11 @@ impl StringCache { } if should_add { - unsafe { - ptr = heap::allocate( - mem::size_of::(), - max(mem::align_of::(), ENTRY_ALIGNMENT) - ) as *mut StringCacheEntry; - ptr::write(ptr, - StringCacheEntry::new(self.buckets[bucket_index], hash, string_to_add)); - } + debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); + let mut entry = Box::new(StringCacheEntry::new( + self.buckets[bucket_index], hash, string_to_add)); + ptr = &mut *entry; + mem::forget(entry); self.buckets[bucket_index] = ptr; log!(Event::Insert(ptr as u64, String::from(string_to_add))); } @@ -143,19 +141,21 @@ impl StringCache { debug_assert!(current != ptr::null_mut()); unsafe { - ptr::read(ptr); - heap::deallocate(ptr as *mut u8, - mem::size_of::(), - max(mem::align_of::(), ENTRY_ALIGNMENT)); + box_from_raw(ptr); } log!(Event::Remove(key)); } } +// Box::from_raw is not stable yet +unsafe fn box_from_raw(raw: *mut T) -> Box { + mem::transmute(raw) +} + // NOTE: Deriving Eq here implies that a given string must always // be interned the same way. -#[unsafe_no_drop_flag] // See tests::atom_drop_is_idempotent +#[cfg_attr(unstable, unsafe_no_drop_flag)] // See tests::atom_drop_is_idempotent #[derive(Eq, Hash, PartialEq)] pub struct Atom { /// This field is public so that the `atom!()` macro can use it. @@ -177,7 +177,7 @@ impl Atom { let len = string_to_add.len(); if len <= string_cache_shared::MAX_INLINE_LEN { let mut buf: [u8; 7] = [0; 7]; - bytes::copy_memory(string_to_add.as_bytes(), &mut buf); + copy_memory(string_to_add.as_bytes(), &mut buf); Inline(len as u8, buf) } else { Dynamic(STRING_CACHE.lock().unwrap().add(string_to_add) as *mut ()) @@ -325,9 +325,10 @@ mod bench; #[cfg(test)] mod tests { + use std::mem; use std::thread; - use super::Atom; - use string_cache_shared::{Static, Inline, Dynamic}; + use super::{Atom, StringCacheEntry}; + use string_cache_shared::{Static, Inline, Dynamic, ENTRY_ALIGNMENT, from_packed_dynamic}; #[test] fn test_as_slice() { @@ -491,7 +492,7 @@ mod tests { fn assert_sizes() { // Guard against accidental changes to the sizes of things. use std::mem; - assert_eq!(8, mem::size_of::()); + assert_eq!(if cfg!(unstable) { 8 } else { 16 }, mem::size_of::()); assert_eq!(48, mem::size_of::()); } @@ -553,8 +554,12 @@ mod tests { #[test] fn atom_drop_is_idempotent() { unsafe { - assert_eq!(::string_cache_shared::from_packed_dynamic(::std::mem::POST_DROP_U64), None); + assert_eq!(from_packed_dynamic(mem::POST_DROP_U64), None); } } + #[test] + fn string_cache_entry_alignment_is_sufficient() { + assert!(mem::align_of::() >= ENTRY_ALIGNMENT); + } } diff --git a/src/lib.rs b/src/lib.rs index 27a588a..82c6128 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,12 +10,11 @@ #![crate_name = "string_cache"] #![crate_type = "rlib"] -#![feature(plugin, unsafe_no_drop_flag)] -#![feature(slice_bytes, heap_api, hash_default)] #![deny(warnings)] #![cfg_attr(test, feature(test, filling_drop))] #![cfg_attr(bench, feature(rand))] -#![plugin(string_cache_plugin)] +#![cfg_attr(feature = "unstable", feature(unsafe_no_drop_flag, plugin))] +#![cfg_attr(feature = "unstable", plugin(string_cache_plugin))] #[cfg(test)] extern crate test; @@ -43,6 +42,9 @@ macro_rules! qualname (($ns:tt, $local:tt) => ( } )); +#[cfg(not(feature = "unstable"))] +include!(concat!(env!("OUT_DIR"), "/ns_macro_without_plugin.rs")); + #[cfg(feature = "log-events")] #[macro_use] pub mod event; diff --git a/src/namespace.rs b/src/namespace.rs index 6ae5904..cd5543b 100644 --- a/src/namespace.rs +++ b/src/namespace.rs @@ -37,30 +37,31 @@ impl QualName { #[cfg(test)] mod tests { use super::{Namespace, QualName}; + use Atom; #[test] fn ns_macro() { - assert_eq!(ns!(""), Namespace(atom!(""))); + assert_eq!(ns!(""), Namespace(Atom::from_slice(""))); - assert_eq!(ns!(html), Namespace(atom!("http://www.w3.org/1999/xhtml"))); - assert_eq!(ns!(xml), Namespace(atom!("http://www.w3.org/XML/1998/namespace"))); - assert_eq!(ns!(xmlns), Namespace(atom!("http://www.w3.org/2000/xmlns/"))); - assert_eq!(ns!(xlink), Namespace(atom!("http://www.w3.org/1999/xlink"))); - assert_eq!(ns!(svg), Namespace(atom!("http://www.w3.org/2000/svg"))); - assert_eq!(ns!(mathml), Namespace(atom!("http://www.w3.org/1998/Math/MathML"))); + assert_eq!(ns!(html), Namespace(Atom::from_slice("http://www.w3.org/1999/xhtml"))); + assert_eq!(ns!(xml), Namespace(Atom::from_slice("http://www.w3.org/XML/1998/namespace"))); + assert_eq!(ns!(xmlns), Namespace(Atom::from_slice("http://www.w3.org/2000/xmlns/"))); + assert_eq!(ns!(xlink), Namespace(Atom::from_slice("http://www.w3.org/1999/xlink"))); + assert_eq!(ns!(svg), Namespace(Atom::from_slice("http://www.w3.org/2000/svg"))); + assert_eq!(ns!(mathml), Namespace(Atom::from_slice("http://www.w3.org/1998/Math/MathML"))); - assert_eq!(ns!(HtMl), Namespace(atom!("http://www.w3.org/1999/xhtml"))); - assert_eq!(ns!(xMl), Namespace(atom!("http://www.w3.org/XML/1998/namespace"))); - assert_eq!(ns!(XmLnS), Namespace(atom!("http://www.w3.org/2000/xmlns/"))); - assert_eq!(ns!(xLiNk), Namespace(atom!("http://www.w3.org/1999/xlink"))); - assert_eq!(ns!(SvG), Namespace(atom!("http://www.w3.org/2000/svg"))); - assert_eq!(ns!(mAtHmL), Namespace(atom!("http://www.w3.org/1998/Math/MathML"))); + assert_eq!(ns!(HtMl), Namespace(Atom::from_slice("http://www.w3.org/1999/xhtml"))); + assert_eq!(ns!(xMl), Namespace(Atom::from_slice("http://www.w3.org/XML/1998/namespace"))); + assert_eq!(ns!(XmLnS), Namespace(Atom::from_slice("http://www.w3.org/2000/xmlns/"))); + assert_eq!(ns!(xLiNk), Namespace(Atom::from_slice("http://www.w3.org/1999/xlink"))); + assert_eq!(ns!(SvG), Namespace(Atom::from_slice("http://www.w3.org/2000/svg"))); + assert_eq!(ns!(mAtHmL), Namespace(Atom::from_slice("http://www.w3.org/1998/Math/MathML"))); } #[test] fn qualname() { assert_eq!(QualName::new(ns!(""), atom!("")), - QualName { ns: ns!(""), local: atom!("") }); + QualName { ns: ns!(""), local: Atom::from_slice("") }); assert_eq!(QualName::new(ns!(XML), atom!(base)), QualName { ns: ns!(XML), local: atom!(base) }); } From d75004fe6f15a769e94dbaa016cac897f51e704d Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 19 Jul 2015 00:11:28 +0200 Subject: [PATCH 06/13] Have Travis run `cargo build` on Rust stable and beta --- .travis.yml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 3c6e71e..a666452 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,11 +1,14 @@ sudo: false language: rust -rust: nightly +rust: + - nightly + - beta + - stable script: - - cargo test - - cargo clean - - cargo test --features log-events - - cd examples/summarize-events/ - - cargo build + - "if [ $TRAVIS_RUST_VERSION != nightly ]; then cargo build; fi" + - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test; fi" + - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo clean; fi" + - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features log-events; fi" + - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cd examples/summarize-events/ && cargo build; fi" notifications: webhooks: http://build.servo.org:54856/travis From ba28f5173f5a82410e5ca723185b304ab0641bbf Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 19 Jul 2015 00:12:52 +0200 Subject: [PATCH 07/13] Only make warnings fatal when testing. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That way, a deprecation warning won’t break dependants. --- plugin/src/lib.rs | 2 +- shared/lib.rs | 2 +- src/lib.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/plugin/src/lib.rs b/plugin/src/lib.rs index 138a0f9..ac4f6f3 100644 --- a/plugin/src/lib.rs +++ b/plugin/src/lib.rs @@ -12,7 +12,7 @@ #![feature(plugin_registrar, quote, box_syntax)] #![feature(rustc_private, slice_patterns)] -#![deny(warnings)] +#![cfg_attr(test, deny(warnings))] #![allow(unused_imports)] // for quotes extern crate syntax; diff --git a/shared/lib.rs b/shared/lib.rs index d0e3f09..a8226b7 100644 --- a/shared/lib.rs +++ b/shared/lib.rs @@ -11,7 +11,7 @@ //! the macros crate and the run-time library, in order to guarantee //! consistency. -#![deny(warnings)] +#![cfg_attr(test, deny(warnings))] #[macro_use] extern crate debug_unreachable; extern crate phf; diff --git a/src/lib.rs b/src/lib.rs index 82c6128..af35aed 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,7 +10,7 @@ #![crate_name = "string_cache"] #![crate_type = "rlib"] -#![deny(warnings)] +#![cfg_attr(test, deny(warnings))] #![cfg_attr(test, feature(test, filling_drop))] #![cfg_attr(bench, feature(rand))] #![cfg_attr(feature = "unstable", feature(unsafe_no_drop_flag, plugin))] From 4706e16f50b7e19638d23e7c87b1add94ebda503 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 19 Jul 2015 00:52:29 +0200 Subject: [PATCH 08/13] Also test with unstable features on Travis. --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index a666452..5aefa64 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,7 @@ script: - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test; fi" - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo clean; fi" - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features log-events; fi" + - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features unstable; fi" - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cd examples/summarize-events/ && cargo build; fi" notifications: webhooks: http://build.servo.org:54856/travis From 2d146b55b9a3e3e3ae598ce7830d78ef85c0bb48 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 19 Jul 2015 08:34:26 +0200 Subject: [PATCH 09/13] =?UTF-8?q?Make=20rand=20a=20dev-dependency.=20(It?= =?UTF-8?q?=E2=80=99s=20only=20used=20in=20tests.)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.toml | 4 +++- src/lib.rs | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2af53cc..5831418 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,10 +25,12 @@ log-events = ["rustc-serialize"] unstable = ["string_cache_plugin"] [dependencies] -rand = "0" lazy_static = "0.1.10" serde = "0.4.2" +[dev-dependencies] +rand = "0" + [dependencies.rustc-serialize] version = "0" optional = true diff --git a/src/lib.rs b/src/lib.rs index af35aed..7ae017b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,7 +12,6 @@ #![cfg_attr(test, deny(warnings))] #![cfg_attr(test, feature(test, filling_drop))] -#![cfg_attr(bench, feature(rand))] #![cfg_attr(feature = "unstable", feature(unsafe_no_drop_flag, plugin))] #![cfg_attr(feature = "unstable", plugin(string_cache_plugin))] @@ -22,6 +21,7 @@ extern crate test; #[macro_use] extern crate lazy_static; +#[cfg(test)] extern crate rand; #[cfg(feature = "log-events")] From 9de43cc0d792f27c0e848312e625a91da7fb307c Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sun, 19 Jul 2015 09:39:44 +0200 Subject: [PATCH 10/13] Explicitly drop the result of box_from_raw() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It’s not necessary, but makes it clearer to readers what’s going on. --- src/atom/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 9df47be..a3ea4b8 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -141,7 +141,7 @@ impl StringCache { debug_assert!(current != ptr::null_mut()); unsafe { - box_from_raw(ptr); + mem::drop(box_from_raw(ptr)); } log!(Event::Remove(key)); From 7452b763fc827642e7f2b46fcf5b5954e3c5f4af Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Mon, 20 Jul 2015 15:08:32 +0200 Subject: [PATCH 11/13] Gratuitious refactoring. --- build.rs | 2 +- src/atom/mod.rs | 61 +++++++++++++++++++++++-------------------------- src/lib.rs | 2 +- 3 files changed, 30 insertions(+), 35 deletions(-) diff --git a/build.rs b/build.rs index 7434cfc..0efc595 100644 --- a/build.rs +++ b/build.rs @@ -8,7 +8,7 @@ use std::io::{BufWriter, Write}; use std::path::Path; fn main() { - let path = Path::new(env!("OUT_DIR")).join("ns_macro_without_plugin.rs"); + let path = Path::new(env!("OUT_DIR")).join("ns_atom_macros_without_plugin.rs"); let mut file = BufWriter::new(File::create(&path).unwrap()); writeln!(file, r"#[macro_export]").unwrap(); writeln!(file, r"macro_rules! ns {{").unwrap(); diff --git a/src/atom/mod.rs b/src/atom/mod.rs index a3ea4b8..f759ad8 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -31,9 +31,10 @@ use event::Event; #[cfg(not(feature = "log-events"))] macro_rules! log (($e:expr) => (())); - +const NB_BUCKETS: usize = 1 << 12; // 4096 +const BUCKET_MASK: u64 = (1 << 12) - 1; struct StringCache { - buckets: [*mut StringCacheEntry; 4096], + buckets: [*mut StringCacheEntry; NB_BUCKETS], } lazy_static! { @@ -63,7 +64,7 @@ impl StringCacheEntry { impl StringCache { fn new() -> StringCache { StringCache { - buckets: unsafe { mem::zeroed() }, + buckets: [ptr::null_mut(); NB_BUCKETS], } } @@ -73,10 +74,10 @@ impl StringCache { string_to_add.hash(&mut hasher); hasher.finish() }; - let bucket_index = (hash & (self.buckets.len()-1) as u64) as usize; + let bucket_index = (hash & BUCKET_MASK) as usize; let mut ptr = self.buckets[bucket_index]; - while ptr != ptr::null_mut() { + while !ptr.is_null() { let value = unsafe { &*ptr }; if value.hash == hash && value.string == string_to_add { break; @@ -84,34 +85,28 @@ impl StringCache { ptr = value.next_in_bucket; } - let mut should_add = false; - if ptr != ptr::null_mut() { + if !ptr.is_null() { unsafe { - if (*ptr).ref_count.fetch_add(1, SeqCst) == 0 { - // Uh-oh. The pointer's reference count was zero, which means someone may try - // to free it. (Naive attempts to defend against this, for example having the - // destructor check to see whether the reference count is indeed zero, don't - // work due to ABA.) Thus we need to temporarily add a duplicate string to the - // list. - should_add = true; - (*ptr).ref_count.fetch_sub(1, SeqCst); + if (*ptr).ref_count.fetch_add(1, SeqCst) > 0 { + return ptr; } + // Uh-oh. The pointer's reference count was zero, which means someone may try + // to free it. (Naive attempts to defend against this, for example having the + // destructor check to see whether the reference count is indeed zero, don't + // work due to ABA.) Thus we need to temporarily add a duplicate string to the + // list. + (*ptr).ref_count.fetch_sub(1, SeqCst); } - } else { - should_add = true } - if should_add { - debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); - let mut entry = Box::new(StringCacheEntry::new( - self.buckets[bucket_index], hash, string_to_add)); - ptr = &mut *entry; - mem::forget(entry); - self.buckets[bucket_index] = ptr; - log!(Event::Insert(ptr as u64, String::from(string_to_add))); - } + debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); + let mut entry = Box::new(StringCacheEntry::new( + self.buckets[bucket_index], hash, string_to_add)); + ptr = &mut *entry; + mem::forget(entry); + self.buckets[bucket_index] = ptr; + log!(Event::Insert(ptr as u64, String::from(string_to_add))); - debug_assert!(ptr != ptr::null_mut()); ptr } @@ -121,24 +116,24 @@ impl StringCache { debug_assert!(value.ref_count.load(SeqCst) == 0); - let bucket_index = (value.hash & (self.buckets.len()-1) as u64) as usize; + let bucket_index = (value.hash & BUCKET_MASK) as usize; let mut current = self.buckets[bucket_index]; let mut prev: *mut StringCacheEntry = ptr::null_mut(); - while current != ptr::null_mut() { + while !current.is_null() { if current == ptr { - if prev != ptr::null_mut() { - unsafe { (*prev).next_in_bucket = (*current).next_in_bucket }; - } else { + if prev.is_null() { unsafe { self.buckets[bucket_index] = (*current).next_in_bucket }; + } else { + unsafe { (*prev).next_in_bucket = (*current).next_in_bucket }; } break; } prev = current; unsafe { current = (*current).next_in_bucket }; } - debug_assert!(current != ptr::null_mut()); + debug_assert!(!current.is_null()); unsafe { mem::drop(box_from_raw(ptr)); diff --git a/src/lib.rs b/src/lib.rs index 7ae017b..446584a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,7 +43,7 @@ macro_rules! qualname (($ns:tt, $local:tt) => ( )); #[cfg(not(feature = "unstable"))] -include!(concat!(env!("OUT_DIR"), "/ns_macro_without_plugin.rs")); +include!(concat!(env!("OUT_DIR"), "/ns_atom_macros_without_plugin.rs")); #[cfg(feature = "log-events")] #[macro_use] From 66ad520292e0f9eef38730357bb4d6ca87300114 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Tue, 21 Jul 2015 01:07:31 +0200 Subject: [PATCH 12/13] Have Travis run (some) tests on stable Rust. --- .travis.yml | 9 ++++----- src/atom/mod.rs | 6 ++++-- src/lib.rs | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5aefa64..c8e34eb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,11 +5,10 @@ rust: - beta - stable script: - - "if [ $TRAVIS_RUST_VERSION != nightly ]; then cargo build; fi" - - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test; fi" - - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo clean; fi" - - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features log-events; fi" + - cargo build + - cargo test + - cargo test --features log-events - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cargo test --features unstable; fi" - - "if [ $TRAVIS_RUST_VERSION = nightly ]; then cd examples/summarize-events/ && cargo build; fi" + - "cd examples/summarize-events/ && cargo build" notifications: webhooks: http://build.servo.org:54856/travis diff --git a/src/atom/mod.rs b/src/atom/mod.rs index f759ad8..58ad41f 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -315,7 +315,7 @@ impl Deserialize for Atom { } } -#[cfg(test)] +#[cfg(all(test, feature = "unstable"))] mod bench; #[cfg(test)] @@ -323,7 +323,7 @@ mod tests { use std::mem; use std::thread; use super::{Atom, StringCacheEntry}; - use string_cache_shared::{Static, Inline, Dynamic, ENTRY_ALIGNMENT, from_packed_dynamic}; + use string_cache_shared::{Static, Inline, Dynamic, ENTRY_ALIGNMENT}; #[test] fn test_as_slice() { @@ -546,8 +546,10 @@ mod tests { /// Atom uses #[unsafe_no_drop_flag] to stay small, so drop() may be called more than once. /// In calls after the first one, the atom will be filled with a POST_DROP value. /// drop() must be a no-op in this case. + #[cfg(feature = "unstable")] #[test] fn atom_drop_is_idempotent() { + use string_cache_shared::from_packed_dynamic; unsafe { assert_eq!(from_packed_dynamic(mem::POST_DROP_U64), None); } diff --git a/src/lib.rs b/src/lib.rs index 446584a..10f4b26 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,11 +11,11 @@ #![crate_type = "rlib"] #![cfg_attr(test, deny(warnings))] -#![cfg_attr(test, feature(test, filling_drop))] +#![cfg_attr(all(test, feature = "unstable"), feature(test, filling_drop))] #![cfg_attr(feature = "unstable", feature(unsafe_no_drop_flag, plugin))] #![cfg_attr(feature = "unstable", plugin(string_cache_plugin))] -#[cfg(test)] +#[cfg(all(test, feature = "unstable"))] extern crate test; #[macro_use] From f7fd9640988f9963189631eaaf433caa21c38541 Mon Sep 17 00:00:00 2001 From: Simon Sapin Date: Sat, 25 Jul 2015 09:44:07 +0200 Subject: [PATCH 13/13] Fix #70 using Option> instead of *mut StringCacheEntry --- src/atom/mod.rs | 96 ++++++++++++++++++++----------------------------- 1 file changed, 39 insertions(+), 57 deletions(-) diff --git a/src/atom/mod.rs b/src/atom/mod.rs index 58ad41f..eb19793 100644 --- a/src/atom/mod.rs +++ b/src/atom/mod.rs @@ -14,7 +14,6 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt; use std::mem; use std::ops; -use std::ptr; use std::str; use std::cmp::Ordering::{self, Equal}; use std::hash::{Hash, SipHasher, Hasher}; @@ -34,7 +33,7 @@ macro_rules! log (($e:expr) => (())); const NB_BUCKETS: usize = 1 << 12; // 4096 const BUCKET_MASK: u64 = (1 << 12) - 1; struct StringCache { - buckets: [*mut StringCacheEntry; NB_BUCKETS], + buckets: [Option>; NB_BUCKETS], } lazy_static! { @@ -42,16 +41,15 @@ lazy_static! { } struct StringCacheEntry { - next_in_bucket: *mut StringCacheEntry, + next_in_bucket: Option>, hash: u64, ref_count: AtomicIsize, string: String, } -unsafe impl Send for StringCache { } - impl StringCacheEntry { - fn new(next: *mut StringCacheEntry, hash: u64, string_to_add: &str) -> StringCacheEntry { + fn new(next: Option>, hash: u64, string_to_add: &str) + -> StringCacheEntry { StringCacheEntry { next_in_bucket: next, hash: hash, @@ -64,7 +62,7 @@ impl StringCacheEntry { impl StringCache { fn new() -> StringCache { StringCache { - buckets: [ptr::null_mut(); NB_BUCKETS], + buckets: unsafe { mem::zeroed() }, } } @@ -75,36 +73,31 @@ impl StringCache { hasher.finish() }; let bucket_index = (hash & BUCKET_MASK) as usize; - let mut ptr = self.buckets[bucket_index]; - - while !ptr.is_null() { - let value = unsafe { &*ptr }; - if value.hash == hash && value.string == string_to_add { - break; - } - ptr = value.next_in_bucket; - } - - if !ptr.is_null() { - unsafe { - if (*ptr).ref_count.fetch_add(1, SeqCst) > 0 { - return ptr; + { + let mut ptr: Option<&mut Box> = + self.buckets[bucket_index].as_mut(); + + while let Some(entry) = ptr.take() { + if entry.hash == hash && entry.string == string_to_add { + if entry.ref_count.fetch_add(1, SeqCst) > 0 { + return &mut **entry; + } + // Uh-oh. The pointer's reference count was zero, which means someone may try + // to free it. (Naive attempts to defend against this, for example having the + // destructor check to see whether the reference count is indeed zero, don't + // work due to ABA.) Thus we need to temporarily add a duplicate string to the + // list. + entry.ref_count.fetch_sub(1, SeqCst); + break; } - // Uh-oh. The pointer's reference count was zero, which means someone may try - // to free it. (Naive attempts to defend against this, for example having the - // destructor check to see whether the reference count is indeed zero, don't - // work due to ABA.) Thus we need to temporarily add a duplicate string to the - // list. - (*ptr).ref_count.fetch_sub(1, SeqCst); + ptr = entry.next_in_bucket.as_mut(); } } - debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); let mut entry = Box::new(StringCacheEntry::new( - self.buckets[bucket_index], hash, string_to_add)); - ptr = &mut *entry; - mem::forget(entry); - self.buckets[bucket_index] = ptr; + self.buckets[bucket_index].take(), hash, string_to_add)); + let ptr: *mut StringCacheEntry = &mut *entry; + self.buckets[bucket_index] = Some(entry); log!(Event::Insert(ptr as u64, String::from(string_to_add))); ptr @@ -112,42 +105,31 @@ impl StringCache { fn remove(&mut self, key: u64) { let ptr = key as *mut StringCacheEntry; - let value: &mut StringCacheEntry = unsafe { mem::transmute(ptr) }; - - debug_assert!(value.ref_count.load(SeqCst) == 0); + let bucket_index = { + let value: &StringCacheEntry = unsafe { &*ptr }; + debug_assert!(value.ref_count.load(SeqCst) == 0); + (value.hash & BUCKET_MASK) as usize + }; - let bucket_index = (value.hash & BUCKET_MASK) as usize; - let mut current = self.buckets[bucket_index]; - let mut prev: *mut StringCacheEntry = ptr::null_mut(); + let mut current: &mut Option> = &mut self.buckets[bucket_index]; - while !current.is_null() { - if current == ptr { - if prev.is_null() { - unsafe { self.buckets[bucket_index] = (*current).next_in_bucket }; - } else { - unsafe { (*prev).next_in_bucket = (*current).next_in_bucket }; - } + loop { + let entry_ptr: *mut StringCacheEntry = match current.as_mut() { + Some(entry) => &mut **entry, + None => break, + }; + if entry_ptr == ptr { + mem::drop(mem::replace(current, unsafe { (*entry_ptr).next_in_bucket.take() })); break; } - prev = current; - unsafe { current = (*current).next_in_bucket }; - } - debug_assert!(!current.is_null()); - - unsafe { - mem::drop(box_from_raw(ptr)); + current = unsafe { &mut (*entry_ptr).next_in_bucket }; } log!(Event::Remove(key)); } } -// Box::from_raw is not stable yet -unsafe fn box_from_raw(raw: *mut T) -> Box { - mem::transmute(raw) -} - // NOTE: Deriving Eq here implies that a given string must always // be interned the same way. #[cfg_attr(unstable, unsafe_no_drop_flag)] // See tests::atom_drop_is_idempotent