From 42af92b9da367fee2171704f9c062d12706c53cd Mon Sep 17 00:00:00 2001 From: Russell Johnston Date: Sun, 12 Apr 2020 10:19:09 -0700 Subject: [PATCH] Use direct pointers for `Symbol`s This relies on the extensions to Rust's `const`s described in issue rust-lang/const-eval#11, and will not compile today. --- gml/Cargo.toml | 1 + gml/src/symbol.rs | 418 +++++++++++++++++++++++++------------------- gml/src/vm/value.rs | 4 +- 3 files changed, 241 insertions(+), 182 deletions(-) diff --git a/gml/Cargo.toml b/gml/Cargo.toml index 91595a7..d1a97eb 100644 --- a/gml/Cargo.toml +++ b/gml/Cargo.toml @@ -7,6 +7,7 @@ edition = "2018" [dependencies] gml-meta = { path = "meta" } project = { path = "../project" } +quickdry = "0.1" [target.wasm32-unknown-unknown.dependencies] wasm-host = { path = "../wasm-host" } diff --git a/gml/src/symbol.rs b/gml/src/symbol.rs index e4c7dc4..2a71907 100644 --- a/gml/src/symbol.rs +++ b/gml/src/symbol.rs @@ -1,267 +1,325 @@ -use std::{mem, ops, cmp, fmt}; -use std::marker::PhantomData; +use std::{ptr, slice, str}; +use std::ops::Deref; +use std::cmp::{self, Eq, PartialEq, Ord, PartialOrd}; +use std::fmt::{self, Debug, Display}; use std::hash::{Hash, Hasher}; use std::borrow::Borrow; +use std::alloc::Layout; use std::cell::RefCell; use std::collections::HashSet; -/// A symbol is an index into a thread-local interner. +/// A string in a thread-local interner. +/// +/// Equality and hash are based on pointer identity. #[derive(Copy, Clone, PartialEq, Eq, Hash)] -pub struct Symbol { - index: u32, - _marker: PhantomData<*const str>, -} +pub struct Symbol { entry: *const Entry } -impl Default for Symbol { - fn default() -> Self { - Symbol::intern("") - } +/// A set of unique strings. +#[derive(Default)] +struct Interner { + /// Pointers to entries allocated in `arena`. + /// + /// These references point into `self.arena`, and are not really `'static`, but this lets + /// `HashSet` pick up the right `Hash` and `Eq` impls. + entries: RefCell>, + + /// Actual storage for `entries`. + arena: quickdry::Arena, } +/// An interned string and its metadata. +/// +/// The length is stored at the start of the allocation, to keep `Entry: Sized`. +/// Equality and hash are based only on string content. +#[repr(C)] +struct Entry { len: usize, kind: Kind, data: [u8; 0] } + +/// A symbol equivalence class. +#[repr(u32)] +#[derive(Copy, Clone)] +enum Kind { None, Keyword, Argument(u32) } + impl Symbol { - /// Map a string to its interned symbol - pub fn intern(string: &str) -> Self { - Interner::with(|interner| interner.intern(string)) - } + /// Intern a string in the current thread's interner. + pub fn intern(string: &str) -> Self { Self::with_kind(string, Kind::None) } - pub fn into_index(self) -> u32 { - self.index + fn with_kind(string: &str, kind: Kind) -> Self { + thread_local! { static INTERNER: Interner = Interner::with_keywords(); } + INTERNER.with(|interner| Symbol { entry: interner.intern(string, kind) }) } - pub fn from_index(index: u32) -> Symbol { - Symbol { index, _marker: PhantomData } - } -} + /// Return the wrapped raw pointer. + pub fn into_raw(self) -> *const u8 { self.entry as *const _ } -impl ops::Deref for Symbol { - type Target = str; + /// Construct a `Symbol` from a raw pointer, obtained from `Symbol::into_raw`. + pub unsafe fn from_raw(raw: *const u8) -> Self { Symbol { entry: raw as *const _ } } - fn deref(&self) -> &str { - Interner::with(|interner| unsafe { mem::transmute(interner.get(*self)) }) + fn entry(&self) -> &Entry { + // Safety: `Symbol` is not `Send` or `Sync`, and is always allocated from a thread-local + // `Interner`. This ensures the associated `Entry` will not be freed until the thread dies + // and takes all associated `Symbol`s with it. + unsafe { &*self.entry } } } -impl Borrow for Symbol { - fn borrow(&self) -> &str { - self - } +impl Default for Symbol { + fn default() -> Self { EMPTY } } -impl cmp::PartialOrd for Symbol { - fn partial_cmp(&self, other: &Self) -> Option { - Some(Symbol::cmp(self, other)) - } +impl Deref for Symbol { + type Target = str; + fn deref(&self) -> &str { self.entry().borrow() } } -impl cmp::Ord for Symbol { - fn cmp(&self, other: &Self) -> cmp::Ordering { - let a: &str = self; - let b: &str = other; - str::cmp(a, b) - } +impl Borrow for Symbol { + fn borrow(&self) -> &str { self } } -impl fmt::Debug for Symbol { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}({})", self, self.into_index()) - } +impl Ord for Symbol { + fn cmp(&self, other: &Self) -> cmp::Ordering { str::cmp(self, other) } +} + +impl PartialOrd for Symbol { + fn partial_cmp(&self, other: &Self) -> Option { Some(Self::cmp(self, other)) } } -impl fmt::Display for Symbol { +impl Debug for Symbol { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let string: &str = self; - write!(f, "{}", string) + ::fmt(self, f)?; + write!(f, "@")?; + <*const Entry as Debug>::fmt(&self.entry, f)?; + Ok(()) } } -#[derive(Default)] -struct Interner { - strings: HashSet, - ids: Vec<*const str>, +impl Display for Symbol { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { ::fmt(self, f) } } -struct Entry { - string: Box, - id: u32, -} +impl Interner { + /// Look up a string and insert it if it's new. + fn intern<'a>(&'a self, string: &str, kind: Kind) -> &'a Entry { + if let Some(&entry) = self.entries.borrow_mut().get(string) { + return entry; + } -impl cmp::PartialEq for Entry { - fn eq(&self, other: &Self) -> bool { - let a: &str = self.borrow(); - let b: &str = other.borrow(); - a == b + let len = string.len(); + let layout = Layout::new::(); + + // Safety: + // * `Entry::data` is carefully aligned to match the end of `Entry`, with no subsequent + // padding, so we can use it elsewhere to compute the offset of the string. + // * The entry is allocated in `self.arena` and only escapes into client code as + // `&'a Entry`, so it can go into `self.entries` as `&'static Entry`. + let entry = unsafe { + let layout = Layout::from_size_align_unchecked(layout.size() + len, layout.align()); + let entry = self.arena.alloc(layout) as *mut Entry; + ptr::write(entry, Entry { len, kind, data: [] }); + ptr::copy_nonoverlapping(string.as_ptr(), entry.add(1) as *mut u8, len); + &*entry + }; + + self.entries.borrow_mut().insert(entry); + entry } -} -impl cmp::Eq for Entry {} - -impl Hash for Entry { - fn hash(&self, state: &mut H) where H: Hasher { - let a: &str = self.borrow(); - a.hash(state) + /// Insert a statically-allocated `Entry` into the interner. + fn insert(&self, entry: &'static Entry) { + assert_eq!(self.entries.borrow_mut().insert(entry), true); } } impl Borrow for Entry { fn borrow(&self) -> &str { - &self.string - } -} - -impl Interner { - fn fill(strings: &[&str]) -> Self { - let mut interner = Interner::default(); - for &string in strings { - interner.intern(string); + // Safety: `Entry` is always allocated with a following `str` of length `self.len`. + unsafe { + let slice = slice::from_raw_parts(self.data.as_ptr(), self.len); + str::from_utf8_unchecked(slice) } - interner } +} - fn intern(&mut self, string: &str) -> Symbol { - if let Some(entry) = self.strings.get(string) { - return Symbol::from_index(entry.id); - } - - let string = String::from(string).into_boxed_str(); - let data = &*string as *const str; - let id = self.ids.len() as u32; - self.strings.insert(Entry { string, id }); - self.ids.push(data); +// Shim impl to let `Interner::intern` call `HashSet<&Entry>::get(&str)`. +impl Borrow for &'_ Entry { + fn borrow(&self) -> &str { >::borrow(*self) } +} - Symbol::from_index(id) - } +impl Eq for Entry {} - fn get(&self, symbol: Symbol) -> &str { - let index = symbol.into_index(); - unsafe { &*self.ids[index as usize] } +impl PartialEq for Entry { + fn eq(&self, other: &Self) -> bool { + >::eq(self.borrow(), other.borrow()) } +} - fn with T>(f: F) -> T { - thread_local!(static INTERNER: RefCell = { - RefCell::new(Interner::new()) - }); - INTERNER.with(|interner| f(&mut *interner.borrow_mut())) - } +impl Hash for Entry { + fn hash(&self, state: &mut H) { str::hash(self.borrow(), state) } } +/// An `Entry` wrapper to be allocated statically. +struct StaticEntry { entry: Entry, _data: T } + +macro_rules! static_entry { ($name: ident, $string: expr, $kind: expr) => { + static $name: &StaticEntry<[u8]> = &StaticEntry { + entry: Entry { len: $string.len(), kind: $kind, data: [] }, + _data: *$string, + }; +}} + +const EMPTY: Symbol = { + static_entry! { EMPTY, b"", Kind::None } + Symbol { entry: &EMPTY.entry } +}; + macro_rules! declare_symbols {( - keywords: $(($index: expr, $name: ident, $string: expr))* - arguments: $(($symbol_index: expr, $argument_index: expr))* + keywords: $(($name: ident, $string: expr))* + arguments: $(($index: expr, $argument: expr))* ) => { #[allow(non_upper_case_globals)] pub mod keyword { - use std::marker::PhantomData; - use super::Symbol; + use super::{Symbol, Entry, Kind, StaticEntry}; + + // Safety: See `Interner::intern`; this time the `&'static Entry` is not even a lie. - $(pub const $name: Symbol = Symbol { index: $index, _marker: PhantomData };)* + $(pub const $name: Symbol = { + static_entry! { ENTRY, $string, Kind::Keyword } + Symbol { entry: &ENTRY.entry } + };)* + + pub const ARGUMENT: [Symbol; 16] = [ + $({ + static_entry! { ENTRY, $argument, Kind::Argument($index) } + Symbol { entry: &ENTRY.entry } + },)* + ]; } impl Interner { - fn new() -> Self { - Interner::fill(&[ - $($string,)* - $(concat!("argument", $argument_index),)* - ]) + fn with_keywords() -> Self { + let interner = Self::default(); + + interner.insert(EMPTY.entry()); + $(interner.insert(keyword::$name.entry());)* + for argument in &keyword::ARGUMENT { + interner.insert(argument.entry()); + } + + interner } } }} declare_symbols! { keywords: - (0, True, "true") - (1, False, "false") - - (2, Self_, "self") - (3, Other, "other") - (4, All, "all") - (5, NoOne, "noone") - (6, Global, "global") - (7, Local, "local") - - (8, Var, "var") - (9, GlobalVar, "globalvar") - - (10, If, "if") - (11, Then, "then") - (12, Else, "else") - (13, Repeat, "repeat") - (14, While, "while") - (15, Do, "do") - (16, Until, "until") - (17, For, "for") - (18, With, "with") - (19, Switch, "switch") - (20, Case, "case") - (21, Default, "default") - (22, Break, "break") - (23, Continue, "continue") - (24, Exit, "exit") - (25, Return, "return") - - (26, Begin, "begin") - (27, End, "end") - - (28, Not, "not") - (29, Div, "div") - (30, Mod, "mod") - (31, And, "and") - (32, Or, "or") - (33, Xor, "xor") + (True, b"true") + (False, b"false") + + (Self_, b"self") + (Other, b"other") + (All, b"all") + (NoOne, b"noone") + (Global, b"global") + (Local, b"local") + + (Var, b"var") + (GlobalVar, b"globalvar") + + (If, b"if") + (Then, b"then") + (Else, b"else") + (Repeat, b"repeat") + (While, b"while") + (Do, b"do") + (Until, b"until") + (For, b"for") + (With, b"with") + (Switch, b"switch") + (Case, b"case") + (Default, b"default") + (Break, b"break") + (Continue, b"continue") + (Exit, b"exit") + (Return, b"return") + + (Begin, b"begin") + (End, b"end") + + (Not, b"not") + (Div, b"div") + (Mod, b"mod") + (And, b"and") + (Or, b"or") + (Xor, b"xor") arguments: - (34, 0) - (35, 1) - (36, 2) - (37, 3) - (38, 4) - (39, 5) - (40, 6) - (41, 7) - (42, 8) - (43, 9) - (44, 10) - (45, 11) - (46, 12) - (47, 13) - (48, 14) - (49, 15) + (0, b"argument0") + (1, b"argument1") + (2, b"argument2") + (3, b"argument3") + (4, b"argument4") + (5, b"argument5") + (6, b"argument6") + (7, b"argument7") + (8, b"argument8") + (9, b"argument9") + (10, b"argument10") + (11, b"argument11") + (12, b"argument12") + (13, b"argument13") + (14, b"argument14") + (15, b"argument15") } impl Symbol { pub fn is_keyword(&self) -> bool { - self.index < 34 + match self.entry().kind { Kind::Keyword => true, _ => false, } } pub fn is_argument(&self) -> bool { - 34 <= self.index && self.index < 50 + match self.entry().kind { Kind::Argument(_) => true, _ => false } } pub fn as_argument(&self) -> Option { - if self.is_argument() { - Some(self.index - 34) - } else { - None - } + match self.entry().kind { Kind::Argument(index) => Some(index), _ => None } } pub fn from_argument(argument: u32) -> Symbol { - assert!(argument < 16); - Symbol::from_index(34 + argument) + keyword::ARGUMENT[argument as usize] } } #[cfg(test)] mod tests { - use super::*; + use super::Symbol; #[test] - fn intern() { - let mut i = Interner::default(); - - assert_eq!(i.intern("dog"), Symbol::from_index(0)); - assert_eq!(i.intern("dog"), Symbol::from_index(0)); - assert_eq!(i.intern("cat"), Symbol::from_index(1)); - assert_eq!(i.intern("cat"), Symbol::from_index(1)); - assert_eq!(i.intern("dog"), Symbol::from_index(0)); + fn keywords() { + let empty = Symbol::default(); + assert_eq!(empty, super::EMPTY); + + let keyword = Symbol::intern("other"); + assert_eq!(keyword, super::keyword::Other); + + let arg = Symbol::intern("argument3"); + assert_eq!(arg, Symbol::from_argument(3)); + } + + #[test] + fn alloc() { + let dog1 = Symbol::intern("dog"); + assert_eq!(&*dog1, "dog"); + + let dog2 = Symbol::intern("dog"); + assert_eq!(&*dog2, "dog"); + assert_eq!(dog1, dog2); + + let cat1 = Symbol::intern("cat"); + assert_eq!(&*cat1, "cat"); + + let cat2 = Symbol::intern("cat"); + assert_eq!(&*cat2, "cat"); + assert_eq!(cat1, cat2); + + assert_ne!(cat1, dog1); } } diff --git a/gml/src/vm/value.rs b/gml/src/vm/value.rs index 607e3ac..b3ea2ee 100644 --- a/gml/src/vm/value.rs +++ b/gml/src/vm/value.rs @@ -50,7 +50,7 @@ impl Value { } match tag & 0xf { - 0x0 => Data::String(Symbol::from_index(payload as u32)), + 0x0 => Data::String(unsafe { Symbol::from_raw(payload as *mut _) }), 0x1 => Data::Array(unsafe { vm::Array::clone_from_raw(payload as *const _) }), _ => unreachable!("corrupt value"), } @@ -95,7 +95,7 @@ impl From for Value { impl From for Value { fn from(value: Symbol) -> Value { let tag = 0xfff0 | 0x0; - let value = value.into_index() as u64; + let value = value.into_raw() as u64; Value((tag << 48) | value) }