Auto merge of #98189 - mystor:fast_ident_literal, r=eddyb

proc_macro/bridge: stop using a remote object handle for proc_macro Ident and Literal This is the fourth part of rust-lang/rust#86822, split off as requested in rust-lang/rust#86822 (review). This patch transforms the `Ident` and `Group` types into structs serialized over IPC rather than handles. Symbol values are interned on both the client and server when deserializing, to avoid unnecessary string copies and keep the size of `TokenTree` down. To do the interning efficiently on the client, the proc-macro crate is given a vendored version of the fxhash hasher, as `SipHash` appeared to cause performance issues. This was done rather than depending on `rustc_hash` as it is unfortunately difficult to depend on crates from within `proc_macro` due to it being built at the same time as `std`. In addition, a custom arena allocator and symbol store was also added, inspired by those in `rustc_arena` and `rustc_span`. To prevent symbol re-use across multiple invocations of a macro on the same thread, a new range of `Symbol` names are used for each invocation of the macro, and symbols from previous invocations are cleaned-up. In order to keep `Ident` creation efficient, a special ASCII-only case was added to perform ident validation without using RPC for simple identifiers. Full identifier validation couldn't be easily added, as it would require depending on the `rustc_lexer` and `unicode-normalization` crates from within `proc_macro`. Unicode identifiers are validated and normalized using RPC. See the individual commit messages for more details on trade-offs and design decisions behind these patches.
tcdi · Jul 19, 2022 · e27d77c · e27d77c
2 parents 87b1013 + 0b5795f
commit e27d77c
Show file tree

Hide file tree

Showing 8 changed files with 672 additions and 122 deletions.
diff --git a/proc_macro/src/bridge/arena.rs b/proc_macro/src/bridge/arena.rs
@@ -0,0 +1,113 @@
+//! A minimal arena allocator inspired by `rustc_arena::DroplessArena`.
+//!
+//! This is unfortunately a minimal re-implementation rather than a dependency
+//! as it is difficult to depend on crates from within `proc_macro`, due to it
+//! being built at the same time as `std`.
+
+use std::cell::{Cell, RefCell};
+use std::cmp;
+use std::mem::MaybeUninit;
+use std::ops::Range;
+use std::ptr;
+use std::slice;
+use std::str;
+
+// The arenas start with PAGE-sized chunks, and then each new chunk is twice as
+// big as its predecessor, up until we reach HUGE_PAGE-sized chunks, whereupon
+// we stop growing. This scales well, from arenas that are barely used up to
+// arenas that are used for 100s of MiBs. Note also that the chosen sizes match
+// the usual sizes of pages and huge pages on Linux.
+const PAGE: usize = 4096;
+const HUGE_PAGE: usize = 2 * 1024 * 1024;
+
+/// A minimal arena allocator inspired by `rustc_arena::DroplessArena`.
+///
+/// This is unfortunately a complete re-implementation rather than a dependency
+/// as it is difficult to depend on crates from within `proc_macro`, due to it
+/// being built at the same time as `std`.
+///
+/// This arena doesn't have support for allocating anything other than byte
+/// slices, as that is all that is necessary.
+pub(crate) struct Arena {
+    start: Cell<*mut MaybeUninit<u8>>,
+    end: Cell<*mut MaybeUninit<u8>>,
+    chunks: RefCell<Vec<Box<[MaybeUninit<u8>]>>>,
+}
+
+impl Arena {
+    pub(crate) fn new() -> Self {
+        Arena {
+            start: Cell::new(ptr::null_mut()),
+            end: Cell::new(ptr::null_mut()),
+            chunks: RefCell::new(Vec::new()),
+        }
+    }
+
+    /// Add a new chunk with at least `additional` free bytes.
+    #[inline(never)]
+    #[cold]
+    fn grow(&self, additional: usize) {
+        let mut chunks = self.chunks.borrow_mut();
+        let mut new_cap;
+        if let Some(last_chunk) = chunks.last_mut() {
+            // If the previous chunk's len is less than HUGE_PAGE
+            // bytes, then this chunk will be least double the previous
+            // chunk's size.
+            new_cap = last_chunk.len().min(HUGE_PAGE / 2);
+            new_cap *= 2;
+        } else {
+            new_cap = PAGE;
+        }
+        // Also ensure that this chunk can fit `additional`.
+        new_cap = cmp::max(additional, new_cap);
+
+        let mut chunk = Box::new_uninit_slice(new_cap);
+        let Range { start, end } = chunk.as_mut_ptr_range();
+        self.start.set(start);
+        self.end.set(end);
+        chunks.push(chunk);
+    }
+
+    /// Allocates a byte slice with specified size from the current memory
+    /// chunk. Returns `None` if there is no free space left to satisfy the
+    /// request.
+    fn alloc_raw_without_grow(&self, bytes: usize) -> Option<&mut [MaybeUninit<u8>]> {
+        let start = self.start.get().addr();
+        let old_end = self.end.get();
+        let end = old_end.addr();
+
+        let new_end = end.checked_sub(bytes)?;
+        if start <= new_end {
+            let new_end = old_end.with_addr(new_end);
+            self.end.set(new_end);
+            // SAFETY: `bytes` bytes starting at `new_end` were just reserved.
+            Some(unsafe { slice::from_raw_parts_mut(new_end, bytes) })
+        } else {
+            None
+        }
+    }
+
+    fn alloc_raw(&self, bytes: usize) -> &mut [MaybeUninit<u8>] {
+        if bytes == 0 {
+            return &mut [];
+        }
+
+        loop {
+            if let Some(a) = self.alloc_raw_without_grow(bytes) {
+                break a;
+            }
+            // No free space left. Allocate a new chunk to satisfy the request.
+            // On failure the grow will panic or abort.
+            self.grow(bytes);
+        }
+    }
+
+    pub(crate) fn alloc_str<'a>(&'a self, string: &str) -> &'a mut str {
+        let alloc = self.alloc_raw(string.len());
+        let bytes = MaybeUninit::write_slice(alloc, string.as_bytes());
+
+        // SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena,
+        // and immediately convert the clone back to `&str`.
+        unsafe { str::from_utf8_unchecked_mut(bytes) }
+    }
+}
diff --git a/proc_macro/src/bridge/client.rs b/proc_macro/src/bridge/client.rs
@@ -175,13 +175,11 @@ define_handles! {
     'owned:
     FreeFunctions,
     TokenStream,
-    Literal,
     SourceFile,
     MultiSpan,
     Diagnostic,
 
     'interned:
-    Ident,
     Span,
 }
 
@@ -197,25 +195,6 @@ impl Clone for TokenStream {
     }
 }
 
-impl Clone for Literal {
-    fn clone(&self) -> Self {
-        self.clone()
-    }
-}
-
-impl fmt::Debug for Literal {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        f.debug_struct("Literal")
-            // format the kind without quotes, as in `kind: Float`
-            .field("kind", &format_args!("{}", &self.debug_kind()))
-            .field("symbol", &self.symbol())
-            // format `Some("...")` on one line even in {:#?} mode
-            .field("suffix", &format_args!("{:?}", &self.suffix()))
-            .field("span", &self.span())
-            .finish()
-    }
-}
-
 impl Clone for SourceFile {
     fn clone(&self) -> Self {
         self.clone()
@@ -242,6 +221,8 @@ impl fmt::Debug for Span {
     }
 }
 
+pub(crate) use super::symbol::Symbol;
+
 macro_rules! define_client_side {
     ($($name:ident {
         $(fn $method:ident($($arg:ident: $arg_ty:ty),* $(,)?) $(-> $ret_ty:ty)*;)*
@@ -405,6 +386,9 @@ fn run_client<A: for<'a, 's> DecodeMut<'a, 's, ()>, R: Encode<()>>(
     panic::catch_unwind(panic::AssertUnwindSafe(|| {
         maybe_install_panic_hook(force_show_panics);
 
+        // Make sure the symbol store is empty before decoding inputs.
+        Symbol::invalidate_all();
+
         let reader = &mut &buf[..];
         let (globals, input) = <(ExpnGlobals<Span>, A)>::decode(reader, &mut ());
 
@@ -438,6 +422,10 @@ fn run_client<A: for<'a, 's> DecodeMut<'a, 's, ()>, R: Encode<()>>(
         buf.clear();
         Err::<(), _>(e).encode(&mut buf, &mut ());
     });
+
+    // Now that a response has been serialized, invalidate all symbols
+    // registered with the interner.
+    Symbol::invalidate_all();
     buf
 }
 

diff --git a/proc_macro/src/bridge/fxhash.rs b/proc_macro/src/bridge/fxhash.rs
@@ -0,0 +1,117 @@
+//! This is a copy of the `rustc_hash` crate, adapted to work as a module.
+//!
+//! If in the future it becomes more reasonable to add dependencies to
+//! `proc_macro`, this module should be removed and replaced with a dependency
+//! on the `rustc_hash` crate.
+
+use std::collections::HashMap;
+use std::convert::TryInto;
+use std::default::Default;
+use std::hash::BuildHasherDefault;
+use std::hash::Hasher;
+use std::mem::size_of;
+use std::ops::BitXor;
+
+/// Type alias for a hashmap using the `fx` hash algorithm.
+pub type FxHashMap<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher>>;
+
+/// A speedy hash algorithm for use within rustc. The hashmap in liballoc
+/// by default uses SipHash which isn't quite as speedy as we want. In the
+/// compiler we're not really worried about DOS attempts, so we use a fast
+/// non-cryptographic hash.
+///
+/// This is the same as the algorithm used by Firefox -- which is a homespun
+/// one not based on any widely-known algorithm -- though modified to produce
+/// 64-bit hash values instead of 32-bit hash values. It consistently
+/// out-performs an FNV-based hash within rustc itself -- the collision rate is
+/// similar or slightly worse than FNV, but the speed of the hash function
+/// itself is much higher because it works on up to 8 bytes at a time.
+pub struct FxHasher {
+    hash: usize,
+}
+
+#[cfg(target_pointer_width = "32")]
+const K: usize = 0x9e3779b9;
+#[cfg(target_pointer_width = "64")]
+const K: usize = 0x517cc1b727220a95;
+
+impl Default for FxHasher {
+    #[inline]
+    fn default() -> FxHasher {
+        FxHasher { hash: 0 }
+    }
+}
+
+impl FxHasher {
+    #[inline]
+    fn add_to_hash(&mut self, i: usize) {
+        self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(K);
+    }
+}
+
+impl Hasher for FxHasher {
+    #[inline]
+    fn write(&mut self, mut bytes: &[u8]) {
+        #[cfg(target_pointer_width = "32")]
+        let read_usize = |bytes: &[u8]| u32::from_ne_bytes(bytes[..4].try_into().unwrap());
+        #[cfg(target_pointer_width = "64")]
+        let read_usize = |bytes: &[u8]| u64::from_ne_bytes(bytes[..8].try_into().unwrap());
+
+        let mut hash = FxHasher { hash: self.hash };
+        assert!(size_of::<usize>() <= 8);
+        while bytes.len() >= size_of::<usize>() {
+            hash.add_to_hash(read_usize(bytes) as usize);
+            bytes = &bytes[size_of::<usize>()..];
+        }
+        if (size_of::<usize>() > 4) && (bytes.len() >= 4) {
+            hash.add_to_hash(u32::from_ne_bytes(bytes[..4].try_into().unwrap()) as usize);
+            bytes = &bytes[4..];
+        }
+        if (size_of::<usize>() > 2) && bytes.len() >= 2 {
+            hash.add_to_hash(u16::from_ne_bytes(bytes[..2].try_into().unwrap()) as usize);
+            bytes = &bytes[2..];
+        }
+        if (size_of::<usize>() > 1) && bytes.len() >= 1 {
+            hash.add_to_hash(bytes[0] as usize);
+        }
+        self.hash = hash.hash;
+    }
+
+    #[inline]
+    fn write_u8(&mut self, i: u8) {
+        self.add_to_hash(i as usize);
+    }
+
+    #[inline]
+    fn write_u16(&mut self, i: u16) {
+        self.add_to_hash(i as usize);
+    }
+
+    #[inline]
+    fn write_u32(&mut self, i: u32) {
+        self.add_to_hash(i as usize);
+    }
+
+    #[cfg(target_pointer_width = "32")]
+    #[inline]
+    fn write_u64(&mut self, i: u64) {
+        self.add_to_hash(i as usize);
+        self.add_to_hash((i >> 32) as usize);
+    }
+
+    #[cfg(target_pointer_width = "64")]
+    #[inline]
+    fn write_u64(&mut self, i: u64) {
+        self.add_to_hash(i as usize);
+    }
+
+    #[inline]
+    fn write_usize(&mut self, i: usize) {
+        self.add_to_hash(i);
+    }
+
+    #[inline]
+    fn finish(&self) -> u64 {
+        self.hash as u64
+    }
+}
diff --git a/proc_macro/src/bridge/handle.rs b/proc_macro/src/bridge/handle.rs
@@ -1,11 +1,13 @@
 //! Server-side handles and storage for per-handle data.
 
-use std::collections::{BTreeMap, HashMap};
-use std::hash::{BuildHasher, Hash};
+use std::collections::BTreeMap;
+use std::hash::Hash;
 use std::num::NonZeroU32;
 use std::ops::{Index, IndexMut};
 use std::sync::atomic::{AtomicUsize, Ordering};
 
+use super::fxhash::FxHashMap;
+
 pub(super) type Handle = NonZeroU32;
 
 /// A store that associates values of type `T` with numeric handles. A value can
@@ -51,31 +53,15 @@ impl<T> IndexMut<Handle> for OwnedStore<T> {
     }
 }
 
-// HACK(eddyb) deterministic `std::collections::hash_map::RandomState` replacement
-// that doesn't require adding any dependencies to `proc_macro` (like `rustc-hash`).
-#[derive(Clone)]
-struct NonRandomState;
-
-impl BuildHasher for NonRandomState {
-    type Hasher = std::collections::hash_map::DefaultHasher;
-    #[inline]
-    fn build_hasher(&self) -> Self::Hasher {
-        Self::Hasher::new()
-    }
-}
-
 /// Like `OwnedStore`, but avoids storing any value more than once.
 pub(super) struct InternedStore<T: 'static> {
     owned: OwnedStore<T>,
-    interner: HashMap<T, Handle, NonRandomState>,
+    interner: FxHashMap<T, Handle>,
 }
 
 impl<T: Copy + Eq + Hash> InternedStore<T> {
     pub(super) fn new(counter: &'static AtomicUsize) -> Self {
-        InternedStore {
-            owned: OwnedStore::new(counter),
-            interner: HashMap::with_hasher(NonRandomState),
-        }
+        InternedStore { owned: OwnedStore::new(counter), interner: FxHashMap::default() }
     }
 
     pub(super) fn alloc(&mut self, x: T) -> Handle {