-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Auto merge of #98189 - mystor:fast_ident_literal, r=eddyb
proc_macro/bridge: stop using a remote object handle for proc_macro Ident and Literal This is the fourth part of rust-lang/rust#86822, split off as requested in rust-lang/rust#86822 (review). This patch transforms the `Ident` and `Group` types into structs serialized over IPC rather than handles. Symbol values are interned on both the client and server when deserializing, to avoid unnecessary string copies and keep the size of `TokenTree` down. To do the interning efficiently on the client, the proc-macro crate is given a vendored version of the fxhash hasher, as `SipHash` appeared to cause performance issues. This was done rather than depending on `rustc_hash` as it is unfortunately difficult to depend on crates from within `proc_macro` due to it being built at the same time as `std`. In addition, a custom arena allocator and symbol store was also added, inspired by those in `rustc_arena` and `rustc_span`. To prevent symbol re-use across multiple invocations of a macro on the same thread, a new range of `Symbol` names are used for each invocation of the macro, and symbols from previous invocations are cleaned-up. In order to keep `Ident` creation efficient, a special ASCII-only case was added to perform ident validation without using RPC for simple identifiers. Full identifier validation couldn't be easily added, as it would require depending on the `rustc_lexer` and `unicode-normalization` crates from within `proc_macro`. Unicode identifiers are validated and normalized using RPC. See the individual commit messages for more details on trade-offs and design decisions behind these patches.
- Loading branch information
Showing
8 changed files
with
672 additions
and
122 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
//! A minimal arena allocator inspired by `rustc_arena::DroplessArena`. | ||
//! | ||
//! This is unfortunately a minimal re-implementation rather than a dependency | ||
//! as it is difficult to depend on crates from within `proc_macro`, due to it | ||
//! being built at the same time as `std`. | ||
|
||
use std::cell::{Cell, RefCell}; | ||
use std::cmp; | ||
use std::mem::MaybeUninit; | ||
use std::ops::Range; | ||
use std::ptr; | ||
use std::slice; | ||
use std::str; | ||
|
||
// The arenas start with PAGE-sized chunks, and then each new chunk is twice as | ||
// big as its predecessor, up until we reach HUGE_PAGE-sized chunks, whereupon | ||
// we stop growing. This scales well, from arenas that are barely used up to | ||
// arenas that are used for 100s of MiBs. Note also that the chosen sizes match | ||
// the usual sizes of pages and huge pages on Linux. | ||
const PAGE: usize = 4096; | ||
const HUGE_PAGE: usize = 2 * 1024 * 1024; | ||
|
||
/// A minimal arena allocator inspired by `rustc_arena::DroplessArena`. | ||
/// | ||
/// This is unfortunately a complete re-implementation rather than a dependency | ||
/// as it is difficult to depend on crates from within `proc_macro`, due to it | ||
/// being built at the same time as `std`. | ||
/// | ||
/// This arena doesn't have support for allocating anything other than byte | ||
/// slices, as that is all that is necessary. | ||
pub(crate) struct Arena { | ||
start: Cell<*mut MaybeUninit<u8>>, | ||
end: Cell<*mut MaybeUninit<u8>>, | ||
chunks: RefCell<Vec<Box<[MaybeUninit<u8>]>>>, | ||
} | ||
|
||
impl Arena { | ||
pub(crate) fn new() -> Self { | ||
Arena { | ||
start: Cell::new(ptr::null_mut()), | ||
end: Cell::new(ptr::null_mut()), | ||
chunks: RefCell::new(Vec::new()), | ||
} | ||
} | ||
|
||
/// Add a new chunk with at least `additional` free bytes. | ||
#[inline(never)] | ||
#[cold] | ||
fn grow(&self, additional: usize) { | ||
let mut chunks = self.chunks.borrow_mut(); | ||
let mut new_cap; | ||
if let Some(last_chunk) = chunks.last_mut() { | ||
// If the previous chunk's len is less than HUGE_PAGE | ||
// bytes, then this chunk will be least double the previous | ||
// chunk's size. | ||
new_cap = last_chunk.len().min(HUGE_PAGE / 2); | ||
new_cap *= 2; | ||
} else { | ||
new_cap = PAGE; | ||
} | ||
// Also ensure that this chunk can fit `additional`. | ||
new_cap = cmp::max(additional, new_cap); | ||
|
||
let mut chunk = Box::new_uninit_slice(new_cap); | ||
let Range { start, end } = chunk.as_mut_ptr_range(); | ||
self.start.set(start); | ||
self.end.set(end); | ||
chunks.push(chunk); | ||
} | ||
|
||
/// Allocates a byte slice with specified size from the current memory | ||
/// chunk. Returns `None` if there is no free space left to satisfy the | ||
/// request. | ||
fn alloc_raw_without_grow(&self, bytes: usize) -> Option<&mut [MaybeUninit<u8>]> { | ||
let start = self.start.get().addr(); | ||
let old_end = self.end.get(); | ||
let end = old_end.addr(); | ||
|
||
let new_end = end.checked_sub(bytes)?; | ||
if start <= new_end { | ||
let new_end = old_end.with_addr(new_end); | ||
self.end.set(new_end); | ||
// SAFETY: `bytes` bytes starting at `new_end` were just reserved. | ||
Some(unsafe { slice::from_raw_parts_mut(new_end, bytes) }) | ||
} else { | ||
None | ||
} | ||
} | ||
|
||
fn alloc_raw(&self, bytes: usize) -> &mut [MaybeUninit<u8>] { | ||
if bytes == 0 { | ||
return &mut []; | ||
} | ||
|
||
loop { | ||
if let Some(a) = self.alloc_raw_without_grow(bytes) { | ||
break a; | ||
} | ||
// No free space left. Allocate a new chunk to satisfy the request. | ||
// On failure the grow will panic or abort. | ||
self.grow(bytes); | ||
} | ||
} | ||
|
||
pub(crate) fn alloc_str<'a>(&'a self, string: &str) -> &'a mut str { | ||
let alloc = self.alloc_raw(string.len()); | ||
let bytes = MaybeUninit::write_slice(alloc, string.as_bytes()); | ||
|
||
// SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena, | ||
// and immediately convert the clone back to `&str`. | ||
unsafe { str::from_utf8_unchecked_mut(bytes) } | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
//! This is a copy of the `rustc_hash` crate, adapted to work as a module. | ||
//! | ||
//! If in the future it becomes more reasonable to add dependencies to | ||
//! `proc_macro`, this module should be removed and replaced with a dependency | ||
//! on the `rustc_hash` crate. | ||
|
||
use std::collections::HashMap; | ||
use std::convert::TryInto; | ||
use std::default::Default; | ||
use std::hash::BuildHasherDefault; | ||
use std::hash::Hasher; | ||
use std::mem::size_of; | ||
use std::ops::BitXor; | ||
|
||
/// Type alias for a hashmap using the `fx` hash algorithm. | ||
pub type FxHashMap<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher>>; | ||
|
||
/// A speedy hash algorithm for use within rustc. The hashmap in liballoc | ||
/// by default uses SipHash which isn't quite as speedy as we want. In the | ||
/// compiler we're not really worried about DOS attempts, so we use a fast | ||
/// non-cryptographic hash. | ||
/// | ||
/// This is the same as the algorithm used by Firefox -- which is a homespun | ||
/// one not based on any widely-known algorithm -- though modified to produce | ||
/// 64-bit hash values instead of 32-bit hash values. It consistently | ||
/// out-performs an FNV-based hash within rustc itself -- the collision rate is | ||
/// similar or slightly worse than FNV, but the speed of the hash function | ||
/// itself is much higher because it works on up to 8 bytes at a time. | ||
pub struct FxHasher { | ||
hash: usize, | ||
} | ||
|
||
#[cfg(target_pointer_width = "32")] | ||
const K: usize = 0x9e3779b9; | ||
#[cfg(target_pointer_width = "64")] | ||
const K: usize = 0x517cc1b727220a95; | ||
|
||
impl Default for FxHasher { | ||
#[inline] | ||
fn default() -> FxHasher { | ||
FxHasher { hash: 0 } | ||
} | ||
} | ||
|
||
impl FxHasher { | ||
#[inline] | ||
fn add_to_hash(&mut self, i: usize) { | ||
self.hash = self.hash.rotate_left(5).bitxor(i).wrapping_mul(K); | ||
} | ||
} | ||
|
||
impl Hasher for FxHasher { | ||
#[inline] | ||
fn write(&mut self, mut bytes: &[u8]) { | ||
#[cfg(target_pointer_width = "32")] | ||
let read_usize = |bytes: &[u8]| u32::from_ne_bytes(bytes[..4].try_into().unwrap()); | ||
#[cfg(target_pointer_width = "64")] | ||
let read_usize = |bytes: &[u8]| u64::from_ne_bytes(bytes[..8].try_into().unwrap()); | ||
|
||
let mut hash = FxHasher { hash: self.hash }; | ||
assert!(size_of::<usize>() <= 8); | ||
while bytes.len() >= size_of::<usize>() { | ||
hash.add_to_hash(read_usize(bytes) as usize); | ||
bytes = &bytes[size_of::<usize>()..]; | ||
} | ||
if (size_of::<usize>() > 4) && (bytes.len() >= 4) { | ||
hash.add_to_hash(u32::from_ne_bytes(bytes[..4].try_into().unwrap()) as usize); | ||
bytes = &bytes[4..]; | ||
} | ||
if (size_of::<usize>() > 2) && bytes.len() >= 2 { | ||
hash.add_to_hash(u16::from_ne_bytes(bytes[..2].try_into().unwrap()) as usize); | ||
bytes = &bytes[2..]; | ||
} | ||
if (size_of::<usize>() > 1) && bytes.len() >= 1 { | ||
hash.add_to_hash(bytes[0] as usize); | ||
} | ||
self.hash = hash.hash; | ||
} | ||
|
||
#[inline] | ||
fn write_u8(&mut self, i: u8) { | ||
self.add_to_hash(i as usize); | ||
} | ||
|
||
#[inline] | ||
fn write_u16(&mut self, i: u16) { | ||
self.add_to_hash(i as usize); | ||
} | ||
|
||
#[inline] | ||
fn write_u32(&mut self, i: u32) { | ||
self.add_to_hash(i as usize); | ||
} | ||
|
||
#[cfg(target_pointer_width = "32")] | ||
#[inline] | ||
fn write_u64(&mut self, i: u64) { | ||
self.add_to_hash(i as usize); | ||
self.add_to_hash((i >> 32) as usize); | ||
} | ||
|
||
#[cfg(target_pointer_width = "64")] | ||
#[inline] | ||
fn write_u64(&mut self, i: u64) { | ||
self.add_to_hash(i as usize); | ||
} | ||
|
||
#[inline] | ||
fn write_usize(&mut self, i: usize) { | ||
self.add_to_hash(i); | ||
} | ||
|
||
#[inline] | ||
fn finish(&self) -> u64 { | ||
self.hash as u64 | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.