Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: replace custom string interner with lasso #22

Merged
merged 2 commits into from
Jun 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ hex = { package = "const-hex", version = "1.10" }
index_vec = "0.1.3"
indexmap = "2.2"
itertools = "0.13"
itoa = "1.0"
libc = "0.2"
md-5 = "0.10"
memchr = "2.7"
Expand All @@ -139,3 +140,6 @@ tikv-jemallocator = "0.5.4"
unicode-width = "0.1.11"
vergen = "8.3"
yansi = "1.0"

# https://github.com/Kixiron/lasso/pull/49
lasso = { git = "https://github.com/conradludgate/lasso", branch = "update-deps" }
3 changes: 2 additions & 1 deletion crates/interface/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ sulk-macros.workspace = true

bumpalo.workspace = true
itertools.workspace = true
itoa = "1.0"
itoa.workspace = true
lasso = { workspace = true, features = ["multi-threaded", "inline-more"] }
match_cfg.workspace = true
md-5.workspace = true
normalize-path = "0.2.1"
Expand Down
88 changes: 50 additions & 38 deletions crates/interface/src/symbol.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::{SessionGlobals, Span};
use lasso::Resolver;
use std::{cmp, fmt, hash, str};
use sulk_data_structures::{index::BaseIndex32, map::FxIndexSet, sync::Lock};
use sulk_data_structures::index::BaseIndex32;
use sulk_macros::symbols;

/// An identifier.
Expand Down Expand Up @@ -331,64 +332,75 @@ impl ToString for Symbol {
}
}

type InternerInner = LassoInterner;

/// Symbol interner.
///
/// Initialized in `SessionGlobals` with the `symbols!` macro's initial symbols.
pub(crate) struct Interner(Lock<InternerInner>);

// The `&'static str`s in this type actually point into the arena.
//
// This type is private to prevent accidentally constructing more than one
// `Interner` on the same thread, which makes it easy to mix up `Symbol`s
// between `Interner`s.
struct InternerInner {
arena: bumpalo::Bump,
strings: FxIndexSet<&'static str>,
}

impl InternerInner {
fn prefill(init: &[&'static str]) -> Self {
Self { arena: bumpalo::Bump::new(), strings: init.iter().copied().collect() }
}
}
pub(crate) struct Interner(InternerInner);

impl Interner {
pub(crate) fn fresh() -> Self {
Self(Lock::new(InternerInner::fresh()))
Self(InternerInner::fresh())
}

#[cfg(test)]
pub(crate) fn prefill(init: &[&'static str]) -> Self {
Self(Lock::new(InternerInner::prefill(init)))
Self(InternerInner::prefill(init))
}

#[inline]
fn intern(&self, string: &str) -> Symbol {
let mut inner = self.0.lock();
if let Some(idx) = inner.strings.get_index_of(string) {
return Symbol::new(idx as u32);
}
self.0.intern(string)
}

let string: &str = inner.arena.alloc_str(string);
#[inline]
fn get(&self, symbol: Symbol) -> &str {
self.0.get(symbol)
}
}

// SAFETY: we can extend the arena allocation to `'static` because we
// only access these while the arena is still alive.
let string: &'static str = unsafe { &*(string as *const str) };
struct LassoInterner(lasso::ThreadedRodeo<Symbol, sulk_data_structures::map::FxBuildHasher>);

// This second hash table lookup can be avoided by using `RawEntryMut`,
// but this code path isn't hot enough for it to be worth it. See
// #91445 for details.
let (idx, is_new) = inner.strings.insert_full(string);
debug_assert!(is_new); // due to the get_index_of check above
impl LassoInterner {
fn prefill(init: &[&'static str]) -> Self {
let capacity = if init.is_empty() {
Default::default()
} else {
let actual_string = init.len();
let strings = actual_string.next_power_of_two();
let actual_bytes = PREINTERNED_SYMBOLS_BYTES as usize;
let bytes = actual_bytes.next_power_of_two().max(4096);
trace!(strings, bytes, "prefill capacity");
lasso::Capacity::new(strings, std::num::NonZeroUsize::new(bytes).unwrap())
};
let rodeo = lasso::ThreadedRodeo::with_capacity_and_hasher(capacity, Default::default());
for &s in init {
rodeo.get_or_intern_static(s);
}
Self(rodeo)
}

Symbol::new(idx as u32)
#[inline]
fn intern(&self, string: &str) -> Symbol {
self.0.get_or_intern(string)
}

/// Get the symbol as a string.
///
/// [`Symbol::as_str()`] should be used in preference to this function.
#[inline]
fn get(&self, symbol: Symbol) -> &str {
self.0.lock().strings.get_index(symbol.as_u32() as usize).unwrap()
unsafe { self.0.resolve_unchecked(&symbol) }
}
}

unsafe impl lasso::Key for Symbol {
#[inline]
fn into_usize(self) -> usize {
self.as_u32() as usize
}

#[inline]
fn try_from_usize(int: usize) -> Option<Self> {
int.try_into().ok().map(Self::new)
}
}

Expand Down