Skip to content

Commit

Permalink
perf: replace custom string interner with lasso (#22)
Browse files Browse the repository at this point in the history
* perf: replace custom string interner with lasso

* chore: patch lasso
  • Loading branch information
DaniPopes authored Jun 30, 2024
1 parent f2af3d8 commit e3cf771
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 39 deletions.
34 changes: 34 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ hex = { package = "const-hex", version = "1.10" }
index_vec = "0.1.3"
indexmap = "2.2"
itertools = "0.13"
itoa = "1.0"
libc = "0.2"
md-5 = "0.10"
memchr = "2.7"
Expand All @@ -139,3 +140,6 @@ tikv-jemallocator = "0.5.4"
unicode-width = "0.1.11"
vergen = "8.3"
yansi = "1.0"

# https://github.com/Kixiron/lasso/pull/49
lasso = { git = "https://github.com/conradludgate/lasso", branch = "update-deps" }
3 changes: 2 additions & 1 deletion crates/interface/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ sulk-macros.workspace = true

bumpalo.workspace = true
itertools.workspace = true
itoa = "1.0"
itoa.workspace = true
lasso = { workspace = true, features = ["multi-threaded", "inline-more"] }
match_cfg.workspace = true
md-5.workspace = true
normalize-path = "0.2.1"
Expand Down
88 changes: 50 additions & 38 deletions crates/interface/src/symbol.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::{SessionGlobals, Span};
use lasso::Resolver;
use std::{cmp, fmt, hash, str};
use sulk_data_structures::{index::BaseIndex32, map::FxIndexSet, sync::Lock};
use sulk_data_structures::index::BaseIndex32;
use sulk_macros::symbols;

/// An identifier.
Expand Down Expand Up @@ -331,64 +332,75 @@ impl ToString for Symbol {
}
}

type InternerInner = LassoInterner;

/// Symbol interner.
///
/// Initialized in `SessionGlobals` with the `symbols!` macro's initial symbols.
pub(crate) struct Interner(Lock<InternerInner>);

// The `&'static str`s in this type actually point into the arena.
//
// This type is private to prevent accidentally constructing more than one
// `Interner` on the same thread, which makes it easy to mix up `Symbol`s
// between `Interner`s.
struct InternerInner {
arena: bumpalo::Bump,
strings: FxIndexSet<&'static str>,
}

impl InternerInner {
fn prefill(init: &[&'static str]) -> Self {
Self { arena: bumpalo::Bump::new(), strings: init.iter().copied().collect() }
}
}
pub(crate) struct Interner(InternerInner);

impl Interner {
pub(crate) fn fresh() -> Self {
Self(Lock::new(InternerInner::fresh()))
Self(InternerInner::fresh())
}

#[cfg(test)]
pub(crate) fn prefill(init: &[&'static str]) -> Self {
Self(Lock::new(InternerInner::prefill(init)))
Self(InternerInner::prefill(init))
}

#[inline]
fn intern(&self, string: &str) -> Symbol {
let mut inner = self.0.lock();
if let Some(idx) = inner.strings.get_index_of(string) {
return Symbol::new(idx as u32);
}
self.0.intern(string)
}

let string: &str = inner.arena.alloc_str(string);
#[inline]
fn get(&self, symbol: Symbol) -> &str {
self.0.get(symbol)
}
}

// SAFETY: we can extend the arena allocation to `'static` because we
// only access these while the arena is still alive.
let string: &'static str = unsafe { &*(string as *const str) };
struct LassoInterner(lasso::ThreadedRodeo<Symbol, sulk_data_structures::map::FxBuildHasher>);

// This second hash table lookup can be avoided by using `RawEntryMut`,
// but this code path isn't hot enough for it to be worth it. See
// #91445 for details.
let (idx, is_new) = inner.strings.insert_full(string);
debug_assert!(is_new); // due to the get_index_of check above
impl LassoInterner {
fn prefill(init: &[&'static str]) -> Self {
let capacity = if init.is_empty() {
Default::default()
} else {
let actual_string = init.len();
let strings = actual_string.next_power_of_two();
let actual_bytes = PREINTERNED_SYMBOLS_BYTES as usize;
let bytes = actual_bytes.next_power_of_two().max(4096);
trace!(strings, bytes, "prefill capacity");
lasso::Capacity::new(strings, std::num::NonZeroUsize::new(bytes).unwrap())
};
let rodeo = lasso::ThreadedRodeo::with_capacity_and_hasher(capacity, Default::default());
for &s in init {
rodeo.get_or_intern_static(s);
}
Self(rodeo)
}

Symbol::new(idx as u32)
#[inline]
fn intern(&self, string: &str) -> Symbol {
self.0.get_or_intern(string)
}

/// Get the symbol as a string.
///
/// [`Symbol::as_str()`] should be used in preference to this function.
#[inline]
fn get(&self, symbol: Symbol) -> &str {
self.0.lock().strings.get_index(symbol.as_u32() as usize).unwrap()
unsafe { self.0.resolve_unchecked(&symbol) }
}
}

unsafe impl lasso::Key for Symbol {
#[inline]
fn into_usize(self) -> usize {
self.as_u32() as usize
}

#[inline]
fn try_from_usize(int: usize) -> Option<Self> {
int.try_into().ok().map(Self::new)
}
}

Expand Down

0 comments on commit e3cf771

Please sign in to comment.