Skip to content

Introduce Symbol::with_interner. #144287

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 21 additions & 19 deletions compiler/rustc_ast/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,25 +167,27 @@ impl Path {
///
/// Panics if `path` is empty or a segment after the first is `kw::PathRoot`.
pub fn join_path_syms(path: impl IntoIterator<Item = impl Borrow<Symbol>>) -> String {
// This is a guess at the needed capacity that works well in practice. It is slightly faster
// than (a) starting with an empty string, or (b) computing the exact capacity required.
// `8` works well because it's about the right size and jemalloc's size classes are all
// multiples of 8.
let mut iter = path.into_iter();
let len_hint = iter.size_hint().1.unwrap_or(1);
let mut s = String::with_capacity(len_hint * 8);

let first_sym = *iter.next().unwrap().borrow();
if first_sym != kw::PathRoot {
s.push_str(first_sym.as_str());
}
for sym in iter {
let sym = *sym.borrow();
debug_assert_ne!(sym, kw::PathRoot);
s.push_str("::");
s.push_str(sym.as_str());
}
s
Symbol::with_interner(|interner| {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: the interner is locked for slightly longer than necessary.

// This is a guess at the needed capacity that works well in practice. It is slightly
// faster than (a) starting with an empty string, or (b) computing the exact capacity
// required. `8` works well because it's about the right size and jemalloc's size classes
// are all multiples of 8.
let mut iter = path.into_iter();
let len_hint = iter.size_hint().1.unwrap_or(1);

let mut s = String::with_capacity(len_hint * 8);
let first_sym = *iter.next().unwrap().borrow();
if first_sym != kw::PathRoot {
s.push_str(interner.get_str(first_sym));
}
for sym in iter {
let sym = *sym.borrow();
debug_assert_ne!(sym, kw::PathRoot);
s.push_str("::");
s.push_str(interner.get_str(sym));
}
s
})
}

/// Like `join_path_syms`, but for `Ident`s. This function is necessary because
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_macros/src/symbols.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
const SYMBOL_DIGITS_BASE: u32 = #symbol_digits_base;

/// The number of predefined symbols; this is the first index for
/// extra pre-interned symbols in an Interner created via
/// extra pre-interned symbols in an interner created via
/// [`Interner::with_extra_symbols`].
pub const PREDEFINED_SYMBOLS_COUNT: u32 = #predefined_symbols_count;

Expand Down
40 changes: 23 additions & 17 deletions compiler/rustc_resolve/src/rustdoc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,26 +141,32 @@ pub fn unindent_doc_fragments(docs: &mut [DocFragment]) {
// In here, the `min_indent` is 1 (because non-sugared fragment are always counted with minimum
// 1 whitespace), meaning that "hello!" will be considered a codeblock because it starts with 4
// (5 - 1) whitespaces.
let Some(min_indent) = docs
.iter()
.map(|fragment| {
fragment
.doc
.as_str()
.lines()
.filter(|line| line.chars().any(|c| !c.is_whitespace()))
.map(|line| {
// Compare against either space or tab, ignoring whether they are
// mixed or not.
let whitespace = line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
whitespace
+ (if fragment.kind == DocFragmentKind::SugaredDoc { 0 } else { add })
let Some(min_indent) = ({
Symbol::with_interner(|interner| {
docs.iter()
.map(|fragment| {
interner
.get_str(fragment.doc)
.lines()
.filter(|line| line.chars().any(|c| !c.is_whitespace()))
.map(|line| {
// Compare against either space or tab, ignoring whether they are
// mixed or not.
let whitespace =
line.chars().take_while(|c| *c == ' ' || *c == '\t').count();
whitespace
+ (if fragment.kind == DocFragmentKind::SugaredDoc {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: the formatting is bad, could you move the if expression to a local variable or something like that?

0
} else {
add
})
})
.min()
.unwrap_or(usize::MAX)
})
.min()
.unwrap_or(usize::MAX)
})
.min()
else {
}) else {
return;
};

Expand Down
46 changes: 37 additions & 9 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2632,6 +2632,19 @@ impl Symbol {
})
}

/// Runs `f` with access to the symbol interner, so you can call
/// `interner.get_str(sym)` instead of `sym.as_str()`.
///
/// This is for performance: it lets you get the contents of multiple
/// symbols with a single TLS lookup and interner lock operation, instead
/// of doing those operations once per symbol.
pub fn with_interner<R>(f: impl FnOnce(&InternerInner) -> R) -> R {
with_session_globals(|session_globals| {
let inner = session_globals.symbol_interner.0.lock();
f(&inner)
})
}

pub fn as_u32(self) -> u32 {
self.0.as_u32()
}
Expand Down Expand Up @@ -2733,14 +2746,13 @@ impl<CTX> HashStable<CTX> for ByteSymbol {
// string with identical contents (e.g. "foo" and b"foo") are both interned,
// only one copy will be stored and the resulting `Symbol` and `ByteSymbol`
// will have the same index.
//
// There must only be one of these, otherwise its easy to mix up symbols
// between interners.
pub(crate) struct Interner(Lock<InternerInner>);

// The `&'static [u8]`s in this type actually point into the arena.
//
// This type is private to prevent accidentally constructing more than one
// `Interner` on the same thread, which makes it easy to mix up `Symbol`s
// between `Interner`s.
struct InternerInner {
pub struct InternerInner {
arena: DroplessArena,
byte_strs: FxIndexSet<&'static [u8]>,
}
Expand Down Expand Up @@ -2794,21 +2806,37 @@ impl Interner {
/// Get the symbol as a string.
///
/// [`Symbol::as_str()`] should be used in preference to this function.
/// (Or [`Symbol::with_interner()`] + [`InternerInner::get_str()`]).
fn get_str(&self, symbol: Symbol) -> &str {
let byte_str = self.get_inner(symbol.0.as_usize());
let inner = self.0.lock();
let byte_str = inner.byte_strs.get_index(symbol.0.as_usize()).unwrap();
// SAFETY: known to be a UTF8 string because it's a `Symbol`.
unsafe { str::from_utf8_unchecked(byte_str) }
}

/// Get the symbol as a string.
///
/// [`ByteSymbol::as_byte_str()`] should be used in preference to this function.
/// (Or [`Symbol::with_interner()`] + [`InternerInner::get_byte_str()`]).
fn get_byte_str(&self, symbol: ByteSymbol) -> &[u8] {
self.get_inner(symbol.0.as_usize())
let inner = self.0.lock();
inner.byte_strs.get_index(symbol.0.as_usize()).unwrap()
}
}

fn get_inner(&self, index: usize) -> &[u8] {
self.0.lock().byte_strs.get_index(index).unwrap()
impl InternerInner {
/// Get the symbol as a string. Used with `with_interner`.
#[inline]
pub fn get_str(&self, symbol: Symbol) -> &str {
let byte_str = self.byte_strs.get_index(symbol.0.as_usize()).unwrap();
// SAFETY: known to be a UTF8 string because it's a `Symbol`.
unsafe { str::from_utf8_unchecked(byte_str) }
}

/// Get the symbol as a string. Used with `with_interner`.
#[inline]
pub fn get_byte_str(&self, symbol: ByteSymbol) -> &[u8] {
self.byte_strs.get_index(symbol.0.as_usize()).unwrap()
}
}

Expand Down
17 changes: 13 additions & 4 deletions src/librustdoc/html/render/print_item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use rustc_hir::def_id::DefId;
use rustc_index::IndexVec;
use rustc_middle::ty::{self, TyCtxt};
use rustc_span::hygiene::MacroKind;
use rustc_span::symbol::{Symbol, sym};
use rustc_span::symbol::{InternerInner, Symbol, sym};
use tracing::{debug, info};

use super::type_layout::document_type_layout;
Expand Down Expand Up @@ -333,7 +333,12 @@ fn item_module(cx: &Context<'_>, item: &clean::Item, items: &[clean::Item]) -> i
}
}

fn cmp(i1: &clean::Item, i2: &clean::Item, tcx: TyCtxt<'_>) -> Ordering {
fn cmp(
i1: &clean::Item,
i2: &clean::Item,
interner: &InternerInner,
tcx: TyCtxt<'_>,
) -> Ordering {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, the interner is locked for very long time.
I'm actually surprised that something like i1.stability(tcx) inside doesn't try to lock the interner again.

let rty1 = reorder(i1.type_());
let rty2 = reorder(i2.type_());
if rty1 != rty2 {
Expand All @@ -349,7 +354,9 @@ fn item_module(cx: &Context<'_>, item: &clean::Item, items: &[clean::Item]) -> i
return is_stable2.cmp(&is_stable1);
}
match (i1.name, i2.name) {
(Some(name1), Some(name2)) => compare_names(name1.as_str(), name2.as_str()),
(Some(name1), Some(name2)) => {
compare_names(interner.get_str(name1), interner.get_str(name2))
}
(Some(_), None) => Ordering::Greater,
(None, Some(_)) => Ordering::Less,
(None, None) => Ordering::Equal,
Expand All @@ -360,7 +367,9 @@ fn item_module(cx: &Context<'_>, item: &clean::Item, items: &[clean::Item]) -> i

match cx.shared.module_sorting {
ModuleSorting::Alphabetical => {
not_stripped_items.sort_by(|(_, i1), (_, i2)| cmp(i1, i2, tcx));
Symbol::with_interner(|interner| {
not_stripped_items.sort_by(|(_, i1), (_, i2)| cmp(i1, i2, interner, tcx));
});
}
ModuleSorting::DeclarationOrder => {}
}
Expand Down
16 changes: 10 additions & 6 deletions src/librustdoc/html/render/search_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,12 +105,16 @@ pub(crate) fn build_index(
let mut aliases: BTreeMap<String, Vec<usize>> = BTreeMap::new();

// Sort search index items. This improves the compressibility of the search index.
cache.search_index.sort_unstable_by(|k1, k2| {
// `sort_unstable_by_key` produces lifetime errors
// HACK(rustdoc): should not be sorting `CrateNum` or `DefIndex`, this will soon go away, too
let k1 = (&k1.path, k1.name.as_str(), &k1.ty, k1.parent.map(|id| (id.index, id.krate)));
let k2 = (&k2.path, k2.name.as_str(), &k2.ty, k2.parent.map(|id| (id.index, id.krate)));
Ord::cmp(&k1, &k2)
Symbol::with_interner(|inner| {
cache.search_index.sort_unstable_by(|k1, k2| {
// `sort_unstable_by_key` produces lifetime errors
// HACK(rustdoc): should not be sorting `CrateNum` or `DefIndex`, this will soon go
// away, too
let pair = |k: &IndexItem| k.parent.map(|id| (id.index, id.krate));
let k1 = (&k1.path, inner.get_str(k1.name), &k1.ty, pair(k1));
let k2 = (&k2.path, inner.get_str(k2.name), &k2.ty, pair(k2));
Ord::cmp(&k1, &k2)
});
});

// Set up alias indexes.
Expand Down
17 changes: 6 additions & 11 deletions src/librustdoc/html/url_parts_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,22 +146,17 @@ impl<'a> Extend<&'a str> for UrlPartsBuilder {

impl FromIterator<Symbol> for UrlPartsBuilder {
fn from_iter<T: IntoIterator<Item = Symbol>>(iter: T) -> Self {
// This code has to be duplicated from the `&str` impl because of
// `Symbol::as_str`'s lifetimes.
let iter = iter.into_iter();
let mut builder = Self::with_capacity_bytes(AVG_PART_LENGTH * iter.size_hint().0);
iter.for_each(|part| builder.push(part.as_str()));
builder
Symbol::with_interner(|interner| {
UrlPartsBuilder::from_iter(iter.into_iter().map(|sym| interner.get_str(sym)))
})
}
}

impl Extend<Symbol> for UrlPartsBuilder {
fn extend<T: IntoIterator<Item = Symbol>>(&mut self, iter: T) {
// This code has to be duplicated from the `&str` impl because of
// `Symbol::as_str`'s lifetimes.
let iter = iter.into_iter();
self.buf.reserve(AVG_PART_LENGTH * iter.size_hint().0);
iter.for_each(|part| self.push(part.as_str()));
Symbol::with_interner(|interner| {
self.extend(iter.into_iter().map(|sym| interner.get_str(sym)))
})
}
}

Expand Down
Loading