Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix BufferBackend soundness issue and add StringInterner::resolve_unchecked #68

Merged
merged 2 commits into from
May 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@ use criterion::{
};
use string_interner::backend::Backend;

criterion_group!(bench_resolve, bench_resolve_already_filled);
criterion_group!(
bench_resolve,
bench_resolve_already_filled,
bench_resolve_unchecked_already_filled
);
criterion_group!(bench_get, bench_get_already_filled);
criterion_group!(bench_iter, bench_iter_already_filled);
criterion_group!(
Expand Down Expand Up @@ -184,6 +188,35 @@ fn bench_resolve_already_filled(c: &mut Criterion) {
bench_for_backend::<BenchBuffer>(&mut g);
}

fn bench_resolve_unchecked_already_filled(c: &mut Criterion) {
let mut g = c.benchmark_group("resolve_unchecked/already-filled");
g.throughput(Throughput::Elements(BENCH_LEN_STRINGS as u64));
fn bench_for_backend<BB: BackendBenchmark>(g: &mut BenchmarkGroup<WallTime>) {
g.bench_with_input(
BB::NAME,
&(BENCH_LEN_STRINGS, BENCH_STRING_LEN),
|bencher, &(len_words, word_len)| {
let words = generate_test_strings(len_words, word_len);
bencher.iter_batched_ref(
|| BB::setup_filled_with_ids(&words),
|(interner, word_ids)| {
for &word_id in &*word_ids {
black_box(
// SAFETY: We provide only valid symbols to the tested interners.
unsafe { interner.resolve_unchecked(word_id) },
);
}
},
BatchSize::SmallInput,
)
},
);
}
bench_for_backend::<BenchBucket>(&mut g);
bench_for_backend::<BenchString>(&mut g);
bench_for_backend::<BenchBuffer>(&mut g);
}

fn bench_get_already_filled(c: &mut Criterion) {
let mut g = c.benchmark_group("get/already-filled");
g.throughput(Throughput::Elements(BENCH_LEN_STRINGS as u64));
Expand Down
18 changes: 10 additions & 8 deletions src/backend/buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,12 @@ where
///
/// Returns the string from the given index if any as well
/// as the index of the next string in the buffer.
fn resolve_index_to_str(&self, index: usize) -> Option<(&str, usize)> {
fn resolve_index_to_str(&self, index: usize) -> Option<(&[u8], usize)> {
let bytes = self.buffer.get(index..)?;
let (str_len, str_len_bytes) = decode_var_usize(bytes)?;
let index_str = index + str_len_bytes;
let str_bytes = self.buffer.get(index_str..index_str + str_len)?;
// SAFETY: It is guaranteed by the backend that only valid strings
// are stored in this portion of the buffer.
let string = unsafe { str::from_utf8_unchecked(str_bytes) };
Some((string, index_str + str_len))
Some((str_bytes, index_str + str_len))
}

/// Resolves the string for the given symbol.
Expand Down Expand Up @@ -180,8 +177,10 @@ where

#[inline]
fn resolve(&self, symbol: Self::Symbol) -> Option<&str> {
self.resolve_index_to_str(symbol.to_usize())
.map(|(string, _next_str_index)| string)
match self.resolve_index_to_str(symbol.to_usize()) {
None => None,
Some((bytes, _)) => str::from_utf8(bytes).ok(),
}
}

fn shrink_to_fit(&mut self) {
Expand Down Expand Up @@ -481,7 +480,10 @@ where
fn next(&mut self) -> Option<Self::Item> {
self.backend
.resolve_index_to_str(self.next)
.and_then(|(string, next)| {
.and_then(|(bytes, next)| {
// SAFETY: Within the iterator all indices given to `resolv_index_to_str`
// are properly pointing to the start of each interned string.
let string = unsafe { str::from_utf8_unchecked(bytes) };
let symbol = S::try_from_usize(self.next)?;
self.next = next;
self.remaining -= 1;
Expand Down
13 changes: 12 additions & 1 deletion src/interner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,12 +264,23 @@ where
self.backend.shrink_to_fit()
}

/// Returns the string for the given symbol if any.
/// Returns the string for the given `symbol`` if any.
#[inline]
pub fn resolve(&self, symbol: <B as Backend>::Symbol) -> Option<&str> {
self.backend.resolve(symbol)
}

/// Returns the string for the given `symbol` without performing any checks.
///
/// # Safety
///
/// It is the caller's responsibility to provide this method with `symbol`s
/// that are valid for the [`StringInterner`].
#[inline]
pub unsafe fn resolve_unchecked(&self, symbol: <B as Backend>::Symbol) -> &str {
unsafe { self.backend.resolve_unchecked(symbol) }
}

/// Returns an iterator that yields all interned strings and their symbols.
#[inline]
pub fn iter(&self) -> <B as Backend>::Iter<'_> {
Expand Down
20 changes: 20 additions & 0 deletions tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,26 @@ macro_rules! gen_tests_for_backend {
assert_eq!(interner.resolve(dd), None);
}

#[test]
fn resolve_unchecked_works() {
let mut interner = StringInterner::new();
// Insert 3 unique strings:
let aa = interner.get_or_intern("aa");
let bb = interner.get_or_intern("bb");
let cc = interner.get_or_intern("cc");
assert_eq!(interner.len(), 3);
// Resolve valid symbols:
assert_eq!(unsafe { interner.resolve_unchecked(aa) }, "aa");
assert_eq!(unsafe { interner.resolve_unchecked(bb) }, "bb");
assert_eq!(unsafe { interner.resolve_unchecked(cc) }, "cc");
assert_eq!(interner.len(), 3);
// Resolve invalid symbols:
let dd = expect_valid_symbol(1000);
assert_ne!(aa, dd);
assert_ne!(bb, dd);
assert_ne!(cc, dd);
}

#[test]
fn get_works() {
let mut interner = StringInterner::new();
Expand Down
Loading