Skip to content

add docs and ut for bootstrap util cache #136848

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions src/bootstrap/src/utils/cache.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
//! This module helps you efficiently store and retrieve values using interning.
//!
//! Interning is a neat trick that keeps only one copy of identical values, saving memory
//! and making comparisons super fast. Here, we provide the `Interned<T>` struct and the `Internable` trait
//! to make interning easy for different data types.
//!
//! The `Interner` struct handles caching for common types like `String`, `PathBuf`, and `Vec<String>`,
//! while the `Cache` struct acts as a write-once storage for linking computation steps with their results.
//!
//! # Thread Safety
//!
//! We use `Mutex` to make sure interning and retrieval are thread-safe. But keep in mind—once a value is
//! interned, it sticks around for the entire lifetime of the program.

use std::any::{Any, TypeId};
use std::borrow::Borrow;
use std::cell::RefCell;
Expand All @@ -12,6 +26,9 @@ use std::{fmt, mem};

use crate::core::builder::Step;

/// Represents an interned value of type `T`, allowing for efficient comparisons and retrieval.
///
/// This struct stores a unique index referencing the interned value within an internal cache.
pub struct Interned<T>(usize, PhantomData<*const T>);

impl<T: Internable + Default> Default for Interned<T> {
Expand Down Expand Up @@ -111,6 +128,10 @@ impl<T: Internable + Ord> Ord for Interned<T> {
}
}

/// A structure for managing the interning of values of type `T`.
///
/// `TyIntern<T>` maintains a mapping between values and their interned representations,
/// ensuring that duplicate values are not stored multiple times.
struct TyIntern<T: Clone + Eq> {
items: Vec<T>,
set: HashMap<T, Interned<T>>,
Expand All @@ -123,6 +144,9 @@ impl<T: Hash + Clone + Eq> Default for TyIntern<T> {
}

impl<T: Hash + Clone + Eq> TyIntern<T> {
/// Interns a borrowed value, ensuring it is stored uniquely.
///
/// If the value has been previously interned, the same `Interned<T>` instance is returned.
fn intern_borrow<B>(&mut self, item: &B) -> Interned<T>
where
B: Eq + Hash + ToOwned<Owned = T> + ?Sized,
Expand All @@ -138,6 +162,9 @@ impl<T: Hash + Clone + Eq> TyIntern<T> {
interned
}

/// Interns an owned value, storing it uniquely.
///
/// If the value has been previously interned, the existing `Interned<T>` is returned.
fn intern(&mut self, item: T) -> Interned<T> {
if let Some(i) = self.set.get(&item) {
return *i;
Expand All @@ -148,18 +175,27 @@ impl<T: Hash + Clone + Eq> TyIntern<T> {
interned
}

/// Retrieves a reference to the interned value associated with the given `Interned<T>` instance.
fn get(&self, i: Interned<T>) -> &T {
&self.items[i.0]
}
}

/// A global interner for managing interned values of common types.
///
/// This structure maintains caches for `String`, `PathBuf`, and `Vec<String>`, ensuring efficient storage
/// and retrieval of frequently used values.
#[derive(Default)]
pub struct Interner {
strs: Mutex<TyIntern<String>>,
paths: Mutex<TyIntern<PathBuf>>,
lists: Mutex<TyIntern<Vec<String>>>,
}

/// Defines the behavior required for a type to be internable.
///
/// Types implementing this trait must provide access to a static cache and define an `intern` method
/// that ensures values are stored uniquely.
trait Internable: Clone + Eq + Hash + 'static {
fn intern_cache() -> &'static Mutex<TyIntern<Self>>;

Expand Down Expand Up @@ -187,11 +223,15 @@ impl Internable for Vec<String> {
}

impl Interner {
/// Interns a string reference, ensuring it is stored uniquely.
///
/// If the string has been previously interned, the same `Interned<String>` instance is returned.
pub fn intern_str(&self, s: &str) -> Interned<String> {
self.strs.lock().unwrap().intern_borrow(s)
}
}

/// A global instance of `Interner` that caches common interned values.
pub static INTERNER: LazyLock<Interner> = LazyLock::new(Interner::default);

/// This is essentially a `HashMap` which allows storing any type in its input and
Expand All @@ -209,10 +249,12 @@ pub struct Cache(
);

impl Cache {
/// Creates a new empty cache.
pub fn new() -> Cache {
Cache(RefCell::new(HashMap::new()))
}

/// Stores the result of a computation step in the cache.
pub fn put<S: Step>(&self, step: S, value: S::Output) {
let mut cache = self.0.borrow_mut();
let type_id = TypeId::of::<S>();
Expand All @@ -225,6 +267,7 @@ impl Cache {
stepcache.insert(step, value);
}

/// Retrieves a cached result for the given step, if available.
pub fn get<S: Step>(&self, step: &S) -> Option<S::Output> {
let mut cache = self.0.borrow_mut();
let type_id = TypeId::of::<S>();
Expand Down Expand Up @@ -255,3 +298,6 @@ impl Cache {
self.0.borrow().contains_key(&TypeId::of::<S>())
}
}

#[cfg(test)]
mod tests;
52 changes: 52 additions & 0 deletions src/bootstrap/src/utils/cache/tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
use std::path::PathBuf;

use crate::utils::cache::{INTERNER, Internable, TyIntern};

#[test]
fn test_string_interning() {
let s1 = INTERNER.intern_str("Hello");
let s2 = INTERNER.intern_str("Hello");
let s3 = INTERNER.intern_str("world");

assert_eq!(s1, s2, "Same strings should be interned to the same instance");
assert_ne!(s1, s3, "Different strings should have different interned values");
}

#[test]
fn test_path_interning() {
let p1 = PathBuf::from("/tmp/file").intern();
let p2 = PathBuf::from("/tmp/file").intern();
let p3 = PathBuf::from("/tmp/other").intern();

assert_eq!(p1, p2);
assert_ne!(p1, p3);
}

#[test]
fn test_vec_interning() {
let v1 = vec!["a".to_string(), "b".to_string()].intern();
let v2 = vec!["a".to_string(), "b".to_string()].intern();
let v3 = vec!["c".to_string()].intern();

assert_eq!(v1, v2);
assert_ne!(v1, v3);
}

#[test]
fn test_interned_equality() {
let s1 = INTERNER.intern_str("test");
let s2 = INTERNER.intern_str("test");

assert_eq!(s1, s2);
assert_eq!(s1, "test");
}

#[test]
fn test_ty_intern_intern_borrow() {
let mut interner = TyIntern::default();
let s1 = interner.intern_borrow("borrowed");
let s2 = interner.intern("borrowed".to_string());

assert_eq!(s1, s2);
assert_eq!(interner.get(s1), "borrowed");
}
Loading