From 8a0a4df0c6725be431157ce84ffd7595c4bd7780 Mon Sep 17 00:00:00 2001 From: bit-aloo Date: Tue, 11 Feb 2025 09:37:59 +0530 Subject: [PATCH 1/2] Add docs to bootstrap:src:utils:cache --- src/bootstrap/src/utils/cache.rs | 43 ++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/src/bootstrap/src/utils/cache.rs b/src/bootstrap/src/utils/cache.rs index 29342cc5a2c5b..650316eebd59b 100644 --- a/src/bootstrap/src/utils/cache.rs +++ b/src/bootstrap/src/utils/cache.rs @@ -1,3 +1,17 @@ +//! This module helps you efficiently store and retrieve values using interning. +//! +//! Interning is a neat trick that keeps only one copy of identical values, saving memory +//! and making comparisons super fast. Here, we provide the `Interned` struct and the `Internable` trait +//! to make interning easy for different data types. +//! +//! The `Interner` struct handles caching for common types like `String`, `PathBuf`, and `Vec`, +//! while the `Cache` struct acts as a write-once storage for linking computation steps with their results. +//! +//! # Thread Safety +//! +//! We use `Mutex` to make sure interning and retrieval are thread-safe. But keep in mind—once a value is +//! interned, it sticks around for the entire lifetime of the program. + use std::any::{Any, TypeId}; use std::borrow::Borrow; use std::cell::RefCell; @@ -12,6 +26,9 @@ use std::{fmt, mem}; use crate::core::builder::Step; +/// Represents an interned value of type `T`, allowing for efficient comparisons and retrieval. +/// +/// This struct stores a unique index referencing the interned value within an internal cache. pub struct Interned(usize, PhantomData<*const T>); impl Default for Interned { @@ -111,6 +128,10 @@ impl Ord for Interned { } } +/// A structure for managing the interning of values of type `T`. +/// +/// `TyIntern` maintains a mapping between values and their interned representations, +/// ensuring that duplicate values are not stored multiple times. struct TyIntern { items: Vec, set: HashMap>, @@ -123,6 +144,9 @@ impl Default for TyIntern { } impl TyIntern { + /// Interns a borrowed value, ensuring it is stored uniquely. + /// + /// If the value has been previously interned, the same `Interned` instance is returned. fn intern_borrow(&mut self, item: &B) -> Interned where B: Eq + Hash + ToOwned + ?Sized, @@ -138,6 +162,9 @@ impl TyIntern { interned } + /// Interns an owned value, storing it uniquely. + /// + /// If the value has been previously interned, the existing `Interned` is returned. fn intern(&mut self, item: T) -> Interned { if let Some(i) = self.set.get(&item) { return *i; @@ -148,11 +175,16 @@ impl TyIntern { interned } + /// Retrieves a reference to the interned value associated with the given `Interned` instance. fn get(&self, i: Interned) -> &T { &self.items[i.0] } } +/// A global interner for managing interned values of common types. +/// +/// This structure maintains caches for `String`, `PathBuf`, and `Vec`, ensuring efficient storage +/// and retrieval of frequently used values. #[derive(Default)] pub struct Interner { strs: Mutex>, @@ -160,6 +192,10 @@ pub struct Interner { lists: Mutex>>, } +/// Defines the behavior required for a type to be internable. +/// +/// Types implementing this trait must provide access to a static cache and define an `intern` method +/// that ensures values are stored uniquely. trait Internable: Clone + Eq + Hash + 'static { fn intern_cache() -> &'static Mutex>; @@ -187,11 +223,15 @@ impl Internable for Vec { } impl Interner { + /// Interns a string reference, ensuring it is stored uniquely. + /// + /// If the string has been previously interned, the same `Interned` instance is returned. pub fn intern_str(&self, s: &str) -> Interned { self.strs.lock().unwrap().intern_borrow(s) } } +/// A global instance of `Interner` that caches common interned values. pub static INTERNER: LazyLock = LazyLock::new(Interner::default); /// This is essentially a `HashMap` which allows storing any type in its input and @@ -209,10 +249,12 @@ pub struct Cache( ); impl Cache { + /// Creates a new empty cache. pub fn new() -> Cache { Cache(RefCell::new(HashMap::new())) } + /// Stores the result of a computation step in the cache. pub fn put(&self, step: S, value: S::Output) { let mut cache = self.0.borrow_mut(); let type_id = TypeId::of::(); @@ -225,6 +267,7 @@ impl Cache { stepcache.insert(step, value); } + /// Retrieves a cached result for the given step, if available. pub fn get(&self, step: &S) -> Option { let mut cache = self.0.borrow_mut(); let type_id = TypeId::of::(); From 24150eb3c073e8f00ebd6b7d02ffac7ddd0f398e Mon Sep 17 00:00:00 2001 From: bit-aloo Date: Tue, 11 Feb 2025 10:33:05 +0530 Subject: [PATCH 2/2] add unit test to bootstrap:util:cache:tests --- src/bootstrap/src/utils/cache.rs | 3 ++ src/bootstrap/src/utils/cache/tests.rs | 52 ++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 src/bootstrap/src/utils/cache/tests.rs diff --git a/src/bootstrap/src/utils/cache.rs b/src/bootstrap/src/utils/cache.rs index 650316eebd59b..1c8cc4025df11 100644 --- a/src/bootstrap/src/utils/cache.rs +++ b/src/bootstrap/src/utils/cache.rs @@ -298,3 +298,6 @@ impl Cache { self.0.borrow().contains_key(&TypeId::of::()) } } + +#[cfg(test)] +mod tests; diff --git a/src/bootstrap/src/utils/cache/tests.rs b/src/bootstrap/src/utils/cache/tests.rs new file mode 100644 index 0000000000000..28f5563a589b1 --- /dev/null +++ b/src/bootstrap/src/utils/cache/tests.rs @@ -0,0 +1,52 @@ +use std::path::PathBuf; + +use crate::utils::cache::{INTERNER, Internable, TyIntern}; + +#[test] +fn test_string_interning() { + let s1 = INTERNER.intern_str("Hello"); + let s2 = INTERNER.intern_str("Hello"); + let s3 = INTERNER.intern_str("world"); + + assert_eq!(s1, s2, "Same strings should be interned to the same instance"); + assert_ne!(s1, s3, "Different strings should have different interned values"); +} + +#[test] +fn test_path_interning() { + let p1 = PathBuf::from("/tmp/file").intern(); + let p2 = PathBuf::from("/tmp/file").intern(); + let p3 = PathBuf::from("/tmp/other").intern(); + + assert_eq!(p1, p2); + assert_ne!(p1, p3); +} + +#[test] +fn test_vec_interning() { + let v1 = vec!["a".to_string(), "b".to_string()].intern(); + let v2 = vec!["a".to_string(), "b".to_string()].intern(); + let v3 = vec!["c".to_string()].intern(); + + assert_eq!(v1, v2); + assert_ne!(v1, v3); +} + +#[test] +fn test_interned_equality() { + let s1 = INTERNER.intern_str("test"); + let s2 = INTERNER.intern_str("test"); + + assert_eq!(s1, s2); + assert_eq!(s1, "test"); +} + +#[test] +fn test_ty_intern_intern_borrow() { + let mut interner = TyIntern::default(); + let s1 = interner.intern_borrow("borrowed"); + let s2 = interner.intern("borrowed".to_string()); + + assert_eq!(s1, s2); + assert_eq!(interner.get(s1), "borrowed"); +}