Skip to content

Commit

Permalink
fix(hstr): Prevent memory leak for global stores (#10047)
Browse files Browse the repository at this point in the history
**Related issue:**

 - Closes #10032
  • Loading branch information
kdy1 authored Feb 19, 2025
1 parent c28d494 commit 4718bc0
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 32 deletions.
6 changes: 6 additions & 0 deletions .changeset/chatty-horses-invent.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
hstr: patch
swc_core: patch
---

fix(hstr): Prevent memory leak for global stores
100 changes: 84 additions & 16 deletions crates/hstr/src/dynamic.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use std::{
borrow::Cow,
cell::RefCell,
ffi::c_void,
hash::{BuildHasherDefault, Hash, Hasher},
mem::ManuallyDrop,
mem::{forget, ManuallyDrop},
ops::Deref,
ptr::NonNull,
ptr::{self, NonNull},
};

use rustc_hash::FxHasher;
Expand All @@ -15,13 +16,26 @@ use crate::{
Atom, INLINE_TAG_INIT, LEN_OFFSET, TAG_MASK,
};

#[derive(PartialEq, Eq)]
pub(crate) struct Metadata {
pub hash: u64,
pub is_global: bool,
}

#[derive(Clone)]
#[derive(Clone, PartialEq, Eq)]
pub(crate) struct Item(pub ThinArc<HeaderWithLength<Metadata>, u8>);

impl Item {
/// https://users.rust-lang.org/t/what-is-the-ideal-way-to-destruct-a-guard-type/25974/8
fn into_inner(self) -> ThinArc<HeaderWithLength<Metadata>, u8> {
unsafe {
let inner = ptr::read(&self.0);
forget(self);
inner
}
}
}

impl Deref for Item {
type Target = <ThinArc<HeaderWithLength<Metadata>, u8> as Deref>::Target;

Expand All @@ -30,9 +44,6 @@ impl Deref for Item {
}
}

/// TODO: Use real weak pointer
type WeakItem = Item;

impl Hash for Item {
fn hash<H: Hasher>(&self, state: &mut H) {
state.write_u64(self.0.header.header.header.hash);
Expand All @@ -56,7 +67,7 @@ pub(crate) unsafe fn restore_arc(v: TaggedValue) -> Item {
///
/// # Merging [AtomStore]
pub struct AtomStore {
pub(crate) data: hashbrown::HashMap<WeakItem, (), BuildEntryHasher>,
pub(crate) data: hashbrown::HashMap<Item, (), BuildEntryHasher>,
}

impl Default for AtomStore {
Expand All @@ -70,13 +81,42 @@ impl Default for AtomStore {
impl AtomStore {
#[inline(always)]
pub fn atom<'a>(&mut self, text: impl Into<Cow<'a, str>>) -> Atom {
new_atom(self, text.into())
atom_in(self, &text.into(), false)
}
}

impl Drop for Item {
fn drop(&mut self) {
// If we are going to drop the last reference, we need to remove the
// entry from the global store if it is a global atom
if self.0.header.header.header.is_global && ThinArc::strong_count(&self.0) == 2 {
let v = GLOBAL_DATA.try_with(|global| {
let mut store = global.borrow_mut();
store.data.remove_entry(self)
});

if let Ok(Some((v, _))) = v {
v.into_inner();
}
}
}
}

thread_local! {
static GLOBAL_DATA: RefCell<AtomStore> = Default::default();
}

pub(crate) fn global_atom(text: &str) -> Atom {
GLOBAL_DATA.with(|global| {
let mut store = global.borrow_mut();

atom_in(&mut *store, text, true)
})
}

/// This can create any kind of [Atom], although this lives in the `dynamic`
/// module.
pub(crate) fn new_atom<S>(storage: S, text: Cow<str>) -> Atom
pub(crate) fn atom_in<S>(storage: S, text: &str, is_global: bool) -> Atom
where
S: Storage,
{
Expand All @@ -92,9 +132,9 @@ where
return Atom { unsafe_data };
}

let hash = calc_hash(&text);
let entry = storage.insert_entry(text, hash);
let entry = ThinArc::into_raw(entry.0) as *mut c_void;
let hash = calc_hash(text);
let entry = storage.insert_entry(text, hash, is_global);
let entry = ThinArc::into_raw(entry.into_inner()) as *mut c_void;

let ptr: NonNull<c_void> = unsafe {
// Safety: Arc::into_raw returns a non-null pointer
Expand All @@ -107,16 +147,16 @@ where
}

pub(crate) trait Storage {
fn insert_entry(self, text: Cow<str>, hash: u64) -> Item;
fn insert_entry(self, text: &str, hash: u64, is_global: bool) -> Item;
}

impl Storage for &'_ mut AtomStore {
#[inline(never)]
fn insert_entry(self, text: Cow<str>, hash: u64) -> Item {
fn insert_entry(self, text: &str, hash: u64, is_global: bool) -> Item {
// If the text is too long, interning is not worth it.
if text.len() > 512 {
return Item(ThinArc::from_header_and_slice(
HeaderWithLength::new(Metadata { hash }, text.len()),
HeaderWithLength::new(Metadata { hash, is_global }, text.len()),
text.as_bytes(),
));
}
Expand All @@ -130,7 +170,7 @@ impl Storage for &'_ mut AtomStore {
.or_insert_with(move || {
(
Item(ThinArc::from_header_and_slice(
HeaderWithLength::new(Metadata { hash }, text.len()),
HeaderWithLength::new(Metadata { hash, is_global }, text.len()),
text.as_bytes(),
)),
(),
Expand Down Expand Up @@ -187,3 +227,31 @@ impl Hasher for EntryHasher {
self.hash = val;
}
}

#[cfg(test)]
mod tests {
use crate::{dynamic::GLOBAL_DATA, Atom};

#[test]
fn global_ref_count_dynamic() {
let atom1 = Atom::new("Hello, world!");

let atom2 = Atom::new("Hello, world!");

// 2 for the two atoms, 1 for the global store
assert_eq!(atom1.ref_count(), 3);
assert_eq!(atom2.ref_count(), 3);

drop(atom1);

// 1 for the atom2, 1 for the global store
assert_eq!(atom2.ref_count(), 2);

drop(atom2);

GLOBAL_DATA.with(|global| {
let store = global.borrow();
assert_eq!(store.data.len(), 0);
});
}
}
20 changes: 4 additions & 16 deletions crates/hstr/src/global_store.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,12 @@
use std::{borrow::Cow, cell::RefCell};
use std::borrow::Cow;

use crate::{Atom, AtomStore};

fn atom(text: Cow<str>) -> Atom {
thread_local! {
static GLOBAL_DATA: RefCell<AtomStore> = Default::default();
}

GLOBAL_DATA.with(|global| {
let mut store = global.borrow_mut();

store.atom(text)
})
}
use crate::{dynamic::global_atom, Atom};

macro_rules! direct_from_impl {
($T:ty) => {
impl From<$T> for Atom {
fn from(s: $T) -> Self {
atom(s.into())
global_atom(&s)
}
}
};
Expand All @@ -30,6 +18,6 @@ direct_from_impl!(String);

impl From<Box<str>> for crate::Atom {
fn from(s: Box<str>) -> Self {
atom(Cow::Owned(String::from(s)))
global_atom(&s)
}
}

0 comments on commit 4718bc0

Please sign in to comment.