From 4718bc0df9dd3285442f0dcf3b9709d8440703e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Wed, 19 Feb 2025 12:02:50 +0900 Subject: [PATCH] fix(hstr): Prevent memory leak for global stores (#10047) **Related issue:** - Closes https://github.com/swc-project/swc/issues/10032 --- .changeset/chatty-horses-invent.md | 6 ++ crates/hstr/src/dynamic.rs | 100 ++++++++++++++++++++++++----- crates/hstr/src/global_store.rs | 20 ++---- 3 files changed, 94 insertions(+), 32 deletions(-) create mode 100644 .changeset/chatty-horses-invent.md diff --git a/.changeset/chatty-horses-invent.md b/.changeset/chatty-horses-invent.md new file mode 100644 index 000000000000..818934ee85e8 --- /dev/null +++ b/.changeset/chatty-horses-invent.md @@ -0,0 +1,6 @@ +--- +hstr: patch +swc_core: patch +--- + +fix(hstr): Prevent memory leak for global stores diff --git a/crates/hstr/src/dynamic.rs b/crates/hstr/src/dynamic.rs index 1bbe29c50dbe..2b127101c15c 100644 --- a/crates/hstr/src/dynamic.rs +++ b/crates/hstr/src/dynamic.rs @@ -1,10 +1,11 @@ use std::{ borrow::Cow, + cell::RefCell, ffi::c_void, hash::{BuildHasherDefault, Hash, Hasher}, - mem::ManuallyDrop, + mem::{forget, ManuallyDrop}, ops::Deref, - ptr::NonNull, + ptr::{self, NonNull}, }; use rustc_hash::FxHasher; @@ -15,13 +16,26 @@ use crate::{ Atom, INLINE_TAG_INIT, LEN_OFFSET, TAG_MASK, }; +#[derive(PartialEq, Eq)] pub(crate) struct Metadata { pub hash: u64, + pub is_global: bool, } -#[derive(Clone)] +#[derive(Clone, PartialEq, Eq)] pub(crate) struct Item(pub ThinArc, u8>); +impl Item { + /// https://users.rust-lang.org/t/what-is-the-ideal-way-to-destruct-a-guard-type/25974/8 + fn into_inner(self) -> ThinArc, u8> { + unsafe { + let inner = ptr::read(&self.0); + forget(self); + inner + } + } +} + impl Deref for Item { type Target = , u8> as Deref>::Target; @@ -30,9 +44,6 @@ impl Deref for Item { } } -/// TODO: Use real weak pointer -type WeakItem = Item; - impl Hash for Item { fn hash(&self, state: &mut H) { state.write_u64(self.0.header.header.header.hash); @@ -56,7 +67,7 @@ pub(crate) unsafe fn restore_arc(v: TaggedValue) -> Item { /// /// # Merging [AtomStore] pub struct AtomStore { - pub(crate) data: hashbrown::HashMap, + pub(crate) data: hashbrown::HashMap, } impl Default for AtomStore { @@ -70,13 +81,42 @@ impl Default for AtomStore { impl AtomStore { #[inline(always)] pub fn atom<'a>(&mut self, text: impl Into>) -> Atom { - new_atom(self, text.into()) + atom_in(self, &text.into(), false) + } +} + +impl Drop for Item { + fn drop(&mut self) { + // If we are going to drop the last reference, we need to remove the + // entry from the global store if it is a global atom + if self.0.header.header.header.is_global && ThinArc::strong_count(&self.0) == 2 { + let v = GLOBAL_DATA.try_with(|global| { + let mut store = global.borrow_mut(); + store.data.remove_entry(self) + }); + + if let Ok(Some((v, _))) = v { + v.into_inner(); + } + } } } +thread_local! { + static GLOBAL_DATA: RefCell = Default::default(); +} + +pub(crate) fn global_atom(text: &str) -> Atom { + GLOBAL_DATA.with(|global| { + let mut store = global.borrow_mut(); + + atom_in(&mut *store, text, true) + }) +} + /// This can create any kind of [Atom], although this lives in the `dynamic` /// module. -pub(crate) fn new_atom(storage: S, text: Cow) -> Atom +pub(crate) fn atom_in(storage: S, text: &str, is_global: bool) -> Atom where S: Storage, { @@ -92,9 +132,9 @@ where return Atom { unsafe_data }; } - let hash = calc_hash(&text); - let entry = storage.insert_entry(text, hash); - let entry = ThinArc::into_raw(entry.0) as *mut c_void; + let hash = calc_hash(text); + let entry = storage.insert_entry(text, hash, is_global); + let entry = ThinArc::into_raw(entry.into_inner()) as *mut c_void; let ptr: NonNull = unsafe { // Safety: Arc::into_raw returns a non-null pointer @@ -107,16 +147,16 @@ where } pub(crate) trait Storage { - fn insert_entry(self, text: Cow, hash: u64) -> Item; + fn insert_entry(self, text: &str, hash: u64, is_global: bool) -> Item; } impl Storage for &'_ mut AtomStore { #[inline(never)] - fn insert_entry(self, text: Cow, hash: u64) -> Item { + fn insert_entry(self, text: &str, hash: u64, is_global: bool) -> Item { // If the text is too long, interning is not worth it. if text.len() > 512 { return Item(ThinArc::from_header_and_slice( - HeaderWithLength::new(Metadata { hash }, text.len()), + HeaderWithLength::new(Metadata { hash, is_global }, text.len()), text.as_bytes(), )); } @@ -130,7 +170,7 @@ impl Storage for &'_ mut AtomStore { .or_insert_with(move || { ( Item(ThinArc::from_header_and_slice( - HeaderWithLength::new(Metadata { hash }, text.len()), + HeaderWithLength::new(Metadata { hash, is_global }, text.len()), text.as_bytes(), )), (), @@ -187,3 +227,31 @@ impl Hasher for EntryHasher { self.hash = val; } } + +#[cfg(test)] +mod tests { + use crate::{dynamic::GLOBAL_DATA, Atom}; + + #[test] + fn global_ref_count_dynamic() { + let atom1 = Atom::new("Hello, world!"); + + let atom2 = Atom::new("Hello, world!"); + + // 2 for the two atoms, 1 for the global store + assert_eq!(atom1.ref_count(), 3); + assert_eq!(atom2.ref_count(), 3); + + drop(atom1); + + // 1 for the atom2, 1 for the global store + assert_eq!(atom2.ref_count(), 2); + + drop(atom2); + + GLOBAL_DATA.with(|global| { + let store = global.borrow(); + assert_eq!(store.data.len(), 0); + }); + } +} diff --git a/crates/hstr/src/global_store.rs b/crates/hstr/src/global_store.rs index af08ad1e0b44..6136c747b6ee 100644 --- a/crates/hstr/src/global_store.rs +++ b/crates/hstr/src/global_store.rs @@ -1,24 +1,12 @@ -use std::{borrow::Cow, cell::RefCell}; +use std::borrow::Cow; -use crate::{Atom, AtomStore}; - -fn atom(text: Cow) -> Atom { - thread_local! { - static GLOBAL_DATA: RefCell = Default::default(); - } - - GLOBAL_DATA.with(|global| { - let mut store = global.borrow_mut(); - - store.atom(text) - }) -} +use crate::{dynamic::global_atom, Atom}; macro_rules! direct_from_impl { ($T:ty) => { impl From<$T> for Atom { fn from(s: $T) -> Self { - atom(s.into()) + global_atom(&s) } } }; @@ -30,6 +18,6 @@ direct_from_impl!(String); impl From> for crate::Atom { fn from(s: Box) -> Self { - atom(Cow::Owned(String::from(s))) + global_atom(&s) } }