Skip to content

Commit f37b211

Browse files
committed
feat(allocator): add HashSet (#14212)
Adds the arena-allocated `HashSet`.
1 parent 0c14e50 commit f37b211

File tree

3 files changed

+215
-12
lines changed

3 files changed

+215
-12
lines changed

crates/oxc_allocator/src/hash_map.rs

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ type FxHashMap<'alloc, K, V> = hashbrown::HashMap<K, V, FxBuildHasher, &'alloc B
4949
///
5050
/// [`FxHasher`]: rustc_hash::FxHasher
5151
#[derive(Debug)]
52-
pub struct HashMap<'alloc, K, V>(ManuallyDrop<FxHashMap<'alloc, K, V>>);
52+
pub struct HashMap<'alloc, K, V>(pub(crate) ManuallyDrop<FxHashMap<'alloc, K, V>>);
5353

5454
/// SAFETY: Even though `Bump` is not `Sync`, we can make `HashMap<K, V>` `Sync` if both `K` and `V`
5555
/// are `Sync` because:
@@ -285,14 +285,3 @@ where
285285
}
286286

287287
// Note: `Index` and `Extend` are implemented via `Deref`
288-
289-
/*
290-
// Uncomment once we also provide `oxc_allocator::HashSet`
291-
impl<'alloc, T> From<HashMap<'alloc, T, ()>> for HashSet<'alloc, T> {
292-
fn from(map: HashMap<'alloc, T, ()>) -> Self {
293-
let inner_map = ManuallyDrop::into_inner(map.0);
294-
let inner_set = FxHashSet::from(inner_map);
295-
Self(ManuallyDrop::new(inner_set))
296-
}
297-
}
298-
*/
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
//! A hash set without `Drop`, that uses [`FxHasher`] to hash keys, and stores data in arena allocator.
2+
//!
3+
//! See [`HashSet`] for more details.
4+
//!
5+
//! [`FxHasher`]: rustc_hash::FxHasher
6+
7+
// All methods which just delegate to `hashbrown::HashSet` methods marked `#[inline(always)]`
8+
#![expect(clippy::inline_always)]
9+
10+
use std::{
11+
hash::Hash,
12+
mem::ManuallyDrop,
13+
ops::{Deref, DerefMut},
14+
};
15+
16+
use bumpalo::Bump;
17+
use rustc_hash::FxBuildHasher;
18+
19+
// Re-export additional types from `hashbrown`
20+
pub use hashbrown::hash_set::{
21+
Difference, Drain, Entry, ExtractIf, Intersection, IntoIter, Iter, SymmetricDifference, Union,
22+
};
23+
24+
use crate::{Allocator, HashMap};
25+
26+
type FxHashSet<'alloc, T> = hashbrown::HashSet<T, FxBuildHasher, &'alloc Bump>;
27+
28+
/// A hash set without `Drop`, that uses [`FxHasher`] to hash keys, and stores data in arena allocator.
29+
///
30+
/// Just a thin wrapper around [`hashbrown::HashSet`], which disables the `Drop` implementation.
31+
///
32+
/// All APIs are the same, except create a [`HashSet`] with
33+
/// either [`new_in`](HashSet::new_in) or [`with_capacity_in`](HashSet::with_capacity_in).
34+
///
35+
/// # No `Drop`s
36+
///
37+
/// Objects allocated into Oxc memory arenas are never [`Dropped`](Drop). Memory is released in bulk
38+
/// when the allocator is dropped, without dropping the individual objects in the arena.
39+
///
40+
/// Therefore, it would produce a memory leak if you allocated [`Drop`] types into the arena
41+
/// which own memory allocations outside the arena.
42+
///
43+
/// Static checks make this impossible to do. [`HashSet::new_in`] and all other methods which create
44+
/// a [`HashSet`] will refuse to compile if the key is a [`Drop`] type.
45+
///
46+
/// [`FxHasher`]: rustc_hash::FxHasher
47+
#[derive(Debug)]
48+
pub struct HashSet<'alloc, T>(ManuallyDrop<FxHashSet<'alloc, T>>);
49+
50+
/// SAFETY: Same as `HashMap`. See `HashMap`'s doc comment for details.
51+
unsafe impl<T: Sync> Sync for HashSet<'_, T> {}
52+
53+
// TODO: `IntoIter` and other consuming iterators provided by `hashbrown` are `Drop`.
54+
// Wrap them in `ManuallyDrop` to prevent that.
55+
56+
impl<'alloc, T> HashSet<'alloc, T> {
57+
/// Const assertion that `T` is not `Drop`.
58+
/// Must be referenced in all methods which create a `HashSet`.
59+
const ASSERT_T_IS_NOT_DROP: () = {
60+
assert!(!std::mem::needs_drop::<T>(), "Cannot create a HashSet<T> where T is a Drop type");
61+
};
62+
63+
/// Creates an empty [`HashSet`]. It will be allocated with the given allocator.
64+
///
65+
/// The hash set is initially created with a capacity of 0, so it will not allocate
66+
/// until it is first inserted into.
67+
#[inline(always)]
68+
pub fn new_in(allocator: &'alloc Allocator) -> Self {
69+
const { Self::ASSERT_T_IS_NOT_DROP };
70+
71+
let inner = FxHashSet::with_hasher_in(FxBuildHasher, allocator.bump());
72+
Self(ManuallyDrop::new(inner))
73+
}
74+
75+
/// Creates an empty [`HashSet`] with the specified capacity. It will be allocated with the given allocator.
76+
///
77+
/// The hash set will be able to hold at least capacity elements without reallocating.
78+
/// If capacity is 0, the hash set will not allocate.
79+
#[inline(always)]
80+
pub fn with_capacity_in(capacity: usize, allocator: &'alloc Allocator) -> Self {
81+
const { Self::ASSERT_T_IS_NOT_DROP };
82+
83+
let inner =
84+
FxHashSet::with_capacity_and_hasher_in(capacity, FxBuildHasher, allocator.bump());
85+
Self(ManuallyDrop::new(inner))
86+
}
87+
88+
/// Create a new [`HashSet`] whose elements are taken from an iterator and allocated in the given `allocator`.
89+
///
90+
/// This is behaviorially identical to [`FromIterator::from_iter`].
91+
#[inline]
92+
pub fn from_iter_in<I: IntoIterator<Item = T>>(iter: I, allocator: &'alloc Allocator) -> Self
93+
where
94+
T: Eq + Hash,
95+
{
96+
const { Self::ASSERT_T_IS_NOT_DROP };
97+
98+
let iter = iter.into_iter();
99+
100+
// Use the iterator's lower size bound.
101+
// This follows `hashbrown::HashSet`'s `from_iter` implementation.
102+
//
103+
// This is a trade-off:
104+
// * Negative: If lower bound is too low, the `HashSet` may have to grow and reallocate during `for_each` loop.
105+
// * Positive: Avoids potential large over-allocation for iterators where upper bound may be a large over-estimate
106+
// e.g. filter iterators.
107+
let capacity = iter.size_hint().0;
108+
let set = FxHashSet::with_capacity_and_hasher_in(capacity, FxBuildHasher, allocator.bump());
109+
// Wrap in `ManuallyDrop` *before* calling `for_each`, so compiler doesn't insert unnecessary code
110+
// to drop the `FxHashSet` in case of a panic in iterator's `next` method
111+
let mut set = ManuallyDrop::new(set);
112+
113+
iter.for_each(|v| {
114+
set.insert(v);
115+
});
116+
117+
Self(set)
118+
}
119+
120+
/// Calling this method produces a compile-time panic.
121+
///
122+
/// This method would be unsound, because [`HashSet`] is `Sync`, and the underlying allocator
123+
/// (`bumpalo::Bump`) is not `Sync`.
124+
///
125+
/// This method exists only to block access as much as possible to the underlying
126+
/// `hashbrown::HashSet::allocator` method. That method can still be accessed via explicit `Deref`
127+
/// (`hash_set.deref().allocator()`), but that's unsound.
128+
///
129+
/// We'll prevent access to it completely and remove this method as soon as we can.
130+
// TODO: Do that!
131+
#[expect(clippy::unused_self)]
132+
pub fn allocator(&self) -> &'alloc Bump {
133+
const { panic!("This method cannot be called") };
134+
unreachable!();
135+
}
136+
}
137+
138+
// Provide access to all `hashbrown::HashSet`'s methods via deref
139+
impl<'alloc, T> Deref for HashSet<'alloc, T> {
140+
type Target = FxHashSet<'alloc, T>;
141+
142+
#[inline]
143+
fn deref(&self) -> &Self::Target {
144+
&self.0
145+
}
146+
}
147+
148+
impl<'alloc, T> DerefMut for HashSet<'alloc, T> {
149+
#[inline]
150+
fn deref_mut(&mut self) -> &mut FxHashSet<'alloc, T> {
151+
&mut self.0
152+
}
153+
}
154+
155+
impl<'alloc, T> IntoIterator for HashSet<'alloc, T> {
156+
type IntoIter = IntoIter<T, &'alloc Bump>;
157+
type Item = T;
158+
159+
/// Creates a consuming iterator, that is, one that moves each value out of the set
160+
/// in arbitrary order.
161+
///
162+
/// The set cannot be used after calling this.
163+
#[inline(always)]
164+
fn into_iter(self) -> Self::IntoIter {
165+
let inner = ManuallyDrop::into_inner(self.0);
166+
// TODO: `hashbrown::hash_set::IntoIter` is `Drop`.
167+
// Wrap it in `ManuallyDrop` to prevent that.
168+
inner.into_iter()
169+
}
170+
}
171+
172+
impl<'alloc, 'i, T> IntoIterator for &'i HashSet<'alloc, T> {
173+
type IntoIter = <&'i FxHashSet<'alloc, T> as IntoIterator>::IntoIter;
174+
type Item = &'i T;
175+
176+
/// Creates an iterator over the values of a `HashSet` in arbitrary order.
177+
///
178+
/// The iterator element type is `&'a T`.
179+
///
180+
/// Return the same [`Iter`] struct as by the `iter` method on [`HashSet`].
181+
#[inline(always)]
182+
fn into_iter(self) -> Self::IntoIter {
183+
self.0.iter()
184+
}
185+
}
186+
187+
impl<T> PartialEq for HashSet<'_, T>
188+
where
189+
T: Eq + Hash,
190+
{
191+
#[inline(always)]
192+
fn eq(&self, other: &Self) -> bool {
193+
self.0.eq(&other.0)
194+
}
195+
}
196+
197+
impl<T> Eq for HashSet<'_, T> where T: Eq + Hash {}
198+
199+
// Note: `Index` and `Extend` are implemented via `Deref`
200+
201+
/// Convert `HashMap<T, ()>` to `HashSet<T>`.
202+
///
203+
/// This conversion is zero cost, as `HashSet<T>` is just a wrapper around `HashMap<T, ()>`.
204+
impl<'alloc, T> From<HashMap<'alloc, T, ()>> for HashSet<'alloc, T> {
205+
#[inline(always)]
206+
fn from(map: HashMap<'alloc, T, ()>) -> Self {
207+
let inner_map = ManuallyDrop::into_inner(map.0);
208+
let inner_set = hashbrown::HashSet::from(inner_map);
209+
Self(ManuallyDrop::new(inner_set))
210+
}
211+
}

crates/oxc_allocator/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
//! * [`Vec`]
1010
//! * [`String`]
1111
//! * [`HashMap`]
12+
//! * [`HashSet`]
1213
//!
1314
//! See [`Allocator`] docs for information on efficient use of [`Allocator`].
1415
//!
@@ -50,6 +51,7 @@ mod convert;
5051
#[cfg(feature = "from_raw_parts")]
5152
mod from_raw_parts;
5253
pub mod hash_map;
54+
pub mod hash_set;
5355
#[cfg(feature = "pool")]
5456
mod pool;
5557
mod string_builder;
@@ -68,6 +70,7 @@ pub use boxed::Box;
6870
pub use clone_in::CloneIn;
6971
pub use convert::{FromIn, IntoIn};
7072
pub use hash_map::HashMap;
73+
pub use hash_set::HashSet;
7174
#[cfg(feature = "pool")]
7275
pub use pool::*;
7376
pub use string_builder::StringBuilder;

0 commit comments

Comments
 (0)