Skip to content

Commit fa56cf5

Browse files
authored
Rollup merge of rust-lang#76458 - mbrubeck:hash_drain_filter, r=Amanieu
Add drain_filter method to HashMap and HashSet Add `HashMap::drain_filter` and `HashSet::drain_filter`, implementing part of rust-lang/rfcs#2140. These new methods are unstable. The tracking issue is rust-lang#59618. The added iterators behave the same as `BTreeMap::drain_filter` and `BTreeSet::drain_filter`, except their iteration order is arbitrary. The unit tests are adapted from `alloc::collections::btree`. This branch rewrites `HashSet` to be a wrapper around `hashbrown::HashSet` rather than `std::collections::HashMap`. (Both are themselves wrappers around `hashbrown::HashMap`, so the in-memory representation is the same either way.) This lets `std` re-use more iterator code from `hashbrown`. Without this change, we would need to duplicate much more code to implement `HashSet::drain_filter`. This branch also updates the `hashbrown` crate to version 0.9.0. Aside from changes related to the `DrainFilter` iterators, this version only changes features that are not used in libstd or rustc. And it updates `indexmap` to version 1.6.0, whose only change is compatibility with `hashbrown` 0.9.0.
2 parents f09372a + fb1fab5 commit fa56cf5

File tree

11 files changed

+503
-83
lines changed

11 files changed

+503
-83
lines changed

Cargo.lock

+4-5
Original file line numberDiff line numberDiff line change
@@ -1259,11 +1259,10 @@ dependencies = [
12591259

12601260
[[package]]
12611261
name = "hashbrown"
1262-
version = "0.8.2"
1262+
version = "0.9.0"
12631263
source = "registry+https://github.com/rust-lang/crates.io-index"
1264-
checksum = "e91b62f79061a0bc2e046024cb7ba44b08419ed238ecbd9adbd787434b9e8c25"
1264+
checksum = "00d63df3d41950fb462ed38308eea019113ad1508da725bbedcd0fa5a85ef5f7"
12651265
dependencies = [
1266-
"autocfg",
12671266
"compiler_builtins",
12681267
"rustc-std-workspace-alloc",
12691268
"rustc-std-workspace-core",
@@ -1401,9 +1400,9 @@ dependencies = [
14011400

14021401
[[package]]
14031402
name = "indexmap"
1404-
version = "1.5.1"
1403+
version = "1.6.0"
14051404
source = "registry+https://github.com/rust-lang/crates.io-index"
1406-
checksum = "86b45e59b16c76b11bf9738fd5d38879d3bd28ad292d7b313608becb17ae2df9"
1405+
checksum = "55e2e4c765aa53a0424761bf9f41aa7a6ac1efa87238f59560640e27fca028f2"
14071406
dependencies = [
14081407
"autocfg",
14091408
"hashbrown",

library/std/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ libc = { version = "0.2.74", default-features = false, features = ['rustc-dep-of
2020
compiler_builtins = { version = "0.1.35" }
2121
profiler_builtins = { path = "../profiler_builtins", optional = true }
2222
unwind = { path = "../unwind" }
23-
hashbrown = { version = "0.8.1", default-features = false, features = ['rustc-dep-of-std'] }
23+
hashbrown = { version = "0.9.0", default-features = false, features = ['rustc-dep-of-std'] }
2424

2525
# Dependencies of the `backtrace` crate
2626
addr2line = { version = "0.13.0", optional = true, default-features = false }

library/std/src/collections/hash/map.rs

+93-6
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,50 @@ impl<K, V, S> HashMap<K, V, S> {
497497
Drain { base: self.base.drain() }
498498
}
499499

500+
/// Creates an iterator which uses a closure to determine if an element should be removed.
501+
///
502+
/// If the closure returns true, the element is removed from the map and yielded.
503+
/// If the closure returns false, or panics, the element remains in the map and will not be
504+
/// yielded.
505+
///
506+
/// Note that `drain_filter` lets you mutate every value in the filter closure, regardless of
507+
/// whether you choose to keep or remove it.
508+
///
509+
/// If the iterator is only partially consumed or not consumed at all, each of the remaining
510+
/// elements will still be subjected to the closure and removed and dropped if it returns true.
511+
///
512+
/// It is unspecified how many more elements will be subjected to the closure
513+
/// if a panic occurs in the closure, or a panic occurs while dropping an element,
514+
/// or if the `DrainFilter` value is leaked.
515+
///
516+
/// # Examples
517+
///
518+
/// Splitting a map into even and odd keys, reusing the original map:
519+
///
520+
/// ```
521+
/// #![feature(hash_drain_filter)]
522+
/// use std::collections::HashMap;
523+
///
524+
/// let mut map: HashMap<i32, i32> = (0..8).map(|x| (x, x)).collect();
525+
/// let drained: HashMap<i32, i32> = map.drain_filter(|k, _v| k % 2 == 0).collect();
526+
///
527+
/// let mut evens = drained.keys().copied().collect::<Vec<_>>();
528+
/// let mut odds = map.keys().copied().collect::<Vec<_>>();
529+
/// evens.sort();
530+
/// odds.sort();
531+
///
532+
/// assert_eq!(evens, vec![0, 2, 4, 6]);
533+
/// assert_eq!(odds, vec![1, 3, 5, 7]);
534+
/// ```
535+
#[inline]
536+
#[unstable(feature = "hash_drain_filter", issue = "59618")]
537+
pub fn drain_filter<F>(&mut self, pred: F) -> DrainFilter<'_, K, V, F>
538+
where
539+
F: FnMut(&K, &mut V) -> bool,
540+
{
541+
DrainFilter { base: self.base.drain_filter(pred) }
542+
}
543+
500544
/// Clears the map, removing all key-value pairs. Keeps the allocated memory
501545
/// for reuse.
502546
///
@@ -1190,6 +1234,19 @@ impl<'a, K, V> Drain<'a, K, V> {
11901234
}
11911235
}
11921236

1237+
/// A draining, filtering iterator over the entries of a `HashMap`.
1238+
///
1239+
/// This `struct` is created by the [`drain_filter`] method on [`HashMap`].
1240+
///
1241+
/// [`drain_filter`]: HashMap::drain_filter
1242+
#[unstable(feature = "hash_drain_filter", issue = "59618")]
1243+
pub struct DrainFilter<'a, K, V, F>
1244+
where
1245+
F: FnMut(&K, &mut V) -> bool,
1246+
{
1247+
base: base::DrainFilter<'a, K, V, F>,
1248+
}
1249+
11931250
/// A mutable iterator over the values of a `HashMap`.
11941251
///
11951252
/// This `struct` is created by the [`values_mut`] method on [`HashMap`]. See its
@@ -1247,16 +1304,16 @@ pub struct RawEntryBuilderMut<'a, K: 'a, V: 'a, S: 'a> {
12471304
#[unstable(feature = "hash_raw_entry", issue = "56167")]
12481305
pub enum RawEntryMut<'a, K: 'a, V: 'a, S: 'a> {
12491306
/// An occupied entry.
1250-
Occupied(RawOccupiedEntryMut<'a, K, V>),
1307+
Occupied(RawOccupiedEntryMut<'a, K, V, S>),
12511308
/// A vacant entry.
12521309
Vacant(RawVacantEntryMut<'a, K, V, S>),
12531310
}
12541311

12551312
/// A view into an occupied entry in a `HashMap`.
12561313
/// It is part of the [`RawEntryMut`] enum.
12571314
#[unstable(feature = "hash_raw_entry", issue = "56167")]
1258-
pub struct RawOccupiedEntryMut<'a, K: 'a, V: 'a> {
1259-
base: base::RawOccupiedEntryMut<'a, K, V>,
1315+
pub struct RawOccupiedEntryMut<'a, K: 'a, V: 'a, S: 'a> {
1316+
base: base::RawOccupiedEntryMut<'a, K, V, S>,
12601317
}
12611318

12621319
/// A view into a vacant entry in a `HashMap`.
@@ -1457,7 +1514,7 @@ impl<'a, K, V, S> RawEntryMut<'a, K, V, S> {
14571514
}
14581515
}
14591516

1460-
impl<'a, K, V> RawOccupiedEntryMut<'a, K, V> {
1517+
impl<'a, K, V, S> RawOccupiedEntryMut<'a, K, V, S> {
14611518
/// Gets a reference to the key in the entry.
14621519
#[inline]
14631520
#[unstable(feature = "hash_raw_entry", issue = "56167")]
@@ -1597,7 +1654,7 @@ impl<K: Debug, V: Debug, S> Debug for RawEntryMut<'_, K, V, S> {
15971654
}
15981655

15991656
#[unstable(feature = "hash_raw_entry", issue = "56167")]
1600-
impl<K: Debug, V: Debug> Debug for RawOccupiedEntryMut<'_, K, V> {
1657+
impl<K: Debug, V: Debug, S> Debug for RawOccupiedEntryMut<'_, K, V, S> {
16011658
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
16021659
f.debug_struct("RawOccupiedEntryMut")
16031660
.field("key", self.key())
@@ -1990,6 +2047,36 @@ where
19902047
}
19912048
}
19922049

2050+
#[unstable(feature = "hash_drain_filter", issue = "59618")]
2051+
impl<K, V, F> Iterator for DrainFilter<'_, K, V, F>
2052+
where
2053+
F: FnMut(&K, &mut V) -> bool,
2054+
{
2055+
type Item = (K, V);
2056+
2057+
#[inline]
2058+
fn next(&mut self) -> Option<(K, V)> {
2059+
self.base.next()
2060+
}
2061+
#[inline]
2062+
fn size_hint(&self) -> (usize, Option<usize>) {
2063+
self.base.size_hint()
2064+
}
2065+
}
2066+
2067+
#[unstable(feature = "hash_drain_filter", issue = "59618")]
2068+
impl<K, V, F> FusedIterator for DrainFilter<'_, K, V, F> where F: FnMut(&K, &mut V) -> bool {}
2069+
2070+
#[unstable(feature = "hash_drain_filter", issue = "59618")]
2071+
impl<'a, K, V, F> fmt::Debug for DrainFilter<'a, K, V, F>
2072+
where
2073+
F: FnMut(&K, &mut V) -> bool,
2074+
{
2075+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2076+
f.pad("DrainFilter { .. }")
2077+
}
2078+
}
2079+
19932080
impl<'a, K, V> Entry<'a, K, V> {
19942081
#[stable(feature = "rust1", since = "1.0.0")]
19952082
/// Ensures a value is in the entry by inserting the default if empty, and returns
@@ -2698,7 +2785,7 @@ fn map_entry<'a, K: 'a, V: 'a>(raw: base::RustcEntry<'a, K, V>) -> Entry<'a, K,
26982785
}
26992786

27002787
#[inline]
2701-
fn map_try_reserve_error(err: hashbrown::TryReserveError) -> TryReserveError {
2788+
pub(super) fn map_try_reserve_error(err: hashbrown::TryReserveError) -> TryReserveError {
27022789
match err {
27032790
hashbrown::TryReserveError::CapacityOverflow => TryReserveError::CapacityOverflow,
27042791
hashbrown::TryReserveError::AllocError { layout } => {

library/std/src/collections/hash/map/tests.rs

+161
Original file line numberDiff line numberDiff line change
@@ -924,3 +924,164 @@ fn test_raw_entry() {
924924
}
925925
}
926926
}
927+
928+
mod test_drain_filter {
929+
use super::*;
930+
931+
use crate::panic::{catch_unwind, AssertUnwindSafe};
932+
use crate::sync::atomic::{AtomicUsize, Ordering};
933+
934+
trait EqSorted: Iterator {
935+
fn eq_sorted<I: IntoIterator<Item = Self::Item>>(self, other: I) -> bool;
936+
}
937+
938+
impl<T: Iterator> EqSorted for T
939+
where
940+
T::Item: Eq + Ord,
941+
{
942+
fn eq_sorted<I: IntoIterator<Item = Self::Item>>(self, other: I) -> bool {
943+
let mut v: Vec<_> = self.collect();
944+
v.sort_unstable();
945+
v.into_iter().eq(other)
946+
}
947+
}
948+
949+
#[test]
950+
fn empty() {
951+
let mut map: HashMap<i32, i32> = HashMap::new();
952+
map.drain_filter(|_, _| unreachable!("there's nothing to decide on"));
953+
assert!(map.is_empty());
954+
}
955+
956+
#[test]
957+
fn consuming_nothing() {
958+
let pairs = (0..3).map(|i| (i, i));
959+
let mut map: HashMap<_, _> = pairs.collect();
960+
assert!(map.drain_filter(|_, _| false).eq_sorted(crate::iter::empty()));
961+
assert_eq!(map.len(), 3);
962+
}
963+
964+
#[test]
965+
fn consuming_all() {
966+
let pairs = (0..3).map(|i| (i, i));
967+
let mut map: HashMap<_, _> = pairs.clone().collect();
968+
assert!(map.drain_filter(|_, _| true).eq_sorted(pairs));
969+
assert!(map.is_empty());
970+
}
971+
972+
#[test]
973+
fn mutating_and_keeping() {
974+
let pairs = (0..3).map(|i| (i, i));
975+
let mut map: HashMap<_, _> = pairs.collect();
976+
assert!(
977+
map.drain_filter(|_, v| {
978+
*v += 6;
979+
false
980+
})
981+
.eq_sorted(crate::iter::empty())
982+
);
983+
assert!(map.keys().copied().eq_sorted(0..3));
984+
assert!(map.values().copied().eq_sorted(6..9));
985+
}
986+
987+
#[test]
988+
fn mutating_and_removing() {
989+
let pairs = (0..3).map(|i| (i, i));
990+
let mut map: HashMap<_, _> = pairs.collect();
991+
assert!(
992+
map.drain_filter(|_, v| {
993+
*v += 6;
994+
true
995+
})
996+
.eq_sorted((0..3).map(|i| (i, i + 6)))
997+
);
998+
assert!(map.is_empty());
999+
}
1000+
1001+
#[test]
1002+
fn drop_panic_leak() {
1003+
static PREDS: AtomicUsize = AtomicUsize::new(0);
1004+
static DROPS: AtomicUsize = AtomicUsize::new(0);
1005+
1006+
struct D;
1007+
impl Drop for D {
1008+
fn drop(&mut self) {
1009+
if DROPS.fetch_add(1, Ordering::SeqCst) == 1 {
1010+
panic!("panic in `drop`");
1011+
}
1012+
}
1013+
}
1014+
1015+
let mut map = (0..3).map(|i| (i, D)).collect::<HashMap<_, _>>();
1016+
1017+
catch_unwind(move || {
1018+
drop(map.drain_filter(|_, _| {
1019+
PREDS.fetch_add(1, Ordering::SeqCst);
1020+
true
1021+
}))
1022+
})
1023+
.unwrap_err();
1024+
1025+
assert_eq!(PREDS.load(Ordering::SeqCst), 3);
1026+
assert_eq!(DROPS.load(Ordering::SeqCst), 3);
1027+
}
1028+
1029+
#[test]
1030+
fn pred_panic_leak() {
1031+
static PREDS: AtomicUsize = AtomicUsize::new(0);
1032+
static DROPS: AtomicUsize = AtomicUsize::new(0);
1033+
1034+
struct D;
1035+
impl Drop for D {
1036+
fn drop(&mut self) {
1037+
DROPS.fetch_add(1, Ordering::SeqCst);
1038+
}
1039+
}
1040+
1041+
let mut map = (0..3).map(|i| (i, D)).collect::<HashMap<_, _>>();
1042+
1043+
catch_unwind(AssertUnwindSafe(|| {
1044+
drop(map.drain_filter(|_, _| match PREDS.fetch_add(1, Ordering::SeqCst) {
1045+
0 => true,
1046+
_ => panic!(),
1047+
}))
1048+
}))
1049+
.unwrap_err();
1050+
1051+
assert_eq!(PREDS.load(Ordering::SeqCst), 2);
1052+
assert_eq!(DROPS.load(Ordering::SeqCst), 1);
1053+
assert_eq!(map.len(), 2);
1054+
}
1055+
1056+
// Same as above, but attempt to use the iterator again after the panic in the predicate
1057+
#[test]
1058+
fn pred_panic_reuse() {
1059+
static PREDS: AtomicUsize = AtomicUsize::new(0);
1060+
static DROPS: AtomicUsize = AtomicUsize::new(0);
1061+
1062+
struct D;
1063+
impl Drop for D {
1064+
fn drop(&mut self) {
1065+
DROPS.fetch_add(1, Ordering::SeqCst);
1066+
}
1067+
}
1068+
1069+
let mut map = (0..3).map(|i| (i, D)).collect::<HashMap<_, _>>();
1070+
1071+
{
1072+
let mut it = map.drain_filter(|_, _| match PREDS.fetch_add(1, Ordering::SeqCst) {
1073+
0 => true,
1074+
_ => panic!(),
1075+
});
1076+
catch_unwind(AssertUnwindSafe(|| while it.next().is_some() {})).unwrap_err();
1077+
// Iterator behaviour after a panic is explicitly unspecified,
1078+
// so this is just the current implementation:
1079+
let result = catch_unwind(AssertUnwindSafe(|| it.next()));
1080+
assert!(result.is_err());
1081+
}
1082+
1083+
assert_eq!(PREDS.load(Ordering::SeqCst), 3);
1084+
assert_eq!(DROPS.load(Ordering::SeqCst), 1);
1085+
assert_eq!(map.len(), 2);
1086+
}
1087+
}

0 commit comments

Comments
 (0)