Skip to content

Commit ea8c629

Browse files
committed
Auto merge of #39560 - F001:retainHashMap, r=alexcrichton
std: Add retain method for HashMap and HashSet Fix #36648 r? @bluss
2 parents 025c328 + d90a7b3 commit ea8c629

File tree

3 files changed

+182
-46
lines changed

3 files changed

+182
-46
lines changed

src/libstd/collections/hash/map.rs

+75-39
Original file line numberDiff line numberDiff line change
@@ -416,22 +416,26 @@ fn search_hashed<K, V, M, F>(table: M, hash: SafeHash, mut is_match: F) -> Inter
416416
}
417417
}
418418

419-
fn pop_internal<K, V>(starting_bucket: FullBucketMut<K, V>) -> (K, V) {
419+
fn pop_internal<K, V>(starting_bucket: FullBucketMut<K, V>)
420+
-> (K, V, &mut RawTable<K, V>)
421+
{
420422
let (empty, retkey, retval) = starting_bucket.take();
421423
let mut gap = match empty.gap_peek() {
422-
Some(b) => b,
423-
None => return (retkey, retval),
424+
Ok(b) => b,
425+
Err(b) => return (retkey, retval, b.into_table()),
424426
};
425427

426428
while gap.full().displacement() != 0 {
427429
gap = match gap.shift() {
428-
Some(b) => b,
429-
None => break,
430+
Ok(b) => b,
431+
Err(b) => {
432+
return (retkey, retval, b.into_table());
433+
},
430434
};
431435
}
432436

433437
// Now we've done all our shifting. Return the value we grabbed earlier.
434-
(retkey, retval)
438+
(retkey, retval, gap.into_bucket().into_table())
435439
}
436440

437441
/// Perform robin hood bucket stealing at the given `bucket`. You must
@@ -721,38 +725,7 @@ impl<K, V, S> HashMap<K, V, S>
721725
return;
722726
}
723727

724-
// Grow the table.
725-
// Specialization of the other branch.
726-
let mut bucket = Bucket::first(&mut old_table);
727-
728-
// "So a few of the first shall be last: for many be called,
729-
// but few chosen."
730-
//
731-
// We'll most likely encounter a few buckets at the beginning that
732-
// have their initial buckets near the end of the table. They were
733-
// placed at the beginning as the probe wrapped around the table
734-
// during insertion. We must skip forward to a bucket that won't
735-
// get reinserted too early and won't unfairly steal others spot.
736-
// This eliminates the need for robin hood.
737-
loop {
738-
bucket = match bucket.peek() {
739-
Full(full) => {
740-
if full.displacement() == 0 {
741-
// This bucket occupies its ideal spot.
742-
// It indicates the start of another "cluster".
743-
bucket = full.into_bucket();
744-
break;
745-
}
746-
// Leaving this bucket in the last cluster for later.
747-
full.into_bucket()
748-
}
749-
Empty(b) => {
750-
// Encountered a hole between clusters.
751-
b.into_bucket()
752-
}
753-
};
754-
bucket.next();
755-
}
728+
let mut bucket = Bucket::head_bucket(&mut old_table);
756729

757730
// This is how the buckets might be laid out in memory:
758731
// ($ marks an initialized bucket)
@@ -1208,6 +1181,57 @@ impl<K, V, S> HashMap<K, V, S>
12081181

12091182
self.search_mut(k).into_occupied_bucket().map(|bucket| pop_internal(bucket).1)
12101183
}
1184+
1185+
/// Retains only the elements specified by the predicate.
1186+
///
1187+
/// In other words, remove all pairs `(k, v)` such that `f(&k,&mut v)` returns `false`.
1188+
///
1189+
/// # Examples
1190+
///
1191+
/// ```
1192+
/// #![feature(retain_hash_collection)]
1193+
/// use std::collections::HashMap;
1194+
///
1195+
/// let mut map: HashMap<isize, isize> = (0..8).map(|x|(x, x*10)).collect();
1196+
/// map.retain(|&k, _| k % 2 == 0);
1197+
/// assert_eq!(map.len(), 4);
1198+
/// ```
1199+
#[unstable(feature = "retain_hash_collection", issue = "36648")]
1200+
pub fn retain<F>(&mut self, mut f: F)
1201+
where F: FnMut(&K, &mut V) -> bool
1202+
{
1203+
if self.table.capacity() == 0 || self.table.size() == 0 {
1204+
return;
1205+
}
1206+
let mut bucket = Bucket::head_bucket(&mut self.table);
1207+
bucket.prev();
1208+
let tail = bucket.index();
1209+
loop {
1210+
bucket = match bucket.peek() {
1211+
Full(mut full) => {
1212+
let should_remove = {
1213+
let (k, v) = full.read_mut();
1214+
!f(k, v)
1215+
};
1216+
if should_remove {
1217+
let prev_idx = full.index();
1218+
let prev_raw = full.raw();
1219+
let (_, _, t) = pop_internal(full);
1220+
Bucket::new_from(prev_raw, prev_idx, t)
1221+
} else {
1222+
full.into_bucket()
1223+
}
1224+
},
1225+
Empty(b) => {
1226+
b.into_bucket()
1227+
}
1228+
};
1229+
bucket.prev(); // reverse iteration
1230+
if bucket.index() == tail {
1231+
break;
1232+
}
1233+
}
1234+
}
12111235
}
12121236

12131237
#[stable(feature = "rust1", since = "1.0.0")]
@@ -1862,7 +1886,8 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> {
18621886
/// ```
18631887
#[stable(feature = "map_entry_recover_keys2", since = "1.12.0")]
18641888
pub fn remove_entry(self) -> (K, V) {
1865-
pop_internal(self.elem)
1889+
let (k, v, _) = pop_internal(self.elem);
1890+
(k, v)
18661891
}
18671892

18681893
/// Gets a reference to the value in the entry.
@@ -3156,4 +3181,15 @@ mod test_map {
31563181
assert_eq!(a.len(), 1);
31573182
assert_eq!(a[key], value);
31583183
}
3184+
3185+
#[test]
3186+
fn test_retain() {
3187+
let mut map: HashMap<isize, isize> = (0..100).map(|x|(x, x*10)).collect();
3188+
3189+
map.retain(|&k, _| k % 2 == 0);
3190+
assert_eq!(map.len(), 50);
3191+
assert_eq!(map[&2], 20);
3192+
assert_eq!(map[&4], 40);
3193+
assert_eq!(map[&6], 60);
3194+
}
31593195
}

src/libstd/collections/hash/set.rs

+33
Original file line numberDiff line numberDiff line change
@@ -630,6 +630,28 @@ impl<T, S> HashSet<T, S>
630630
{
631631
Recover::take(&mut self.map, value)
632632
}
633+
634+
/// Retains only the elements specified by the predicate.
635+
///
636+
/// In other words, remove all elements `e` such that `f(&e)` returns `false`.
637+
///
638+
/// # Examples
639+
///
640+
/// ```
641+
/// #![feature(retain_hash_collection)]
642+
/// use std::collections::HashSet;
643+
///
644+
/// let xs = [1,2,3,4,5,6];
645+
/// let mut set: HashSet<isize> = xs.iter().cloned().collect();
646+
/// set.retain(|&k| k % 2 == 0);
647+
/// assert_eq!(set.len(), 3);
648+
/// ```
649+
#[unstable(feature = "retain_hash_collection", issue = "36648")]
650+
pub fn retain<F>(&mut self, mut f: F)
651+
where F: FnMut(&T) -> bool
652+
{
653+
self.map.retain(|k, _| f(k));
654+
}
633655
}
634656

635657
#[stable(feature = "rust1", since = "1.0.0")]
@@ -1611,4 +1633,15 @@ mod test_set {
16111633
assert!(a.contains(&5));
16121634
assert!(a.contains(&6));
16131635
}
1636+
1637+
#[test]
1638+
fn test_retain() {
1639+
let xs = [1,2,3,4,5,6];
1640+
let mut set: HashSet<isize> = xs.iter().cloned().collect();
1641+
set.retain(|&k| k % 2 == 0);
1642+
assert_eq!(set.len(), 3);
1643+
assert!(set.contains(&2));
1644+
assert!(set.contains(&4));
1645+
assert!(set.contains(&6));
1646+
}
16141647
}

src/libstd/collections/hash/table.rs

+74-7
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ pub struct RawTable<K, V> {
8585
unsafe impl<K: Send, V: Send> Send for RawTable<K, V> {}
8686
unsafe impl<K: Sync, V: Sync> Sync for RawTable<K, V> {}
8787

88-
struct RawBucket<K, V> {
88+
pub struct RawBucket<K, V> {
8989
hash: *mut HashUint,
9090
// We use *const to ensure covariance with respect to K and V
9191
pair: *const (K, V),
@@ -216,6 +216,10 @@ impl<K, V, M> FullBucket<K, V, M> {
216216
pub fn index(&self) -> usize {
217217
self.idx
218218
}
219+
/// Get the raw bucket.
220+
pub fn raw(&self) -> RawBucket<K, V> {
221+
self.raw
222+
}
219223
}
220224

221225
impl<K, V, M> EmptyBucket<K, V, M> {
@@ -230,6 +234,10 @@ impl<K, V, M> Bucket<K, V, M> {
230234
pub fn index(&self) -> usize {
231235
self.idx
232236
}
237+
/// get the table.
238+
pub fn into_table(self) -> M {
239+
self.table
240+
}
233241
}
234242

235243
impl<K, V, M> Deref for FullBucket<K, V, M>
@@ -275,6 +283,16 @@ impl<K, V, M: Deref<Target = RawTable<K, V>>> Bucket<K, V, M> {
275283
Bucket::at_index(table, hash.inspect() as usize)
276284
}
277285

286+
pub fn new_from(r: RawBucket<K, V>, i: usize, t: M)
287+
-> Bucket<K, V, M>
288+
{
289+
Bucket {
290+
raw: r,
291+
idx: i,
292+
table: t,
293+
}
294+
}
295+
278296
pub fn at_index(table: M, ib_index: usize) -> Bucket<K, V, M> {
279297
// if capacity is 0, then the RawBucket will be populated with bogus pointers.
280298
// This is an uncommon case though, so avoid it in release builds.
@@ -296,6 +314,40 @@ impl<K, V, M: Deref<Target = RawTable<K, V>>> Bucket<K, V, M> {
296314
}
297315
}
298316

317+
// "So a few of the first shall be last: for many be called,
318+
// but few chosen."
319+
//
320+
// We'll most likely encounter a few buckets at the beginning that
321+
// have their initial buckets near the end of the table. They were
322+
// placed at the beginning as the probe wrapped around the table
323+
// during insertion. We must skip forward to a bucket that won't
324+
// get reinserted too early and won't unfairly steal others spot.
325+
// This eliminates the need for robin hood.
326+
pub fn head_bucket(table: M) -> Bucket<K, V, M> {
327+
let mut bucket = Bucket::first(table);
328+
329+
loop {
330+
bucket = match bucket.peek() {
331+
Full(full) => {
332+
if full.displacement() == 0 {
333+
// This bucket occupies its ideal spot.
334+
// It indicates the start of another "cluster".
335+
bucket = full.into_bucket();
336+
break;
337+
}
338+
// Leaving this bucket in the last cluster for later.
339+
full.into_bucket()
340+
}
341+
Empty(b) => {
342+
// Encountered a hole between clusters.
343+
b.into_bucket()
344+
}
345+
};
346+
bucket.next();
347+
}
348+
bucket
349+
}
350+
299351
/// Reads a bucket at a given index, returning an enum indicating whether
300352
/// it's initialized or not. You need to match on this enum to get
301353
/// the appropriate types to call most of the other functions in
@@ -333,6 +385,17 @@ impl<K, V, M: Deref<Target = RawTable<K, V>>> Bucket<K, V, M> {
333385
self.raw = self.raw.offset(dist);
334386
}
335387
}
388+
389+
/// Modifies the bucket pointer in place to make it point to the previous slot.
390+
pub fn prev(&mut self) {
391+
let range = self.table.capacity();
392+
let new_idx = self.idx.wrapping_sub(1) & (range - 1);
393+
let dist = (new_idx as isize).wrapping_sub(self.idx as isize);
394+
self.idx = new_idx;
395+
unsafe {
396+
self.raw = self.raw.offset(dist);
397+
}
398+
}
336399
}
337400

338401
impl<K, V, M: Deref<Target = RawTable<K, V>>> EmptyBucket<K, V, M> {
@@ -352,7 +415,7 @@ impl<K, V, M: Deref<Target = RawTable<K, V>>> EmptyBucket<K, V, M> {
352415
}
353416
}
354417

355-
pub fn gap_peek(self) -> Option<GapThenFull<K, V, M>> {
418+
pub fn gap_peek(self) -> Result<GapThenFull<K, V, M>, Bucket<K, V, M>> {
356419
let gap = EmptyBucket {
357420
raw: self.raw,
358421
idx: self.idx,
@@ -361,12 +424,12 @@ impl<K, V, M: Deref<Target = RawTable<K, V>>> EmptyBucket<K, V, M> {
361424

362425
match self.next().peek() {
363426
Full(bucket) => {
364-
Some(GapThenFull {
427+
Ok(GapThenFull {
365428
gap: gap,
366429
full: bucket,
367430
})
368431
}
369-
Empty(..) => None,
432+
Empty(e) => Err(e.into_bucket()),
370433
}
371434
}
372435
}
@@ -529,7 +592,11 @@ impl<K, V, M> GapThenFull<K, V, M>
529592
&self.full
530593
}
531594

532-
pub fn shift(mut self) -> Option<GapThenFull<K, V, M>> {
595+
pub fn into_bucket(self) -> Bucket<K, V, M> {
596+
self.full.into_bucket()
597+
}
598+
599+
pub fn shift(mut self) -> Result<GapThenFull<K, V, M>, Bucket<K, V, M>> {
533600
unsafe {
534601
*self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET);
535602
ptr::copy_nonoverlapping(self.full.raw.pair, self.gap.raw.pair as *mut (K, V), 1);
@@ -544,9 +611,9 @@ impl<K, V, M> GapThenFull<K, V, M>
544611

545612
self.full = bucket;
546613

547-
Some(self)
614+
Ok(self)
548615
}
549-
Empty(..) => None,
616+
Empty(b) => Err(b.into_bucket()),
550617
}
551618
}
552619
}

0 commit comments

Comments
 (0)