From dc6bc2d3e88f2a157d296d5481963e32e47a3c3e Mon Sep 17 00:00:00 2001 From: Piotr Czarnecki Date: Thu, 5 Feb 2015 19:06:23 +0100 Subject: [PATCH] In-place growth of HashMap. Cleanup. * modified internal layout * use of NonZero hashes * refactoring * correct explanation of the load factor * better nomenclature * 'probe distance' -> 'displacement' --- src/libstd/collections/hash/map.rs | 660 +++++++++++--------- src/libstd/collections/hash/table.rs | 885 +++++++++------------------ 2 files changed, 659 insertions(+), 886 deletions(-) diff --git a/src/libstd/collections/hash/map.rs b/src/libstd/collections/hash/map.rs index 6327061047248..6c3a3595e9f79 100644 --- a/src/libstd/collections/hash/map.rs +++ b/src/libstd/collections/hash/map.rs @@ -11,7 +11,6 @@ // ignore-lexer-test FIXME #15883 use self::Entry::*; -use self::SearchResult::*; use self::VacantEntryState::*; use borrow::BorrowFrom; @@ -24,7 +23,7 @@ use iter::{self, Iterator, ExactSizeIterator, IntoIterator, IteratorExt, FromIte use marker::Sized; use mem::{self, replace}; use num::{Int, UnsignedInt}; -use ops::{Deref, FnMut, Index, IndexMut}; +use ops::{Deref, DerefMut, Drop, FnMut, Index, IndexMut}; use option::Option::{self, Some, None}; use rand::{self, Rng}; use result::Result::{self, Ok, Err}; @@ -96,28 +95,53 @@ fn test_resize_policy() { } } -// The main performance trick in this hashmap is called Robin Hood Hashing. -// It gains its excellent performance from one essential operation: +// The main performance trick in this hashmap is called Robin Hood linear +// probing. It gains its excellent performance from one essential operation: // // If an insertion collides with an existing element, and that element's -// "probe distance" (how far away the element is from its ideal location) -// is higher than how far we've already probed, swap the elements. +// "displacement" (how far away the element is from its ideal location) +// is lower than how far we've already probed, swap the elements. // -// This massively lowers variance in probe distance, and allows us to get very +// This massively lowers variance in displacement, and allows us to get very // high load factors with good performance. The 90% load factor I use is rather // conservative. // // > Why a load factor of approximately 90%? // // In general, all the distances to initial buckets will converge on the mean. -// At a load factor of α, the odds of finding the target bucket after k -// probes is approximately 1-α^k. If we set this equal to 50% (since we converge -// on the mean) and set k=8 (64-byte cache line / 8-byte hash), α=0.92. I round -// this down to make the math easier on the CPU and avoid its FPU. -// Since on average we start the probing in the middle of a cache line, this -// strategy pulls in two cache lines of hashes on every lookup. I think that's -// pretty good, but if you want to trade off some space, it could go down to one -// cache line on average with an α of 0.84. +// At a load factor of α, the odds of finding the target bucket after exactly n +// unsuccesful probes[3] are +// +// Pr{displacement = n} = +// (1 - α) / a * ∑_{k≥1} e^(-kα) * (kα)^(k+n) / (k + n)! * (1 - kα / (k + n + 1)) +// +// These odds can be approximated with this code: +// +// ``` +// use std::num::Float; +// use std::iter::AdditiveIterator; +// +// fn factorial(value: f64) -> f64 { +// if value == 0.0 { 1.0 } else { value * factorial(value - 1.0) } +// } +// +// fn psi(a: f64, displacement: u32) -> f64 { +// let n = displacement as f64; +// (1.0 / a - 1.0) * (0..130).map(|k_| { +// let k = k_ as f64; +// (-k * a).exp() * (1.0 - k * a / (k + n + 1.0)) * +// (k * a).powi((k_ + n_) as i32) / factorial(k + n) as f64 +// }).sum() +// } +// ``` +// +// If we set α=0.909, then Pr{displacement < 4} = 0.51 and Pr{displacement < +// 8} = 0.77. The exact value of 0.909 is chosen to make the math easier on +// the CPU and avoid its FPU. Since on average we start the probing in the +// middle of a cache line, this strategy pulls in one cache line of hashes on +// most lookups (64-byte cache line with 8-byte hash). I think this choice is +// pretty good, but α could go up to 0.95, or down to 0.84 to trade off some +// space. // // > Wait, what? Where did you get 1-α^k from? // @@ -126,23 +150,10 @@ fn test_resize_policy() { // α^3, etc. Therefore, the odds of colliding k times is α^k. The odds of NOT // colliding after k tries is 1-α^k. // -// The paper from 1986 cited below mentions an implementation which keeps track -// of the distance-to-initial-bucket histogram. This approach is not suitable -// for modern architectures because it requires maintaining an internal data -// structure. This allows very good first guesses, but we are most concerned -// with guessing entire cache lines, not individual indexes. Furthermore, array -// accesses are no longer linear and in one direction, as we have now. There -// is also memory and cache pressure that this would entail that would be very -// difficult to properly see in a microbenchmark. -// // ## Future Improvements (FIXME!) // // Allow the load factor to be changed dynamically and/or at initialization. // -// Also, would it be possible for us to reuse storage when growing the -// underlying table? This is exactly the use case for 'realloc', and may -// be worth exploring. -// // ## Future Optimizations (FIXME!) // // Another possible design choice that I made without any real reason is @@ -155,9 +166,9 @@ fn test_resize_policy() { // This would definitely be an avenue worth exploring if people start complaining // about the size of rust executables. // -// Annotate exceedingly likely branches in `table::make_hash` -// and `search_hashed` to reduce instruction cache pressure -// and mispredictions once it becomes possible (blocked on issue #11092). +// Annotate the exceedingly likely branch in `search_hashed` to reduce +// instruction cache pressure and mispredictions once it becomes possible +// (blocked on issue #11092). // // Shrinking the table could simply reallocate in place after moving buckets // to the first half. @@ -220,11 +231,12 @@ fn test_resize_policy() { /// /// Relevant papers/articles: /// -/// 1. Pedro Celis. ["Robin Hood Hashing"](https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf) -/// 2. Emmanuel Goossaert. ["Robin Hood +/// 1. Emmanuel Goossaert. ["Robin Hood /// hashing"](http://codecapsule.com/2013/11/11/robin-hood-hashing/) -/// 3. Emmanuel Goossaert. ["Robin Hood hashing: backward shift +/// 2. Emmanuel Goossaert. ["Robin Hood hashing: backward shift /// deletion"](http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/) +/// 3. Alfredo Viola (2005). Distributional analysis of Robin Hood linear probing +/// hashing with buckets. /// /// # Example /// @@ -296,8 +308,8 @@ fn test_resize_policy() { /// println!("{:?} has {} hp", viking, health); /// } /// ``` -#[derive(Clone)] #[stable(feature = "rust1", since = "1.0.0")] +#[derive(Clone)] pub struct HashMap { // All hashes are keyed on these values, to prevent hash collision attacks. hash_state: S, @@ -311,38 +323,49 @@ pub struct HashMap { fn search_hashed(table: M, hash: SafeHash, mut is_match: F) - -> SearchResult where + -> InternalEntry where M: Deref>, F: FnMut(&K) -> bool, { + // Worst case, we'll find one empty bucket among `size + 1` buckets. let size = table.size(); - let mut probe = Bucket::new(table, hash); + let mut probe = if let Some(probe) = Bucket::new(table, hash) { + probe + } else { + return InternalEntry::TableIsEmpty; + }; let ib = probe.index(); - while probe.index() != ib + size { - let full = match probe.peek() { - Empty(b) => return TableRef(b.into_table()), // hit an empty bucket - Full(b) => b + loop { + let bucket = match probe.peek() { + Empty(empty) => { + // Found a hole! + return InternalEntry::Vacant(NoElem(empty)); + } + Full(bucket) => bucket }; - if full.distance() + ib < full.index() { + let robin_ib = bucket.index() as int - bucket.displacement() as int; + + if (ib as isize) < robin_ib { // We can finish the search early if we hit any bucket // with a lower distance to initial bucket than we've probed. - return TableRef(full.into_table()); + return InternalEntry::Vacant(NeqElem(bucket, robin_ib as usize)); } // If the hash doesn't match, it can't be this one.. - if hash == full.hash() { + if hash == *bucket.read().0 { // If the key doesn't match, it can't be this one.. - if is_match(full.read().0) { - return FoundExisting(full); + if is_match(bucket.read().1) { + return InternalEntry::Occupied(OccupiedEntryState { + elem: bucket, + }); } } - probe = full.next(); + probe = bucket.into_next(); + assert!(probe.index() != ib + size + 1); } - - TableRef(probe.into_table()) } fn pop_internal(starting_bucket: FullBucketMut) -> (K, V) { @@ -352,7 +375,7 @@ fn pop_internal(starting_bucket: FullBucketMut) -> (K, V) { None => return (retkey, retval) }; - while gap.full().distance() != 0 { + while gap.full().displacement() != 0 { gap = match gap.shift() { Some(b) => b, None => break @@ -382,12 +405,15 @@ fn robin_hood<'a, K: 'a, V: 'a>(mut bucket: FullBucketMut<'a, K, V>, // There can be at most `size - dib` buckets to displace, because // in the worst case, there are `size` elements and we already are // `distance` buckets away from the initial one. - let idx_end = starting_index + size - bucket.distance(); + let idx_end = starting_index + size - bucket.displacement(); loop { - let (old_hash, old_key, old_val) = bucket.replace(hash, k, v); + let (old_hash, old_key, old_val) = { + let (h_ref, k_ref, v_ref) = bucket.read_mut(); + (replace(h_ref, hash), replace(k_ref, k), replace(v_ref, v)) + }; loop { - let probe = bucket.next(); + let probe = bucket.into_next(); assert!(probe.index() != idx_end); let full_bucket = match probe.peek() { @@ -396,16 +422,18 @@ fn robin_hood<'a, K: 'a, V: 'a>(mut bucket: FullBucketMut<'a, K, V>, let b = bucket.put(old_hash, old_key, old_val); // Now that it's stolen, just read the value's pointer // right out of the table! - return Bucket::at_index(b.into_table(), starting_index) - .peek() - .expect_full() - .into_mut_refs() - .1; + let starting_bucket = Bucket::at_index(b.into_table(), starting_index).unwrap() + .peek(); + if let Full(starting_bucket) = starting_bucket { + return starting_bucket.into_mut_refs().1; + } else { + panic!("Expected full bucket"); + } }, Full(bucket) => bucket }; - let probe_ib = full_bucket.index() - full_bucket.distance(); + let probe_ib = full_bucket.index() - full_bucket.displacement(); bucket = full_bucket; @@ -421,23 +449,28 @@ fn robin_hood<'a, K: 'a, V: 'a>(mut bucket: FullBucketMut<'a, K, V>, } } -/// A result that works like Option> but preserves -/// the reference that grants us access to the table in any case. -enum SearchResult { - // This is an entry that holds the given key: - FoundExisting(FullBucket), - - // There was no such entry. The reference is given back: - TableRef(M) -} - -impl SearchResult { - fn into_option(self) -> Option> { - match self { - FoundExisting(bucket) => Some(bucket), - TableRef(_) => None - } +// Performs insertion with relaxed requirements. +// The caller should ensure that invariants of Robin Hood linear probing hold. +fn insert_hashed_ordered(arg: M, h: SafeHash, k: K, v: V) -> M + where M: Deref> + DerefMut +{ + let cap = arg.capacity(); + let mut buckets = Bucket::new(arg, h).unwrap(); + let ib = buckets.index(); + + while buckets.index() != ib + cap { + // We don't need to compare hashes for value swap. + // Not even DIBs for Robin Hood. + buckets = match buckets.peek() { + Empty(empty) => { + return empty.put(h, k, v).into_table(); + } + Full(full) => full.into_bucket() + }; + buckets.next(); } + + panic!("Internal HashMap error: Out of space."); } impl HashMap @@ -456,37 +489,20 @@ impl HashMap where Q: BorrowFrom + Eq + Hash { let hash = self.make_hash(q); - search_hashed(&self.table, hash, |k| q.eq(BorrowFrom::borrow_from(k))) - .into_option() + match search_hashed(&self.table, hash, |k| q.eq(BorrowFrom::borrow_from(k))) { + InternalEntry::Occupied(bucket) => Some(bucket.elem), + _ => None, + } } fn search_mut<'a, Q: ?Sized>(&'a mut self, q: &Q) -> Option> where Q: BorrowFrom + Eq + Hash { let hash = self.make_hash(q); - search_hashed(&mut self.table, hash, |k| q.eq(BorrowFrom::borrow_from(k))) - .into_option() - } - - // The caller should ensure that invariants by Robin Hood Hashing hold. - fn insert_hashed_ordered(&mut self, hash: SafeHash, k: K, v: V) { - let cap = self.table.capacity(); - let mut buckets = Bucket::new(&mut self.table, hash); - let ib = buckets.index(); - - while buckets.index() != ib + cap { - // We don't need to compare hashes for value swap. - // Not even DIBs for Robin Hood. - buckets = match buckets.peek() { - Empty(empty) => { - empty.put(hash, k, v); - return; - } - Full(b) => b.into_bucket() - }; - buckets.next(); + match search_hashed(&mut self.table, hash, |k| q.eq(BorrowFrom::borrow_from(k))) { + InternalEntry::Occupied(bucket) => Some(bucket.elem), + _ => None, } - panic!("Internal HashMap error: Out of space."); } } @@ -635,75 +651,100 @@ impl HashMap assert!(self.table.size() <= new_capacity); assert!(new_capacity.is_power_of_two() || new_capacity == 0); - let mut old_table = replace(&mut self.table, RawTable::new(new_capacity)); - let old_size = old_table.size(); + let old_size = self.table.size(); + let old_capacity = self.table.capacity(); - if old_table.capacity() == 0 || old_table.size() == 0 { + if self.table.capacity() == 0 || self.table.size() == 0 { + self.table = RawTable::new(new_capacity); return; } // Grow the table. - // Specialization of the other branch. - let mut bucket = Bucket::first(&mut old_table); + let is_inplace = self.table.grow_inplace(new_capacity); - // "So a few of the first shall be last: for many be called, - // but few chosen." - // - // We'll most likely encounter a few buckets at the beginning that - // have their initial buckets near the end of the table. They were - // placed at the beginning as the probe wrapped around the table - // during insertion. We must skip forward to a bucket that won't - // get reinserted too early and won't unfairly steal others spot. - // This eliminates the need for robin hood. - loop { - bucket = match bucket.peek() { - Full(full) => { - if full.distance() == 0 { - // This bucket occupies its ideal spot. - // It indicates the start of another "cluster". - bucket = full.into_bucket(); - break; - } - // Leaving this bucket in the last cluster for later. - full.into_bucket() - } - Empty(b) => { - // Encountered a hole between clusters. - b.into_bucket() - } - }; - bucket.next(); - } + let mut old_table = if is_inplace { + None + } else { + Some(replace(&mut self.table, RawTable::new(new_capacity))) + }; - // This is how the buckets might be laid out in memory: - // ($ marks an initialized bucket) - // ________________ - // |$$$_$$$$$$_$$$$$| - // - // But we've skipped the entire initial cluster of buckets - // and will continue iteration in this order: - // ________________ - // |$$$$$$_$$$$$ - // ^ wrap around once end is reached - // ________________ - // $$$_____________| - // ^ exit once table.size == 0 - loop { - bucket = match bucket.peek() { - Full(bucket) => { - let h = bucket.hash(); - let (b, k, v) = bucket.take(); - self.insert_hashed_ordered(h, k, v); - { - let t = b.table(); // FIXME "lifetime too short". - if t.size() == 0 { break } - }; - b.into_bucket() - } - Empty(b) => b.into_bucket() + { + let (source, mut destination) = if let Some(ref mut old_table) = old_table { + // Borrow self.table in both branches to satisfy the checker. + (old_table, Some(&mut self.table)) + } else { + // Resizing in-place. + (&mut self.table, None) }; - bucket.next(); - } + + // Iterate over `old_capacity` buckets, which constitute half of + // the table which was resized in-place, or the entire + // `old_table`. + let mut bucket = Bucket::at_index(source, 0).unwrap().iter_to(old_capacity); + + // "So a few of the first shall be last: for many be called, + // but few chosen." + // + // We'll most likely encounter a few buckets at the beginning that + // have their initial buckets near the end of the table. They were + // placed at the beginning as the probe wrapped around the table + // during insertion. We must skip forward to a bucket that won't + // get reinserted too early and won't unfairly steal others spot. + // This eliminates the need for robin hood. + loop { + bucket = match bucket.peek() { + Full(full) => { + if full.displacement() == 0 { + // This bucket occupies its ideal spot. + // It indicates the start of another "cluster". + bucket = full.into_bucket(); + break; + } + // Leaving this bucket in the last cluster for later. + full.into_bucket() + } + Empty(b) => { + // Encountered a hole between clusters. + b.into_bucket() + } + }; + bucket.next(); + } + + // This is how the buckets might be laid out in memory: + // ($ marks an initialized bucket) + // ________________ + // |$$$_$$$$$$_$$$$$| + // + // But we've skipped the entire initial cluster of buckets + // and will continue iteration in this order: + // ________________ + // |$$$$$$_$$$$$ + // ^ wrap around once end is reached + // ________________ + // $$$_____________| + // ^ exit once table.size == 0 + let idx_end = bucket.index() + old_capacity; + + while bucket.index() != idx_end { + bucket = match bucket.peek() { + Full(bucket) => { + let h = *bucket.read().0; + let (b, k, v) = bucket.take(); + + if let Some(ref mut dest) = destination { + insert_hashed_ordered(&mut **dest, h, k, v); + b.into_bucket() + } else { + // Resizing in-place. + insert_hashed_ordered(b.into_bucket(), h, k, v) + } + } + Empty(b) => b.into_bucket() + }; + bucket.next(); // wraps at old_capacity + } + }; assert_eq!(self.table.size(), old_size); } @@ -751,53 +792,16 @@ impl HashMap /// /// If the key already exists, the hashtable will be returned untouched /// and a reference to the existing element will be returned. - fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> &mut V { - self.insert_or_replace_with(hash, k, v, |_, _, _| ()) - } - - fn insert_or_replace_with<'a, F>(&'a mut self, - hash: SafeHash, - k: K, - v: V, - mut found_existing: F) - -> &'a mut V where - F: FnMut(&mut K, &mut V, V), - { - // Worst case, we'll find one empty bucket among `size + 1` buckets. - let size = self.table.size(); - let mut probe = Bucket::new(&mut self.table, hash); - let ib = probe.index(); - - loop { - let mut bucket = match probe.peek() { - Empty(bucket) => { - // Found a hole! - return bucket.put(hash, k, v).into_mut_refs().1; - } - Full(bucket) => bucket - }; - - // hash matches? - if bucket.hash() == hash { - // key matches? - if k == *bucket.read_mut().0 { - let (bucket_k, bucket_v) = bucket.into_mut_refs(); - debug_assert!(k == *bucket_k); - // Key already exists. Get its reference. - found_existing(bucket_k, bucket_v, v); - return bucket_v; - } + fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> Option { + match search_hashed(&mut self.table, hash, |key| *key == k) { + InternalEntry::Vacant(entry) => { + entry.insert(hash, k, v); + return None; } - - let robin_ib = bucket.index() as int - bucket.distance() as int; - - if (ib as int) < robin_ib { - // Found a luckier bucket than me. Better steal his spot. - return robin_hood(bucket, robin_ib as uint, hash, k, v); + InternalEntry::Occupied(mut entry) => { + return Some(entry.insert(v)); } - - probe = bucket.next(); - assert!(probe.index() != ib + size + 1); + InternalEntry::TableIsEmpty => unreachable!() } } @@ -870,7 +874,10 @@ impl HashMap /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn iter(&self) -> Iter { - Iter { inner: self.table.iter() } + Iter { + elems_left: self.table.size(), + inner: self.table.raw_full_buckets(), + } } /// An iterator visiting all key-value pairs in arbitrary order, @@ -898,7 +905,10 @@ impl HashMap /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn iter_mut(&mut self) -> IterMut { - IterMut { inner: self.table.iter_mut() } + IterMut { + elems_left: self.table.size(), + inner: self.table.raw_full_buckets_mut(), + } } /// Creates a consuming iterator, that is, one that moves each key-value @@ -937,7 +947,15 @@ impl HashMap self.reserve(1); let hash = self.make_hash(&key); - search_entry_hashed(&mut self.table, hash, key) + match search_hashed(&mut self.table, hash, |k| *k == key) { + InternalEntry::Occupied(state) => Occupied(state), + InternalEntry::Vacant(bucket) => Vacant(VacantEntry { + elem: bucket, + hash: hash, + key: key, + }), + InternalEntry::TableIsEmpty => unreachable!() + } } /// Returns the number of elements in the map. @@ -994,11 +1012,8 @@ impl HashMap #[unstable(feature = "std_misc", reason = "matches collection reform specification, waiting for dust to settle")] pub fn drain(&mut self) -> Drain { - fn last_two((_, b, c): (A, B, C)) -> (B, C) { (b, c) } - let last_two: fn((SafeHash, K, V)) -> (K, V) = last_two; // coerce to fn pointer - Drain { - inner: self.table.drain().map(last_two), + inner: Bucket::at_index(&mut self.table, 0).unwrap() } } @@ -1113,12 +1128,7 @@ impl HashMap pub fn insert(&mut self, k: K, v: V) -> Option { let hash = self.make_hash(&k); self.reserve(1); - - let mut retval = None; - self.insert_or_replace_with(hash, k, v, |_, val_ref, val| { - retval = Some(replace(val_ref, val)); - }); - retval + self.insert_hashed_nocheck(hash, k, v) } /// Removes a key from the map, returning the value at the key if the key @@ -1150,53 +1160,6 @@ impl HashMap } } -fn search_entry_hashed<'a, K: Eq, V>(table: &'a mut RawTable, hash: SafeHash, k: K) - -> Entry<'a, K, V> -{ - // Worst case, we'll find one empty bucket among `size + 1` buckets. - let size = table.size(); - let mut probe = Bucket::new(table, hash); - let ib = probe.index(); - - loop { - let bucket = match probe.peek() { - Empty(bucket) => { - // Found a hole! - return Vacant(VacantEntry { - hash: hash, - key: k, - elem: NoElem(bucket), - }); - }, - Full(bucket) => bucket - }; - - // hash matches? - if bucket.hash() == hash { - // key matches? - if k == *bucket.read().0 { - return Occupied(OccupiedEntry{ - elem: bucket, - }); - } - } - - let robin_ib = bucket.index() as int - bucket.distance() as int; - - if (ib as int) < robin_ib { - // Found a luckier bucket than me. Better steal his spot. - return Vacant(VacantEntry { - hash: hash, - key: k, - elem: NeqElem(bucket, robin_ib as uint), - }); - } - - probe = bucket.next(); - assert!(probe.index() != ib + size + 1); - } -} - impl PartialEq for HashMap where K: Eq + Hash, V: PartialEq, S: HashState, @@ -1280,14 +1243,16 @@ impl IndexMut for HashMap /// HashMap iterator. #[stable(feature = "rust1", since = "1.0.0")] pub struct Iter<'a, K: 'a, V: 'a> { - inner: table::Iter<'a, K, V> + inner: table::RawFullBuckets>, + elems_left: usize, } // FIXME(#19839) Remove in favor of `#[derive(Clone)]` impl<'a, K, V> Clone for Iter<'a, K, V> { fn clone(&self) -> Iter<'a, K, V> { Iter { - inner: self.inner.clone() + inner: self.inner.clone(), + elems_left: self.elems_left, } } } @@ -1295,7 +1260,8 @@ impl<'a, K, V> Clone for Iter<'a, K, V> { /// HashMap mutable values iterator. #[stable(feature = "rust1", since = "1.0.0")] pub struct IterMut<'a, K: 'a, V: 'a> { - inner: table::IterMut<'a, K, V> + inner: table::RawFullBuckets>, + elems_left: usize, } /// HashMap move iterator. @@ -1338,15 +1304,24 @@ impl<'a, K, V> Clone for Values<'a, K, V> { #[unstable(feature = "std_misc", reason = "matches collection reform specification, waiting for dust to settle")] pub struct Drain<'a, K: 'a, V: 'a> { - inner: iter::Map, fn((SafeHash, K, V)) -> (K, V)> + // inner: iter::Map< + // (SafeHash, K, V), + // (K, V), + // table::Drain<'a, K, V>, + // fn((SafeHash, K, V)) -> (K, V), + // > + inner: table::Bucket>, + + // /// Iterator over the entries in a table, clearing the table. + // pub struct Drain<'a, K: 'a, V: 'a> { + // table: &'a mut RawTable, + // } } /// A view into a single occupied location in a HashMap. #[unstable(feature = "std_misc", reason = "precise API still being fleshed out")] -pub struct OccupiedEntry<'a, K: 'a, V: 'a> { - elem: FullBucket>, -} +pub type OccupiedEntry<'a, K: 'a, V: 'a> = OccupiedEntryState>; /// A view into a single empty location in a HashMap. #[unstable(feature = "std_misc", @@ -1367,6 +1342,11 @@ pub enum Entry<'a, K: 'a, V: 'a> { Vacant(VacantEntry<'a, K, V>), } +/// A view into a single occupied location in a HashMap. +struct OccupiedEntryState { + elem: FullBucket, +} + /// Possible states of a VacantEntry. enum VacantEntryState { /// The index is occupied, but the key to insert has precedence, @@ -1412,36 +1392,68 @@ impl IntoIterator for HashMap } } + +enum InternalEntry { + Occupied(OccupiedEntryState), + Vacant(VacantEntryState), + /// The table is empty. Cannot create `EmptyBucket`. + TableIsEmpty, +} + #[stable(feature = "rust1", since = "1.0.0")] impl<'a, K, V> Iterator for Iter<'a, K, V> { type Item = (&'a K, &'a V); - #[inline] fn next(&mut self) -> Option<(&'a K, &'a V)> { self.inner.next() } - #[inline] fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + fn next(&mut self) -> Option<(&'a K, &'a V)> { + self.inner.next().map(|bucket| { + self.elems_left -= 1; + bucket.into_refs() + }) + } + + fn size_hint(&self) -> (usize, Option) { + (self.elems_left, Some(self.elems_left)) + } } #[stable(feature = "rust1", since = "1.0.0")] impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> { - #[inline] fn len(&self) -> usize { self.inner.len() } + #[inline] fn len(&self) -> usize { self.elems_left } } #[stable(feature = "rust1", since = "1.0.0")] impl<'a, K, V> Iterator for IterMut<'a, K, V> { type Item = (&'a K, &'a mut V); - #[inline] fn next(&mut self) -> Option<(&'a K, &'a mut V)> { self.inner.next() } - #[inline] fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + fn next(&mut self) -> Option<(&'a K, &'a mut V)> { + self.inner.next().map(|bucket| { + self.elems_left -= 1; + let (k, v) = bucket.into_mut_refs(); + (&*k, v) + }) + } + + fn size_hint(&self) -> (usize, Option) { + (self.elems_left, Some(self.elems_left)) + } } #[stable(feature = "rust1", since = "1.0.0")] impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> { - #[inline] fn len(&self) -> usize { self.inner.len() } + #[inline] fn len(&self) -> usize { self.elems_left } } #[stable(feature = "rust1", since = "1.0.0")] impl Iterator for IntoIter { type Item = (K, V); - #[inline] fn next(&mut self) -> Option<(K, V)> { self.inner.next() } - #[inline] fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[inline] + fn next(&mut self) -> Option<(K, V)> { + self.inner.next() + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } } #[stable(feature = "rust1", since = "1.0.0")] impl ExactSizeIterator for IntoIter { @@ -1472,16 +1484,37 @@ impl<'a, K, V> ExactSizeIterator for Values<'a, K, V> { #[inline] fn len(&self) -> usize { self.inner.len() } } -#[stable(feature = "rust1", since = "1.0.0")] + impl<'a, K, V> Iterator for Drain<'a, K, V> { type Item = (K, V); - #[inline] fn next(&mut self) -> Option<(K, V)> { self.inner.next() } - #[inline] fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + fn next(&mut self) -> Option<(K, V)> { + let cap = self.inner.table().capacity(); + while self.inner.index() < cap { + if let Some(kv_pair) = self.inner.peek_take() { + return Some(kv_pair); + } + self.inner.next(); + } + + None + } + + fn size_hint(&self) -> (usize, Option) { + let size = self.inner.table().size(); + (size, Some(size)) + } } -#[stable(feature = "rust1", since = "1.0.0")] + impl<'a, K, V> ExactSizeIterator for Drain<'a, K, V> { - #[inline] fn len(&self) -> usize { self.inner.len() } + fn len(&self) -> usize { self.inner.table().size() } +} + +#[unsafe_destructor] +impl<'a, K: 'a, V: 'a> Drop for Drain<'a, K, V> { + fn drop(&mut self) { + for _ in self {} + } } #[unstable(feature = "std_misc", @@ -1498,21 +1531,19 @@ impl<'a, K, V> Entry<'a, K, V> { #[unstable(feature = "std_misc", reason = "matches collection reform v2 specification, waiting for dust to settle")] -impl<'a, K, V> OccupiedEntry<'a, K, V> { +impl>> OccupiedEntryState { /// Gets a reference to the value in the entry. pub fn get(&self) -> &V { - self.elem.read().1 + self.elem.read().2 } +} +#[unstable(feature = "std_misc", + reason = "matches collection reform v2 specification, waiting for dust to settle")] +impl>> OccupiedEntryState { /// Gets a mutable reference to the value in the entry. pub fn get_mut(&mut self) -> &mut V { - self.elem.read_mut().1 - } - - /// Converts the OccupiedEntry into a mutable reference to the value in the entry - /// with a lifetime bound to the map itself - pub fn into_mut(self) -> &'a mut V { - self.elem.into_mut_refs().1 + self.elem.read_mut().2 } /// Sets the value of the entry, and returns the entry's old value @@ -1521,6 +1552,16 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { mem::swap(&mut value, old_value); value } +} + +#[unstable(feature = "std_misc", + reason = "matches collection reform v2 specification, waiting for dust to settle")] +impl<'a, K: 'a, V: 'a> OccupiedEntryState> { + /// Converts the OccupiedEntry into a mutable reference to the value in the entry + /// with a lifetime bound to the map itself + pub fn into_mut(self) -> &'a mut V { + self.elem.into_mut_refs().1 + } /// Takes the value out of the entry, and returns it pub fn remove(self) -> V { @@ -1533,13 +1574,22 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> { /// Sets the value of the entry with the VacantEntry's key, /// and returns a mutable reference to it + #[inline] pub fn insert(self, value: V) -> &'a mut V { - match self.elem { + self.elem.insert(self.hash, self.key, value) + } +} + +impl<'a, K: 'a, V: 'a> VacantEntryState> { + /// Sets the value of the entry with the provided key, + /// and returns a mutable reference to it + fn insert(self, hash: SafeHash, key: K, value: V) -> &'a mut V { + match self { NeqElem(bucket, ib) => { - robin_hood(bucket, ib, self.hash, self.key, value) + robin_hood(bucket, ib, hash, key, value) } NoElem(bucket) => { - bucket.put(self.hash, self.key, value).into_mut_refs().1 + bucket.put(hash, key, value).into_mut_refs().1 } } } @@ -1670,6 +1720,20 @@ mod test_map { assert_eq!(*m.get(&2).unwrap(), 4); } + #[test] + fn test_clone() { + let mut m = HashMap::new(); + assert_eq!(m.len(), 0); + assert!(m.insert(1i, 2i).is_none()); + assert_eq!(m.len(), 1); + assert!(m.insert(2i, 4i).is_none()); + assert_eq!(m.len(), 2); + let m2 = m.clone(); + assert_eq!(*m2.get(&1).unwrap(), 2); + assert_eq!(*m2.get(&2).unwrap(), 4); + assert_eq!(m2.len(), 2); + } + thread_local! { static DROP_VECTOR: RefCell> = RefCell::new(Vec::new()) } #[derive(Hash, PartialEq, Eq)] @@ -1761,7 +1825,7 @@ mod test_map { } #[test] - fn test_move_iter_drops() { + fn test_into_iter_drops() { DROP_VECTOR.with(|v| { *v.borrow_mut() = repeat(0).take(200).collect(); }); diff --git a/src/libstd/collections/hash/table.rs b/src/libstd/collections/hash/table.rs index 8952b81690186..26623b86ebbb6 100644 --- a/src/libstd/collections/hash/table.rs +++ b/src/libstd/collections/hash/table.rs @@ -16,18 +16,17 @@ use clone::Clone; use cmp; use hash::{Hash, Hasher}; use iter::{Iterator, IteratorExt, ExactSizeIterator, count}; -use marker::{Copy, Send, Sync, Sized, self}; +use marker::{Copy, Sized}; use mem::{min_align_of, size_of}; use mem; use num::{Int, UnsignedInt}; use ops::{Deref, DerefMut, Drop}; use option::Option; use option::Option::{Some, None}; -use ptr::{self, PtrExt, copy_nonoverlapping_memory, zero_memory}; -use rt::heap::{allocate, deallocate}; +use ptr::{self, Unique, PtrExt, copy_memory, copy_nonoverlapping_memory, zero_memory}; +use rt::heap::{allocate, deallocate, reallocate_inplace}; use collections::hash_state::HashState; - -const EMPTY_BUCKET: u64 = 0u64; +use core::nonzero::NonZero; /// The raw hashtable, providing safe-ish access to the unzipped and highly /// optimized arrays of hashes, keys, and values. @@ -38,23 +37,22 @@ const EMPTY_BUCKET: u64 = 0u64; /// /// Essential invariants of this structure: /// -/// - if t.hashes[i] == EMPTY_BUCKET, then `Bucket::at_index(&t, i).raw` +/// - if t.hashes[i] == None, then `Bucket::at_index(&t, i).raw` /// points to 'undefined' contents. Don't read from it. This invariant is /// enforced outside this module with the `EmptyBucket`, `FullBucket`, /// and `SafeHash` types. /// /// - An `EmptyBucket` is only constructed at an index with -/// a hash of EMPTY_BUCKET. +/// a hash of None. /// -/// - A `FullBucket` is only constructed at an index with a -/// non-EMPTY_BUCKET hash. +/// - A `FullBucket` is only constructed at an index with a hash. /// -/// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get +/// - A `SafeHash` is only constructed for non-zero hash. We get /// around hashes of zero by changing them to 0x8000_0000_0000_0000, /// which will likely map to the same bucket, while not being confused /// with "empty". /// -/// - All three "arrays represented by pointers" are the same length: +/// - Both "arrays represented by pointers" have the same length: /// `capacity`. This is set at creation and never changes. The arrays /// are unzipped to save space (we don't have to pay for the padding /// between odd sized elements, such as in a map from u64 to u8), and @@ -67,50 +65,45 @@ const EMPTY_BUCKET: u64 = 0u64; /// but in general is just a tricked out `Vec>`. #[unsafe_no_drop_flag] pub struct RawTable { - capacity: uint, - size: uint, - hashes: *mut u64, - // Because K/V do not appear directly in any of the types in the struct, - // inform rustc that in fact instances of K and V are reachable from here. - marker: marker::CovariantType<(K,V)>, + capacity: usize, + size: usize, + // NB. The table will probably need manual impls of Send and Sync if this + // field ever changes. + middle: Unique<(K, V)>, } -unsafe impl Send for RawTable {} -unsafe impl Sync for RawTable {} - -struct RawBucket { - hash: *mut u64, - key: *mut K, - val: *mut V +struct RawBucket { + hash: *mut Option, + kval: *mut (K, V), } -impl Copy for RawBucket {} +impl Copy for RawBucket {} -pub struct Bucket { - raw: RawBucket, - idx: uint, - table: M +struct BareBucket { + raw: RawBucket, + idx: usize, + capacity: usize, + table: M, } -impl Copy for Bucket {} +impl Copy for BareBucket {} -pub struct EmptyBucket { - raw: RawBucket, - idx: uint, - table: M -} +pub struct Bucket(BareBucket); + +impl Copy for Bucket {} -pub struct FullBucket { - raw: RawBucket, - idx: uint, - table: M +mod bucket { + pub enum Empty {} + pub enum Full {} } -pub type EmptyBucketImm<'table, K, V> = EmptyBucket>; -pub type FullBucketImm<'table, K, V> = FullBucket>; +pub type EmptyBucket = Bucket; +pub type EmptyBucketImm<'t, K, V> = EmptyBucket>; +pub type EmptyBucketMut<'t, K, V> = EmptyBucket>; -pub type EmptyBucketMut<'table, K, V> = EmptyBucket>; -pub type FullBucketMut<'table, K, V> = FullBucket>; +pub type FullBucket = Bucket; +pub type FullBucketImm<'t, K, V> = FullBucket>; +pub type FullBucketMut<'t, K, V> = FullBucket>; pub enum BucketState { Empty(EmptyBucket), @@ -127,16 +120,7 @@ struct GapThenFull { /// A hash that is not zero, since we use a hash of zero to represent empty /// buckets. -#[derive(PartialEq, Copy)] -pub struct SafeHash { - hash: u64, -} - -impl SafeHash { - /// Peek at the hash value, which is guaranteed to be non-zero. - #[inline(always)] - pub fn inspect(&self) -> u64 { self.hash } -} +pub type SafeHash = NonZero; /// We need to remove hashes of 0. That's reserved for empty buckets. /// This function wraps up `hash_keyed` to be the only way outside this @@ -152,93 +136,109 @@ pub fn make_hash(hash_state: &S, t: &T) -> SafeHash // EMPTY_HASH. We can maintain our precious uniform distribution // of initial indexes by unconditionally setting the MSB, // effectively reducing 64-bits hashes to 63 bits. - SafeHash { hash: 0x8000_0000_0000_0000 | state.finish() } + unsafe { NonZero::new(0x8000_0000_0000_0000 | state.finish()) } } -// `replace` casts a `*u64` to a `*SafeHash`. Since we statically -// ensure that a `FullBucket` points to an index with a non-zero hash, -// and a `SafeHash` is just a `u64` with a different name, this is -// safe. +// `read` casts a `*mut SafeHash` to a `*mut Option`. Since we +// statically ensure that a `FullBucket` points to an index with a non-zero +// hash, and a `SafeHash` is Zeroable, this is safe. // // This test ensures that a `SafeHash` really IS the same size as a -// `u64`. If you need to change the size of `SafeHash` (and -// consequently made this test fail), `replace` needs to be -// modified to no longer assume this. +// `Option`. If you need to change the nullability of `SafeHash`, +// some functions need to be modified to no longer assume this. #[test] -fn can_alias_safehash_as_u64() { - assert_eq!(size_of::(), size_of::()) +fn can_alias_safehash_as_option() { + assert_eq!(size_of::(), size_of::>()) } -impl RawBucket { - unsafe fn offset(self, count: int) -> RawBucket { +impl RawBucket { + unsafe fn offset(self, count: int) -> RawBucket { RawBucket { hash: self.hash.offset(count), - key: self.key.offset(count), - val: self.val.offset(count), + kval: self.kval.offset(count), } } } -// Buckets hold references to the table. -impl FullBucket { - /// Borrow a reference to the table. - pub fn table(&self) -> &M { - &self.table - } - /// Move out the reference to the table. - pub fn into_table(self) -> M { - self.table +impl<'t, K, V, M: Deref> + 't> RawBucket { + pub fn into_refs(self) -> (&'t K, &'t V) { + let &(ref k, ref v) = unsafe { + &*self.kval + }; + (k, v) } - /// Get the raw index. - pub fn index(&self) -> uint { - self.idx +} + +impl<'t, K, V, M> RawBucket + where M: Deref> + DerefMut + 't +{ + pub fn into_mut_refs(self) -> (&'t mut K, &'t mut V) { + let &mut (ref mut k, ref mut v) = unsafe { + &mut *self.kval + }; + (k, v) } } -impl EmptyBucket { - /// Borrow a reference to the table. - pub fn table(&self) -> &M { - &self.table +// Chain buckets! This is perfectly safe as long as operations on one +// `Bucket` can't invalidate other `Bucket`s. +impl>> Deref for Bucket { + type Target = RawTable; + + fn deref(&self) -> &RawTable { + &*self.0.table } - /// Move out the reference to the table. - pub fn into_table(self) -> M { - self.table +} + +impl>> DerefMut for Bucket { + fn deref_mut(&mut self) -> &mut RawTable { + &mut *self.0.table } } -impl Bucket { +// Buckets hold references to the table. +impl Bucket { + /// Borrow a reference to the table. + pub fn table(&self) -> &M { + &self.0.table + } /// Move out the reference to the table. pub fn into_table(self) -> M { - self.table + self.0.table } /// Get the raw index. pub fn index(&self) -> uint { - self.idx + self.0.idx } } impl>> Bucket { - pub fn new(table: M, hash: SafeHash) -> Bucket { - Bucket::at_index(table, hash.inspect() as uint) - } - - pub fn at_index(table: M, ib_index: uint) -> Bucket { - let ib_index = ib_index & (table.capacity() - 1); - Bucket { - raw: unsafe { - table.first_bucket_raw().offset(ib_index as int) - }, - idx: ib_index, - table: table + pub fn new(table: M, hash: SafeHash) -> Option> { + Bucket::at_index(table, *hash as usize) + } + + pub fn at_index(table: M, ib_index: uint) -> Option> { + if table.capacity() == 0 { + None + } else { + let ib_index = ib_index & (table.capacity() - 1); + Some(Bucket(BareBucket { + raw: unsafe { + table.first_bucket_raw().offset(ib_index as int) + }, + idx: ib_index, + capacity: table.capacity(), + table: table + })) } } - pub fn first(table: M) -> Bucket { - Bucket { - raw: table.first_bucket_raw(), - idx: 0, - table: table - } + /// Narrows down the range of iteration, which must be a power of 2. + pub fn iter_to(mut self, limit: usize) -> Bucket { + assert!(limit <= self.capacity); + assert!(limit.is_power_of_two()); + self.0.capacity = limit; + self } /// Reads a bucket at a given index, returning an enum indicating whether @@ -246,49 +246,47 @@ impl>> Bucket { /// the appropriate types to call most of the other functions in /// this module. pub fn peek(self) -> BucketState { - match unsafe { *self.raw.hash } { - EMPTY_BUCKET => - Empty(EmptyBucket { - raw: self.raw, - idx: self.idx, - table: self.table - }), - _ => - Full(FullBucket { - raw: self.raw, - idx: self.idx, - table: self.table - }) + match unsafe { *self.0.raw.hash } { + None => Empty(Bucket(self.0)), + _ => Full(Bucket(self.0)), } } /// Modifies the bucket pointer in place to make it point to the next slot. pub fn next(&mut self) { - // Branchless bucket iteration step. - // As we reach the end of the table... - // We take the current idx: 0111111b - // Xor it by its increment: ^ 1000000b - // ------------ - // 1111111b - // Then AND with the capacity: & 1000000b - // ------------ - // to get the backwards offset: 1000000b - // ... and it's zero at all other times. - let maybe_wraparound_dist = (self.idx ^ (self.idx + 1)) & self.table.capacity(); - // Finally, we obtain the offset 1 or the offset -cap + 1. - let dist = 1 - (maybe_wraparound_dist as int); - - self.idx += 1; + self.0.idx += 1; + + let dist = if self.0.idx & (self.0.capacity - 1) == 0 { + 1i - self.0.capacity as int + } else { + 1 + }; unsafe { - self.raw = self.raw.offset(dist); + self.0.raw = self.0.raw.offset(dist); } } } -impl>> EmptyBucket { +impl> + DerefMut> Bucket { + /// A combination of `peek` and `take` which doesn't consume the bucket. + pub fn peek_take(&mut self) -> Option<(K, V)> { + unsafe { + if *self.0.raw.hash != None { + *self.0.raw.hash = None; + self.0.table.size -= 1; + let (k, v) = ptr::read(self.0.raw.kval); + Some((k, v)) + } else { + None + } + } + } +} + +impl>, S> Bucket { #[inline] - pub fn next(self) -> Bucket { + pub fn into_next(self) -> Bucket { let mut bucket = self.into_bucket(); bucket.next(); bucket @@ -296,21 +294,23 @@ impl>> EmptyBucket { #[inline] pub fn into_bucket(self) -> Bucket { - Bucket { - raw: self.raw, - idx: self.idx, - table: self.table - } + Bucket(self.0) } +} +impl>> EmptyBucket { pub fn gap_peek(self) -> Option> { - let gap = EmptyBucket { - raw: self.raw, - idx: self.idx, - table: () - }; + let gap = Bucket(BareBucket { + table: (), + idx: self.0.idx, + capacity: self.0.capacity, + raw: RawBucket { + hash: self.0.raw.hash, + kval: self.0.raw.kval, + }, + }); - match self.next().peek() { + match self.into_next().peek() { Full(bucket) => { Some(GapThenFull { gap: gap, @@ -333,61 +333,35 @@ impl> + DerefMut> EmptyBucket { pub fn put(mut self, hash: SafeHash, key: K, value: V) -> FullBucket { unsafe { - *self.raw.hash = hash.inspect(); - ptr::write(self.raw.key, key); - ptr::write(self.raw.val, value); + *self.0.raw.hash = Some(hash); + ptr::write(self.0.raw.kval, (key, value)); } - self.table.size += 1; + self.0.table.size += 1; - FullBucket { raw: self.raw, idx: self.idx, table: self.table } + Bucket(self.0) } } impl>> FullBucket { - #[inline] - pub fn next(self) -> Bucket { - let mut bucket = self.into_bucket(); - bucket.next(); - bucket - } - - #[inline] - pub fn into_bucket(self) -> Bucket { - Bucket { - raw: self.raw, - idx: self.idx, - table: self.table - } - } - /// Get the distance between this bucket and the 'ideal' location /// as determined by the key's hash stored in it. /// /// In the cited blog posts above, this is called the "distance to /// initial bucket", or DIB. Also known as "probe count". - pub fn distance(&self) -> uint { + pub fn displacement(&self) -> uint { // Calculates the distance one has to travel when going from // `hash mod capacity` onwards to `idx mod capacity`, wrapping around // if the destination is not reached before the end of the table. - (self.idx - self.hash().inspect() as uint) & (self.table.capacity() - 1) - } - - #[inline] - pub fn hash(&self) -> SafeHash { - unsafe { - SafeHash { - hash: *self.raw.hash - } - } + (self.0.idx - **self.read().0 as usize) & (self.0.capacity - 1) } /// Gets references to the key and value at a given index. - pub fn read(&self) -> (&K, &V) { - unsafe { - (&*self.raw.key, - &*self.raw.val) - } + pub fn read(&self) -> (&SafeHash, &K, &V) { + let (&ref h, &(ref k, ref v)) = unsafe { + (&*(self.0.raw.hash as *mut SafeHash), &*self.0.raw.kval) + }; + (h, k, v) } } @@ -397,38 +371,21 @@ impl> + DerefMut> FullBucket { /// This works similarly to `put`, building an `EmptyBucket` out of the /// taken bucket. pub fn take(mut self) -> (EmptyBucket, K, V) { - self.table.size -= 1; - - unsafe { - *self.raw.hash = EMPTY_BUCKET; - ( - EmptyBucket { - raw: self.raw, - idx: self.idx, - table: self.table - }, - ptr::read(self.raw.key), - ptr::read(self.raw.val) - ) - } - } + self.0.table.size -= 1; - pub fn replace(&mut self, h: SafeHash, k: K, v: V) -> (SafeHash, K, V) { unsafe { - let old_hash = ptr::replace(self.raw.hash as *mut SafeHash, h); - let old_key = ptr::replace(self.raw.key, k); - let old_val = ptr::replace(self.raw.val, v); - - (old_hash, old_key, old_val) + *self.0.raw.hash = None; + let (k, v) = ptr::read(self.0.raw.kval); + (Bucket(self.0), k, v) } } /// Gets mutable references to the key and value at a given index. - pub fn read_mut(&mut self) -> (&mut K, &mut V) { - unsafe { - (&mut *self.raw.key, - &mut *self.raw.val) - } + pub fn read_mut(&mut self) -> (&mut SafeHash, &mut K, &mut V) { + let (&mut ref mut h, &mut (ref mut k, ref mut v)) = unsafe { + (&mut *(self.0.raw.hash as *mut SafeHash), &mut *self.0.raw.kval) + }; + (h, k, v) } } @@ -439,10 +396,7 @@ impl<'t, K, V, M: Deref> + 't> FullBucket { /// in exchange for this, the returned references have a longer lifetime /// than the references returned by `read()`. pub fn into_refs(self) -> (&'t K, &'t V) { - unsafe { - (&*self.raw.key, - &*self.raw.val) - } + self.0.raw.into_refs() } } @@ -450,20 +404,7 @@ impl<'t, K, V, M: Deref> + DerefMut + 't> FullBucket (&'t mut K, &'t mut V) { - unsafe { - (&mut *self.raw.key, - &mut *self.raw.val) - } - } -} - -impl BucketState { - // For convenience. - pub fn expect_full(self) -> FullBucket { - match self { - Full(full) => full, - Empty(..) => panic!("Expected full bucket") - } + self.0.raw.into_mut_refs() } } @@ -475,17 +416,19 @@ impl>> GapThenFull { pub fn shift(mut self) -> Option> { unsafe { - *self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET); - copy_nonoverlapping_memory(self.gap.raw.key, self.full.raw.key, 1); - copy_nonoverlapping_memory(self.gap.raw.val, self.full.raw.val, 1); + *self.gap.0.raw.hash = mem::replace(&mut *self.full.0.raw.hash, None); + copy_nonoverlapping_memory(self.gap.0.raw.kval, self.full.0.raw.kval, 1); } - let FullBucket { raw: prev_raw, idx: prev_idx, .. } = self.full; + let Bucket(BareBucket { raw: prev_raw, idx: prev_idx, .. }) = self.full; - match self.full.next().peek() { + match self.full.into_next().peek() { Full(bucket) => { - self.gap.raw = prev_raw; - self.gap.idx = prev_idx; + self.gap.0.raw = RawBucket { + hash: prev_raw.hash, + kval: prev_raw.kval, + }; + self.gap.0.idx = prev_idx; self.full = bucket; @@ -496,136 +439,36 @@ impl>> GapThenFull { } } - -/// Rounds up to a multiple of a power of two. Returns the closest multiple -/// of `target_alignment` that is higher or equal to `unrounded`. -/// -/// # Panics -/// -/// Panics if `target_alignment` is not a power of two. -fn round_up_to_next(unrounded: uint, target_alignment: uint) -> uint { - assert!(target_alignment.is_power_of_two()); - (unrounded + target_alignment - 1) & !(target_alignment - 1) -} - -#[test] -fn test_rounding() { - assert_eq!(round_up_to_next(0, 4), 0); - assert_eq!(round_up_to_next(1, 4), 4); - assert_eq!(round_up_to_next(2, 4), 4); - assert_eq!(round_up_to_next(3, 4), 4); - assert_eq!(round_up_to_next(4, 4), 4); - assert_eq!(round_up_to_next(5, 4), 8); -} - -// Returns a tuple of (key_offset, val_offset), -// from the start of a mallocated array. -fn calculate_offsets(hashes_size: uint, - keys_size: uint, keys_align: uint, - vals_align: uint) - -> (uint, uint) { - let keys_offset = round_up_to_next(hashes_size, keys_align); - let end_of_keys = keys_offset + keys_size; - - let vals_offset = round_up_to_next(end_of_keys, vals_align); - - (keys_offset, vals_offset) -} - -// Returns a tuple of (minimum required malloc alignment, hash_offset, -// array_size), from the start of a mallocated array. -fn calculate_allocation(hash_size: uint, hash_align: uint, - keys_size: uint, keys_align: uint, - vals_size: uint, vals_align: uint) - -> (uint, uint, uint) { - let hash_offset = 0; - let (_, vals_offset) = calculate_offsets(hash_size, - keys_size, keys_align, - vals_align); - let end_of_vals = vals_offset + vals_size; - - let min_align = cmp::max(hash_align, cmp::max(keys_align, vals_align)); - - (min_align, hash_offset, end_of_vals) -} - -#[test] -fn test_offset_calculation() { - assert_eq!(calculate_allocation(128, 8, 15, 1, 4, 4), (8, 0, 148)); - assert_eq!(calculate_allocation(3, 1, 2, 1, 1, 1), (1, 0, 6)); - assert_eq!(calculate_allocation(6, 2, 12, 4, 24, 8), (8, 0, 48)); - assert_eq!(calculate_offsets(128, 15, 1, 4), (128, 144)); - assert_eq!(calculate_offsets(3, 2, 1, 1), (3, 5)); - assert_eq!(calculate_offsets(6, 12, 4, 8), (8, 24)); -} - impl RawTable { - /// Does not initialize the buckets. The caller should ensure they, - /// at the very least, set every hash to EMPTY_BUCKET. + /// Does not initialize the buckets. unsafe fn new_uninitialized(capacity: uint) -> RawTable { if capacity == 0 { return RawTable { size: 0, capacity: 0, - hashes: ptr::null_mut(), - marker: marker::CovariantType, + middle: Unique::null(), }; } - // No need for `checked_mul` before a more restrictive check performed - // later in this method. - let hashes_size = capacity * size_of::(); - let keys_size = capacity * size_of::< K >(); - let vals_size = capacity * size_of::< V >(); - - // Allocating hashmaps is a little tricky. We need to allocate three - // arrays, but since we know their sizes and alignments up front, - // we just allocate a single array, and then have the subarrays - // point into it. - // - // This is great in theory, but in practice getting the alignment - // right is a little subtle. Therefore, calculating offsets has been - // factored out into a different function. - let (malloc_alignment, hash_offset, size) = - calculate_allocation( - hashes_size, min_align_of::(), - keys_size, min_align_of::< K >(), - vals_size, min_align_of::< V >()); - - // One check for overflow that covers calculation and rounding of size. - let size_of_bucket = size_of::().checked_add(size_of::()).unwrap() - .checked_add(size_of::()).unwrap(); - assert!(size >= capacity.checked_mul(size_of_bucket) - .expect("capacity overflow"), - "capacity overflow"); - - let buffer = allocate(size, malloc_alignment); - if buffer.is_null() { ::alloc::oom() } - - let hashes = buffer.offset(hash_offset as int) as *mut u64; + + let elem_size = size_of::<(K, V)>() + size_of::(); + let size = capacity.checked_mul(elem_size).expect("capacity overflow"); + let hashes = allocate(size, align::()); + if hashes.is_null() { ::alloc::oom() } RawTable { capacity: capacity, size: 0, - hashes: hashes, - marker: marker::CovariantType, + middle: Unique((hashes as *mut (K, V)).offset(capacity as isize)), } } - fn first_bucket_raw(&self) -> RawBucket { - let hashes_size = self.capacity * size_of::(); - let keys_size = self.capacity * size_of::(); - - let buffer = self.hashes as *mut u8; - let (keys_offset, vals_offset) = calculate_offsets(hashes_size, - keys_size, min_align_of::(), - min_align_of::()); - - unsafe { - RawBucket { - hash: self.hashes, - key: buffer.offset(keys_offset as int) as *mut K, - val: buffer.offset(vals_offset as int) as *mut V - } + #[inline] + fn first_bucket_raw(&self) -> RawBucket { + RawBucket { + hash: self.middle.0 as *mut Option, + kval: unsafe { + self.middle.0.offset(-(self.capacity as isize)) + }, } } @@ -633,9 +476,36 @@ impl RawTable { /// initially empty. pub fn new(capacity: uint) -> RawTable { unsafe { - let ret = RawTable::new_uninitialized(capacity); - zero_memory(ret.hashes, capacity); - ret + let table = RawTable::new_uninitialized(capacity); + zero_memory(table.middle.0 as *mut Option, capacity); + table + } + } + + pub fn grow_inplace(&mut self, capacity: uint) -> bool { + if self.middle.0.is_null() || capacity < self.capacity { + return false; + } + + let size = self.capacity * (size_of::() + size_of::<(K, V)>()); + let new_size = (size_of::() + size_of::<(K, V)>()).checked_mul(capacity) + .expect("capacity overflow"); + unsafe { + let ptr = self.middle.0.offset(-(self.capacity as isize)) as *mut u8; + let is_inplace = reallocate_inplace(ptr, + size, + new_size, + align::()) >= new_size; + + if is_inplace { + let hashes = self.middle.0.offset((capacity - self.capacity) as isize) as *mut Option; + copy_memory(hashes, self.middle.0 as *const Option, self.capacity); + zero_memory(hashes.offset(self.capacity as int), capacity - self.capacity); + self.middle = Unique(self.middle.0.offset((capacity - self.capacity) as isize)); + self.capacity = capacity; + } + + return is_inplace; } } @@ -650,100 +520,81 @@ impl RawTable { self.size } - fn raw_buckets(&self) -> RawBuckets { - RawBuckets { - raw: self.first_bucket_raw(), - hashes_end: unsafe { - self.hashes.offset(self.capacity as int) - }, - marker: marker::ContravariantLifetime, - } - } - - pub fn iter(&self) -> Iter { - Iter { - iter: self.raw_buckets(), - elems_left: self.size(), - } + pub fn raw_full_buckets(&self) -> RawFullBuckets> { + full_buckets(self.first_bucket_raw(), self.capacity, self) } - pub fn iter_mut(&mut self) -> IterMut { - IterMut { - iter: self.raw_buckets(), - elems_left: self.size(), - } + pub fn raw_full_buckets_mut(&mut self) -> RawFullBuckets> { + full_buckets(self.first_bucket_raw(), self.capacity, self) } pub fn into_iter(self) -> IntoIter { - let RawBuckets { raw, hashes_end, .. } = self.raw_buckets(); // Replace the marker regardless of lifetime bounds on parameters. IntoIter { - iter: RawBuckets { - raw: raw, - hashes_end: hashes_end, - marker: marker::ContravariantLifetime, - }, - table: self, + iter: full_buckets(self.first_bucket_raw(), self.capacity, self) } } +} - pub fn drain(&mut self) -> Drain { - let RawBuckets { raw, hashes_end, .. } = self.raw_buckets(); - // Replace the marker regardless of lifetime bounds on parameters. - Drain { - iter: RawBuckets { - raw: raw, - hashes_end: hashes_end, - marker: marker::ContravariantLifetime::<'static>, - }, - table: self, - } +fn align() -> usize { + cmp::max(mem::min_align_of::<(K, V)>(), mem::min_align_of::()) +} + +pub fn full_buckets(raw: RawBucket, cap: usize, t: M) -> RawFullBuckets { + RawFullBuckets { + raw: raw, + hashes_end: unsafe { raw.hash.offset(cap as int) }, + table: t, + } +} + +pub struct RawFullBucket(RawBucket); + +impl Deref for RawFullBucket { + type Target = RawBucket; + + fn deref(&self) -> &RawBucket { + &self.0 } +} - /// Returns an iterator that copies out each entry. Used while the table - /// is being dropped. - unsafe fn rev_move_buckets(&mut self) -> RevMoveBuckets { - let raw_bucket = self.first_bucket_raw(); - RevMoveBuckets { - raw: raw_bucket.offset(self.capacity as int), - hashes_end: raw_bucket.hash, - elems_left: self.size, - marker: marker::ContravariantLifetime, - } +impl DerefMut for RawFullBucket { + fn deref_mut(&mut self) -> &mut RawBucket { + &mut self.0 } } /// A raw iterator. The basis for some other iterators in this module. Although /// this interface is safe, it's not used outside this module. -struct RawBuckets<'a, K, V> { - raw: RawBucket, - hashes_end: *mut u64, - marker: marker::ContravariantLifetime<'a>, +pub struct RawFullBuckets { + raw: RawBucket, + hashes_end: *mut Option, + table: M, } // FIXME(#19839) Remove in favor of `#[derive(Clone)]` -impl<'a, K, V> Clone for RawBuckets<'a, K, V> { - fn clone(&self) -> RawBuckets<'a, K, V> { - RawBuckets { +impl Clone for RawFullBuckets { + fn clone(&self) -> RawFullBuckets { + RawFullBuckets { raw: self.raw, hashes_end: self.hashes_end, - marker: marker::ContravariantLifetime, + table: self.table.clone(), + // marker: marker::ContravariantLifetime, } } } +impl Iterator for RawFullBuckets { + type Item = RawFullBucket; -impl<'a, K, V> Iterator for RawBuckets<'a, K, V> { - type Item = RawBucket; - - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { while self.raw.hash != self.hashes_end { unsafe { // We are swapping out the pointer to a bucket and replacing // it with the pointer to the next one. let prev = ptr::replace(&mut self.raw, self.raw.offset(1)); - if *prev.hash != EMPTY_BUCKET { - return Some(prev); + if *prev.hash != None { + return Some(RawFullBucket(prev)); } } } @@ -752,117 +603,9 @@ impl<'a, K, V> Iterator for RawBuckets<'a, K, V> { } } -/// An iterator that moves out buckets in reverse order. It leaves the table -/// in an inconsistent state and should only be used for dropping -/// the table's remaining entries. It's used in the implementation of Drop. -struct RevMoveBuckets<'a, K, V> { - raw: RawBucket, - hashes_end: *mut u64, - elems_left: uint, - marker: marker::ContravariantLifetime<'a>, -} - -impl<'a, K, V> Iterator for RevMoveBuckets<'a, K, V> { - type Item = (K, V); - - fn next(&mut self) -> Option<(K, V)> { - if self.elems_left == 0 { - return None; - } - - loop { - debug_assert!(self.raw.hash != self.hashes_end); - - unsafe { - self.raw = self.raw.offset(-1); - - if *self.raw.hash != EMPTY_BUCKET { - self.elems_left -= 1; - return Some(( - ptr::read(self.raw.key), - ptr::read(self.raw.val) - )); - } - } - } - } -} - -/// Iterator over shared references to entries in a table. -pub struct Iter<'a, K: 'a, V: 'a> { - iter: RawBuckets<'a, K, V>, - elems_left: uint, -} - -// FIXME(#19839) Remove in favor of `#[derive(Clone)]` -impl<'a, K, V> Clone for Iter<'a, K, V> { - fn clone(&self) -> Iter<'a, K, V> { - Iter { - iter: self.iter.clone(), - elems_left: self.elems_left - } - } -} - - -/// Iterator over mutable references to entries in a table. -pub struct IterMut<'a, K: 'a, V: 'a> { - iter: RawBuckets<'a, K, V>, - elems_left: uint, -} - /// Iterator over the entries in a table, consuming the table. pub struct IntoIter { - table: RawTable, - iter: RawBuckets<'static, K, V> -} - -/// Iterator over the entries in a table, clearing the table. -pub struct Drain<'a, K: 'a, V: 'a> { - table: &'a mut RawTable, - iter: RawBuckets<'static, K, V>, -} - -impl<'a, K, V> Iterator for Iter<'a, K, V> { - type Item = (&'a K, &'a V); - - fn next(&mut self) -> Option<(&'a K, &'a V)> { - self.iter.next().map(|bucket| { - self.elems_left -= 1; - unsafe { - (&*bucket.key, - &*bucket.val) - } - }) - } - - fn size_hint(&self) -> (usize, Option) { - (self.elems_left, Some(self.elems_left)) - } -} -impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> { - fn len(&self) -> usize { self.elems_left } -} - -impl<'a, K, V> Iterator for IterMut<'a, K, V> { - type Item = (&'a K, &'a mut V); - - fn next(&mut self) -> Option<(&'a K, &'a mut V)> { - self.iter.next().map(|bucket| { - self.elems_left -= 1; - unsafe { - (&*bucket.key, - &mut *bucket.val) - } - }) - } - - fn size_hint(&self) -> (usize, Option) { - (self.elems_left, Some(self.elems_left)) - } -} -impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> { - fn len(&self) -> usize { self.elems_left } + iter: RawFullBuckets> } impl Iterator for IntoIter { @@ -870,60 +613,27 @@ impl Iterator for IntoIter { fn next(&mut self) -> Option<(SafeHash, K, V)> { self.iter.next().map(|bucket| { - self.table.size -= 1; + self.iter.table.size -= 1; unsafe { - ( - SafeHash { - hash: *bucket.hash, - }, - ptr::read(bucket.key), - ptr::read(bucket.val) - ) + let (k, v) = ptr::read(bucket.kval as *const (K, V)); + (*(bucket.hash as *mut SafeHash), k, v) } }) } fn size_hint(&self) -> (usize, Option) { - let size = self.table.size(); + let size = self.iter.table.size(); (size, Some(size)) } } impl ExactSizeIterator for IntoIter { - fn len(&self) -> usize { self.table.size() } -} - -impl<'a, K, V> Iterator for Drain<'a, K, V> { - type Item = (SafeHash, K, V); - - #[inline] - fn next(&mut self) -> Option<(SafeHash, K, V)> { - self.iter.next().map(|bucket| { - self.table.size -= 1; - unsafe { - ( - SafeHash { - hash: ptr::replace(bucket.hash, EMPTY_BUCKET), - }, - ptr::read(bucket.key), - ptr::read(bucket.val) - ) - } - }) - } - - fn size_hint(&self) -> (usize, Option) { - let size = self.table.size(); - (size, Some(size)) - } -} -impl<'a, K, V> ExactSizeIterator for Drain<'a, K, V> { - fn len(&self) -> usize { self.table.size() } + fn len(&self) -> usize { self.iter.table.size() } } #[unsafe_destructor] -impl<'a, K: 'a, V: 'a> Drop for Drain<'a, K, V> { +impl Drop for IntoIter { fn drop(&mut self) { - for _ in self.by_ref() {} + for _ in self {} } } @@ -934,21 +644,24 @@ impl Clone for RawTable { { let cap = self.capacity(); - let mut new_buckets = Bucket::first(&mut new_ht); - let mut buckets = Bucket::first(self); + let mut buckets = if let Some(buckets) = Bucket::at_index(self, 0) { + buckets + } else { + return new_ht; + }; + let mut new_buckets = Bucket::at_index(&mut new_ht, 0).unwrap(); while buckets.index() != cap { match buckets.peek() { Full(full) => { let (h, k, v) = { - let (k, v) = full.read(); - (full.hash(), k.clone(), v.clone()) + let (h, k, v) = full.read(); + (*h, k.clone(), v.clone()) }; - *new_buckets.raw.hash = h.inspect(); - ptr::write(new_buckets.raw.key, k); - ptr::write(new_buckets.raw.val, v); + *new_buckets.0.raw.hash = Some(h); + ptr::write(new_buckets.0.raw.kval, (k, v)); } Empty(..) => { - *new_buckets.raw.hash = EMPTY_BUCKET; + *new_buckets.0.raw.hash = None; } } new_buckets.next(); @@ -966,29 +679,25 @@ impl Clone for RawTable { #[unsafe_destructor] impl Drop for RawTable { fn drop(&mut self) { - if self.hashes.is_null() { + if self.middle.0.is_null() { return; } - // This is done in reverse because we've likely partially taken - // some elements out with `.into_iter()` from the front. // Check if the size is 0, so we don't do a useless scan when // dropping empty tables such as on resize. - // Also avoid double drop of elements that have been already moved out. + // Avoid double drop of elements that have been already moved out. unsafe { - for _ in self.rev_move_buckets() {} + if self.size != 0 { + for bucket in self.raw_full_buckets_mut() { + ptr::read(bucket.kval as *const (K, V)); + } + } } - let hashes_size = self.capacity * size_of::(); - let keys_size = self.capacity * size_of::(); - let vals_size = self.capacity * size_of::(); - let (align, _, size) = calculate_allocation(hashes_size, min_align_of::(), - keys_size, min_align_of::(), - vals_size, min_align_of::()); + let size = self.capacity * (size_of::() + size_of::<(K, V)>()); unsafe { - deallocate(self.hashes as *mut u8, size, align); - // Remember how everything was allocated out of one buffer - // during initialization? We only need one call to free here. + let ptr = self.middle.0.offset(-(self.capacity as isize)) as *mut u8; + deallocate(ptr, size, align::()); } } }