diff --git a/src/libstd/collections/hash/bench.rs b/src/libstd/collections/hash/bench.rs index ac21ae0f0aa14..636df75e699da 100644 --- a/src/libstd/collections/hash/bench.rs +++ b/src/libstd/collections/hash/bench.rs @@ -55,6 +55,24 @@ fn grow_by_insertion(b: &mut Bencher) { }); } +#[bench] +fn grow_by_insertion_large(b: &mut Bencher) { + use super::map::HashMap; + + let mut m = HashMap::new(); + + for i in range_inclusive(1, 1000) { + m.insert(i, (String::new(), String::new())); + } + + let mut k = 1001; + + b.iter(|| { + m.insert(k, (String::new(), String::new())); + k += 1; + }); +} + #[bench] fn find_existing(b: &mut Bencher) { use super::map::HashMap; diff --git a/src/libstd/collections/hash/map.rs b/src/libstd/collections/hash/map.rs index 9ba90c470f8ae..e9058e5c74290 100644 --- a/src/libstd/collections/hash/map.rs +++ b/src/libstd/collections/hash/map.rs @@ -9,19 +9,18 @@ // except according to those terms. use self::Entry::*; -use self::SearchResult::*; use self::VacantEntryState::*; -use borrow::Borrow; +use borrow::{Borrow, BorrowMut}; use clone::Clone; use cmp::{max, Eq, PartialEq}; use default::Default; use fmt::{self, Debug}; use hash::{Hash, SipHasher}; -use iter::{self, Iterator, ExactSizeIterator, IntoIterator, FromIterator, Extend, Map}; +use iter::{Iterator, ExactSizeIterator, IntoIterator, FromIterator, Extend, Map}; use marker::Sized; -use mem::{self, replace}; -use ops::{Deref, FnMut, FnOnce, Index}; +use mem::{self, swap, replace}; +use ops::{Drop, FnMut, FnOnce, Index}; use option::Option::{self, Some, None}; use rand::{self, Rng}; use result::Result::{self, Ok, Err}; @@ -31,10 +30,11 @@ use super::table::{ Bucket, EmptyBucket, FullBucket, - FullBucketImm, FullBucketMut, RawTable, - SafeHash + SafeHash, + PartialRawTable, + Put, }; use super::table::BucketState::{ Empty, @@ -92,53 +92,59 @@ fn test_resize_policy() { } } -// The main performance trick in this hashmap is called Robin Hood Hashing. -// It gains its excellent performance from one essential operation: +// The main performance trick in this hashmap is called Robin Hood hashing with +// linear probing. It gains its excellent performance from one essential +// operation: // // If an insertion collides with an existing element, and that element's -// "probe distance" (how far away the element is from its ideal location) -// is higher than how far we've already probed, swap the elements. +// "displacement" (how far away the element is from its ideal location) +// is lower than how far we've already probed, swap the elements. // -// This massively lowers variance in probe distance, and allows us to get very +// This massively lowers variance in displacement and allows us to get very // high load factors with good performance. The 90% load factor I use is rather // conservative. // // > Why a load factor of approximately 90%? // // In general, all the distances to initial buckets will converge on the mean. -// At a load factor of α, the odds of finding the target bucket after k -// probes is approximately 1-α^k. If we set this equal to 50% (since we converge -// on the mean) and set k=8 (64-byte cache line / 8-byte hash), α=0.92. I round -// this down to make the math easier on the CPU and avoid its FPU. -// Since on average we start the probing in the middle of a cache line, this -// strategy pulls in two cache lines of hashes on every lookup. I think that's -// pretty good, but if you want to trade off some space, it could go down to one -// cache line on average with an α of 0.84. +// At a load factor of α, the odds of finding the target bucket after exactly n +// unsuccesful probes[4] are // -// > Wait, what? Where did you get 1-α^k from? +// Pr{displacement = n} = +// (1 - α) / α * ∑_{k≥1} e^(-kα) * (kα)^(k+n) / (k + n)! * (1 - kα / (k + n + 1)) // -// On the first probe, your odds of a collision with an existing element is α. -// The odds of doing this twice in a row is approximately α^2. For three times, -// α^3, etc. Therefore, the odds of colliding k times is α^k. The odds of NOT -// colliding after k tries is 1-α^k. +// These odds can be approximated with this code: // -// The paper from 1986 cited below mentions an implementation which keeps track -// of the distance-to-initial-bucket histogram. This approach is not suitable -// for modern architectures because it requires maintaining an internal data -// structure. This allows very good first guesses, but we are most concerned -// with guessing entire cache lines, not individual indexes. Furthermore, array -// accesses are no longer linear and in one direction, as we have now. There -// is also memory and cache pressure that this would entail that would be very -// difficult to properly see in a microbenchmark. +// ``` +// use std::num::Float; +// use std::iter::AdditiveIterator; +// +// fn factorial(value: f64) -> f64 { +// if value == 0.0 { 1.0 } else { value * factorial(value - 1.0) } +// } +// +// fn psi(a: f64, displacement: u32) -> f64 { +// let n = displacement as f64; +// (1.0 / a - 1.0) * (0..130).map(|k_| { +// let k = k_ as f64; +// (-k * a).exp() * (1.0 - k * a / (k + n + 1.0)) * +// (k * a).powi((k + n) as i32) / factorial(k + n) as f64 +// }).sum() +// } +// ``` +// +// If we set α=0.909, then Pr{displacement < 4} = 0.51 and Pr{displacement < +// 8} = 0.77. The exact value of 0.909 is chosen to make the math easier on +// the CPU and avoid its FPU. Since on average we start the probing in the +// middle of a cache line, this strategy pulls in one cache line of hashes on +// most lookups (64-byte cache line with 8-byte hash). I think this choice is +// pretty good, but α could go up to 0.95, or down to 0.84 to trade off some +// space. // // ## Future Improvements (FIXME!) // // Allow the load factor to be changed dynamically and/or at initialization. // -// Also, would it be possible for us to reuse storage when growing the -// underlying table? This is exactly the use case for 'realloc', and may -// be worth exploring. -// // ## Future Optimizations (FIXME!) // // Another possible design choice that I made without any real reason is @@ -151,9 +157,9 @@ fn test_resize_policy() { // This would definitely be an avenue worth exploring if people start complaining // about the size of rust executables. // -// Annotate exceedingly likely branches in `table::make_hash` -// and `search_hashed` to reduce instruction cache pressure -// and mispredictions once it becomes possible (blocked on issue #11092). +// Annotate the exceedingly likely branch in `search_hashed` to reduce +// instruction cache pressure and mispredictions once it becomes possible +// (blocked on issue #11092). // // Shrinking the table could simply reallocate in place after moving buckets // to the first half. @@ -233,6 +239,8 @@ fn test_resize_policy() { /// hashing"](http://codecapsule.com/2013/11/11/robin-hood-hashing/) /// 3. Emmanuel Goossaert. ["Robin Hood hashing: backward shift /// deletion"](http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/) +/// 4. Alfredo Viola (2005). Distributional analysis of Robin Hood linear probing +/// hashing with buckets. /// /// # Examples /// @@ -304,7 +312,6 @@ fn test_resize_policy() { /// println!("{:?} has {} hp", viking, health); /// } /// ``` -#[derive(Clone)] #[stable(feature = "rust1", since = "1.0.0")] pub struct HashMap { // All hashes are keyed on these values, to prevent hash collision attacks. @@ -319,45 +326,49 @@ pub struct HashMap { fn search_hashed(table: M, hash: SafeHash, mut is_match: F) - -> SearchResult where - M: Deref>, + -> InternalEntry where + M: Borrow>, F: FnMut(&K) -> bool, { - // This is the only function where capacity can be zero. To avoid - // undefined behaviour when Bucket::new gets the raw bucket in this - // case, immediately return the appropriate search result. - if table.capacity() == 0 { - return TableRef(table); - } - - let size = table.size(); - let mut probe = Bucket::new(table, hash); + // Worst case, we'll find one empty bucket among `size + 1` buckets. + let size = table.borrow().size(); + let mut probe = match Bucket::new(table, *hash as usize) { + Some(probe) => probe, + // This is the only function where capacity can be zero. + None => return InternalEntry::TableIsEmpty, + }; let ib = probe.index(); - while probe.index() != ib + size { - let full = match probe.peek() { - Empty(b) => return TableRef(b.into_table()), // hit an empty bucket - Full(b) => b + loop { + let bucket = match probe.peek() { + Empty(empty) => { + // Found a hole! + return InternalEntry::Vacant(NoElem(empty)); + } + Full(bucket) => bucket }; - if full.distance() + ib < full.index() { + let robin_ib = bucket.index() as isize - bucket.displacement() as isize; + + if (ib as isize) < robin_ib { // We can finish the search early if we hit any bucket // with a lower distance to initial bucket than we've probed. - return TableRef(full.into_table()); + return InternalEntry::Vacant(NeqElem(bucket, robin_ib as usize)); } // If the hash doesn't match, it can't be this one.. - if hash == full.hash() { + if hash == *bucket.read().0 { // If the key doesn't match, it can't be this one.. - if is_match(full.read().0) { - return FoundExisting(full); + if is_match(bucket.read().1) { + return InternalEntry::Occupied(OccupiedEntryState { + elem: bucket, + }); } } - probe = full.next(); + probe = bucket.into_next(); + debug_assert!(probe.index() != ib + size + 1); } - - TableRef(probe.into_table()) } fn pop_internal(starting_bucket: FullBucketMut) -> (K, V) { @@ -367,7 +378,7 @@ fn pop_internal(starting_bucket: FullBucketMut) -> (K, V) { None => return (retkey, retval) }; - while gap.full().distance() != 0 { + while gap.full().displacement() != 0 { gap = match gap.shift() { Some(b) => b, None => break @@ -383,76 +394,80 @@ fn pop_internal(starting_bucket: FullBucketMut) -> (K, V) { /// to recalculate it. /// /// `hash`, `k`, and `v` are the elements to "robin hood" into the hashtable. -fn robin_hood<'a, K: 'a, V: 'a>(mut bucket: FullBucketMut<'a, K, V>, +fn robin_hood<'a, K: 'a, V: 'a>(bucket: FullBucketMut<'a, K, V>, mut ib: usize, mut hash: SafeHash, - mut k: K, - mut v: V) + mut key: K, + mut val: V) -> &'a mut V { let starting_index = bucket.index(); let size = { let table = bucket.table(); // FIXME "lifetime too short". table.size() }; - // There can be at most `size - dib` buckets to displace, because + let mut bucket = bucket.stash(); + // There can be at most `size - displacement` buckets to displace, because // in the worst case, there are `size` elements and we already are - // `distance` buckets away from the initial one. - let idx_end = starting_index + size - bucket.distance(); + // `displacement` buckets away from the initial one. + let idx_end = starting_index + size - bucket.displacement(); loop { - let (old_hash, old_key, old_val) = bucket.replace(hash, k, v); + { + let (h_ref, k_ref, v_ref) = bucket.read_mut(); + swap(h_ref, &mut hash); + swap(k_ref, &mut key); + swap(v_ref, &mut val); + }; loop { - let probe = bucket.next(); + let probe = bucket.into_next(); assert!(probe.index() != idx_end); let full_bucket = match probe.peek() { Empty(bucket) => { // Found a hole! - let b = bucket.put(old_hash, old_key, old_val); + let b = bucket.put(hash, key, val); // Now that it's stolen, just read the value's pointer // right out of the table! - return Bucket::at_index(b.into_table(), starting_index) - .peek() - .expect_full() - .into_mut_refs() - .1; + return b.into_table().into_mut_refs().1; }, Full(bucket) => bucket }; - let probe_ib = full_bucket.index() - full_bucket.distance(); + let probe_ib = full_bucket.index() - full_bucket.displacement(); bucket = full_bucket; // Robin hood! Steal the spot. if ib < probe_ib { ib = probe_ib; - hash = old_hash; - k = old_key; - v = old_val; break; } } } } -/// A result that works like Option> but preserves -/// the reference that grants us access to the table in any case. -enum SearchResult { - // This is an entry that holds the given key: - FoundExisting(FullBucket), - - // There was no such entry. The reference is given back: - TableRef(M) -} - -impl SearchResult { - fn into_option(self) -> Option> { - match self { - FoundExisting(bucket) => Some(bucket), - TableRef(_) => None - } +// Performs insertion with relaxed requirements. +// The caller should ensure that invariants of Robin Hood linear probing hold. +fn insert_hashed_ordered(table: M, hash: SafeHash, key: K, val: V) -> M + where M: BorrowMut> +{ + let cap = table.borrow().capacity(); + let mut buckets = Bucket::new(table, *hash as usize).unwrap(); + let ib = buckets.index(); + + while buckets.index() != ib + cap { + // We don't need to compare hashes for value swap. + // Not even DIBs for Robin Hood. + buckets = match buckets.peek() { + Empty(empty) => { + return empty.put(hash, key, val).into_table(); + } + Full(full) => full.into_bucket() + }; + buckets.next(); } + + panic!("Internal HashMap error: Out of space."); } impl HashMap @@ -465,41 +480,20 @@ impl HashMap /// Search for a key, yielding the index if it's found in the hashtable. /// If you already have the hash for the key lying around, use /// search_hashed. - fn search<'a, Q: ?Sized>(&'a self, q: &Q) -> Option> + fn search<'a, Q: ?Sized>(&'a self, q: &Q) + -> InternalEntry> where K: Borrow, Q: Eq + Hash { let hash = self.make_hash(q); search_hashed(&self.table, hash, |k| q.eq(k.borrow())) - .into_option() } - fn search_mut<'a, Q: ?Sized>(&'a mut self, q: &Q) -> Option> + fn search_mut<'a, Q: ?Sized>(&'a mut self, q: &Q) + -> InternalEntry> where K: Borrow, Q: Eq + Hash { let hash = self.make_hash(q); search_hashed(&mut self.table, hash, |k| q.eq(k.borrow())) - .into_option() - } - - // The caller should ensure that invariants by Robin Hood Hashing hold. - fn insert_hashed_ordered(&mut self, hash: SafeHash, k: K, v: V) { - let cap = self.table.capacity(); - let mut buckets = Bucket::new(&mut self.table, hash); - let ib = buckets.index(); - - while buckets.index() != ib + cap { - // We don't need to compare hashes for value swap. - // Not even DIBs for Robin Hood. - buckets = match buckets.peek() { - Empty(empty) => { - empty.put(hash, k, v); - return; - } - Full(b) => b.into_bucket() - }; - buckets.next(); - } - panic!("Internal HashMap error: Out of space."); } } @@ -596,6 +590,8 @@ impl HashMap } /// Returns the number of elements the map can hold without reallocating. + /// This value may be lower than the real number of elements the map will + /// hold before reallocating. /// /// # Examples /// @@ -636,7 +632,9 @@ impl HashMap if self.table.capacity() < min_cap { let new_capacity = max(min_cap.next_power_of_two(), INITIAL_CAPACITY); + let old_size = self.table.size(); self.resize(new_capacity); + assert_eq!(self.table.size(), old_size); } } @@ -648,16 +646,19 @@ impl HashMap assert!(self.table.size() <= new_capacity); assert!(new_capacity.is_power_of_two() || new_capacity == 0); - let mut old_table = replace(&mut self.table, RawTable::new(new_capacity)); - let old_size = old_table.size(); + let old_capacity = self.table.capacity(); - if old_table.capacity() == 0 || old_table.size() == 0 { + if self.table.size() == 0 { + self.table = RawTable::new(new_capacity); return; } - // Grow the table. - // Specialization of the other branch. - let mut bucket = Bucket::first(&mut old_table); + let mut destination = RawTable::new(new_capacity); + + // Iterate over `old_capacity` buckets, which constitute half of + // the table which was resized in-place, or the entire + // `old_table`. + let mut bucket = Bucket::new(&mut self.table, 0).unwrap(); // "So a few of the first shall be last: for many be called, // but few chosen." @@ -671,7 +672,7 @@ impl HashMap loop { bucket = match bucket.peek() { Full(full) => { - if full.distance() == 0 { + if full.displacement() == 0 { // This bucket occupies its ideal spot. // It indicates the start of another "cluster". bucket = full.into_bucket(); @@ -701,24 +702,22 @@ impl HashMap // ________________ // $$$_____________| // ^ exit once table.size == 0 - loop { + let idx_end = bucket.index() + old_capacity; + + while bucket.index() != idx_end { bucket = match bucket.peek() { Full(bucket) => { - let h = bucket.hash(); + let h = *bucket.read().0; let (b, k, v) = bucket.take(); - self.insert_hashed_ordered(h, k, v); - { - let t = b.table(); // FIXME "lifetime too short". - if t.size() == 0 { break } - }; + insert_hashed_ordered(&mut destination, h, k, v); b.into_bucket() } Empty(b) => b.into_bucket() }; - bucket.next(); + bucket.next(); // wraps at old_capacity } - assert_eq!(self.table.size(), old_size); + replace(bucket.into_table(), destination); } /// Shrinks the capacity of the map as much as possible. It will drop @@ -746,11 +745,12 @@ impl HashMap debug_assert!(self.len() <= min_capacity); if self.table.capacity() != min_capacity { - let old_table = replace(&mut self.table, RawTable::new(min_capacity)); + let mut old_table = PartialRawTable::new(replace(&mut self.table, + RawTable::new(min_capacity))); let old_size = old_table.size(); // Shrink the table. Naive algorithm for resizing: - for (h, k, v) in old_table.into_iter() { + while let Some((h, k, v)) = old_table.take_front() { self.insert_hashed_nocheck(h, k, v); } @@ -764,53 +764,16 @@ impl HashMap /// /// If the key already exists, the hashtable will be returned untouched /// and a reference to the existing element will be returned. - fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> &mut V { - self.insert_or_replace_with(hash, k, v, |_, _, _| ()) - } - - fn insert_or_replace_with<'a, F>(&'a mut self, - hash: SafeHash, - k: K, - v: V, - mut found_existing: F) - -> &'a mut V where - F: FnMut(&mut K, &mut V, V), - { - // Worst case, we'll find one empty bucket among `size + 1` buckets. - let size = self.table.size(); - let mut probe = Bucket::new(&mut self.table, hash); - let ib = probe.index(); - - loop { - let mut bucket = match probe.peek() { - Empty(bucket) => { - // Found a hole! - return bucket.put(hash, k, v).into_mut_refs().1; - } - Full(bucket) => bucket - }; - - // hash matches? - if bucket.hash() == hash { - // key matches? - if k == *bucket.read_mut().0 { - let (bucket_k, bucket_v) = bucket.into_mut_refs(); - debug_assert!(k == *bucket_k); - // Key already exists. Get its reference. - found_existing(bucket_k, bucket_v, v); - return bucket_v; - } + fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> Option { + match search_hashed(&mut self.table, hash, |key| *key == k) { + InternalEntry::Vacant(entry) => { + entry.insert(hash, k, v); + return None; } - - let robin_ib = bucket.index() as isize - bucket.distance() as isize; - - if (ib as isize) < robin_ib { - // Found a luckier bucket than me. Better steal his spot. - return robin_hood(bucket, robin_ib as usize, hash, k, v); + InternalEntry::Occupied(mut entry) => { + return Some(entry.insert(v)); } - - probe = bucket.next(); - assert!(probe.index() != ib + size + 1); + InternalEntry::TableIsEmpty => unreachable!() } } @@ -883,7 +846,10 @@ impl HashMap /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn iter(&self) -> Iter { - Iter { inner: self.table.iter() } + Iter { + elems_left: self.table.size(), + inner: Bucket::raw_full_buckets(&self.table), + } } /// An iterator visiting all key-value pairs in arbitrary order, @@ -911,7 +877,10 @@ impl HashMap /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub fn iter_mut(&mut self) -> IterMut { - IterMut { inner: self.table.iter_mut() } + IterMut { + elems_left: self.table.size(), + inner: Bucket::raw_full_buckets(&mut self.table), + } } /// Gets the given key's corresponding entry in the map for in-place manipulation. @@ -919,9 +888,16 @@ impl HashMap pub fn entry(&mut self, key: K) -> Entry { // Gotta resize now. self.reserve(1); - let hash = self.make_hash(&key); - search_entry_hashed(&mut self.table, hash, key) + match search_hashed(&mut self.table, hash, |k| key.eq(k.borrow())) { + InternalEntry::Occupied(state) => Occupied(state), + InternalEntry::Vacant(bucket) => Vacant(VacantEntry { + key: key, + hash: hash, + elem: bucket, + }), + InternalEntry::TableIsEmpty => unreachable!() + } } /// Returns the number of elements in the map. @@ -979,11 +955,8 @@ impl HashMap #[unstable(feature = "std_misc", reason = "matches collection reform specification, waiting for dust to settle")] pub fn drain(&mut self) -> Drain { - fn last_two((_, b, c): (A, B, C)) -> (B, C) { (b, c) } - let last_two: fn((SafeHash, K, V)) -> (K, V) = last_two; // coerce to fn pointer - Drain { - inner: self.table.drain().map(last_two), + inner: Bucket::new(&mut self.table, 0) } } @@ -1026,7 +999,7 @@ impl HashMap pub fn get(&self, k: &Q) -> Option<&V> where K: Borrow, Q: Hash + Eq { - self.search(k).map(|bucket| bucket.into_refs().1) + self.search(k).into_option().map(|bucket| bucket.into_refs().1) } /// Returns true if the map contains a value for the specified key. @@ -1049,7 +1022,7 @@ impl HashMap pub fn contains_key(&self, k: &Q) -> bool where K: Borrow, Q: Hash + Eq { - self.search(k).is_some() + self.search(k).into_option().is_some() } /// Returns a mutable reference to the value corresponding to the key. @@ -1074,7 +1047,7 @@ impl HashMap pub fn get_mut(&mut self, k: &Q) -> Option<&mut V> where K: Borrow, Q: Hash + Eq { - self.search_mut(k).map(|bucket| bucket.into_mut_refs().1) + self.search_mut(k).into_option().map(|bucket| bucket.into_mut_refs().1) } /// Inserts a key-value pair into the map. If the key already had a value @@ -1094,15 +1067,10 @@ impl HashMap /// assert_eq!(map[&37], "c"); /// ``` #[stable(feature = "rust1", since = "1.0.0")] - pub fn insert(&mut self, k: K, v: V) -> Option { - let hash = self.make_hash(&k); + pub fn insert(&mut self, key: K, value: V) -> Option { self.reserve(1); - - let mut retval = None; - self.insert_or_replace_with(hash, k, v, |_, val_ref, val| { - retval = Some(replace(val_ref, val)); - }); - retval + let hash = self.make_hash(&key); + self.insert_hashed_nocheck(hash, key, value) } /// Removes a key from the map, returning the value at the key if the key @@ -1130,54 +1098,7 @@ impl HashMap return None } - self.search_mut(k).map(|bucket| pop_internal(bucket).1) - } -} - -fn search_entry_hashed<'a, K: Eq, V>(table: &'a mut RawTable, hash: SafeHash, k: K) - -> Entry<'a, K, V> -{ - // Worst case, we'll find one empty bucket among `size + 1` buckets. - let size = table.size(); - let mut probe = Bucket::new(table, hash); - let ib = probe.index(); - - loop { - let bucket = match probe.peek() { - Empty(bucket) => { - // Found a hole! - return Vacant(VacantEntry { - hash: hash, - key: k, - elem: NoElem(bucket), - }); - }, - Full(bucket) => bucket - }; - - // hash matches? - if bucket.hash() == hash { - // key matches? - if k == *bucket.read().0 { - return Occupied(OccupiedEntry{ - elem: bucket, - }); - } - } - - let robin_ib = bucket.index() as isize - bucket.distance() as isize; - - if (ib as isize) < robin_ib { - // Found a luckier bucket than me. Better steal his spot. - return Vacant(VacantEntry { - hash: hash, - key: k, - elem: NeqElem(bucket, robin_ib as usize), - }); - } - - probe = bucket.next(); - assert!(probe.index() != ib + size + 1); + self.search_mut(k).into_option().map(|bucket| pop_internal(bucket).1) } } @@ -1231,17 +1152,50 @@ impl<'a, K, Q: ?Sized, V, S> Index<&'a Q> for HashMap } } +impl Clone for HashMap { + fn clone(&self) -> HashMap { + let mut new_ht = RawTable::new_uninitialized(self.table.capacity()); + + let cap = self.table.capacity(); + if let Some(mut buckets) = Bucket::new(&self.table, 0) { + while buckets.index() != cap { + match buckets.peek() { + Full(full) => { + let (h, k, v) = { + let (h, k, v) = full.read(); + (*h, k.clone(), v.clone()) + }; + new_ht.push_back(Some((h, k, v))); + } + Empty(..) => { + new_ht.push_back(None); + } + } + buckets.next(); + } + } + + HashMap { + table: new_ht.unwrap(), + hash_state: self.hash_state.clone(), + resize_policy: self.resize_policy.clone(), + } + } +} + /// HashMap iterator. #[stable(feature = "rust1", since = "1.0.0")] pub struct Iter<'a, K: 'a, V: 'a> { - inner: table::Iter<'a, K, V> + inner: table::RawFullBuckets>, + elems_left: usize, } // FIXME(#19839) Remove in favor of `#[derive(Clone)]` impl<'a, K, V> Clone for Iter<'a, K, V> { fn clone(&self) -> Iter<'a, K, V> { Iter { - inner: self.inner.clone() + inner: self.inner.clone(), + elems_left: self.elems_left, } } } @@ -1249,13 +1203,14 @@ impl<'a, K, V> Clone for Iter<'a, K, V> { /// HashMap mutable values iterator. #[stable(feature = "rust1", since = "1.0.0")] pub struct IterMut<'a, K: 'a, V: 'a> { - inner: table::IterMut<'a, K, V> + inner: table::RawFullBuckets>, + elems_left: usize, } /// HashMap move iterator. #[stable(feature = "rust1", since = "1.0.0")] pub struct IntoIter { - inner: iter::Map, fn((SafeHash, K, V)) -> (K, V)> + table: PartialRawTable, } /// HashMap keys iterator. @@ -1292,14 +1247,12 @@ impl<'a, K, V> Clone for Values<'a, K, V> { #[unstable(feature = "std_misc", reason = "matches collection reform specification, waiting for dust to settle")] pub struct Drain<'a, K: 'a, V: 'a> { - inner: iter::Map, fn((SafeHash, K, V)) -> (K, V)> + inner: Option>>, } /// A view into a single occupied location in a HashMap. #[stable(feature = "rust1", since = "1.0.0")] -pub struct OccupiedEntry<'a, K: 'a, V: 'a> { - elem: FullBucket>, -} +pub type OccupiedEntry<'a, K: 'a, V: 'a> = OccupiedEntryState>; /// A view into a single empty location in a HashMap. #[stable(feature = "rust1", since = "1.0.0")] @@ -1321,6 +1274,11 @@ pub enum Entry<'a, K: 'a, V: 'a> { Vacant(VacantEntry<'a, K, V>), } +/// A view into a single occupied location in a HashMap. +struct OccupiedEntryState { + elem: FullBucket, +} + /// Possible states of a VacantEntry. enum VacantEntryState { /// The index is occupied, but the key to insert has precedence, @@ -1379,11 +1337,25 @@ impl IntoIterator for HashMap /// let vec: Vec<(&str, isize)> = map.into_iter().collect(); /// ``` fn into_iter(self) -> IntoIter { - fn last_two((_, b, c): (A, B, C)) -> (B, C) { (b, c) } - let last_two: fn((SafeHash, K, V)) -> (K, V) = last_two; - IntoIter { - inner: self.table.into_iter().map(last_two) + table: PartialRawTable::new(self.table) + } + } +} + + +enum InternalEntry { + Occupied(OccupiedEntryState), + Vacant(VacantEntryState), + /// The table is empty. Cannot create `EmptyBucket`. + TableIsEmpty, +} + +impl InternalEntry { + fn into_option(self) -> Option> { + match self { + InternalEntry::Occupied(bucket) => Some(bucket.elem), + _ => None, } } } @@ -1392,36 +1364,60 @@ impl IntoIterator for HashMap impl<'a, K, V> Iterator for Iter<'a, K, V> { type Item = (&'a K, &'a V); - #[inline] fn next(&mut self) -> Option<(&'a K, &'a V)> { self.inner.next() } - #[inline] fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + fn next(&mut self) -> Option<(&'a K, &'a V)> { + self.inner.next().map(|bucket| { + self.elems_left -= 1; + bucket.into_refs() + }) + } + + fn size_hint(&self) -> (usize, Option) { + (self.elems_left, Some(self.elems_left)) + } } #[stable(feature = "rust1", since = "1.0.0")] impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> { - #[inline] fn len(&self) -> usize { self.inner.len() } + #[inline] fn len(&self) -> usize { self.elems_left } } #[stable(feature = "rust1", since = "1.0.0")] impl<'a, K, V> Iterator for IterMut<'a, K, V> { type Item = (&'a K, &'a mut V); - #[inline] fn next(&mut self) -> Option<(&'a K, &'a mut V)> { self.inner.next() } - #[inline] fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + fn next(&mut self) -> Option<(&'a K, &'a mut V)> { + self.inner.next().map(|bucket| { + self.elems_left -= 1; + let (k, v) = bucket.into_mut_refs(); + (&*k, v) + }) + } + + fn size_hint(&self) -> (usize, Option) { + (self.elems_left, Some(self.elems_left)) + } } #[stable(feature = "rust1", since = "1.0.0")] impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> { - #[inline] fn len(&self) -> usize { self.inner.len() } + #[inline] fn len(&self) -> usize { self.elems_left } } #[stable(feature = "rust1", since = "1.0.0")] impl Iterator for IntoIter { type Item = (K, V); - #[inline] fn next(&mut self) -> Option<(K, V)> { self.inner.next() } - #[inline] fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + #[inline] + fn next(&mut self) -> Option<(K, V)> { + self.table.take_front().map(|(_, k, v)| (k, v)) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + (self.table.size(), Some(self.table.size())) + } } #[stable(feature = "rust1", since = "1.0.0")] impl ExactSizeIterator for IntoIter { - #[inline] fn len(&self) -> usize { self.inner.len() } + #[inline] fn len(&self) -> usize { self.table.size() } } #[stable(feature = "rust1", since = "1.0.0")] @@ -1452,20 +1448,51 @@ impl<'a, K, V> ExactSizeIterator for Values<'a, K, V> { impl<'a, K, V> Iterator for Drain<'a, K, V> { type Item = (K, V); - #[inline] fn next(&mut self) -> Option<(K, V)> { self.inner.next() } - #[inline] fn size_hint(&self) -> (usize, Option) { self.inner.size_hint() } + fn next(&mut self) -> Option<(K, V)> { + if let Some(mut bucket) = self.inner.take() { + let cap = bucket.table().capacity(); + while bucket.index() < cap { + match bucket.peek() { + Full(bucket) => { + let (empty, k, v) = bucket.take(); + self.inner = Some(empty.into_bucket()); + return Some((k, v)); + } + Empty(empty) => { + bucket = empty.into_next(); + } + } + } + } + + None + } + + fn size_hint(&self) -> (usize, Option) { + self.inner.as_ref().map(|bucket| { + let size = bucket.table().size(); + (size, Some(size)) + }).unwrap_or((0, Some(0))) + } } + #[stable(feature = "rust1", since = "1.0.0")] impl<'a, K, V> ExactSizeIterator for Drain<'a, K, V> { - #[inline] fn len(&self) -> usize { self.inner.len() } + fn len(&self) -> usize { self.inner.as_ref().map(|bucket| bucket.table().size()).unwrap_or(0) } +} + +impl<'a, K: 'a, V: 'a> Drop for Drain<'a, K, V> { + fn drop(&mut self) { + for _ in self {} + } } impl<'a, K, V> Entry<'a, K, V> { + /// Returns a mutable reference to the entry if occupied, or the VacantEntry if vacant #[unstable(feature = "std_misc", reason = "will soon be replaced by or_insert")] #[deprecated(since = "1.0", reason = "replaced with more ergonomic `or_insert` and `or_insert_with`")] - /// Returns a mutable reference to the entry if occupied, or the VacantEntry if vacant pub fn get(self) -> Result<&'a mut V, VacantEntry<'a, K, V>> { match self { Occupied(entry) => Ok(entry.into_mut()), @@ -1473,9 +1500,9 @@ impl<'a, K, V> Entry<'a, K, V> { } } - #[stable(feature = "rust1", since = "1.0.0")] /// Ensures a value is in the entry by inserting the default if empty, and returns /// a mutable reference to the value in the entry. + #[stable(feature = "rust1", since = "1.0.0")] pub fn or_insert(self, default: V) -> &'a mut V { match self { Occupied(entry) => entry.into_mut(), @@ -1483,9 +1510,9 @@ impl<'a, K, V> Entry<'a, K, V> { } } - #[stable(feature = "rust1", since = "1.0.0")] /// Ensures a value is in the entry by inserting the result of the default function if empty, /// and returns a mutable reference to the value in the entry. + #[stable(feature = "rust1", since = "1.0.0")] pub fn or_insert_with V>(self, default: F) -> &'a mut V { match self { Occupied(entry) => entry.into_mut(), @@ -1498,13 +1525,17 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { /// Gets a reference to the value in the entry. #[stable(feature = "rust1", since = "1.0.0")] pub fn get(&self) -> &V { - self.elem.read().1 + self.elem.read().2 } +} +#[unstable(feature = "std_misc", + reason = "matches collection reform v2 specification, waiting for dust to settle")] +impl<'a, K: 'a, V: 'a, M: 'a> OccupiedEntryState where M: BorrowMut> { /// Gets a mutable reference to the value in the entry. #[stable(feature = "rust1", since = "1.0.0")] pub fn get_mut(&mut self) -> &mut V { - self.elem.read_mut().1 + self.elem.read_mut().2 } /// Converts the OccupiedEntry into a mutable reference to the value in the entry @@ -1521,7 +1552,11 @@ impl<'a, K, V> OccupiedEntry<'a, K, V> { mem::swap(&mut value, old_value); value } +} +#[unstable(feature = "std_misc", + reason = "matches collection reform v2 specification, waiting for dust to settle")] +impl<'a, K: 'a, V: 'a> OccupiedEntryState> { /// Takes the value out of the entry, and returns it #[stable(feature = "rust1", since = "1.0.0")] pub fn remove(self) -> V { @@ -1534,12 +1569,20 @@ impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> { /// and returns a mutable reference to it #[stable(feature = "rust1", since = "1.0.0")] pub fn insert(self, value: V) -> &'a mut V { - match self.elem { + self.elem.insert(self.hash, self.key, value) + } +} + +impl<'a, K: 'a, V: 'a> VacantEntryState> { + /// Sets the value of the entry with the provided key, + /// and returns a mutable reference to it + fn insert(self, hash: SafeHash, key: K, value: V) -> &'a mut V { + match self { NeqElem(bucket, ib) => { - robin_hood(bucket, ib, self.hash, self.key, value) + robin_hood(bucket, ib, hash, key, value) } NoElem(bucket) => { - bucket.put(self.hash, self.key, value).into_mut_refs().1 + bucket.put(hash, key, value).into_mut_refs().1 } } } @@ -1646,6 +1689,20 @@ mod test_map { assert_eq!(*m.get(&2).unwrap(), 4); } + #[test] + fn test_clone() { + let mut m = HashMap::new(); + assert_eq!(m.len(), 0); + assert!(m.insert(1, 2).is_none()); + assert_eq!(m.len(), 1); + assert!(m.insert(2, 4).is_none()); + assert_eq!(m.len(), 2); + let m2 = m.clone(); + assert_eq!(*m2.get(&1).unwrap(), 2); + assert_eq!(*m2.get(&2).unwrap(), 4); + assert_eq!(m2.len(), 2); + } + thread_local! { static DROP_VECTOR: RefCell> = RefCell::new(Vec::new()) } #[derive(Hash, PartialEq, Eq)] @@ -1737,7 +1794,7 @@ mod test_map { } #[test] - fn test_move_iter_drops() { + fn test_into_iter_drops() { DROP_VECTOR.with(|v| { *v.borrow_mut() = repeat(0).take(200).collect(); }); @@ -1802,11 +1859,35 @@ mod test_map { } #[test] - fn test_empty_pop() { + fn test_empty_remove() { let mut m: HashMap = HashMap::new(); assert_eq!(m.remove(&0), None); } + #[test] + fn test_empty_entry() { + let mut m: HashMap = HashMap::new(); + match m.entry(0) { + Occupied(_) => panic!(), + Vacant(_) => {} + } + assert!(*m.entry(0).or_insert(true)); + assert_eq!(m.len(), 1); + } + + #[test] + fn test_empty_iter() { + let mut m: HashMap = HashMap::new(); + assert_eq!(m.drain().next(), None); + assert_eq!(m.keys().next(), None); + assert_eq!(m.values().next(), None); + assert_eq!(m.iter().next(), None); + assert_eq!(m.iter_mut().next(), None); + assert_eq!(m.len(), 0); + assert!(m.is_empty()); + assert_eq!(m.into_iter().next(), None); + } + #[test] fn test_lots_of_insertions() { let mut m = HashMap::new(); diff --git a/src/libstd/collections/hash/table.rs b/src/libstd/collections/hash/table.rs index 4841f36c7f747..a4a92881fb8a1 100644 --- a/src/libstd/collections/hash/table.rs +++ b/src/libstd/collections/hash/table.rs @@ -10,22 +10,21 @@ use self::BucketState::*; +use borrow::{Borrow, BorrowMut}; use clone::Clone; use cmp; use hash::{Hash, Hasher}; -use iter::{Iterator, ExactSizeIterator}; -use marker::{Copy, Send, Sync, Sized, self}; -use mem::{min_align_of, size_of}; -use mem; -use num::wrapping::OverflowingOps; -use ops::{Deref, DerefMut, Drop}; -use option::Option; -use option::Option::{Some, None}; +use iter::Iterator; +use marker::{Copy, Sized, PhantomData}; +use mem::{self, min_align_of, size_of}; +use ops::Drop; +use option::Option::{self, Some, None}; use ptr::{self, Unique}; -use rt::heap::{allocate, deallocate, EMPTY}; +use rt::heap::{allocate, deallocate}; use collections::hash_state::HashState; +use core::nonzero::NonZero; -const EMPTY_BUCKET: u64 = 0; +static EMPTY: u8 = 0; /// The raw hashtable, providing safe-ish access to the unzipped and highly /// optimized arrays of hashes, keys, and values. @@ -36,28 +35,25 @@ const EMPTY_BUCKET: u64 = 0; /// /// Essential invariants of this structure: /// -/// - if t.hashes[i] == EMPTY_BUCKET, then `Bucket::at_index(&t, i).raw` +/// - if t.hashes[i] == None, then `Bucket::new(&t, i).raw` /// points to 'undefined' contents. Don't read from it. This invariant is /// enforced outside this module with the `EmptyBucket`, `FullBucket`, /// and `SafeHash` types. /// /// - An `EmptyBucket` is only constructed at an index with -/// a hash of EMPTY_BUCKET. +/// a hash of None. /// -/// - A `FullBucket` is only constructed at an index with a -/// non-EMPTY_BUCKET hash. +/// - A `FullBucket` is only constructed at an index with a hash. /// -/// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get +/// - A `SafeHash` is only constructed for non-zero hash. We get /// around hashes of zero by changing them to 0x8000_0000_0000_0000, /// which will likely map to the same bucket, while not being confused /// with "empty". /// -/// - All three "arrays represented by pointers" are the same length: +/// - Both "arrays represented by pointers" have the same length: /// `capacity`. This is set at creation and never changes. The arrays -/// are unzipped to save space (we don't have to pay for the padding -/// between odd sized elements, such as in a map from u64 to u8), and -/// be more cache aware (scanning through 8 hashes brings in at most -/// 2 cache lines, since they're all right beside each other). +/// are unzipped to be more cache aware (scanning through 8 hashes brings +/// in at most 2 cache lines, since they're all right beside each other). /// /// You can kind of think of this module/data structure as a safe wrapper /// around just the "table" part of the hashtable. It enforces some @@ -67,56 +63,48 @@ const EMPTY_BUCKET: u64 = 0; pub struct RawTable { capacity: usize, size: usize, - hashes: Unique, - - // Because K/V do not appear directly in any of the types in the struct, - // inform rustc that in fact instances of K and V are reachable from here. - marker: marker::PhantomData<(K,V)>, + // NB. The table will probably need manual impls of Send and Sync if this + // field ever changes. + middle: Unique<(K, V)>, } -unsafe impl Send for RawTable {} -unsafe impl Sync for RawTable {} - struct RawBucket { - hash: *mut u64, - key: *mut K, - val: *mut V, - _marker: marker::PhantomData<(K,V)>, + hash: *mut Option, + kval: *mut (K, V), } -impl Copy for RawBucket {} -impl Clone for RawBucket { - fn clone(&self) -> RawBucket { *self } -} +pub struct Bucket { + raw: RawBucket, + idx: usize, + range: usize, + table: M, + marker: PhantomData, -pub struct Bucket { - raw: RawBucket, - idx: usize, - table: M } -impl Copy for Bucket {} -impl Clone for Bucket { - fn clone(&self) -> Bucket { *self } +impl Copy for RawBucket {} +impl Clone for RawBucket { + fn clone(&self) -> RawBucket { *self } } -pub struct EmptyBucket { - raw: RawBucket, - idx: usize, - table: M +impl Copy for Bucket where M: Borrow> {} +impl Clone for Bucket where M: Borrow> { + fn clone(&self) -> Bucket { *self } } -pub struct FullBucket { - raw: RawBucket, - idx: usize, - table: M +mod bucket { + pub enum Empty {} + pub enum Full {} + pub enum EmptyOrFull {} } -pub type EmptyBucketImm<'table, K, V> = EmptyBucket>; -pub type FullBucketImm<'table, K, V> = FullBucket>; +pub type EmptyBucket = Bucket; +pub type EmptyBucketImm<'t, K, V> = EmptyBucket>; +pub type EmptyBucketMut<'t, K, V> = EmptyBucket>; -pub type EmptyBucketMut<'table, K, V> = EmptyBucket>; -pub type FullBucketMut<'table, K, V> = FullBucket>; +pub type FullBucket = Bucket; +pub type FullBucketImm<'t, K, V> = FullBucket>; +pub type FullBucketMut<'t, K, V> = FullBucket>; pub enum BucketState { Empty(EmptyBucket), @@ -133,16 +121,7 @@ struct GapThenFull { /// A hash that is not zero, since we use a hash of zero to represent empty /// buckets. -#[derive(PartialEq, Copy, Clone)] -pub struct SafeHash { - hash: u64, -} - -impl SafeHash { - /// Peek at the hash value, which is guaranteed to be non-zero. - #[inline(always)] - pub fn inspect(&self) -> u64 { self.hash } -} +pub type SafeHash = NonZero; /// We need to remove hashes of 0. That's reserved for empty buckets. /// This function wraps up `hash_keyed` to be the only way outside this @@ -156,51 +135,58 @@ pub fn make_hash(hash_state: &S, t: &T) -> SafeHash // EMPTY_HASH. We can maintain our precious uniform distribution // of initial indexes by unconditionally setting the MSB, // effectively reducing 64-bits hashes to 63 bits. - SafeHash { hash: 0x8000_0000_0000_0000 | state.finish() } + unsafe { NonZero::new(0x8000_0000_0000_0000 | state.finish()) } } -// `replace` casts a `*u64` to a `*SafeHash`. Since we statically -// ensure that a `FullBucket` points to an index with a non-zero hash, -// and a `SafeHash` is just a `u64` with a different name, this is -// safe. +// `read` casts a `*mut SafeHash` to a `*mut Option`. Since we +// statically ensure that a `FullBucket` points to an index with a non-zero +// hash, and a `SafeHash` is NonZero, this is safe. // // This test ensures that a `SafeHash` really IS the same size as a -// `u64`. If you need to change the size of `SafeHash` (and -// consequently made this test fail), `replace` needs to be -// modified to no longer assume this. +// `Option`. If you need to change the nullability of `SafeHash`, +// some functions need to be modified to no longer assume this. #[test] -fn can_alias_safehash_as_u64() { - assert_eq!(size_of::(), size_of::()) +fn can_alias_safehash_as_option() { + assert_eq!(size_of::(), size_of::>()) } impl RawBucket { unsafe fn offset(self, count: isize) -> RawBucket { RawBucket { hash: self.hash.offset(count), - key: self.key.offset(count), - val: self.val.offset(count), - _marker: marker::PhantomData, + kval: self.kval.offset(count), } } } -// Buckets hold references to the table. -impl FullBucket { - /// Borrow a reference to the table. - pub fn table(&self) -> &M { - &self.table - } - /// Move out the reference to the table. - pub fn into_table(self) -> M { - self.table +// It is safe to access the table through any number of buckets as long +// as operations on the outer bucket `Bucket` can't invalidate inner `Bucket`s. +impl Borrow> for Bucket + where M: Borrow> +{ + fn borrow(&self) -> &RawTable { + self.table.borrow().borrow() } - /// Get the raw index. - pub fn index(&self) -> usize { - self.idx +} + +impl BorrowMut> for Bucket + where M: BorrowMut> +{ + fn borrow_mut(&mut self) -> &mut RawTable { + self.table.borrow_mut().borrow_mut() } } -impl EmptyBucket { +/// `Put` is implemented for types which provide access to a table and cannot be invalidated +/// by filling a bucket. A similar implementation for `Take` is possible. +pub trait Put {} +impl Put for RawTable {} +impl<'t, K, V> Put for &'t mut RawTable {} +impl Put for Bucket {} +impl Put for FullBucket {} + +// Buckets hold references to the table. +impl Bucket { /// Borrow a reference to the table. pub fn table(&self) -> &M { &self.table @@ -209,43 +195,40 @@ impl EmptyBucket { pub fn into_table(self) -> M { self.table } -} - -impl Bucket { - /// Move out the reference to the table. - pub fn into_table(self) -> M { - self.table - } /// Get the raw index. pub fn index(&self) -> usize { self.idx } } -impl>> Bucket { - pub fn new(table: M, hash: SafeHash) -> Bucket { - Bucket::at_index(table, hash.inspect() as usize) - } - - pub fn at_index(table: M, ib_index: usize) -> Bucket { - // if capacity is 0, then the RawBucket will be populated with bogus pointers. - // This is an uncommon case though, so avoid it in release builds. - debug_assert!(table.capacity() > 0, "Table should have capacity at this point"); - let ib_index = ib_index & (table.capacity() - 1); - Bucket { - raw: unsafe { - table.first_bucket_raw().offset(ib_index as isize) - }, - idx: ib_index, - table: table +impl Bucket where M: Borrow> { + pub fn new(table: M, ib_index: usize) -> Option> { + unsafe { + let capacity = table.borrow().capacity(); + if capacity == 0 { + None + } else { + let idx = ib_index & (capacity - 1); + let bucket: Bucket = Bucket { + raw: table.borrow().first_bucket_raw().offset(idx as isize), + idx: idx, + range: capacity, + table: table, + marker: PhantomData, + }; + Some(bucket.state_cast()) + } } } - pub fn first(table: M) -> Bucket { - Bucket { - raw: table.first_bucket_raw(), - idx: 0, - table: table + pub fn raw_full_buckets(table: M) -> RawFullBuckets { + let first_bucket_raw = table.borrow().first_bucket_raw(); + RawFullBuckets { + raw: first_bucket_raw, + hashes_end: unsafe { + first_bucket_raw.hash.offset(table.borrow().capacity as isize) + }, + table: table, } } @@ -255,70 +238,75 @@ impl>> Bucket { /// this module. pub fn peek(self) -> BucketState { match unsafe { *self.raw.hash } { - EMPTY_BUCKET => - Empty(EmptyBucket { - raw: self.raw, - idx: self.idx, - table: self.table - }), - _ => - Full(FullBucket { - raw: self.raw, - idx: self.idx, - table: self.table - }) + None => Empty(self.state_cast()), + _ => Full(self.state_cast()), } } /// Modifies the bucket pointer in place to make it point to the next slot. pub fn next(&mut self) { - // Branchless bucket iteration step. - // As we reach the end of the table... - // We take the current idx: 0111111b - // Xor it by its increment: ^ 1000000b - // ------------ - // 1111111b - // Then AND with the capacity: & 1000000b - // ------------ - // to get the backwards offset: 1000000b - // ... and it's zero at all other times. - let maybe_wraparound_dist = (self.idx ^ (self.idx + 1)) & self.table.capacity(); - // Finally, we obtain the offset 1 or the offset -cap + 1. - let dist = 1 - (maybe_wraparound_dist as isize); - self.idx += 1; + let dist = if self.idx & (self.range - 1) == 0 { + 1 - self.range as isize + } else { + 1 + }; + unsafe { self.raw = self.raw.offset(dist); } } } -impl>> EmptyBucket { - #[inline] - pub fn next(self) -> Bucket { +impl Bucket where M: Borrow> { + /// Transmutes the state of a bucket. This method can't be public. + fn state_cast(self) -> Bucket { + Bucket { + raw: self.raw, + idx: self.idx, + range: self.range, + table: self.table, + marker: PhantomData, + } + } + + /// Erases information about the state of a bucket. + pub fn into_bucket(self) -> Bucket { + self.state_cast() + } + + /// Erases information about the state of a bucket and advance it. + pub fn into_next(self) -> Bucket { let mut bucket = self.into_bucket(); bucket.next(); bucket } - #[inline] - pub fn into_bucket(self) -> Bucket { + /// Duplicates the current position. This can be useful for operations + /// on two or more buckets. + pub fn stash(self) -> Bucket, S> { Bucket { raw: self.raw, idx: self.idx, - table: self.table + range: self.range, + table: self, + marker: PhantomData, } } +} +impl EmptyBucket where M: Borrow> { pub fn gap_peek(self) -> Option> { - let gap = EmptyBucket { - raw: self.raw, + let gap = Bucket { + table: (), idx: self.idx, - table: () + range: self.range, + raw: self.raw, + marker: PhantomData, }; - match self.next().peek() { + match self.into_next().peek() { Full(bucket) => { Some(GapThenFull { gap: gap, @@ -330,7 +318,7 @@ impl>> EmptyBucket { } } -impl> + DerefMut> EmptyBucket { +impl EmptyBucket where M: BorrowMut>, M: Put { /// Puts given key and value pair, along with the key's hash, /// into this bucket in the hashtable. Note how `self` is 'moved' into /// this function, because this slot will no longer be empty when @@ -341,156 +329,93 @@ impl> + DerefMut> EmptyBucket { pub fn put(mut self, hash: SafeHash, key: K, value: V) -> FullBucket { unsafe { - *self.raw.hash = hash.inspect(); - ptr::write(self.raw.key, key); - ptr::write(self.raw.val, value); + *self.raw.hash = Some(hash); + ptr::write(self.raw.kval, (key, value)); } - self.table.size += 1; + self.table.borrow_mut().size += 1; - FullBucket { raw: self.raw, idx: self.idx, table: self.table } + self.state_cast() } } -impl>> FullBucket { - #[inline] - pub fn next(self) -> Bucket { - let mut bucket = self.into_bucket(); - bucket.next(); - bucket - } - - #[inline] - pub fn into_bucket(self) -> Bucket { - Bucket { - raw: self.raw, - idx: self.idx, - table: self.table - } - } - +impl<'t, K, V, M: 't> FullBucket where M: Borrow> { /// Get the distance between this bucket and the 'ideal' location /// as determined by the key's hash stored in it. /// - /// In the cited blog posts above, this is called the "distance to + /// Blog posts cited in the other module call the "distance to /// initial bucket", or DIB. Also known as "probe count". - pub fn distance(&self) -> usize { + pub fn displacement(&self) -> usize { // Calculates the distance one has to travel when going from // `hash mod capacity` onwards to `idx mod capacity`, wrapping around // if the destination is not reached before the end of the table. - (self.idx.wrapping_sub(self.hash().inspect() as usize)) & (self.table.capacity() - 1) - } - - #[inline] - pub fn hash(&self) -> SafeHash { - unsafe { - SafeHash { - hash: *self.raw.hash - } - } + (self.idx.wrapping_sub(**self.read().0 as usize)) & (self.table.borrow().capacity() - 1) } /// Gets references to the key and value at a given index. - pub fn read(&self) -> (&K, &V) { - unsafe { - (&*self.raw.key, - &*self.raw.val) - } - } -} - -impl> + DerefMut> FullBucket { - /// Removes this bucket's key and value from the hashtable. - /// - /// This works similarly to `put`, building an `EmptyBucket` out of the - /// taken bucket. - pub fn take(mut self) -> (EmptyBucket, K, V) { - self.table.size -= 1; - - unsafe { - *self.raw.hash = EMPTY_BUCKET; - ( - EmptyBucket { - raw: self.raw, - idx: self.idx, - table: self.table - }, - ptr::read(self.raw.key), - ptr::read(self.raw.val) - ) - } - } - - pub fn replace(&mut self, h: SafeHash, k: K, v: V) -> (SafeHash, K, V) { - unsafe { - let old_hash = ptr::replace(self.raw.hash as *mut SafeHash, h); - let old_key = ptr::replace(self.raw.key, k); - let old_val = ptr::replace(self.raw.val, v); - - (old_hash, old_key, old_val) - } + pub fn read(&self) -> (&SafeHash, &K, &V) { + let (&ref h, &(ref k, ref v)) = unsafe { + (&*(self.raw.hash as *mut SafeHash), &*self.raw.kval) + }; + (h, k, v) } - /// Gets mutable references to the key and value at a given index. - pub fn read_mut(&mut self) -> (&mut K, &mut V) { - unsafe { - (&mut *self.raw.key, - &mut *self.raw.val) - } - } -} - -impl<'t, K, V, M: Deref> + 't> FullBucket { /// Exchange a bucket state for immutable references into the table. /// Because the underlying reference to the table is also consumed, /// no further changes to the structure of the table are possible; /// in exchange for this, the returned references have a longer lifetime /// than the references returned by `read()`. pub fn into_refs(self) -> (&'t K, &'t V) { + unsafe { (&(*self.raw.kval).0, &(*self.raw.kval).1) } + } +} + +impl<'t, K, V, M: 't> FullBucket where M: BorrowMut> { + /// Gets mutable references to the key and value at a given index. + pub fn read_mut(&mut self) -> (&mut SafeHash, &mut K, &mut V) { unsafe { - (&*self.raw.key, - &*self.raw.val) + let &mut (ref mut k, ref mut v) = &mut *self.raw.kval; + (&mut *(self.raw.hash as *mut SafeHash), k, v) } } -} -impl<'t, K, V, M: Deref> + DerefMut + 't> FullBucket { /// This works similarly to `into_refs`, exchanging a bucket state /// for mutable references into the table. pub fn into_mut_refs(self) -> (&'t mut K, &'t mut V) { - unsafe { - (&mut *self.raw.key, - &mut *self.raw.val) - } + unsafe { (&mut (*self.raw.kval).0, &mut (*self.raw.kval).1) } } -} -impl BucketState { - // For convenience. - pub fn expect_full(self) -> FullBucket { - match self { - Full(full) => full, - Empty(..) => panic!("Expected full bucket") + /// Removes this bucket's key and value from the hashtable. + /// + /// This works similarly to `put`, building an `EmptyBucket` out of the + /// taken bucket. + pub fn take(mut self) -> (EmptyBucket, K, V) { + self.table.borrow_mut().size -= 1; + + unsafe { + *self.raw.hash = None; + let (k, v) = ptr::read(self.raw.kval); + (self.state_cast(), k, v) } } } -impl>> GapThenFull { +impl GapThenFull where M: Borrow> { #[inline] pub fn full(&self) -> &FullBucket { &self.full } + /// Advances `GapThenFull` by one bucket. pub fn shift(mut self) -> Option> { unsafe { - *self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET); - ptr::copy_nonoverlapping(self.full.raw.key, self.gap.raw.key, 1); - ptr::copy_nonoverlapping(self.full.raw.val, self.gap.raw.val, 1); + *self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, None); + ptr::copy_nonoverlapping(self.full.raw.kval, self.gap.raw.kval, 1); } - let FullBucket { raw: prev_raw, idx: prev_idx, .. } = self.full; + let Bucket { raw: prev_raw, idx: prev_idx, .. } = self.full; - match self.full.next().peek() { + match self.full.into_next().peek() { Full(bucket) => { self.gap.raw = prev_raw; self.gap.idx = prev_idx; @@ -504,140 +429,33 @@ impl>> GapThenFull { } } - -/// Rounds up to a multiple of a power of two. Returns the closest multiple -/// of `target_alignment` that is higher or equal to `unrounded`. -/// -/// # Panics -/// -/// Panics if `target_alignment` is not a power of two. -fn round_up_to_next(unrounded: usize, target_alignment: usize) -> usize { - assert!(target_alignment.is_power_of_two()); - (unrounded + target_alignment - 1) & !(target_alignment - 1) -} - -#[test] -fn test_rounding() { - assert_eq!(round_up_to_next(0, 4), 0); - assert_eq!(round_up_to_next(1, 4), 4); - assert_eq!(round_up_to_next(2, 4), 4); - assert_eq!(round_up_to_next(3, 4), 4); - assert_eq!(round_up_to_next(4, 4), 4); - assert_eq!(round_up_to_next(5, 4), 8); -} - -// Returns a tuple of (key_offset, val_offset), -// from the start of a mallocated array. -fn calculate_offsets(hashes_size: usize, - keys_size: usize, keys_align: usize, - vals_align: usize) - -> (usize, usize, bool) { - let keys_offset = round_up_to_next(hashes_size, keys_align); - let (end_of_keys, oflo) = keys_offset.overflowing_add(keys_size); - - let vals_offset = round_up_to_next(end_of_keys, vals_align); - - (keys_offset, vals_offset, oflo) -} - -// Returns a tuple of (minimum required malloc alignment, hash_offset, -// array_size), from the start of a mallocated array. -fn calculate_allocation(hash_size: usize, hash_align: usize, - keys_size: usize, keys_align: usize, - vals_size: usize, vals_align: usize) - -> (usize, usize, usize, bool) { - let hash_offset = 0; - let (_, vals_offset, oflo) = calculate_offsets(hash_size, - keys_size, keys_align, - vals_align); - let (end_of_vals, oflo2) = vals_offset.overflowing_add(vals_size); - - let min_align = cmp::max(hash_align, cmp::max(keys_align, vals_align)); - - (min_align, hash_offset, end_of_vals, oflo || oflo2) -} - -#[test] -fn test_offset_calculation() { - assert_eq!(calculate_allocation(128, 8, 15, 1, 4, 4), (8, 0, 148, false)); - assert_eq!(calculate_allocation(3, 1, 2, 1, 1, 1), (1, 0, 6, false)); - assert_eq!(calculate_allocation(6, 2, 12, 4, 24, 8), (8, 0, 48, false)); - assert_eq!(calculate_offsets(128, 15, 1, 4), (128, 144, false)); - assert_eq!(calculate_offsets(3, 2, 1, 1), (3, 5, false)); - assert_eq!(calculate_offsets(6, 12, 4, 8), (8, 24, false)); -} - impl RawTable { - /// Does not initialize the buckets. The caller should ensure they, - /// at the very least, set every hash to EMPTY_BUCKET. - unsafe fn new_uninitialized(capacity: usize) -> RawTable { - if capacity == 0 { - return RawTable { - size: 0, - capacity: 0, - hashes: Unique::new(EMPTY as *mut u64), - marker: marker::PhantomData, + /// Does not initialize the buckets. + pub fn new_uninitialized(capacity: usize) -> PartialRawTable { + unsafe { + let table = if capacity == 0 { + RawTable { + capacity: 0, + size: 0, + middle: Unique::new(&EMPTY as *const _ as *mut (K, V)), + } + } else { + let alloc = allocate(checked_size_generic::(capacity), align::()); + if alloc.is_null() { ::alloc::oom() } + + RawTable { + capacity: capacity, + size: 0, + middle: Unique::new((alloc as *mut (K, V)).offset(capacity as isize)), + } }; - } - // No need for `checked_mul` before a more restrictive check performed - // later in this method. - let hashes_size = capacity * size_of::(); - let keys_size = capacity * size_of::< K >(); - let vals_size = capacity * size_of::< V >(); - - // Allocating hashmaps is a little tricky. We need to allocate three - // arrays, but since we know their sizes and alignments up front, - // we just allocate a single array, and then have the subarrays - // point into it. - // - // This is great in theory, but in practice getting the alignment - // right is a little subtle. Therefore, calculating offsets has been - // factored out into a different function. - let (malloc_alignment, hash_offset, size, oflo) = - calculate_allocation( - hashes_size, min_align_of::(), - keys_size, min_align_of::< K >(), - vals_size, min_align_of::< V >()); - - assert!(!oflo, "capacity overflow"); - - // One check for overflow that covers calculation and rounding of size. - let size_of_bucket = size_of::().checked_add(size_of::()).unwrap() - .checked_add(size_of::()).unwrap(); - assert!(size >= capacity.checked_mul(size_of_bucket) - .expect("capacity overflow"), - "capacity overflow"); - - let buffer = allocate(size, malloc_alignment); - if buffer.is_null() { ::alloc::oom() } - - let hashes = buffer.offset(hash_offset as isize) as *mut u64; - - RawTable { - capacity: capacity, - size: 0, - hashes: Unique::new(hashes), - marker: marker::PhantomData, - } - } - - fn first_bucket_raw(&self) -> RawBucket { - let hashes_size = self.capacity * size_of::(); - let keys_size = self.capacity * size_of::(); - - let buffer = *self.hashes as *mut u8; - let (keys_offset, vals_offset, oflo) = - calculate_offsets(hashes_size, - keys_size, min_align_of::(), - min_align_of::()); - debug_assert!(!oflo, "capacity overflow"); - unsafe { - RawBucket { - hash: *self.hashes, - key: buffer.offset(keys_offset as isize) as *mut K, - val: buffer.offset(vals_offset as isize) as *mut V, - _marker: marker::PhantomData, + PartialRawTable { + front: table.first_bucket_raw(), + back: table.first_bucket_raw(), + front_num: 0, + back_num: capacity, + table: table, } } } @@ -645,10 +463,16 @@ impl RawTable { /// Creates a new raw table from a given capacity. All buckets are /// initially empty. pub fn new(capacity: usize) -> RawTable { + RawTable::new_uninitialized(capacity).unwrap() + } + + #[inline] + fn first_bucket_raw(&self) -> RawBucket { unsafe { - let ret = RawTable::new_uninitialized(capacity); - ptr::write_bytes(*ret.hashes, 0, capacity); - ret + RawBucket { + hash: self.as_mut_ptr() as *mut Option, + kval: self.as_mut_ptr().offset(-(self.capacity as isize)), + } } } @@ -663,106 +487,98 @@ impl RawTable { self.size } - fn raw_buckets(&self) -> RawBuckets { - RawBuckets { - raw: self.first_bucket_raw(), - hashes_end: unsafe { - self.hashes.offset(self.capacity as isize) - }, - marker: marker::PhantomData, - } + /// Pointer to one-past-the-last key-value pair. + pub fn as_mut_ptr(&self) -> *mut (K, V) { + unsafe { self.middle.get() as *const _ as *mut _ } } +} - pub fn iter(&self) -> Iter { - Iter { - iter: self.raw_buckets(), - elems_left: self.size(), - } - } +/// Rounds up to a multiple of a power of two. Returns the closest multiple +/// of `target_alignment` that is higher or equal to `unrounded`. +/// +/// # Panics +/// +/// Panics if `target_alignment` is not a power of two. +fn round_up_to_next(unrounded: usize, target_alignment: usize) -> usize { + assert!(target_alignment.is_power_of_two()); + (unrounded + target_alignment - 1) & !(target_alignment - 1) +} - pub fn iter_mut(&mut self) -> IterMut { - IterMut { - iter: self.raw_buckets(), - elems_left: self.size(), - } - } +#[test] +fn test_rounding() { + assert_eq!(round_up_to_next(0, 4), 0); + assert_eq!(round_up_to_next(1, 4), 4); + assert_eq!(round_up_to_next(2, 4), 4); + assert_eq!(round_up_to_next(3, 4), 4); + assert_eq!(round_up_to_next(4, 4), 4); + assert_eq!(round_up_to_next(5, 4), 8); + assert_eq!(round_up_to_next(5, 8), 8); +} - pub fn into_iter(self) -> IntoIter { - let RawBuckets { raw, hashes_end, .. } = self.raw_buckets(); - // Replace the marker regardless of lifetime bounds on parameters. - IntoIter { - iter: RawBuckets { - raw: raw, - hashes_end: hashes_end, - marker: marker::PhantomData, - }, - table: self, - } - } +#[inline] +fn size_generic(capacity: usize) -> usize { + let hash_align = min_align_of::>(); + round_up_to_next(size_of::<(K, V)>() * capacity, hash_align) + size_of::() * capacity +} - pub fn drain(&mut self) -> Drain { - let RawBuckets { raw, hashes_end, .. } = self.raw_buckets(); - // Replace the marker regardless of lifetime bounds on parameters. - Drain { - iter: RawBuckets { - raw: raw, - hashes_end: hashes_end, - marker: marker::PhantomData, - }, - table: self, - } +fn checked_size_generic(capacity: usize) -> usize { + let size = size_generic::(capacity); + let elem_size = size_of::<(K, V)>() + size_of::(); + assert!(size >= capacity.checked_mul(elem_size).expect("capacity overflow"), + "capacity overflow"); + size +} + +#[inline] +fn align() -> usize { + cmp::max(mem::min_align_of::<(K, V)>(), mem::min_align_of::()) +} + +/// A newtyped RawBucket. Not copyable. +pub struct RawFullBucket(RawBucket, PhantomData); + +impl<'t, K, V, M: 't> RawFullBucket where M: Borrow> { + pub fn into_refs(self) -> (&'t K, &'t V) { + unsafe { (&(*self.0.kval).0, &(*self.0.kval).1) } } +} - /// Returns an iterator that copies out each entry. Used while the table - /// is being dropped. - unsafe fn rev_move_buckets(&mut self) -> RevMoveBuckets { - let raw_bucket = self.first_bucket_raw(); - RevMoveBuckets { - raw: raw_bucket.offset(self.capacity as isize), - hashes_end: raw_bucket.hash, - elems_left: self.size, - marker: marker::PhantomData, - } +impl<'t, K, V, M: 't> RawFullBucket where M: BorrowMut> { + pub fn into_mut_refs(self) -> (&'t mut K, &'t mut V) { + unsafe { (&mut (*self.0.kval).0, &mut (*self.0.kval).1) } } } /// A raw iterator. The basis for some other iterators in this module. Although /// this interface is safe, it's not used outside this module. -struct RawBuckets<'a, K, V> { +pub struct RawFullBuckets { raw: RawBucket, - hashes_end: *mut u64, - - // Strictly speaking, this should be &'a (K,V), but that would - // require that K:'a, and we often use RawBuckets<'static...> for - // move iterations, so that messes up a lot of other things. So - // just use `&'a (K,V)` as this is not a publicly exposed type - // anyway. - marker: marker::PhantomData<&'a ()>, + hashes_end: *mut Option, + table: M, } // FIXME(#19839) Remove in favor of `#[derive(Clone)]` -impl<'a, K, V> Clone for RawBuckets<'a, K, V> { - fn clone(&self) -> RawBuckets<'a, K, V> { - RawBuckets { +impl Clone for RawFullBuckets { + fn clone(&self) -> RawFullBuckets { + RawFullBuckets { raw: self.raw, hashes_end: self.hashes_end, - marker: marker::PhantomData, + table: self.table.clone(), } } } +impl Iterator for RawFullBuckets { + type Item = RawFullBucket; -impl<'a, K, V> Iterator for RawBuckets<'a, K, V> { - type Item = RawBucket; - - fn next(&mut self) -> Option> { + fn next(&mut self) -> Option> { while self.raw.hash != self.hashes_end { unsafe { // We are swapping out the pointer to a bucket and replacing // it with the pointer to the next one. let prev = ptr::replace(&mut self.raw, self.raw.offset(1)); - if *prev.hash != EMPTY_BUCKET { - return Some(prev); + if *prev.hash != None { + return Some(RawFullBucket(prev, PhantomData)); } } } @@ -771,249 +587,104 @@ impl<'a, K, V> Iterator for RawBuckets<'a, K, V> { } } -/// An iterator that moves out buckets in reverse order. It leaves the table -/// in an inconsistent state and should only be used for dropping -/// the table's remaining entries. It's used in the implementation of Drop. -struct RevMoveBuckets<'a, K, V> { - raw: RawBucket, - hashes_end: *mut u64, - elems_left: usize, - - // As above, `&'a (K,V)` would seem better, but we often use - // 'static for the lifetime, and this is not a publicly exposed - // type. - marker: marker::PhantomData<&'a ()>, -} - -impl<'a, K, V> Iterator for RevMoveBuckets<'a, K, V> { - type Item = (K, V); - - fn next(&mut self) -> Option<(K, V)> { - if self.elems_left == 0 { - return None; +impl Drop for RawTable { + fn drop(&mut self) { + if self.capacity == 0 || self.capacity == mem::POST_DROP_USIZE { + return; } - - loop { - debug_assert!(self.raw.hash != self.hashes_end); - - unsafe { - self.raw = self.raw.offset(-1); - - if *self.raw.hash != EMPTY_BUCKET { - self.elems_left -= 1; - return Some(( - ptr::read(self.raw.key), - ptr::read(self.raw.val) - )); + // Check if the size is 0, so we don't do a useless scan when + // dropping empty tables such as on resize. + // Avoid double drop of elements that have been already moved out. + unsafe { + if self.size != 0 { + for bucket in Bucket::raw_full_buckets(&mut *self) { + ptr::read(bucket.0.kval); } } - } - } -} - -/// Iterator over shared references to entries in a table. -pub struct Iter<'a, K: 'a, V: 'a> { - iter: RawBuckets<'a, K, V>, - elems_left: usize, -} -// FIXME(#19839) Remove in favor of `#[derive(Clone)]` -impl<'a, K, V> Clone for Iter<'a, K, V> { - fn clone(&self) -> Iter<'a, K, V> { - Iter { - iter: self.iter.clone(), - elems_left: self.elems_left + let ptr = self.as_mut_ptr().offset(-(self.capacity as isize)) as *mut u8; + deallocate(ptr, size_generic::(self.capacity), align::()); } } } - -/// Iterator over mutable references to entries in a table. -pub struct IterMut<'a, K: 'a, V: 'a> { - iter: RawBuckets<'a, K, V>, - elems_left: usize, -} - -/// Iterator over the entries in a table, consuming the table. -pub struct IntoIter { +/// A partial table provides safe and cheap draining and incremental construction. +pub struct PartialRawTable { table: RawTable, - iter: RawBuckets<'static, K, V> -} - -/// Iterator over the entries in a table, clearing the table. -pub struct Drain<'a, K: 'a, V: 'a> { - table: &'a mut RawTable, - iter: RawBuckets<'static, K, V>, -} - -impl<'a, K, V> Iterator for Iter<'a, K, V> { - type Item = (&'a K, &'a V); - - fn next(&mut self) -> Option<(&'a K, &'a V)> { - self.iter.next().map(|bucket| { - self.elems_left -= 1; - unsafe { - (&*bucket.key, - &*bucket.val) - } - }) - } - - fn size_hint(&self) -> (usize, Option) { - (self.elems_left, Some(self.elems_left)) - } + front: RawBucket, + back: RawBucket, + front_num: usize, + back_num: usize, } -impl<'a, K, V> ExactSizeIterator for Iter<'a, K, V> { - fn len(&self) -> usize { self.elems_left } -} - -impl<'a, K, V> Iterator for IterMut<'a, K, V> { - type Item = (&'a K, &'a mut V); - fn next(&mut self) -> Option<(&'a K, &'a mut V)> { - self.iter.next().map(|bucket| { - self.elems_left -= 1; - unsafe { - (&*bucket.key, - &mut *bucket.val) +impl PartialRawTable { + /// Turn a table into a partial table. All buckets are already initialized. + pub fn new(table: RawTable) -> PartialRawTable { + unsafe { + PartialRawTable { + front: table.first_bucket_raw(), + back: table.first_bucket_raw().offset(table.capacity() as isize), + front_num: 0, + back_num: 0, + table: table, } - }) + } } - fn size_hint(&self) -> (usize, Option) { - (self.elems_left, Some(self.elems_left)) - } -} -impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> { - fn len(&self) -> usize { self.elems_left } -} - -impl Iterator for IntoIter { - type Item = (SafeHash, K, V); - - fn next(&mut self) -> Option<(SafeHash, K, V)> { - self.iter.next().map(|bucket| { - self.table.size -= 1; - unsafe { - ( - SafeHash { - hash: *bucket.hash, - }, - ptr::read(bucket.key), - ptr::read(bucket.val) - ) + /// Initialize a bucket. Has no effect if there are no uninitialized buckets at the back. + pub fn push_back(&mut self, bucket: Option<(SafeHash, K, V)>) { + unsafe { + if self.back_num != 0 { + self.back_num -= 1; + let back = ptr::replace(&mut self.back, self.back.offset(1)); + if let Some((h, k, v)) = bucket { + *back.hash = Some(h); + ptr::write(back.kval, (k, v)); + self.table.size += 1; + } else { + *back.hash = None; + } } - }) + } } - fn size_hint(&self) -> (usize, Option) { - let size = self.table.size(); - (size, Some(size)) - } -} -impl ExactSizeIterator for IntoIter { - fn len(&self) -> usize { self.table.size() } -} - -impl<'a, K, V> Iterator for Drain<'a, K, V> { - type Item = (SafeHash, K, V); - - #[inline] - fn next(&mut self) -> Option<(SafeHash, K, V)> { - self.iter.next().map(|bucket| { - self.table.size -= 1; - unsafe { - ( - SafeHash { - hash: ptr::replace(bucket.hash, EMPTY_BUCKET), - }, - ptr::read(bucket.key), - ptr::read(bucket.val) - ) + /// Takes out an initialized bucket. Returns None if all buckets are uninitialized. + pub fn take_front(&mut self) -> Option<(SafeHash, K, V)> { + unsafe { + while self.front.hash != self.back.hash { + self.front_num += 1; + let front = ptr::replace(&mut self.front, self.front.offset(1)); + if let Some(h) = *front.hash { + self.table.size -= 1; + let (k, v) = ptr::read(front.kval); + return Some((h, k, v)); + } } - }) - } - - fn size_hint(&self) -> (usize, Option) { - let size = self.table.size(); - (size, Some(size)) - } -} -impl<'a, K, V> ExactSizeIterator for Drain<'a, K, V> { - fn len(&self) -> usize { self.table.size() } -} + } -impl<'a, K: 'a, V: 'a> Drop for Drain<'a, K, V> { - fn drop(&mut self) { - for _ in self.by_ref() {} + None } -} -impl Clone for RawTable { - fn clone(&self) -> RawTable { + /// Unwrap the table by zeroing uninitialized ranges. + pub fn unwrap(self) -> RawTable { unsafe { - let mut new_ht = RawTable::new_uninitialized(self.capacity()); - - { - let cap = self.capacity(); - let mut new_buckets = Bucket::first(&mut new_ht); - let mut buckets = Bucket::first(self); - while buckets.index() != cap { - match buckets.peek() { - Full(full) => { - let (h, k, v) = { - let (k, v) = full.read(); - (full.hash(), k.clone(), v.clone()) - }; - *new_buckets.raw.hash = h.inspect(); - ptr::write(new_buckets.raw.key, k); - ptr::write(new_buckets.raw.val, v); - } - Empty(..) => { - *new_buckets.raw.hash = EMPTY_BUCKET; - } - } - new_buckets.next(); - buckets.next(); - } - }; - - new_ht.size = self.size(); - - new_ht + ptr::write_bytes(self.table.first_bucket_raw().hash, 0, self.front_num); + ptr::write_bytes(self.back.hash, 0, self.back_num); + let table = ptr::read(&self.table); + mem::forget(self); + table } } + + pub fn size(&self) -> usize { + self.table.size() + } } -impl Drop for RawTable { +/// Drops all initialized buckets in the partial table. +impl Drop for PartialRawTable { fn drop(&mut self) { - if self.capacity == 0 || self.capacity == mem::POST_DROP_USIZE { - return; - } - - // This is done in reverse because we've likely partially taken - // some elements out with `.into_iter()` from the front. - // Check if the size is 0, so we don't do a useless scan when - // dropping empty tables such as on resize. - // Also avoid double drop of elements that have been already moved out. - unsafe { - for _ in self.rev_move_buckets() {} - } - - let hashes_size = self.capacity * size_of::(); - let keys_size = self.capacity * size_of::(); - let vals_size = self.capacity * size_of::(); - let (align, _, size, oflo) = - calculate_allocation(hashes_size, min_align_of::(), - keys_size, min_align_of::(), - vals_size, min_align_of::()); - - debug_assert!(!oflo, "should be impossible"); - - unsafe { - deallocate(*self.hashes as *mut u8, size, align); - // Remember how everything was allocated out of one buffer - // during initialization? We only need one call to free here. - } + while let Some(_) = self.take_front() {} + debug_assert_eq!(self.table.size, 0); } }