From 873985af90cfd5e6d8ec41d5585f0cb947a73676 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 29 Sep 2020 11:18:17 +0200 Subject: [PATCH 01/33] chore: Add a benchmark for growing a map while inserting to it --- benches/bench.rs | 25 +++++++++++++++++++++++++ src/main.rs | 20 ++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 src/main.rs diff --git a/benches/bench.rs b/benches/bench.rs index 729b53fe52..3accff741f 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -105,6 +105,31 @@ bench_suite!( insert_std_random ); +macro_rules! bench_grow_insert { + ($name:ident, $maptype:ident, $keydist:expr) => { + #[bench] + fn $name(b: &mut Bencher) { + b.iter(|| { + let mut m = $maptype::default(); + for i in ($keydist).take(SIZE) { + m.insert(i, i); + } + black_box(&mut m); + }) + } + }; +} + +bench_suite!( + bench_grow_insert, + grow_insert_ahash_serial, + grow_insert_std_serial, + grow_insert_ahash_highbits, + grow_insert_std_highbits, + grow_insert_ahash_random, + grow_insert_std_random +); + macro_rules! bench_insert_erase { ($name:ident, $maptype:ident, $keydist:expr) => { #[bench] diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000000..4cf12a3de2 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,20 @@ +fn main() { + let mut map1 = hashbrown::HashMap::new(); + map1.insert(1u8, ""); + map1.reserve(1000); + let mut map2 = hashbrown::HashMap::new(); + map2.insert(1i16, ""); + map2.reserve(1000); + let mut map3 = hashbrown::HashMap::new(); + map3.insert(3u16, ""); + map3.reserve(1000); + let mut map4 = hashbrown::HashMap::new(); + map4.insert(3u64, ""); + map4.reserve(1000); + dbg!(( + map1.iter().next(), + map2.iter().next(), + map3.iter().next(), + map4.iter().next() + )); +} From 022f06ee99765dc8bed8bed71fe229032c8b5583 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 29 Sep 2020 11:18:06 +0200 Subject: [PATCH 02/33] refactor: Extract a non generic part of RawTable --- src/raw/mod.rs | 418 ++++++++++++++++++++++++++++--------------------- 1 file changed, 239 insertions(+), 179 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 79995e933a..785b1faa5b 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -368,6 +368,12 @@ impl Bucket { /// A raw hash table with an unsafe API. pub struct RawTable { + table: RawTableInner, + // Tell dropck that we own instances of T. + marker: PhantomData, +} + +struct RawTableInner { // Mask to get an index from a hash value. The value is one less than the // number of buckets in the table. bucket_mask: usize, @@ -382,9 +388,6 @@ pub struct RawTable { // Number of elements in the table, only really used by len() items: usize, - // Tell dropck that we own instances of T. - marker: PhantomData, - alloc: A, } @@ -397,13 +400,8 @@ impl RawTable { #[cfg_attr(feature = "inline-more", inline)] pub const fn new() -> Self { Self { - // Be careful to cast the entire slice to a raw pointer. - ctrl: unsafe { NonNull::new_unchecked(Group::static_empty() as *const _ as *mut u8) }, - bucket_mask: 0, - items: 0, - growth_left: 0, + table: RawTableInner::new_in(Global), marker: PhantomData, - alloc: Global, } } @@ -431,13 +429,8 @@ impl RawTable { #[cfg_attr(feature = "inline-more", inline)] pub fn new_in(alloc: A) -> Self { Self { - // Be careful to cast the entire slice to a raw pointer. - ctrl: unsafe { NonNull::new_unchecked(Group::static_empty() as *const _ as *mut u8) }, - bucket_mask: 0, - items: 0, - growth_left: 0, + table: RawTableInner::new_in(alloc), marker: PhantomData, - alloc, } } @@ -457,18 +450,15 @@ impl RawTable { Some(lco) => lco, None => return Err(fallibility.capacity_overflow()), }; - let ptr: NonNull = match do_alloc(&alloc, layout) { - Ok(block) => block.cast(), - Err(_) => return Err(fallibility.alloc_err(layout)), - }; - let ctrl = NonNull::new_unchecked(ptr.as_ptr().add(ctrl_offset)); Ok(Self { - ctrl, - bucket_mask: buckets - 1, - items: 0, - growth_left: bucket_mask_to_capacity(buckets - 1), + table: RawTableInner::new_uninitialized( + alloc, + buckets, + fallibility, + layout, + ctrl_offset, + )?, marker: PhantomData, - alloc, }) } @@ -489,7 +479,10 @@ impl RawTable { None => return Err(fallibility.capacity_overflow()), }; let result = Self::new_uninitialized(alloc, buckets, fallibility)?; - result.ctrl(0).write_bytes(EMPTY, result.num_ctrl_bytes()); + result + .table + .ctrl(0) + .write_bytes(EMPTY, result.table.num_ctrl_bytes()); Ok(result) } @@ -521,8 +514,8 @@ impl RawTable { Some(lco) => lco, None => hint::unreachable_unchecked(), }; - self.alloc.deallocate( - NonNull::new_unchecked(self.ctrl.as_ptr().sub(ctrl_offset)), + self.table.alloc.deallocate( + NonNull::new_unchecked(self.table.ctrl.as_ptr().sub(ctrl_offset)), layout, ); } @@ -530,7 +523,7 @@ impl RawTable { /// Returns pointer to one past last element of data table. #[cfg_attr(feature = "inline-more", inline)] pub unsafe fn data_end(&self) -> NonNull { - NonNull::new_unchecked(self.ctrl.as_ptr().cast()) + NonNull::new_unchecked(self.table.ctrl.as_ptr().cast()) } /// Returns pointer to start of data table. @@ -546,17 +539,10 @@ impl RawTable { bucket.to_base_index(self.data_end()) } - /// Returns a pointer to a control byte. - #[cfg_attr(feature = "inline-more", inline)] - unsafe fn ctrl(&self, index: usize) -> *mut u8 { - debug_assert!(index < self.num_ctrl_bytes()); - self.ctrl.as_ptr().add(index) - } - /// Returns a pointer to an element in the table. #[cfg_attr(feature = "inline-more", inline)] pub unsafe fn bucket(&self, index: usize) -> Bucket { - debug_assert_ne!(self.bucket_mask, 0); + debug_assert_ne!(self.table.bucket_mask, 0); debug_assert!(index < self.buckets()); Bucket::from_base_index(self.data_end(), index) } @@ -566,10 +552,10 @@ impl RawTable { #[deprecated(since = "0.8.1", note = "use erase or remove instead")] pub unsafe fn erase_no_drop(&mut self, item: &Bucket) { let index = self.bucket_index(item); - debug_assert!(is_full(*self.ctrl(index))); - let index_before = index.wrapping_sub(Group::WIDTH) & self.bucket_mask; - let empty_before = Group::load(self.ctrl(index_before)).match_empty(); - let empty_after = Group::load(self.ctrl(index)).match_empty(); + debug_assert!(is_full(*self.table.ctrl(index))); + let index_before = index.wrapping_sub(Group::WIDTH) & self.table.bucket_mask; + let empty_before = Group::load(self.table.ctrl(index_before)).match_empty(); + let empty_after = Group::load(self.table.ctrl(index)).match_empty(); // If we are inside a continuous block of Group::WIDTH full or deleted // cells then a probe window may have seen a full block when trying to @@ -582,11 +568,11 @@ impl RawTable { let ctrl = if empty_before.leading_zeros() + empty_after.trailing_zeros() >= Group::WIDTH { DELETED } else { - self.growth_left += 1; + self.table.growth_left += 1; EMPTY }; - self.set_ctrl(index, ctrl); - self.items -= 1; + self.table.set_ctrl(index, ctrl); + self.table.items -= 1; } /// Erases an element from the table, dropping it in place. @@ -632,59 +618,18 @@ impl RawTable { } } - /// Returns an iterator-like object for a probe sequence on the table. - /// - /// This iterator never terminates, but is guaranteed to visit each bucket - /// group exactly once. The loop using `probe_seq` must terminate upon - /// reaching a group containing an empty bucket. - #[cfg_attr(feature = "inline-more", inline)] - fn probe_seq(&self, hash: u64) -> ProbeSeq { - ProbeSeq { - pos: h1(hash) & self.bucket_mask, - stride: 0, - } - } - - /// Sets a control byte, and possibly also the replicated control byte at - /// the end of the array. - #[cfg_attr(feature = "inline-more", inline)] - unsafe fn set_ctrl(&self, index: usize, ctrl: u8) { - // Replicate the first Group::WIDTH control bytes at the end of - // the array without using a branch: - // - If index >= Group::WIDTH then index == index2. - // - Otherwise index2 == self.bucket_mask + 1 + index. - // - // The very last replicated control byte is never actually read because - // we mask the initial index for unaligned loads, but we write it - // anyways because it makes the set_ctrl implementation simpler. - // - // If there are fewer buckets than Group::WIDTH then this code will - // replicate the buckets at the end of the trailing group. For example - // with 2 buckets and a group size of 4, the control bytes will look - // like this: - // - // Real | Replicated - // --------------------------------------------- - // | [A] | [B] | [EMPTY] | [EMPTY] | [A] | [B] | - // --------------------------------------------- - let index2 = ((index.wrapping_sub(Group::WIDTH)) & self.bucket_mask) + Group::WIDTH; - - *self.ctrl(index) = ctrl; - *self.ctrl(index2) = ctrl; - } - /// Searches for an empty or deleted bucket which is suitable for inserting /// a new element. /// /// There must be at least 1 empty bucket in the table. #[cfg_attr(feature = "inline-more", inline)] fn find_insert_slot(&self, hash: u64) -> usize { - let mut probe_seq = self.probe_seq(hash); + let mut probe_seq = self.table.probe_seq(hash); loop { unsafe { - let group = Group::load(self.ctrl(probe_seq.pos)); + let group = Group::load(self.table.ctrl(probe_seq.pos)); if let Some(bit) = group.match_empty_or_deleted().lowest_set_bit() { - let result = (probe_seq.pos + bit) & self.bucket_mask; + let result = (probe_seq.pos + bit) & self.table.bucket_mask; // In tables smaller than the group width, trailing control // bytes outside the range of the table are filled with @@ -695,10 +640,10 @@ impl RawTable { // table. This second scan is guaranteed to find an empty // slot (due to the load factor) before hitting the trailing // control bytes (containing EMPTY). - if unlikely(is_full(*self.ctrl(result))) { - debug_assert!(self.bucket_mask < Group::WIDTH); + if unlikely(is_full(*self.table.ctrl(result))) { + debug_assert!(self.table.bucket_mask < Group::WIDTH); debug_assert_ne!(probe_seq.pos, 0); - return Group::load_aligned(self.ctrl(0)) + return Group::load_aligned(self.table.ctrl(0)) .match_empty_or_deleted() .lowest_set_bit_nonzero(); } @@ -706,7 +651,7 @@ impl RawTable { return result; } } - probe_seq.move_next(self.bucket_mask); + probe_seq.move_next(self.table.bucket_mask); } } @@ -715,11 +660,13 @@ impl RawTable { pub fn clear_no_drop(&mut self) { if !self.is_empty_singleton() { unsafe { - self.ctrl(0).write_bytes(EMPTY, self.num_ctrl_bytes()); + self.table + .ctrl(0) + .write_bytes(EMPTY, self.table.num_ctrl_bytes()); } } - self.items = 0; - self.growth_left = bucket_mask_to_capacity(self.bucket_mask); + self.table.items = 0; + self.table.growth_left = bucket_mask_to_capacity(self.table.bucket_mask); } /// Removes all elements from the table without freeing the backing memory. @@ -742,9 +689,9 @@ impl RawTable { pub fn shrink_to(&mut self, min_size: usize, hasher: impl Fn(&T) -> u64) { // Calculate the minimal number of elements that we need to reserve // space for. - let min_size = usize::max(self.items, min_size); + let min_size = usize::max(self.table.items, min_size); if min_size == 0 { - *self = Self::new_in(self.alloc.clone()); + *self = Self::new_in(self.table.alloc.clone()); return; } @@ -760,8 +707,8 @@ impl RawTable { // If we have more buckets than we need, shrink the table. if min_buckets < self.buckets() { // Fast path if the table is empty - if self.items == 0 { - *self = Self::with_capacity_in(min_size, self.alloc.clone()) + if self.table.items == 0 { + *self = Self::with_capacity_in(min_size, self.table.alloc.clone()) } else { // Avoid `Result::unwrap_or_else` because it bloats LLVM IR. if self @@ -778,7 +725,7 @@ impl RawTable { /// without reallocation. #[cfg_attr(feature = "inline-more", inline)] pub fn reserve(&mut self, additional: usize, hasher: impl Fn(&T) -> u64) { - if additional > self.growth_left { + if additional > self.table.growth_left { // Avoid `Result::unwrap_or_else` because it bloats LLVM IR. if self .reserve_rehash(additional, hasher, Fallibility::Infallible) @@ -797,7 +744,7 @@ impl RawTable { additional: usize, hasher: impl Fn(&T) -> u64, ) -> Result<(), TryReserveError> { - if additional > self.growth_left { + if additional > self.table.growth_left { self.reserve_rehash(additional, hasher, Fallibility::Fallible) } else { Ok(()) @@ -814,11 +761,11 @@ impl RawTable { fallibility: Fallibility, ) -> Result<(), TryReserveError> { // Avoid `Option::ok_or_else` because it bloats LLVM IR. - let new_items = match self.items.checked_add(additional) { + let new_items = match self.table.items.checked_add(additional) { Some(new_items) => new_items, None => return Err(fallibility.capacity_overflow()), }; - let full_capacity = bucket_mask_to_capacity(self.bucket_mask); + let full_capacity = bucket_mask_to_capacity(self.table.bucket_mask); if new_items <= full_capacity / 2 { // Rehash in-place without re-allocating if we have plenty of spare // capacity that is locked up due to DELETED entries. @@ -845,19 +792,21 @@ impl RawTable { // control bytes to EMPTY. This effectively frees up all buckets // containing a DELETED entry. for i in (0..self.buckets()).step_by(Group::WIDTH) { - let group = Group::load_aligned(self.ctrl(i)); + let group = Group::load_aligned(self.table.ctrl(i)); let group = group.convert_special_to_empty_and_full_to_deleted(); - group.store_aligned(self.ctrl(i)); + group.store_aligned(self.table.ctrl(i)); } // Fix up the trailing control bytes. See the comments in set_ctrl // for the handling of tables smaller than the group width. if self.buckets() < Group::WIDTH { - self.ctrl(0) - .copy_to(self.ctrl(Group::WIDTH), self.buckets()); + self.table + .ctrl(0) + .copy_to(self.table.ctrl(Group::WIDTH), self.buckets()); } else { - self.ctrl(0) - .copy_to(self.ctrl(self.buckets()), Group::WIDTH); + self.table + .ctrl(0) + .copy_to(self.table.ctrl(self.buckets()), Group::WIDTH); } // If the hash function panics then properly clean up any elements @@ -867,21 +816,22 @@ impl RawTable { let mut guard = guard(self, |self_| { if mem::needs_drop::() { for i in 0..self_.buckets() { - if *self_.ctrl(i) == DELETED { - self_.set_ctrl(i, EMPTY); + if *self_.table.ctrl(i) == DELETED { + self_.table.set_ctrl(i, EMPTY); self_.bucket(i).drop(); - self_.items -= 1; + self_.table.items -= 1; } } } - self_.growth_left = bucket_mask_to_capacity(self_.bucket_mask) - self_.items; + self_.table.growth_left = + bucket_mask_to_capacity(self_.table.bucket_mask) - self_.table.items; }); // At this point, DELETED elements are elements that we haven't // rehashed yet. Find them and re-insert them at their ideal // position. 'outer: for i in 0..guard.buckets() { - if *guard.ctrl(i) != DELETED { + if *guard.table.ctrl(i) != DELETED { continue; } 'inner: loop { @@ -898,24 +848,25 @@ impl RawTable { // same unaligned group, then there is no benefit in moving // it and we can just continue to the next item. let probe_index = |pos: usize| { - (pos.wrapping_sub(guard.probe_seq(hash).pos) & guard.bucket_mask) + (pos.wrapping_sub(guard.table.probe_seq(hash).pos) + & guard.table.bucket_mask) / Group::WIDTH }; if likely(probe_index(i) == probe_index(new_i)) { - guard.set_ctrl(i, h2(hash)); + guard.table.set_ctrl(i, h2(hash)); continue 'outer; } // We are moving the current item to a new position. Write // our H2 to the control byte of the new position. - let prev_ctrl = *guard.ctrl(new_i); - guard.set_ctrl(new_i, h2(hash)); + let prev_ctrl = *guard.table.ctrl(new_i); + guard.table.set_ctrl(new_i, h2(hash)); if prev_ctrl == EMPTY { // If the target slot is empty, simply move the current // element into the new slot and clear the old control // byte. - guard.set_ctrl(i, EMPTY); + guard.table.set_ctrl(i, EMPTY); guard.bucket(new_i).copy_from_nonoverlapping(&item); continue 'outer; } else { @@ -929,7 +880,8 @@ impl RawTable { } } - guard.growth_left = bucket_mask_to_capacity(guard.bucket_mask) - guard.items; + guard.table.growth_left = + bucket_mask_to_capacity(guard.table.bucket_mask) - guard.table.items; mem::forget(guard); } } @@ -943,13 +895,13 @@ impl RawTable { fallibility: Fallibility, ) -> Result<(), TryReserveError> { unsafe { - debug_assert!(self.items <= capacity); + debug_assert!(self.table.items <= capacity); // Allocate and initialize the new table. let mut new_table = - Self::fallible_with_capacity(self.alloc.clone(), capacity, fallibility)?; - new_table.growth_left -= self.items; - new_table.items = self.items; + Self::fallible_with_capacity(self.table.alloc.clone(), capacity, fallibility)?; + new_table.table.growth_left -= self.table.items; + new_table.table.items = self.table.items; // The hash function may panic, in which case we simply free the new // table without dropping any elements that may have been copied into @@ -973,7 +925,7 @@ impl RawTable { // - we know there is enough space in the table. // - all elements are unique. let index = new_table.find_insert_slot(hash); - new_table.set_ctrl(index, h2(hash)); + new_table.table.set_ctrl(index, h2(hash)); new_table.bucket(index).copy_from_nonoverlapping(&item); } @@ -998,17 +950,17 @@ impl RawTable { // We can avoid growing the table once we have reached our load // factor if we are replacing a tombstone. This works since the // number of EMPTY slots does not change in this case. - let old_ctrl = *self.ctrl(index); - if unlikely(self.growth_left == 0 && special_is_empty(old_ctrl)) { + let old_ctrl = *self.table.ctrl(index); + if unlikely(self.table.growth_left == 0 && special_is_empty(old_ctrl)) { self.reserve(1, hasher); index = self.find_insert_slot(hash); } let bucket = self.bucket(index); - self.growth_left -= special_is_empty(old_ctrl) as usize; - self.set_ctrl(index, h2(hash)); + self.table.growth_left -= special_is_empty(old_ctrl) as usize; + self.table.set_ctrl(index, h2(hash)); bucket.write(value); - self.items += 1; + self.table.items += 1; bucket } } @@ -1060,12 +1012,12 @@ impl RawTable { // If we are replacing a DELETED entry then we don't need to update // the load counter. - let old_ctrl = *self.ctrl(index); - self.growth_left -= special_is_empty(old_ctrl) as usize; + let old_ctrl = *self.table.ctrl(index); + self.table.growth_left -= special_is_empty(old_ctrl) as usize; - self.set_ctrl(index, h2(hash)); + self.table.set_ctrl(index, h2(hash)); bucket.write(value); - self.items += 1; + self.table.items += 1; bucket } } @@ -1082,14 +1034,14 @@ impl RawTable { F: FnOnce(T) -> Option, { let index = self.bucket_index(&bucket); - let old_ctrl = *self.ctrl(index); + let old_ctrl = *self.table.ctrl(index); debug_assert!(is_full(old_ctrl)); - let old_growth_left = self.growth_left; + let old_growth_left = self.table.growth_left; let item = self.remove(bucket); if let Some(new_item) = f(item) { - self.growth_left = old_growth_left; - self.set_ctrl(index, old_ctrl); - self.items += 1; + self.table.growth_left = old_growth_left; + self.table.set_ctrl(index, old_ctrl); + self.table.items += 1; self.bucket(index).write(new_item); true } else { @@ -1137,32 +1089,26 @@ impl RawTable { /// more, but is guaranteed to be able to hold at least this many. #[cfg_attr(feature = "inline-more", inline)] pub fn capacity(&self) -> usize { - self.items + self.growth_left + self.table.items + self.table.growth_left } /// Returns the number of elements in the table. #[cfg_attr(feature = "inline-more", inline)] pub fn len(&self) -> usize { - self.items + self.table.items } /// Returns the number of buckets in the table. #[cfg_attr(feature = "inline-more", inline)] pub fn buckets(&self) -> usize { - self.bucket_mask + 1 - } - - /// Returns the number of control bytes in the table. - #[cfg_attr(feature = "inline-more", inline)] - fn num_ctrl_bytes(&self) -> usize { - self.bucket_mask + 1 + Group::WIDTH + self.table.bucket_mask + 1 } /// Returns whether this table points to the empty singleton with a capacity /// of 0. #[cfg_attr(feature = "inline-more", inline)] fn is_empty_singleton(&self) -> bool { - self.bucket_mask == 0 + self.table.bucket_mask == 0 } /// Returns an iterator over every element in the table. It is up to @@ -1173,8 +1119,8 @@ impl RawTable { pub unsafe fn iter(&self) -> RawIter { let data = Bucket::from_base_index(self.data_end(), 0); RawIter { - iter: RawIterRange::new(self.ctrl.as_ptr(), data, self.buckets()), - items: self.items, + iter: RawIterRange::new(self.table.ctrl.as_ptr(), data, self.table.buckets()), + items: self.table.items, } } @@ -1212,7 +1158,7 @@ impl RawTable { debug_assert_eq!(iter.len(), self.len()); RawDrain { iter, - table: ManuallyDrop::new(mem::replace(self, Self::new_in(self.alloc.clone()))), + table: ManuallyDrop::new(mem::replace(self, Self::new_in(self.table.alloc.clone()))), orig_table: NonNull::from(self), marker: PhantomData, } @@ -1227,7 +1173,7 @@ impl RawTable { pub unsafe fn into_iter_from(self, iter: RawIter) -> RawIntoIter { debug_assert_eq!(iter.len(), self.len()); - let alloc = self.alloc.clone(); + let alloc = self.table.alloc.clone(); let allocation = self.into_allocation(); RawIntoIter { iter, @@ -1241,16 +1187,16 @@ impl RawTable { /// should be dropped using a `RawIter` before freeing the allocation. #[cfg_attr(feature = "inline-more", inline)] pub(crate) fn into_allocation(self) -> Option<(NonNull, Layout)> { - let alloc = if self.is_empty_singleton() { + let alloc = if self.table.is_empty_singleton() { None } else { // Avoid `Option::unwrap_or_else` because it bloats LLVM IR. - let (layout, ctrl_offset) = match calculate_layout::(self.buckets()) { + let (layout, ctrl_offset) = match calculate_layout::(self.table.buckets()) { Some(lco) => lco, None => unsafe { hint::unreachable_unchecked() }, }; Some(( - unsafe { NonNull::new_unchecked(self.ctrl.as_ptr().sub(ctrl_offset)) }, + unsafe { NonNull::new_unchecked(self.table.ctrl.as_ptr().sub(ctrl_offset)) }, layout, )) }; @@ -1262,17 +1208,129 @@ impl RawTable { unsafe impl Send for RawTable where T: Send {} unsafe impl Sync for RawTable where T: Sync {} +impl RawTableInner { + #[cfg_attr(feature = "inline-more", inline)] + const fn new_in(alloc: A) -> Self { + Self { + // Be careful to cast the entire slice to a raw pointer. + ctrl: unsafe { NonNull::new_unchecked(Group::static_empty() as *const _ as *mut u8) }, + bucket_mask: 0, + items: 0, + growth_left: 0, + alloc, + } + } +} + +impl RawTableInner { + unsafe fn new_uninitialized( + alloc: A, + buckets: usize, + fallibility: Fallibility, + layout: Layout, + ctrl_offset: usize, + ) -> Result { + debug_assert!(buckets.is_power_of_two()); + + let ptr: NonNull = match do_alloc(&alloc, layout) { + Ok(block) => block.cast(), + Err(_) => return Err(fallibility.alloc_err(layout)), + }; + let ctrl = NonNull::new_unchecked(ptr.as_ptr().add(ctrl_offset)); + Ok(Self { + ctrl, + bucket_mask: buckets - 1, + items: 0, + growth_left: bucket_mask_to_capacity(buckets - 1), + alloc, + }) + } + + /// Returns an iterator-like object for a probe sequence on the table. + /// + /// This iterator never terminates, but is guaranteed to visit each bucket + /// group exactly once. The loop using `probe_seq` must terminate upon + /// reaching a group containing an empty bucket. + #[cfg_attr(feature = "inline-more", inline)] + fn probe_seq(&self, hash: u64) -> ProbeSeq { + ProbeSeq { + pos: h1(hash) & self.bucket_mask, + stride: 0, + } + } + + /// Sets a control byte, and possibly also the replicated control byte at + /// the end of the array. + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn set_ctrl(&self, index: usize, ctrl: u8) { + // Replicate the first Group::WIDTH control bytes at the end of + // the array without using a branch: + // - If index >= Group::WIDTH then index == index2. + // - Otherwise index2 == self.bucket_mask + 1 + index. + // + // The very last replicated control byte is never actually read because + // we mask the initial index for unaligned loads, but we write it + // anyways because it makes the set_ctrl implementation simpler. + // + // If there are fewer buckets than Group::WIDTH then this code will + // replicate the buckets at the end of the trailing group. For example + // with 2 buckets and a group size of 4, the control bytes will look + // like this: + // + // Real | Replicated + // --------------------------------------------- + // | [A] | [B] | [EMPTY] | [EMPTY] | [A] | [B] | + // --------------------------------------------- + let index2 = ((index.wrapping_sub(Group::WIDTH)) & self.bucket_mask) + Group::WIDTH; + + *self.ctrl(index) = ctrl; + *self.ctrl(index2) = ctrl; + } + + /// Returns a pointer to a control byte. + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn ctrl(&self, index: usize) -> *mut u8 { + debug_assert!(index < self.num_ctrl_bytes()); + self.ctrl.as_ptr().add(index) + } + + #[cfg_attr(feature = "inline-more", inline)] + fn capacity(&self) -> usize { + self.items + self.growth_left + } + + #[cfg_attr(feature = "inline-more", inline)] + fn len(&self) -> usize { + self.items + } + + #[cfg_attr(feature = "inline-more", inline)] + fn buckets(&self) -> usize { + self.bucket_mask + 1 + } + + #[cfg_attr(feature = "inline-more", inline)] + fn num_ctrl_bytes(&self) -> usize { + self.bucket_mask + 1 + Group::WIDTH + } + + #[cfg_attr(feature = "inline-more", inline)] + fn is_empty_singleton(&self) -> bool { + self.bucket_mask == 0 + } +} + impl Clone for RawTable { fn clone(&self) -> Self { - if self.is_empty_singleton() { - Self::new_in(self.alloc.clone()) + if self.table.is_empty_singleton() { + Self::new_in(self.table.alloc.clone()) } else { unsafe { let mut new_table = ManuallyDrop::new( // Avoid `Result::ok_or_else` because it bloats LLVM IR. match Self::new_uninitialized( - self.alloc.clone(), - self.buckets(), + self.table.alloc.clone(), + self.table.buckets(), Fallibility::Infallible, ) { Ok(table) => table, @@ -1292,8 +1350,8 @@ impl Clone for RawTable { } fn clone_from(&mut self, source: &Self) { - if source.is_empty_singleton() { - *self = Self::new_in(self.alloc.clone()); + if source.table.is_empty_singleton() { + *self = Self::new_in(self.table.alloc.clone()); } else { unsafe { // First, drop all our elements without clearing the control bytes. @@ -1312,7 +1370,7 @@ impl Clone for RawTable { (self as *mut Self).write( // Avoid `Result::unwrap_or_else` because it bloats LLVM IR. match Self::new_uninitialized( - self.alloc.clone(), + self.table.alloc.clone(), source.buckets(), Fallibility::Infallible, ) { @@ -1348,14 +1406,15 @@ impl RawTableClone for RawTable { #[cfg_attr(feature = "inline-more", inline)] unsafe fn clone_from_spec(&mut self, source: &Self, _on_panic: impl FnMut(&mut Self)) { source + .table .ctrl(0) - .copy_to_nonoverlapping(self.ctrl(0), self.num_ctrl_bytes()); + .copy_to_nonoverlapping(self.table.ctrl(0), self.table.num_ctrl_bytes()); source .data_start() .copy_to_nonoverlapping(self.data_start(), self.buckets()); - self.items = source.items; - self.growth_left = source.growth_left; + self.table.items = source.table.items; + self.table.growth_left = source.table.growth_left; } } @@ -1365,8 +1424,9 @@ impl RawTable { unsafe fn clone_from_impl(&mut self, source: &Self, mut on_panic: impl FnMut(&mut Self)) { // Copy the control bytes unchanged. We do this in a single pass source + .table .ctrl(0) - .copy_to_nonoverlapping(self.ctrl(0), self.num_ctrl_bytes()); + .copy_to_nonoverlapping(self.table.ctrl(0), self.table.num_ctrl_bytes()); // The cloning of elements may panic, in which case we need // to make sure we drop only the elements that have been @@ -1374,7 +1434,7 @@ impl RawTable { let mut guard = guard((0, &mut *self), |(index, self_)| { if mem::needs_drop::() && self_.len() != 0 { for i in 0..=*index { - if is_full(*self_.ctrl(i)) { + if is_full(*self_.table.ctrl(i)) { self_.bucket(i).drop(); } } @@ -1398,8 +1458,8 @@ impl RawTable { // Successfully cloned all items, no need to clean up. mem::forget(guard); - self.items = source.items; - self.growth_left = source.growth_left; + self.table.items = source.table.items; + self.table.growth_left = source.table.growth_left; } /// Variant of `clone_from` to use when a hasher is available. @@ -1410,7 +1470,7 @@ impl RawTable { // buckets as the source since we can just copy the contents directly // in that case. if self.buckets() != source.buckets() - && bucket_mask_to_capacity(self.bucket_mask) >= source.len() + && bucket_mask_to_capacity(self.table.bucket_mask) >= source.len() { self.clear(); @@ -1432,7 +1492,7 @@ impl RawTable { // - we know there is enough space in the table. // - all elements are unique. let index = guard_self.find_insert_slot(hash); - guard_self.set_ctrl(index, h2(hash)); + guard_self.table.set_ctrl(index, h2(hash)); guard_self.bucket(index).write(item); } } @@ -1440,8 +1500,8 @@ impl RawTable { // Successfully cloned all items, no need to clean up. mem::forget(guard_self); - self.items = source.items; - self.growth_left -= source.items; + self.table.items = source.table.items; + self.table.growth_left -= source.table.items; } else { self.clone_from(source); } @@ -1972,8 +2032,8 @@ impl<'a, T, A: Allocator + Clone> RawIterHash<'a, T, A> { fn new(table: &'a RawTable, hash: u64) -> Self { unsafe { let h2_hash = h2(hash); - let probe_seq = table.probe_seq(hash); - let group = Group::load(table.ctrl(probe_seq.pos)); + let probe_seq = table.table.probe_seq(hash); + let group = Group::load(table.table.ctrl(probe_seq.pos)); let bitmask = group.match_byte(h2_hash).into_iter(); RawIterHash { @@ -1994,15 +2054,15 @@ impl<'a, T, A: Allocator + Clone> Iterator for RawIterHash<'a, T, A> { unsafe { loop { if let Some(bit) = self.bitmask.next() { - let index = (self.probe_seq.pos + bit) & self.table.bucket_mask; + let index = (self.probe_seq.pos + bit) & self.table.table.bucket_mask; let bucket = self.table.bucket(index); return Some(bucket); } if likely(self.group.match_empty().any_bit_set()) { return None; } - self.probe_seq.move_next(self.table.bucket_mask); - self.group = Group::load(self.table.ctrl(self.probe_seq.pos)); + self.probe_seq.move_next(self.table.table.bucket_mask); + self.group = Group::load(self.table.table.ctrl(self.probe_seq.pos)); self.bitmask = self.group.match_byte(self.h2_hash).into_iter(); } } From 22558c10632c2f64bb6580436bba5d931c4af77b Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 29 Sep 2020 11:24:12 +0200 Subject: [PATCH 03/33] perf(compile): Extract part of rehash_in_place --- src/raw/mod.rs | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 785b1faa5b..137700e0a2 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -788,26 +788,7 @@ impl RawTable { /// If `hasher` panics then some the table's contents may be lost. fn rehash_in_place(&mut self, hasher: impl Fn(&T) -> u64) { unsafe { - // Bulk convert all full control bytes to DELETED, and all DELETED - // control bytes to EMPTY. This effectively frees up all buckets - // containing a DELETED entry. - for i in (0..self.buckets()).step_by(Group::WIDTH) { - let group = Group::load_aligned(self.table.ctrl(i)); - let group = group.convert_special_to_empty_and_full_to_deleted(); - group.store_aligned(self.table.ctrl(i)); - } - - // Fix up the trailing control bytes. See the comments in set_ctrl - // for the handling of tables smaller than the group width. - if self.buckets() < Group::WIDTH { - self.table - .ctrl(0) - .copy_to(self.table.ctrl(Group::WIDTH), self.buckets()); - } else { - self.table - .ctrl(0) - .copy_to(self.table.ctrl(self.buckets()), Group::WIDTH); - } + self.table.prepare_rehash_in_place(); // If the hash function panics then properly clean up any elements // that we haven't rehashed yet. We unfortunately can't preserve the @@ -1223,6 +1204,7 @@ impl RawTableInner { } impl RawTableInner { + #[cfg_attr(feature = "inline-more", inline)] unsafe fn new_uninitialized( alloc: A, buckets: usize, @@ -1246,6 +1228,29 @@ impl RawTableInner { }) } + fn prepare_rehash_in_place(&mut self) { + unsafe { + // Bulk convert all full control bytes to DELETED, and all DELETED + // control bytes to EMPTY. This effectively frees up all buckets + // containing a DELETED entry. + for i in (0..self.buckets()).step_by(Group::WIDTH) { + let group = Group::load_aligned(self.ctrl(i)); + let group = group.convert_special_to_empty_and_full_to_deleted(); + group.store_aligned(self.ctrl(i)); + } + + // Fix up the trailing control bytes. See the comments in set_ctrl + // for the handling of tables smaller than the group width. + if self.buckets() < Group::WIDTH { + self.ctrl(0) + .copy_to(self.ctrl(Group::WIDTH), self.buckets()); + } else { + self.ctrl(0) + .copy_to(self.ctrl(self.buckets()), Group::WIDTH); + } + } + } + /// Returns an iterator-like object for a probe sequence on the table. /// /// This iterator never terminates, but is guaranteed to visit each bucket From 3fb20c4fa4a0a3c0e159ecb9b0d037c28db9115f Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 29 Sep 2020 11:28:46 +0200 Subject: [PATCH 04/33] perf(compile): find_insert_slot does not depend on T --- src/raw/mod.rs | 86 +++++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 43 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 137700e0a2..8bcd8e7401 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -618,43 +618,6 @@ impl RawTable { } } - /// Searches for an empty or deleted bucket which is suitable for inserting - /// a new element. - /// - /// There must be at least 1 empty bucket in the table. - #[cfg_attr(feature = "inline-more", inline)] - fn find_insert_slot(&self, hash: u64) -> usize { - let mut probe_seq = self.table.probe_seq(hash); - loop { - unsafe { - let group = Group::load(self.table.ctrl(probe_seq.pos)); - if let Some(bit) = group.match_empty_or_deleted().lowest_set_bit() { - let result = (probe_seq.pos + bit) & self.table.bucket_mask; - - // In tables smaller than the group width, trailing control - // bytes outside the range of the table are filled with - // EMPTY entries. These will unfortunately trigger a - // match, but once masked may point to a full bucket that - // is already occupied. We detect this situation here and - // perform a second scan starting at the begining of the - // table. This second scan is guaranteed to find an empty - // slot (due to the load factor) before hitting the trailing - // control bytes (containing EMPTY). - if unlikely(is_full(*self.table.ctrl(result))) { - debug_assert!(self.table.bucket_mask < Group::WIDTH); - debug_assert_ne!(probe_seq.pos, 0); - return Group::load_aligned(self.table.ctrl(0)) - .match_empty_or_deleted() - .lowest_set_bit_nonzero(); - } - - return result; - } - } - probe_seq.move_next(self.table.bucket_mask); - } - } - /// Marks all table buckets as empty without dropping their contents. #[cfg_attr(feature = "inline-more", inline)] pub fn clear_no_drop(&mut self) { @@ -821,7 +784,7 @@ impl RawTable { let hash = hasher(item.as_ref()); // Search for a suitable place to put it - let new_i = guard.find_insert_slot(hash); + let new_i = guard.table.find_insert_slot(hash); // Probing works by scanning through all of the control // bytes in groups, which may not be aligned to the group @@ -905,7 +868,7 @@ impl RawTable { // - there are no DELETED entries. // - we know there is enough space in the table. // - all elements are unique. - let index = new_table.find_insert_slot(hash); + let index = new_table.table.find_insert_slot(hash); new_table.table.set_ctrl(index, h2(hash)); new_table.bucket(index).copy_from_nonoverlapping(&item); } @@ -926,7 +889,7 @@ impl RawTable { #[cfg_attr(feature = "inline-more", inline)] pub fn insert(&mut self, hash: u64, value: T, hasher: impl Fn(&T) -> u64) -> Bucket { unsafe { - let mut index = self.find_insert_slot(hash); + let mut index = self.table.find_insert_slot(hash); // We can avoid growing the table once we have reached our load // factor if we are replacing a tombstone. This works since the @@ -934,7 +897,7 @@ impl RawTable { let old_ctrl = *self.table.ctrl(index); if unlikely(self.table.growth_left == 0 && special_is_empty(old_ctrl)) { self.reserve(1, hasher); - index = self.find_insert_slot(hash); + index = self.table.find_insert_slot(hash); } let bucket = self.bucket(index); @@ -988,7 +951,7 @@ impl RawTable { #[cfg(any(feature = "raw", feature = "rustc-internal-api"))] pub fn insert_no_grow(&mut self, hash: u64, value: T) -> Bucket { unsafe { - let index = self.find_insert_slot(hash); + let index = self.table.find_insert_slot(hash); let bucket = self.bucket(index); // If we are replacing a DELETED entry then we don't need to update @@ -1228,6 +1191,43 @@ impl RawTableInner { }) } + /// Searches for an empty or deleted bucket which is suitable for inserting + /// a new element. + /// + /// There must be at least 1 empty bucket in the table. + #[cfg_attr(feature = "inline-more", inline)] + fn find_insert_slot(&self, hash: u64) -> usize { + let mut probe_seq = self.table.probe_seq(hash); + loop { + unsafe { + let group = Group::load(self.table.ctrl(probe_seq.pos)); + if let Some(bit) = group.match_empty_or_deleted().lowest_set_bit() { + let result = (probe_seq.pos + bit) & self.table.bucket_mask; + + // In tables smaller than the group width, trailing control + // bytes outside the range of the table are filled with + // EMPTY entries. These will unfortunately trigger a + // match, but once masked may point to a full bucket that + // is already occupied. We detect this situation here and + // perform a second scan starting at the begining of the + // table. This second scan is guaranteed to find an empty + // slot (due to the load factor) before hitting the trailing + // control bytes (containing EMPTY). + if unlikely(is_full(*self.table.ctrl(result))) { + debug_assert!(self.table.bucket_mask < Group::WIDTH); + debug_assert_ne!(probe_seq.pos, 0); + return Group::load_aligned(self.table.ctrl(0)) + .match_empty_or_deleted() + .lowest_set_bit_nonzero(); + } + + return result; + } + } + probe_seq.move_next(self.table.bucket_mask); + } + } + fn prepare_rehash_in_place(&mut self) { unsafe { // Bulk convert all full control bytes to DELETED, and all DELETED @@ -1496,7 +1496,7 @@ impl RawTable { // - there are no DELETED entries. // - we know there is enough space in the table. // - all elements are unique. - let index = guard_self.find_insert_slot(hash); + let index = guard_self.table.find_insert_slot(hash); guard_self.table.set_ctrl(index, h2(hash)); guard_self.bucket(index).write(item); } From 4de49292c55f89b554edb39d0273ddb4a24274e0 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 29 Sep 2020 11:55:51 +0200 Subject: [PATCH 05/33] perf(compile): Extract more parts out of rehash_in_place --- src/raw/mod.rs | 99 +++++++++++++++++++++++++++++++------------------- 1 file changed, 62 insertions(+), 37 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 8bcd8e7401..02edb8c83e 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -783,43 +783,22 @@ impl RawTable { let item = guard.bucket(i); let hash = hasher(item.as_ref()); - // Search for a suitable place to put it - let new_i = guard.table.find_insert_slot(hash); - - // Probing works by scanning through all of the control - // bytes in groups, which may not be aligned to the group - // size. If both the new and old position fall within the - // same unaligned group, then there is no benefit in moving - // it and we can just continue to the next item. - let probe_index = |pos: usize| { - (pos.wrapping_sub(guard.table.probe_seq(hash).pos) - & guard.table.bucket_mask) - / Group::WIDTH - }; - if likely(probe_index(i) == probe_index(new_i)) { - guard.table.set_ctrl(i, h2(hash)); - continue 'outer; - } - - // We are moving the current item to a new position. Write - // our H2 to the control byte of the new position. - let prev_ctrl = *guard.table.ctrl(new_i); - guard.table.set_ctrl(new_i, h2(hash)); - - if prev_ctrl == EMPTY { - // If the target slot is empty, simply move the current - // element into the new slot and clear the old control - // byte. - guard.table.set_ctrl(i, EMPTY); - guard.bucket(new_i).copy_from_nonoverlapping(&item); - continue 'outer; - } else { - // If the target slot is occupied, swap the two elements - // and then continue processing the element that we just - // swapped into the old slot. - debug_assert_eq!(prev_ctrl, DELETED); - mem::swap(guard.bucket(new_i).as_mut(), item.as_mut()); - continue 'inner; + match guard.table.search_new_slot(i, hash) { + Slot::Skip => continue 'outer, + Slot::Empty(new_i) => { + // If the target slot is empty, simply move the current + // element into the new slot and clear the old control + // byte. + guard.bucket(new_i).copy_from_nonoverlapping(&item); + continue 'outer; + } + Slot::Occupied(new_i) => { + // If the target slot is occupied, swap the two elements + // and then continue processing the element that we just + // swapped into the old slot. + mem::swap(guard.bucket(new_i).as_mut(), item.as_mut()); + continue 'inner; + } } } } @@ -1251,6 +1230,46 @@ impl RawTableInner { } } + unsafe fn raw_bucket(&self, index: usize) -> Bucket { + debug_assert_ne!(self.bucket_mask, 0); + debug_assert!(index < self.buckets()); + Bucket::from_base_index(self.data_end(), index) + } + + unsafe fn data_end(&self) -> NonNull { + NonNull::new_unchecked(self.ctrl.as_ptr()) + } + + unsafe fn search_new_slot(&mut self, i: usize, hash: u64) -> Slot { + // Search for a suitable place to put it + let new_i = self.find_insert_slot(hash); + + // Probing works by scanning through all of the control + // bytes in groups, which may not be aligned to the group + // size. If both the new and old position fall within the + // same unaligned group, then there is no benefit in moving + // it and we can just continue to the next item. + let probe_index = |pos: usize| { + (pos.wrapping_sub(self.probe_seq(hash).pos) & self.bucket_mask) / Group::WIDTH + }; + if likely(probe_index(i) == probe_index(new_i)) { + self.set_ctrl(i, h2(hash)); + return Slot::Skip; + } + + // We are moving the current item to a new position. Write + // our H2 to the control byte of the new position. + let prev_ctrl = *self.ctrl(new_i); + self.set_ctrl(new_i, h2(hash)); + if prev_ctrl == EMPTY { + self.set_ctrl(i, EMPTY); + Slot::Empty(new_i) + } else { + debug_assert_eq!(prev_ctrl, DELETED); + Slot::Occupied(new_i) + } + } + /// Returns an iterator-like object for a probe sequence on the table. /// /// This iterator never terminates, but is guaranteed to visit each bucket @@ -1325,6 +1344,12 @@ impl RawTableInner { } } +enum Slot { + Skip, + Empty(usize), + Occupied(usize), +} + impl Clone for RawTable { fn clone(&self) -> Self { if self.table.is_empty_singleton() { From be9ce391cbc1c44ff44ed8ff7733567f71498302 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 29 Sep 2020 12:10:37 +0200 Subject: [PATCH 06/33] perf(compile): Move part of fallible_with_capacity out --- src/raw/mod.rs | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 02edb8c83e..8a5e9b8f35 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -478,13 +478,21 @@ impl RawTable { Some(buckets) => buckets, None => return Err(fallibility.capacity_overflow()), }; - let result = Self::new_uninitialized(alloc, buckets, fallibility)?; - result - .table - .ctrl(0) - .write_bytes(EMPTY, result.table.num_ctrl_bytes()); - - Ok(result) + // Avoid `Option::ok_or_else` because it bloats LLVM IR. + let (layout, ctrl_offset) = match calculate_layout::(buckets) { + Some(lco) => lco, + None => return Err(fallibility.capacity_overflow()), + }; + Ok(Self { + table: RawTableInner::fallible_with_capacity( + alloc, + buckets, + fallibility, + layout, + ctrl_offset, + )?, + marker: PhantomData, + }) } } } @@ -1170,6 +1178,21 @@ impl RawTableInner { }) } + /// Attempts to allocate a new hash table with at least enough capacity + /// for inserting the given number of elements without reallocating. + unsafe fn fallible_with_capacity( + alloc: A, + buckets: usize, + fallibility: Fallibility, + layout: Layout, + ctrl_offset: usize, + ) -> Result { + let result = Self::new_uninitialized(alloc, buckets, fallibility, layout, ctrl_offset)?; + result.ctrl(0).write_bytes(EMPTY, result.num_ctrl_bytes()); + + Ok(result) + } + /// Searches for an empty or deleted bucket which is suitable for inserting /// a new element. /// From 5204be050b264b6a7b40c6e222a3c645bd1ed7ba Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 29 Sep 2020 12:28:42 +0200 Subject: [PATCH 07/33] perf(compile): Make calculate_layout non-generic --- src/raw/mod.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 8a5e9b8f35..f530e2b3e1 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -259,14 +259,18 @@ fn calculate_layout(buckets: usize) -> Option<(Layout, usize)> { #[cfg_attr(feature = "inline-more", inline)] #[cfg(not(feature = "nightly"))] fn calculate_layout(buckets: usize) -> Option<(Layout, usize)> { + calculate_layout_(mem::align_of::(), mem::size_of::(), buckets) +} + +#[cfg_attr(feature = "inline-more", inline)] +#[cfg(not(feature = "nightly"))] +fn calculate_layout_(align_of: usize, size_of: usize, buckets: usize) -> Option<(Layout, usize)> { debug_assert!(buckets.is_power_of_two()); // Manual layout calculation since Layout methods are not yet stable. - let ctrl_align = usize::max(mem::align_of::(), Group::WIDTH); - let ctrl_offset = mem::size_of::() - .checked_mul(buckets)? - .checked_add(ctrl_align - 1)? - & !(ctrl_align - 1); + let ctrl_align = usize::max(align_of, Group::WIDTH); + let ctrl_offset = + size_of.checked_mul(buckets)?.checked_add(ctrl_align - 1)? & !(ctrl_align - 1); let len = ctrl_offset.checked_add(buckets + Group::WIDTH)?; Some(( From 82a6dad61e958f5fc7544f71733e576ad84333c8 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 29 Sep 2020 12:56:09 +0200 Subject: [PATCH 08/33] perf(compiler): Make the panic guard non-generic on rehash Since this in the cold path there should be no need to monomorphize this multiple times. --- src/raw/mod.rs | 54 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 17 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index f530e2b3e1..b9b833f8ae 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -368,6 +368,12 @@ impl Bucket { pub unsafe fn copy_from_nonoverlapping(&self, other: &Self) { self.as_ptr().copy_from_nonoverlapping(other.as_ptr(), 1); } + + unsafe fn cast(self) -> Bucket { + Bucket { + ptr: self.ptr.cast(), + } + } } /// A raw hash table with an unsafe API. @@ -769,25 +775,17 @@ impl RawTable { // that we haven't rehashed yet. We unfortunately can't preserve the // element since we lost their hash and have no way of recovering it // without risking another panic. - let mut guard = guard(self, |self_| { - if mem::needs_drop::() { - for i in 0..self_.buckets() { - if *self_.table.ctrl(i) == DELETED { - self_.table.set_ctrl(i, EMPTY); - self_.bucket(i).drop(); - self_.table.items -= 1; - } - } - } - self_.table.growth_left = - bucket_mask_to_capacity(self_.table.bucket_mask) - self_.table.items; - }); + let mut guard = + self.table + .rehash_panic_guard(mem::needs_drop::(), |bucket: Bucket| { + bucket.cast::().drop(); + }); // At this point, DELETED elements are elements that we haven't // rehashed yet. Find them and re-insert them at their ideal // position. 'outer: for i in 0..guard.buckets() { - if *guard.table.ctrl(i) != DELETED { + if *guard.ctrl(i) != DELETED { continue; } 'inner: loop { @@ -795,7 +793,7 @@ impl RawTable { let item = guard.bucket(i); let hash = hasher(item.as_ref()); - match guard.table.search_new_slot(i, hash) { + match guard.search_new_slot(i, hash) { Slot::Skip => continue 'outer, Slot::Empty(new_i) => { // If the target slot is empty, simply move the current @@ -815,8 +813,7 @@ impl RawTable { } } - guard.table.growth_left = - bucket_mask_to_capacity(guard.table.bucket_mask) - guard.table.items; + guard.growth_left = bucket_mask_to_capacity(guard.bucket_mask) - guard.items; mem::forget(guard); } } @@ -1257,6 +1254,10 @@ impl RawTableInner { } } + unsafe fn bucket(&self, index: usize) -> Bucket { + self.raw_bucket(index).cast::() + } + unsafe fn raw_bucket(&self, index: usize) -> Bucket { debug_assert_ne!(self.bucket_mask, 0); debug_assert!(index < self.buckets()); @@ -1369,6 +1370,25 @@ impl RawTableInner { fn is_empty_singleton(&self) -> bool { self.bucket_mask == 0 } + + unsafe fn rehash_panic_guard<'s>( + &'s mut self, + needs_drop: bool, + drop: fn(Bucket), + ) -> crate::scopeguard::ScopeGuard<&mut Self, impl FnMut(&mut &'s mut Self) + 's> { + guard(self, move |self_| { + if needs_drop { + for i in 0..self_.buckets() { + if *self_.ctrl(i) == DELETED { + self_.set_ctrl(i, EMPTY); + drop(self_.raw_bucket(i)); + self_.items -= 1; + } + } + } + self_.growth_left = bucket_mask_to_capacity(self_.bucket_mask) - self_.items; + }) + } } enum Slot { From 57179c84227543733635dd80bfced1a98b6f7ecd Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 29 Sep 2020 14:59:07 +0200 Subject: [PATCH 09/33] perf(compile): Make raw_iter_hash less generic --- src/raw/mod.rs | 73 +++++++++++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 28 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index b9b833f8ae..8c8c251790 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -775,11 +775,12 @@ impl RawTable { // that we haven't rehashed yet. We unfortunately can't preserve the // element since we lost their hash and have no way of recovering it // without risking another panic. - let mut guard = - self.table - .rehash_panic_guard(mem::needs_drop::(), |bucket: Bucket| { - bucket.cast::().drop(); - }); + let mut guard = self.table.rehash_panic_guard( + mem::needs_drop::(), + |self_: &mut RawTableInner, index| { + self_.bucket::(index).drop(); + }, + ); // At this point, DELETED elements are elements that we haven't // rehashed yet. Find them and re-insert them at their ideal @@ -1255,13 +1256,9 @@ impl RawTableInner { } unsafe fn bucket(&self, index: usize) -> Bucket { - self.raw_bucket(index).cast::() - } - - unsafe fn raw_bucket(&self, index: usize) -> Bucket { debug_assert_ne!(self.bucket_mask, 0); debug_assert!(index < self.buckets()); - Bucket::from_base_index(self.data_end(), index) + Bucket::from_base_index(self.data_end().cast::(), index) } unsafe fn data_end(&self) -> NonNull { @@ -1374,14 +1371,14 @@ impl RawTableInner { unsafe fn rehash_panic_guard<'s>( &'s mut self, needs_drop: bool, - drop: fn(Bucket), + drop: fn(&mut Self, usize), ) -> crate::scopeguard::ScopeGuard<&mut Self, impl FnMut(&mut &'s mut Self) + 's> { guard(self, move |self_| { if needs_drop { for i in 0..self_.buckets() { if *self_.ctrl(i) == DELETED { self_.set_ctrl(i, EMPTY); - drop(self_.raw_bucket(i)); + drop(self_, i); self_.items -= 1; } } @@ -2091,8 +2088,11 @@ impl FusedIterator for RawDrain<'_, T, A> {} /// /// In rare cases, the iterator may return a bucket with a different hash. pub struct RawIterHash<'a, T, A: Allocator + Clone = Global> { - table: &'a RawTable, + inner: RawIterHashInner<'a, A>, + marker: PhantomData<&'a T>, +} +struct RawIterHashInner<'a, A> { // The top 7 bits of the hash. h2_hash: u8, @@ -2107,13 +2107,22 @@ pub struct RawIterHash<'a, T, A: Allocator + Clone = Global> { impl<'a, T, A: Allocator + Clone> RawIterHash<'a, T, A> { fn new(table: &'a RawTable, hash: u64) -> Self { + RawIterHash { + inner: RawIterHashInner::new(&table.table, hash), + marker: PhantomData, + } + } +} + +impl<'a, A: Allocator + Clone> RawIterHashInner<'a, A> { + fn new(table: &'a RawTableInner, hash: u64) -> Self { unsafe { let h2_hash = h2(hash); - let probe_seq = table.table.probe_seq(hash); - let group = Group::load(table.table.ctrl(probe_seq.pos)); + let probe_seq = table.probe_seq(hash); + let group = Group::load(table.ctrl(probe_seq.pos)); let bitmask = group.match_byte(h2_hash).into_iter(); - RawIterHash { + RawIterHashInner { table, h2_hash, probe_seq, @@ -2129,19 +2138,27 @@ impl<'a, T, A: Allocator + Clone> Iterator for RawIterHash<'a, T, A> { fn next(&mut self) -> Option> { unsafe { - loop { - if let Some(bit) = self.bitmask.next() { - let index = (self.probe_seq.pos + bit) & self.table.table.bucket_mask; - let bucket = self.table.bucket(index); - return Some(bucket); - } - if likely(self.group.match_empty().any_bit_set()) { - return None; - } - self.probe_seq.move_next(self.table.table.bucket_mask); - self.group = Group::load(self.table.table.ctrl(self.probe_seq.pos)); - self.bitmask = self.group.match_byte(self.h2_hash).into_iter(); + match self.inner.next() { + Some(index) => Some(self.inner.table.bucket(index)), + None => None, + } + } + } +} + +impl<'a, A: Allocator + Clone> RawIterHashInner<'a, A> { + unsafe fn next(&mut self) -> Option { + loop { + if let Some(bit) = self.bitmask.next() { + let index = (self.probe_seq.pos + bit) & self.table.bucket_mask; + return Some(index); + } + if likely(self.group.match_empty().any_bit_set()) { + return None; } + self.probe_seq.move_next(self.table.bucket_mask); + self.group = Group::load(self.table.ctrl(self.probe_seq.pos)); + self.bitmask = self.group.match_byte(self.h2_hash).into_iter(); } } } From f64f332ae97634c326e9c1fba85e5af9b13fdd8a Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 29 Sep 2020 13:20:45 +0200 Subject: [PATCH 10/33] perf(compile): Make resize less generic --- src/raw/mod.rs | 96 ++++++++++++++++++++++++++++---------------------- 1 file changed, 54 insertions(+), 42 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 8c8c251790..9e8f2e254f 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -479,32 +479,10 @@ impl RawTable { capacity: usize, fallibility: Fallibility, ) -> Result { - if capacity == 0 { - Ok(Self::new_in(alloc)) - } else { - unsafe { - // Avoid `Option::ok_or_else` because it bloats LLVM IR. - let buckets = match capacity_to_buckets(capacity) { - Some(buckets) => buckets, - None => return Err(fallibility.capacity_overflow()), - }; - // Avoid `Option::ok_or_else` because it bloats LLVM IR. - let (layout, ctrl_offset) = match calculate_layout::(buckets) { - Some(lco) => lco, - None => return Err(fallibility.capacity_overflow()), - }; - Ok(Self { - table: RawTableInner::fallible_with_capacity( - alloc, - buckets, - fallibility, - layout, - ctrl_offset, - )?, - marker: PhantomData, - }) - } - } + Ok(Self { + table: RawTableInner::fallible_with_capacity::(alloc, capacity, fallibility)?, + marker: PhantomData, + }) } /// Attempts to allocate a new hash table using the given allocator, with at least enough @@ -532,10 +510,7 @@ impl RawTable { Some(lco) => lco, None => hint::unreachable_unchecked(), }; - self.table.alloc.deallocate( - NonNull::new_unchecked(self.table.ctrl.as_ptr().sub(ctrl_offset)), - layout, - ); + self.table.free_buckets(layout, ctrl_offset) } /// Returns pointer to one past last element of data table. @@ -831,10 +806,13 @@ impl RawTable { debug_assert!(self.table.items <= capacity); // Allocate and initialize the new table. - let mut new_table = - Self::fallible_with_capacity(self.table.alloc.clone(), capacity, fallibility)?; - new_table.table.growth_left -= self.table.items; - new_table.table.items = self.table.items; + let mut new_table = RawTableInner::fallible_with_capacity::( + self.table.alloc.clone(), + capacity, + fallibility, + )?; + new_table.growth_left -= self.table.items; + new_table.items = self.table.items; // The hash function may panic, in which case we simply free the new // table without dropping any elements that may have been copied into @@ -842,9 +820,13 @@ impl RawTable { // // This guard is also used to free the old table on success, see // the comment at the bottom of this function. - let mut new_table = guard(ManuallyDrop::new(new_table), |new_table| { + let mut new_table = guard(new_table, |new_table| { if !new_table.is_empty_singleton() { - new_table.free_buckets(); + let (layout, ctrl_offset) = match calculate_layout::(new_table.buckets()) { + Some(lco) => lco, + None => hint::unreachable_unchecked(), + }; + new_table.free_buckets(layout, ctrl_offset); } }); @@ -857,8 +839,8 @@ impl RawTable { // - there are no DELETED entries. // - we know there is enough space in the table. // - all elements are unique. - let index = new_table.table.find_insert_slot(hash); - new_table.table.set_ctrl(index, h2(hash)); + let index = new_table.find_insert_slot(hash); + new_table.set_ctrl(index, h2(hash)); new_table.bucket(index).copy_from_nonoverlapping(&item); } @@ -866,7 +848,7 @@ impl RawTable { // self with the new table. The old table will have its memory freed but // the items will not be dropped (since they have been moved into the // new table). - mem::swap(self, &mut new_table); + mem::swap(&mut self.table, &mut new_table); Ok(()) } @@ -1180,9 +1162,31 @@ impl RawTableInner { }) } - /// Attempts to allocate a new hash table with at least enough capacity - /// for inserting the given number of elements without reallocating. - unsafe fn fallible_with_capacity( + fn fallible_with_capacity( + alloc: A, + capacity: usize, + fallibility: Fallibility, + ) -> Result { + if capacity == 0 { + Ok(Self::new_in(alloc)) + } else { + unsafe { + // Avoid `Option::ok_or_else` because it bloats LLVM IR. + let buckets = match capacity_to_buckets(capacity) { + Some(buckets) => buckets, + None => return Err(fallibility.capacity_overflow()), + }; + // Avoid `Option::ok_or_else` because it bloats LLVM IR. + let (layout, ctrl_offset) = match calculate_layout::(buckets) { + Some(lco) => lco, + None => return Err(fallibility.capacity_overflow()), + }; + Self::fallible_with_capacity_inner(alloc, buckets, fallibility, layout, ctrl_offset) + } + } + } + + unsafe fn fallible_with_capacity_inner( alloc: A, buckets: usize, fallibility: Fallibility, @@ -1386,6 +1390,14 @@ impl RawTableInner { self_.growth_left = bucket_mask_to_capacity(self_.bucket_mask) - self_.items; }) } + + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn free_buckets(&mut self, layout: Layout, ctrl_offset: usize) { + self.alloc.deallocate( + NonNull::new_unchecked(self.ctrl.as_ptr().sub(ctrl_offset)), + layout, + ); + } } enum Slot { From 933251a90e0820cc89bcfc25103562a499ce5196 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 29 Sep 2020 15:47:29 +0200 Subject: [PATCH 11/33] perf(compiler): Make RawIterRange less generic --- src/raw/mod.rs | 107 ++++++++++++++++++++++++++++++------------------- 1 file changed, 66 insertions(+), 41 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 9e8f2e254f..47826e1464 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -368,12 +368,6 @@ impl Bucket { pub unsafe fn copy_from_nonoverlapping(&self, other: &Self) { self.as_ptr().copy_from_nonoverlapping(other.as_ptr(), 1); } - - unsafe fn cast(self) -> Bucket { - Bucket { - ptr: self.ptr.cast(), - } - } } /// A raw hash table with an unsafe API. @@ -1643,13 +1637,18 @@ impl IntoIterator for RawTable { /// Iterator over a sub-range of a table. Unlike `RawIter` this iterator does /// not track an item count. pub(crate) struct RawIterRange { + // Pointer to the buckets for the current group. + data: Bucket, + + inner: RawIterRangeInner, +} + +#[derive(Clone)] +pub(crate) struct RawIterRangeInner { // Mask of full buckets in the current group. Bits are cleared from this // mask as each element is processed. current_group: BitMask, - // Pointer to the buckets for the current group. - data: Bucket, - // Pointer to the next group of control bytes, // Must be aligned to the group size. next_ctrl: *const u8, @@ -1664,19 +1663,9 @@ impl RawIterRange { /// The control byte address must be aligned to the group size. #[cfg_attr(feature = "inline-more", inline)] unsafe fn new(ctrl: *const u8, data: Bucket, len: usize) -> Self { - debug_assert_ne!(len, 0); - debug_assert_eq!(ctrl as usize % Group::WIDTH, 0); - let end = ctrl.add(len); - - // Load the first group and advance ctrl to point to the next group - let current_group = Group::load_aligned(ctrl).match_full(); - let next_ctrl = ctrl.add(Group::WIDTH); - Self { - current_group, data, - next_ctrl, - end, + inner: RawIterRangeInner::new(ctrl, len), } } @@ -1725,37 +1714,38 @@ impl RawIterRange { } } -// We make raw iterators unconditionally Send and Sync, and let the PhantomData -// in the actual iterator implementations determine the real Send/Sync bounds. -unsafe impl Send for RawIterRange {} -unsafe impl Sync for RawIterRange {} - -impl Clone for RawIterRange { +impl RawIterRangeInner { + /// Returns a `RawIterRange` covering a subset of a table. + /// + /// The control byte address must be aligned to the group size. #[cfg_attr(feature = "inline-more", inline)] - fn clone(&self) -> Self { + unsafe fn new(ctrl: *const u8, len: usize) -> Self { + debug_assert_ne!(len, 0); + debug_assert_eq!(ctrl as usize % Group::WIDTH, 0); + let end = ctrl.add(len); + + // Load the first group and advance ctrl to point to the next group + let current_group = Group::load_aligned(ctrl).match_full(); + let next_ctrl = ctrl.add(Group::WIDTH); + Self { - data: self.data.clone(), - next_ctrl: self.next_ctrl, - current_group: self.current_group, - end: self.end, + current_group, + next_ctrl, + end, } } -} - -impl Iterator for RawIterRange { - type Item = Bucket; - #[cfg_attr(feature = "inline-more", inline)] - fn next(&mut self) -> Option> { + fn next(&mut self) -> (usize, Option) { unsafe { + let mut offset = 0; loop { if let Some(index) = self.current_group.lowest_set_bit() { self.current_group = self.current_group.remove_lowest_bit(); - return Some(self.data.next_n(index)); + return (offset, Some(index)); } if self.next_ctrl >= self.end { - return None; + return (offset, None); } // We might read past self.end up to the next group boundary, @@ -1764,13 +1754,12 @@ impl Iterator for RawIterRange { // EMPTY. On larger tables self.end is guaranteed to be aligned // to the group size (since tables are power-of-two sized). self.current_group = Group::load_aligned(self.next_ctrl).match_full(); - self.data = self.data.next_n(Group::WIDTH); + offset += Group::WIDTH; self.next_ctrl = self.next_ctrl.add(Group::WIDTH); } } } - #[cfg_attr(feature = "inline-more", inline)] fn size_hint(&self) -> (usize, Option) { // We don't have an item count, so just guess based on the range size. ( @@ -1780,6 +1769,42 @@ impl Iterator for RawIterRange { } } +// We make raw iterators unconditionally Send and Sync, and let the PhantomData +// in the actual iterator implementations determine the real Send/Sync bounds. +unsafe impl Send for RawIterRange {} +unsafe impl Sync for RawIterRange {} + +impl Clone for RawIterRange { + #[cfg_attr(feature = "inline-more", inline)] + fn clone(&self) -> Self { + Self { + data: self.data.clone(), + inner: self.inner.clone(), + } + } +} + +impl Iterator for RawIterRange { + type Item = Bucket; + + #[cfg_attr(feature = "inline-more", inline)] + fn next(&mut self) -> Option> { + unsafe { + let (offset, index) = self.inner.next(); + self.data = self.data.next_n(offset); + match index { + Some(index) => Some(self.data.next_n(index)), + None => None, + } + } + } + + #[cfg_attr(feature = "inline-more", inline)] + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } +} + impl FusedIterator for RawIterRange {} /// Iterator which returns a raw pointer to every full bucket in the table. From cccbeadcd51e34f3b78dee955761a30848624348 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 29 Sep 2020 16:10:00 +0200 Subject: [PATCH 12/33] clear_no_drop does not need to be generic --- src/raw/mod.rs | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 47826e1464..5441c3b740 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -608,15 +608,7 @@ impl RawTable { /// Marks all table buckets as empty without dropping their contents. #[cfg_attr(feature = "inline-more", inline)] pub fn clear_no_drop(&mut self) { - if !self.is_empty_singleton() { - unsafe { - self.table - .ctrl(0) - .write_bytes(EMPTY, self.table.num_ctrl_bytes()); - } - } - self.table.items = 0; - self.table.growth_left = bucket_mask_to_capacity(self.table.bucket_mask); + self.table.clear_no_drop() } /// Removes all elements from the table without freeing the backing memory. @@ -1392,6 +1384,18 @@ impl RawTableInner { layout, ); } + + /// Marks all table buckets as empty without dropping their contents. + #[cfg_attr(feature = "inline-more", inline)] + fn clear_no_drop(&mut self) { + if !self.is_empty_singleton() { + unsafe { + self.ctrl(0).write_bytes(EMPTY, self.num_ctrl_bytes()); + } + } + self.items = 0; + self.growth_left = bucket_mask_to_capacity(self.bucket_mask); + } } enum Slot { From 030bf080d5192ff4f9d0320abaa02d073a524deb Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 29 Sep 2020 16:14:21 +0200 Subject: [PATCH 13/33] refactor: Merge the drop loop into a function --- src/raw/mod.rs | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 5441c3b740..a2e59903e2 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -615,13 +615,16 @@ impl RawTable { #[cfg_attr(feature = "inline-more", inline)] pub fn clear(&mut self) { // Ensure that the table is reset even if one of the drops panic - let self_ = guard(self, |self_| self_.clear_no_drop()); + let mut self_ = guard(self, |self_| self_.clear_no_drop()); + unsafe { + self_.drop_elements(); + } + } - if mem::needs_drop::() && self_.len() != 0 { - unsafe { - for item in self_.iter() { - item.drop(); - } + unsafe fn drop_elements(&mut self) { + if mem::needs_drop::() && self.len() != 0 { + for item in self.iter() { + item.drop(); } } } @@ -1439,11 +1442,7 @@ impl Clone for RawTable { } else { unsafe { // First, drop all our elements without clearing the control bytes. - if mem::needs_drop::() && self.len() != 0 { - for item in self.iter() { - item.drop(); - } - } + self.drop_elements(); // If necessary, resize our table to match the source. if self.buckets() != source.buckets() { @@ -1598,11 +1597,7 @@ unsafe impl<#[may_dangle] T, A: Allocator + Clone> Drop for RawTable { fn drop(&mut self) { if !self.is_empty_singleton() { unsafe { - if mem::needs_drop::() && self.len() != 0 { - for item in self.iter() { - item.drop(); - } - } + self.drop_elements(); self.free_buckets(); } } @@ -1614,11 +1609,7 @@ impl Drop for RawTable { fn drop(&mut self) { if !self.is_empty_singleton() { unsafe { - if mem::needs_drop::() && self.len() != 0 { - for item in self.iter() { - item.drop(); - } - } + self.drop_elements(); self.free_buckets(); } } From e7a9bd41d575ed2e26548796b8bf8fc1e9685c1d Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 29 Sep 2020 16:49:54 +0200 Subject: [PATCH 14/33] perf(compile): Make the resize panic guard less generic --- src/raw/mod.rs | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index a2e59903e2..823761bd3f 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -809,15 +809,7 @@ impl RawTable { // // This guard is also used to free the old table on success, see // the comment at the bottom of this function. - let mut new_table = guard(new_table, |new_table| { - if !new_table.is_empty_singleton() { - let (layout, ctrl_offset) = match calculate_layout::(new_table.buckets()) { - Some(lco) => lco, - None => hint::unreachable_unchecked(), - }; - new_table.free_buckets(layout, ctrl_offset); - } - }); + let mut new_table = new_table.resize_panic_guard(calculate_layout::); // Copy all elements to the new table. for item in self.iter() { @@ -1380,6 +1372,21 @@ impl RawTableInner { }) } + unsafe fn resize_panic_guard<'s>( + &'s mut self, + layout: fn(usize) -> Option<(Layout, usize)>, + ) -> crate::scopeguard::ScopeGuard<&mut Self, impl FnMut(&mut &'s mut Self) + 's> { + guard(self, move |self_| { + if !self_.is_empty_singleton() { + let (layout, ctrl_offset) = match layout(self_.buckets()) { + Some(lco) => lco, + None => hint::unreachable_unchecked(), + }; + self_.free_buckets(layout, ctrl_offset); + } + }) + } + #[cfg_attr(feature = "inline-more", inline)] unsafe fn free_buckets(&mut self, layout: Layout, ctrl_offset: usize) { self.alloc.deallocate( From aa7c1be3a93a8b449a90a6ed6fa395c1776295f7 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 29 Sep 2020 17:05:32 +0200 Subject: [PATCH 15/33] perf(compiler): Shrink rehash_in_place a bit more --- src/raw/mod.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 823761bd3f..be754743e6 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -749,10 +749,9 @@ impl RawTable { // At this point, DELETED elements are elements that we haven't // rehashed yet. Find them and re-insert them at their ideal // position. - 'outer: for i in 0..guard.buckets() { - if *guard.ctrl(i) != DELETED { - continue; - } + let mut start = 0; + 'outer: while let Some(i) = guard.next_deleted(start) { + start = i + 1; 'inner: loop { // Hash the current item let item = guard.bucket(i); @@ -1353,6 +1352,10 @@ impl RawTableInner { self.bucket_mask == 0 } + unsafe fn next_deleted(&self, start: usize) -> Option { + (start..self.buckets()).find(|&i| *self.ctrl(i) == DELETED) + } + unsafe fn rehash_panic_guard<'s>( &'s mut self, needs_drop: bool, From 3eac16207f6d36773c3cc15328dd9e2edaa9fd72 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 29 Sep 2020 18:04:31 +0200 Subject: [PATCH 16/33] perf(compile): Make erase less generic --- src/raw/mod.rs | 70 +++++++++++++++++++++----------------------------- 1 file changed, 29 insertions(+), 41 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index be754743e6..c285de47c6 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -539,27 +539,7 @@ impl RawTable { #[deprecated(since = "0.8.1", note = "use erase or remove instead")] pub unsafe fn erase_no_drop(&mut self, item: &Bucket) { let index = self.bucket_index(item); - debug_assert!(is_full(*self.table.ctrl(index))); - let index_before = index.wrapping_sub(Group::WIDTH) & self.table.bucket_mask; - let empty_before = Group::load(self.table.ctrl(index_before)).match_empty(); - let empty_after = Group::load(self.table.ctrl(index)).match_empty(); - - // If we are inside a continuous block of Group::WIDTH full or deleted - // cells then a probe window may have seen a full block when trying to - // insert. We therefore need to keep that block non-empty so that - // lookups will continue searching to the next probe window. - // - // Note that in this context `leading_zeros` refers to the bytes at the - // end of a group, while `trailing_zeros` refers to the bytes at the - // begining of a group. - let ctrl = if empty_before.leading_zeros() + empty_after.trailing_zeros() >= Group::WIDTH { - DELETED - } else { - self.table.growth_left += 1; - EMPTY - }; - self.table.set_ctrl(index, ctrl); - self.table.items -= 1; + self.table.erase(index) } /// Erases an element from the table, dropping it in place. @@ -999,13 +979,6 @@ impl RawTable { self.table.bucket_mask + 1 } - /// Returns whether this table points to the empty singleton with a capacity - /// of 0. - #[cfg_attr(feature = "inline-more", inline)] - fn is_empty_singleton(&self) -> bool { - self.table.bucket_mask == 0 - } - /// Returns an iterator over every element in the table. It is up to /// the caller to ensure that the `RawTable` outlives the `RawIter`. /// Because we cannot make the `next` method unsafe on the `RawIter` @@ -1327,16 +1300,6 @@ impl RawTableInner { self.ctrl.as_ptr().add(index) } - #[cfg_attr(feature = "inline-more", inline)] - fn capacity(&self) -> usize { - self.items + self.growth_left - } - - #[cfg_attr(feature = "inline-more", inline)] - fn len(&self) -> usize { - self.items - } - #[cfg_attr(feature = "inline-more", inline)] fn buckets(&self) -> usize { self.bucket_mask + 1 @@ -1409,6 +1372,31 @@ impl RawTableInner { self.items = 0; self.growth_left = bucket_mask_to_capacity(self.bucket_mask); } + + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn erase(&mut self, index: usize) { + debug_assert!(is_full(*self.ctrl(index))); + let index_before = index.wrapping_sub(Group::WIDTH) & self.bucket_mask; + let empty_before = Group::load(self.ctrl(index_before)).match_empty(); + let empty_after = Group::load(self.ctrl(index)).match_empty(); + + // If we are inside a continuous block of Group::WIDTH full or deleted + // cells then a probe window may have seen a full block when trying to + // insert. We therefore need to keep that block non-empty so that + // lookups will continue searching to the next probe window. + // + // Note that in this context `leading_zeros` refers to the bytes at the + // end of a group, while `trailing_zeros` refers to the bytes at the + // begining of a group. + let ctrl = if empty_before.leading_zeros() + empty_after.trailing_zeros() >= Group::WIDTH { + DELETED + } else { + self.growth_left += 1; + EMPTY + }; + self.set_ctrl(index, ctrl); + self.items -= 1; + } } enum Slot { @@ -1457,7 +1445,7 @@ impl Clone for RawTable { // If necessary, resize our table to match the source. if self.buckets() != source.buckets() { // Skip our drop by using ptr::write. - if !self.is_empty_singleton() { + if !self.table.is_empty_singleton() { self.free_buckets(); } (self as *mut Self).write( @@ -1605,7 +1593,7 @@ impl RawTable { unsafe impl<#[may_dangle] T, A: Allocator + Clone> Drop for RawTable { #[cfg_attr(feature = "inline-more", inline)] fn drop(&mut self) { - if !self.is_empty_singleton() { + if !self.table.is_empty_singleton() { unsafe { self.drop_elements(); self.free_buckets(); @@ -1617,7 +1605,7 @@ unsafe impl<#[may_dangle] T, A: Allocator + Clone> Drop for RawTable { impl Drop for RawTable { #[cfg_attr(feature = "inline-more", inline)] fn drop(&mut self) { - if !self.is_empty_singleton() { + if !self.table.is_empty_singleton() { unsafe { self.drop_elements(); self.free_buckets(); From 5df48f6b7879aa0f1456b132986152116ca309a1 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 29 Sep 2020 18:52:10 +0200 Subject: [PATCH 17/33] cleanup --- src/raw/mod.rs | 75 +++++++++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 29 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index c285de47c6..f16d463be1 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -262,7 +262,7 @@ fn calculate_layout(buckets: usize) -> Option<(Layout, usize)> { calculate_layout_(mem::align_of::(), mem::size_of::(), buckets) } -#[cfg_attr(feature = "inline-more", inline)] +#[inline] #[cfg(not(feature = "nightly"))] fn calculate_layout_(align_of: usize, size_of: usize, buckets: usize) -> Option<(Layout, usize)> { debug_assert!(buckets.is_power_of_two()); @@ -377,6 +377,8 @@ pub struct RawTable { marker: PhantomData, } +/// Non-generic part of `RawTable` which allows functions to be instantiated only once regardless +/// of how many different key-value types are used. struct RawTableInner { // Mask to get an index from a hash value. The value is one less than the // number of buckets in the table. @@ -883,7 +885,7 @@ impl RawTable { pub fn insert_no_grow(&mut self, hash: u64, value: T) -> Bucket { unsafe { let index = self.table.find_insert_slot(hash); - let bucket = self.bucket(index); + let bucket = self.table.bucket(index); // If we are replacing a DELETED entry then we don't need to update // the load counter. @@ -1139,6 +1141,7 @@ impl RawTableInner { } } + #[inline] unsafe fn fallible_with_capacity_inner( alloc: A, buckets: usize, @@ -1156,7 +1159,7 @@ impl RawTableInner { /// a new element. /// /// There must be at least 1 empty bucket in the table. - #[cfg_attr(feature = "inline-more", inline)] + #[inline] fn find_insert_slot(&self, hash: u64) -> usize { let mut probe_seq = self.table.probe_seq(hash); loop { @@ -1189,6 +1192,7 @@ impl RawTableInner { } } + #[inline] fn prepare_rehash_in_place(&mut self) { unsafe { // Bulk convert all full control bytes to DELETED, and all DELETED @@ -1212,16 +1216,19 @@ impl RawTableInner { } } + #[cfg_attr(feature = "inline-more", inline)] unsafe fn bucket(&self, index: usize) -> Bucket { debug_assert_ne!(self.bucket_mask, 0); debug_assert!(index < self.buckets()); - Bucket::from_base_index(self.data_end().cast::(), index) + Bucket::from_base_index(self.data_end(), index) } - unsafe fn data_end(&self) -> NonNull { - NonNull::new_unchecked(self.ctrl.as_ptr()) + #[cfg_attr(feature = "inline-more", inline)] + unsafe fn data_end(&self) -> NonNull { + NonNull::new_unchecked(self.ctrl.as_ptr() as *mut T) } + #[inline] unsafe fn search_new_slot(&mut self, i: usize, hash: u64) -> Slot { // Search for a suitable place to put it let new_i = self.find_insert_slot(hash); @@ -1257,7 +1264,7 @@ impl RawTableInner { /// This iterator never terminates, but is guaranteed to visit each bucket /// group exactly once. The loop using `probe_seq` must terminate upon /// reaching a group containing an empty bucket. - #[cfg_attr(feature = "inline-more", inline)] + #[inline] fn probe_seq(&self, hash: u64) -> ProbeSeq { ProbeSeq { pos: h1(hash) & self.bucket_mask, @@ -1267,7 +1274,7 @@ impl RawTableInner { /// Sets a control byte, and possibly also the replicated control byte at /// the end of the array. - #[cfg_attr(feature = "inline-more", inline)] + #[inline] unsafe fn set_ctrl(&self, index: usize, ctrl: u8) { // Replicate the first Group::WIDTH control bytes at the end of // the array without using a branch: @@ -1294,23 +1301,23 @@ impl RawTableInner { } /// Returns a pointer to a control byte. - #[cfg_attr(feature = "inline-more", inline)] + #[inline] unsafe fn ctrl(&self, index: usize) -> *mut u8 { debug_assert!(index < self.num_ctrl_bytes()); self.ctrl.as_ptr().add(index) } - #[cfg_attr(feature = "inline-more", inline)] + #[inline] fn buckets(&self) -> usize { self.bucket_mask + 1 } - #[cfg_attr(feature = "inline-more", inline)] + #[inline] fn num_ctrl_bytes(&self) -> usize { self.bucket_mask + 1 + Group::WIDTH } - #[cfg_attr(feature = "inline-more", inline)] + #[inline] fn is_empty_singleton(&self) -> bool { self.bucket_mask == 0 } @@ -1319,6 +1326,8 @@ impl RawTableInner { (start..self.buckets()).find(|&i| *self.ctrl(i) == DELETED) } + #[allow(clippy::mut_mut)] + #[inline] unsafe fn rehash_panic_guard<'s>( &'s mut self, needs_drop: bool, @@ -1338,6 +1347,8 @@ impl RawTableInner { }) } + #[allow(clippy::mut_mut)] + #[inline] unsafe fn resize_panic_guard<'s>( &'s mut self, layout: fn(usize) -> Option<(Layout, usize)>, @@ -1353,7 +1364,7 @@ impl RawTableInner { }) } - #[cfg_attr(feature = "inline-more", inline)] + #[inline] unsafe fn free_buckets(&mut self, layout: Layout, ctrl_offset: usize) { self.alloc.deallocate( NonNull::new_unchecked(self.ctrl.as_ptr().sub(ctrl_offset)), @@ -1362,7 +1373,7 @@ impl RawTableInner { } /// Marks all table buckets as empty without dropping their contents. - #[cfg_attr(feature = "inline-more", inline)] + #[inline] fn clear_no_drop(&mut self) { if !self.is_empty_singleton() { unsafe { @@ -1373,7 +1384,7 @@ impl RawTableInner { self.growth_left = bucket_mask_to_capacity(self.bucket_mask); } - #[cfg_attr(feature = "inline-more", inline)] + #[inline] unsafe fn erase(&mut self, index: usize) { debug_assert!(is_full(*self.ctrl(index))); let index_before = index.wrapping_sub(Group::WIDTH) & self.bucket_mask; @@ -1492,7 +1503,7 @@ impl RawTableClone for RawTable { .copy_to_nonoverlapping(self.table.ctrl(0), self.table.num_ctrl_bytes()); source .data_start() - .copy_to_nonoverlapping(self.data_start(), self.buckets()); + .copy_to_nonoverlapping(self.data_start(), self.table.buckets()); self.table.items = source.table.items; self.table.growth_left = source.table.growth_left; @@ -1550,7 +1561,7 @@ impl RawTable { // elements one by one. We don't do this if we have the same number of // buckets as the source since we can just copy the contents directly // in that case. - if self.buckets() != source.buckets() + if self.table.buckets() != source.table.buckets() && bucket_mask_to_capacity(self.table.bucket_mask) >= source.len() { self.clear(); @@ -1670,7 +1681,7 @@ impl RawIterRange { #[cfg(feature = "rayon")] pub(crate) fn split(mut self) -> (Self, Option>) { unsafe { - if self.end <= self.next_ctrl { + if self.inner.end <= self.inner.next_ctrl { // Nothing to split if the group that we are current processing // is the last one. (self, None) @@ -1678,7 +1689,7 @@ impl RawIterRange { // len is the remaining number of elements after the group that // we are currently processing. It must be a multiple of the // group size (small tables are caught by the check above). - let len = offset_from(self.end, self.next_ctrl); + let len = offset_from(self.inner.end, self.inner.next_ctrl); debug_assert_eq!(len % Group::WIDTH, 0); // Split the remaining elements into two halves, but round the @@ -1690,7 +1701,7 @@ impl RawIterRange { let mid = (len / 2) & !(Group::WIDTH - 1); let tail = Self::new( - self.next_ctrl.add(mid), + self.inner.next_ctrl.add(mid), self.data.next_n(Group::WIDTH).next_n(mid), len - mid, ); @@ -1698,9 +1709,9 @@ impl RawIterRange { self.data.next_n(Group::WIDTH).next_n(mid).ptr, tail.data.ptr ); - debug_assert_eq!(self.end, tail.end); - self.end = self.next_ctrl.add(mid); - debug_assert_eq!(self.end.add(Group::WIDTH), tail.next_ctrl); + debug_assert_eq!(self.inner.end, tail.inner.end); + self.inner.end = self.inner.next_ctrl.add(mid); + debug_assert_eq!(self.inner.end.add(Group::WIDTH), tail.inner.next_ctrl); (self, Some(tail)) } } @@ -1854,7 +1865,7 @@ impl RawIter { return; } - if self.iter.next_ctrl < self.iter.end + if self.iter.inner.next_ctrl < self.iter.inner.end && b.as_ptr() <= self.iter.data.next_n(Group::WIDTH).as_ptr() { // The iterator has not yet reached the bucket's group. @@ -1865,7 +1876,7 @@ impl RawIter { // To do that, we need to find its control byte. We know that self.iter.data is // at self.iter.next_ctrl - Group::WIDTH, so we work from there: let offset = offset_from(self.iter.data.as_ptr(), b.as_ptr()); - let ctrl = self.iter.next_ctrl.sub(Group::WIDTH).add(offset); + let ctrl = self.iter.inner.next_ctrl.sub(Group::WIDTH).add(offset); // This method should be called _before_ a removal, or _after_ an insert, // so in both cases the ctrl byte should indicate that the bucket is full. assert!(is_full(*ctrl)); @@ -1888,7 +1899,7 @@ impl RawIter { // - Otherwise, update the iterator cached group so that it won't // yield a to-be-removed bucket, or _will_ yield a to-be-added bucket. // We'll also need ot update the item count accordingly. - if let Some(index) = self.iter.current_group.lowest_set_bit() { + if let Some(index) = self.iter.inner.current_group.lowest_set_bit() { let next_bucket = self.iter.data.next_n(index); if b.as_ptr() > next_bucket.as_ptr() { // The toggled bucket is "before" the bucket the iterator would yield next. We @@ -1909,7 +1920,7 @@ impl RawIter { // Instead, we _just_ flip the bit for the particular bucket the caller asked // us to reflect. let our_bit = offset_from(self.iter.data.as_ptr(), b.as_ptr()); - let was_full = self.iter.current_group.flip(our_bit); + let was_full = self.iter.inner.current_group.flip(our_bit); debug_assert_ne!(was_full, is_insert); if is_insert { @@ -1921,10 +1932,16 @@ impl RawIter { if cfg!(debug_assertions) { if b.as_ptr() == next_bucket.as_ptr() { // The removed bucket should no longer be next - debug_assert_ne!(self.iter.current_group.lowest_set_bit(), Some(index)); + debug_assert_ne!( + self.iter.inner.current_group.lowest_set_bit(), + Some(index) + ); } else { // We should not have changed what bucket comes next. - debug_assert_eq!(self.iter.current_group.lowest_set_bit(), Some(index)); + debug_assert_eq!( + self.iter.inner.current_group.lowest_set_bit(), + Some(index) + ); } } } From c83bf9d6a0371763fd6f04fb68eecf221ce45cfe Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Fri, 2 Oct 2020 10:31:58 +0200 Subject: [PATCH 18/33] Restore performance of the RawIterRange --- src/raw/mod.rs | 96 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 68 insertions(+), 28 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index f16d463be1..75d58490c9 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -368,6 +368,13 @@ impl Bucket { pub unsafe fn copy_from_nonoverlapping(&self, other: &Self) { self.as_ptr().copy_from_nonoverlapping(other.as_ptr(), 1); } + + #[cfg_attr(feature = "inline-more", inline)] + fn cast(self) -> Bucket { + Bucket { + ptr: self.ptr.cast(), + } + } } /// A raw hash table with an unsafe API. @@ -1641,14 +1648,15 @@ impl IntoIterator for RawTable { /// Iterator over a sub-range of a table. Unlike `RawIter` this iterator does /// not track an item count. pub(crate) struct RawIterRange { - // Pointer to the buckets for the current group. - data: Bucket, - inner: RawIterRangeInner, + marker: PhantomData, } #[derive(Clone)] pub(crate) struct RawIterRangeInner { + // Pointer to the buckets for the current group. + data: Bucket, + // Mask of full buckets in the current group. Bits are cleared from this // mask as each element is processed. current_group: BitMask, @@ -1668,8 +1676,8 @@ impl RawIterRange { #[cfg_attr(feature = "inline-more", inline)] unsafe fn new(ctrl: *const u8, data: Bucket, len: usize) -> Self { Self { - data, - inner: RawIterRangeInner::new(ctrl, len), + inner: RawIterRangeInner::new(ctrl, data.cast(), len), + marker: PhantomData, } } @@ -1702,12 +1710,23 @@ impl RawIterRange { let tail = Self::new( self.inner.next_ctrl.add(mid), - self.data.next_n(Group::WIDTH).next_n(mid), + self.inner + .data + .clone() + .cast::() + .next_n(Group::WIDTH) + .next_n(mid), len - mid, ); debug_assert_eq!( - self.data.next_n(Group::WIDTH).next_n(mid).ptr, - tail.data.ptr + self.inner + .data + .clone() + .cast::() + .next_n(Group::WIDTH) + .next_n(mid) + .ptr, + tail.inner.data.clone().cast::().ptr ); debug_assert_eq!(self.inner.end, tail.inner.end); self.inner.end = self.inner.next_ctrl.add(mid); @@ -1718,12 +1737,20 @@ impl RawIterRange { } } +fn offset_multiplier() -> usize { + if mem::size_of::() == 0 { + 1 + } else { + mem::size_of::() + } +} + impl RawIterRangeInner { /// Returns a `RawIterRange` covering a subset of a table. /// /// The control byte address must be aligned to the group size. - #[cfg_attr(feature = "inline-more", inline)] - unsafe fn new(ctrl: *const u8, len: usize) -> Self { + #[inline] + unsafe fn new(ctrl: *const u8, data: Bucket, len: usize) -> Self { debug_assert_ne!(len, 0); debug_assert_eq!(ctrl as usize % Group::WIDTH, 0); let end = ctrl.add(len); @@ -1736,20 +1763,21 @@ impl RawIterRangeInner { current_group, next_ctrl, end, + data, } } - fn next(&mut self) -> (usize, Option) { + #[inline] + fn next(&mut self, offset_multiplier: usize) -> Option> { unsafe { - let mut offset = 0; loop { if let Some(index) = self.current_group.lowest_set_bit() { self.current_group = self.current_group.remove_lowest_bit(); - return (offset, Some(index)); + return Some(self.data.next_n(offset_multiplier * index)); } if self.next_ctrl >= self.end { - return (offset, None); + return None; } // We might read past self.end up to the next group boundary, @@ -1758,12 +1786,13 @@ impl RawIterRangeInner { // EMPTY. On larger tables self.end is guaranteed to be aligned // to the group size (since tables are power-of-two sized). self.current_group = Group::load_aligned(self.next_ctrl).match_full(); - offset += Group::WIDTH; + self.data = self.data.next_n(offset_multiplier * Group::WIDTH); self.next_ctrl = self.next_ctrl.add(Group::WIDTH); } } } + #[inline] fn size_hint(&self) -> (usize, Option) { // We don't have an item count, so just guess based on the range size. ( @@ -1782,8 +1811,8 @@ impl Clone for RawIterRange { #[cfg_attr(feature = "inline-more", inline)] fn clone(&self) -> Self { Self { - data: self.data.clone(), inner: self.inner.clone(), + marker: self.marker, } } } @@ -1793,13 +1822,10 @@ impl Iterator for RawIterRange { #[cfg_attr(feature = "inline-more", inline)] fn next(&mut self) -> Option> { - unsafe { - let (offset, index) = self.inner.next(); - self.data = self.data.next_n(offset); - match index { - Some(index) => Some(self.data.next_n(index)), - None => None, - } + let bucket = self.inner.next(offset_multiplier::()); + match bucket { + Some(bucket) => Some(bucket.cast()), + None => None, } } @@ -1859,14 +1885,22 @@ impl RawIter { #[cfg(feature = "raw")] fn reflect_toggle_full(&mut self, b: &Bucket, is_insert: bool) { unsafe { - if b.as_ptr() > self.iter.data.as_ptr() { + if b.as_ptr() as *mut u8 > self.iter.inner.data.as_ptr() { // The iterator has already passed the bucket's group. // So the toggle isn't relevant to this iterator. return; } if self.iter.inner.next_ctrl < self.iter.inner.end - && b.as_ptr() <= self.iter.data.next_n(Group::WIDTH).as_ptr() + && b.as_ptr() + <= self + .iter + .inner + .data + .clone() + .cast::() + .next_n(Group::WIDTH) + .as_ptr() { // The iterator has not yet reached the bucket's group. // We don't need to reload anything, but we do need to adjust the item count. @@ -1875,7 +1909,10 @@ impl RawIter { // Double-check that the user isn't lying to us by checking the bucket state. // To do that, we need to find its control byte. We know that self.iter.data is // at self.iter.next_ctrl - Group::WIDTH, so we work from there: - let offset = offset_from(self.iter.data.as_ptr(), b.as_ptr()); + let offset = offset_from( + self.iter.inner.data.clone().cast::().as_ptr(), + b.as_ptr(), + ); let ctrl = self.iter.inner.next_ctrl.sub(Group::WIDTH).add(offset); // This method should be called _before_ a removal, or _after_ an insert, // so in both cases the ctrl byte should indicate that the bucket is full. @@ -1900,7 +1937,7 @@ impl RawIter { // yield a to-be-removed bucket, or _will_ yield a to-be-added bucket. // We'll also need ot update the item count accordingly. if let Some(index) = self.iter.inner.current_group.lowest_set_bit() { - let next_bucket = self.iter.data.next_n(index); + let next_bucket = self.iter.inner.data.clone().cast::().next_n(index); if b.as_ptr() > next_bucket.as_ptr() { // The toggled bucket is "before" the bucket the iterator would yield next. We // therefore don't need to do anything --- the iterator has already passed the @@ -1919,7 +1956,10 @@ impl RawIter { // call to reflect for those buckets might _also_ decrement the item count. // Instead, we _just_ flip the bit for the particular bucket the caller asked // us to reflect. - let our_bit = offset_from(self.iter.data.as_ptr(), b.as_ptr()); + let our_bit = offset_from( + self.iter.inner.data.clone().cast::().as_ptr(), + b.as_ptr(), + ); let was_full = self.iter.inner.current_group.flip(our_bit); debug_assert_ne!(was_full, is_insert); From c2051201e5cfae57e9d6deec93f36fc06d1fe9f1 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Fri, 2 Oct 2020 10:40:03 +0200 Subject: [PATCH 19/33] Fix performance on raw_iter_hash --- src/raw/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 75d58490c9..5c76355201 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -2202,6 +2202,7 @@ impl<'a, T, A: Allocator + Clone> RawIterHash<'a, T, A> { } impl<'a, A: Allocator + Clone> RawIterHashInner<'a, A> { + #[inline] fn new(table: &'a RawTableInner, hash: u64) -> Self { unsafe { let h2_hash = h2(hash); @@ -2234,6 +2235,7 @@ impl<'a, T, A: Allocator + Clone> Iterator for RawIterHash<'a, T, A> { } impl<'a, A: Allocator + Clone> RawIterHashInner<'a, A> { + #[inline] unsafe fn next(&mut self) -> Option { loop { if let Some(bit) = self.bitmask.next() { From 8a229a5b1c943d1f7c63d5bd2f5b041294c903f2 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Mon, 5 Oct 2020 13:32:04 +0200 Subject: [PATCH 20/33] chore: Ensure the raw feature is compiled on CI --- ci/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run.sh b/ci/run.sh index 9bf0b0b609..5b75fd1183 100644 --- a/ci/run.sh +++ b/ci/run.sh @@ -9,7 +9,7 @@ if [ "${NO_STD}" = "1" ]; then FEATURES="rustc-internal-api" OP="build" else - FEATURES="rustc-internal-api,serde,rayon" + FEATURES="rustc-internal-api,serde,rayon,raw" OP="test" fi if [ "${TRAVIS_RUST_VERSION}" = "nightly" ]; then From 8721c34a9655737a558b2df8bd89b6fe3d524d72 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Mon, 5 Oct 2020 17:37:33 +0200 Subject: [PATCH 21/33] Revert the iteration to be generic again --- src/raw/bitmask.rs | 10 ++++ src/raw/mod.rs | 119 ++++++++++++++------------------------------- 2 files changed, 47 insertions(+), 82 deletions(-) diff --git a/src/raw/bitmask.rs b/src/raw/bitmask.rs index 99b2d5341b..356edbf9de 100644 --- a/src/raw/bitmask.rs +++ b/src/raw/bitmask.rs @@ -61,6 +61,16 @@ impl BitMask { } } + #[inline] + pub fn take_next_bit(&mut self) -> Option { + if let Some(index) = self.lowest_set_bit() { + *self = self.remove_lowest_bit(); + Some(index) + } else { + None + } + } + /// Returns the first set bit in the `BitMask`, if there is one. The /// bitmask must not be empty. #[inline] diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 5c76355201..747fe70137 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -368,13 +368,6 @@ impl Bucket { pub unsafe fn copy_from_nonoverlapping(&self, other: &Self) { self.as_ptr().copy_from_nonoverlapping(other.as_ptr(), 1); } - - #[cfg_attr(feature = "inline-more", inline)] - fn cast(self) -> Bucket { - Bucket { - ptr: self.ptr.cast(), - } - } } /// A raw hash table with an unsafe API. @@ -1649,14 +1642,12 @@ impl IntoIterator for RawTable { /// not track an item count. pub(crate) struct RawIterRange { inner: RawIterRangeInner, - marker: PhantomData, + // Pointer to the buckets for the current group. + data: Bucket, } #[derive(Clone)] pub(crate) struct RawIterRangeInner { - // Pointer to the buckets for the current group. - data: Bucket, - // Mask of full buckets in the current group. Bits are cleared from this // mask as each element is processed. current_group: BitMask, @@ -1676,8 +1667,8 @@ impl RawIterRange { #[cfg_attr(feature = "inline-more", inline)] unsafe fn new(ctrl: *const u8, data: Bucket, len: usize) -> Self { Self { - inner: RawIterRangeInner::new(ctrl, data.cast(), len), - marker: PhantomData, + inner: RawIterRangeInner::new(ctrl, len), + data, } } @@ -1710,23 +1701,12 @@ impl RawIterRange { let tail = Self::new( self.inner.next_ctrl.add(mid), - self.inner - .data - .clone() - .cast::() - .next_n(Group::WIDTH) - .next_n(mid), + self.inner.data.next_n(Group::WIDTH).next_n(mid), len - mid, ); debug_assert_eq!( - self.inner - .data - .clone() - .cast::() - .next_n(Group::WIDTH) - .next_n(mid) - .ptr, - tail.inner.data.clone().cast::().ptr + self.data.next_n(Group::WIDTH).next_n(mid).ptr, + tail.data.ptr ); debug_assert_eq!(self.inner.end, tail.inner.end); self.inner.end = self.inner.next_ctrl.add(mid); @@ -1737,20 +1717,12 @@ impl RawIterRange { } } -fn offset_multiplier() -> usize { - if mem::size_of::() == 0 { - 1 - } else { - mem::size_of::() - } -} - impl RawIterRangeInner { /// Returns a `RawIterRange` covering a subset of a table. /// /// The control byte address must be aligned to the group size. #[inline] - unsafe fn new(ctrl: *const u8, data: Bucket, len: usize) -> Self { + unsafe fn new(ctrl: *const u8, len: usize) -> Self { debug_assert_ne!(len, 0); debug_assert_eq!(ctrl as usize % Group::WIDTH, 0); let end = ctrl.add(len); @@ -1763,32 +1735,17 @@ impl RawIterRangeInner { current_group, next_ctrl, end, - data, } } #[inline] - fn next(&mut self, offset_multiplier: usize) -> Option> { - unsafe { - loop { - if let Some(index) = self.current_group.lowest_set_bit() { - self.current_group = self.current_group.remove_lowest_bit(); - return Some(self.data.next_n(offset_multiplier * index)); - } - - if self.next_ctrl >= self.end { - return None; - } - - // We might read past self.end up to the next group boundary, - // but this is fine because it only occurs on tables smaller - // than the group size where the trailing control bytes are all - // EMPTY. On larger tables self.end is guaranteed to be aligned - // to the group size (since tables are power-of-two sized). - self.current_group = Group::load_aligned(self.next_ctrl).match_full(); - self.data = self.data.next_n(offset_multiplier * Group::WIDTH); - self.next_ctrl = self.next_ctrl.add(Group::WIDTH); - } + unsafe fn next_group(&mut self) -> Option<()> { + if self.next_ctrl >= self.end { + None + } else { + self.current_group = Group::load_aligned(self.next_ctrl).match_full(); + self.next_ctrl = self.next_ctrl.add(Group::WIDTH); + Some(()) } } @@ -1812,7 +1769,7 @@ impl Clone for RawIterRange { fn clone(&self) -> Self { Self { inner: self.inner.clone(), - marker: self.marker, + data: self.data.clone(), } } } @@ -1822,10 +1779,22 @@ impl Iterator for RawIterRange { #[cfg_attr(feature = "inline-more", inline)] fn next(&mut self) -> Option> { - let bucket = self.inner.next(offset_multiplier::()); - match bucket { - Some(bucket) => Some(bucket.cast()), - None => None, + unsafe { + loop { + if let Some(index) = self.inner.current_group.take_next_bit() { + return Some(self.data.next_n(index)); + } + + // We might read past self.end up to the next group boundary, + // but this is fine because it only occurs on tables smaller + // than the group size where the trailing control bytes are all + // EMPTY. On larger tables self.end is guaranteed to be aligned + // to the group size (since tables are power-of-two sized). + if let None = self.inner.next_group() { + return None; + } + self.data = self.data.next_n(Group::WIDTH); + } } } @@ -1885,22 +1854,14 @@ impl RawIter { #[cfg(feature = "raw")] fn reflect_toggle_full(&mut self, b: &Bucket, is_insert: bool) { unsafe { - if b.as_ptr() as *mut u8 > self.iter.inner.data.as_ptr() { + if b.as_ptr() > self.iter.data.as_ptr() { // The iterator has already passed the bucket's group. // So the toggle isn't relevant to this iterator. return; } if self.iter.inner.next_ctrl < self.iter.inner.end - && b.as_ptr() - <= self - .iter - .inner - .data - .clone() - .cast::() - .next_n(Group::WIDTH) - .as_ptr() + && b.as_ptr() <= self.iter.data.next_n(Group::WIDTH).as_ptr() { // The iterator has not yet reached the bucket's group. // We don't need to reload anything, but we do need to adjust the item count. @@ -1909,10 +1870,7 @@ impl RawIter { // Double-check that the user isn't lying to us by checking the bucket state. // To do that, we need to find its control byte. We know that self.iter.data is // at self.iter.next_ctrl - Group::WIDTH, so we work from there: - let offset = offset_from( - self.iter.inner.data.clone().cast::().as_ptr(), - b.as_ptr(), - ); + let offset = offset_from(self.iter.data.as_ptr(), b.as_ptr()); let ctrl = self.iter.inner.next_ctrl.sub(Group::WIDTH).add(offset); // This method should be called _before_ a removal, or _after_ an insert, // so in both cases the ctrl byte should indicate that the bucket is full. @@ -1937,7 +1895,7 @@ impl RawIter { // yield a to-be-removed bucket, or _will_ yield a to-be-added bucket. // We'll also need ot update the item count accordingly. if let Some(index) = self.iter.inner.current_group.lowest_set_bit() { - let next_bucket = self.iter.inner.data.clone().cast::().next_n(index); + let next_bucket = self.iter.data.next_n(index); if b.as_ptr() > next_bucket.as_ptr() { // The toggled bucket is "before" the bucket the iterator would yield next. We // therefore don't need to do anything --- the iterator has already passed the @@ -1956,10 +1914,7 @@ impl RawIter { // call to reflect for those buckets might _also_ decrement the item count. // Instead, we _just_ flip the bit for the particular bucket the caller asked // us to reflect. - let our_bit = offset_from( - self.iter.inner.data.clone().cast::().as_ptr(), - b.as_ptr(), - ); + let our_bit = offset_from(self.iter.data.as_ptr(), b.as_ptr()); let was_full = self.iter.inner.current_group.flip(our_bit); debug_assert_ne!(was_full, is_insert); From 4b3932fbf9ddfecfcf0793c9a6c0ae624581dc59 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 6 Oct 2020 10:23:36 +0200 Subject: [PATCH 22/33] perf(compile): Make rehash_in_place smaller --- src/raw/mod.rs | 49 +++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 747fe70137..37f0fb781e 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -715,13 +715,11 @@ impl RawTable { /// If `hasher` panics then some the table's contents may be lost. fn rehash_in_place(&mut self, hasher: impl Fn(&T) -> u64) { unsafe { - self.table.prepare_rehash_in_place(); - // If the hash function panics then properly clean up any elements // that we haven't rehashed yet. We unfortunately can't preserve the // element since we lost their hash and have no way of recovering it // without risking another panic. - let mut guard = self.table.rehash_panic_guard( + let mut guard = self.table.prepare_rehash_in_place( mem::needs_drop::(), |self_: &mut RawTableInner, index| { self_.bucket::(index).drop(); @@ -1192,28 +1190,31 @@ impl RawTableInner { } } - #[inline] - fn prepare_rehash_in_place(&mut self) { - unsafe { - // Bulk convert all full control bytes to DELETED, and all DELETED - // control bytes to EMPTY. This effectively frees up all buckets - // containing a DELETED entry. - for i in (0..self.buckets()).step_by(Group::WIDTH) { - let group = Group::load_aligned(self.ctrl(i)); - let group = group.convert_special_to_empty_and_full_to_deleted(); - group.store_aligned(self.ctrl(i)); - } - - // Fix up the trailing control bytes. See the comments in set_ctrl - // for the handling of tables smaller than the group width. - if self.buckets() < Group::WIDTH { - self.ctrl(0) - .copy_to(self.ctrl(Group::WIDTH), self.buckets()); - } else { - self.ctrl(0) - .copy_to(self.ctrl(self.buckets()), Group::WIDTH); - } + #[allow(clippy::mut_mut)] + unsafe fn prepare_rehash_in_place<'s>( + &'s mut self, + needs_drop: bool, + drop: fn(&mut Self, usize), + ) -> crate::scopeguard::ScopeGuard<&mut Self, impl FnMut(&mut &'s mut Self) + 's> { + // Bulk convert all full control bytes to DELETED, and all DELETED + // control bytes to EMPTY. This effectively frees up all buckets + // containing a DELETED entry. + for i in (0..self.buckets()).step_by(Group::WIDTH) { + let group = Group::load_aligned(self.ctrl(i)); + let group = group.convert_special_to_empty_and_full_to_deleted(); + group.store_aligned(self.ctrl(i)); + } + + // Fix up the trailing control bytes. See the comments in set_ctrl + // for the handling of tables smaller than the group width. + if self.buckets() < Group::WIDTH { + self.ctrl(0) + .copy_to(self.ctrl(Group::WIDTH), self.buckets()); + } else { + self.ctrl(0) + .copy_to(self.ctrl(self.buckets()), Group::WIDTH); } + self.rehash_panic_guard(needs_drop, drop) } #[cfg_attr(feature = "inline-more", inline)] From 34bab11f672c30b836a70426d06cc4dbedf64831 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 6 Oct 2020 10:28:24 +0200 Subject: [PATCH 23/33] fix rayon feature --- src/raw/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 37f0fb781e..70d47f0ca6 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -736,7 +736,6 @@ impl RawTable { // Hash the current item let item = guard.bucket(i); let hash = hasher(item.as_ref()); - match guard.search_new_slot(i, hash) { Slot::Skip => continue 'outer, Slot::Empty(new_i) => { @@ -1702,7 +1701,7 @@ impl RawIterRange { let tail = Self::new( self.inner.next_ctrl.add(mid), - self.inner.data.next_n(Group::WIDTH).next_n(mid), + self.data.next_n(Group::WIDTH).next_n(mid), len - mid, ); debug_assert_eq!( From ba25130f1b269a8e8fa2742d532a91c3624f48b1 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 6 Oct 2020 10:29:28 +0200 Subject: [PATCH 24/33] perf: Avoid some re-hashing in rehash_in_place --- src/raw/mod.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 70d47f0ca6..05fa68ff66 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -732,10 +732,11 @@ impl RawTable { let mut start = 0; 'outer: while let Some(i) = guard.next_deleted(start) { start = i + 1; + // Hash the current item + let item = guard.bucket(i); + let hash = hasher(item.as_ref()); + 'inner: loop { - // Hash the current item - let item = guard.bucket(i); - let hash = hasher(item.as_ref()); match guard.search_new_slot(i, hash) { Slot::Skip => continue 'outer, Slot::Empty(new_i) => { From 52e63eef72c9b04ae635c7df5f686712d4944686 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 6 Oct 2020 11:47:07 +0200 Subject: [PATCH 25/33] perf(compile): Parameterize by Layout instead of T --- src/raw/mod.rs | 102 ++++++++++++++++++++++--------------------------- 1 file changed, 45 insertions(+), 57 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 05fa68ff66..1261ceda85 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -259,18 +259,20 @@ fn calculate_layout(buckets: usize) -> Option<(Layout, usize)> { #[cfg_attr(feature = "inline-more", inline)] #[cfg(not(feature = "nightly"))] fn calculate_layout(buckets: usize) -> Option<(Layout, usize)> { - calculate_layout_(mem::align_of::(), mem::size_of::(), buckets) + calculate_layout_for(Layout::new::(), buckets) } #[inline] -#[cfg(not(feature = "nightly"))] -fn calculate_layout_(align_of: usize, size_of: usize, buckets: usize) -> Option<(Layout, usize)> { +fn calculate_layout_for(layout: Layout, buckets: usize) -> Option<(Layout, usize)> { debug_assert!(buckets.is_power_of_two()); // Manual layout calculation since Layout methods are not yet stable. - let ctrl_align = usize::max(align_of, Group::WIDTH); - let ctrl_offset = - size_of.checked_mul(buckets)?.checked_add(ctrl_align - 1)? & !(ctrl_align - 1); + let ctrl_align = usize::max(layout.align(), Group::WIDTH); + let ctrl_offset = layout + .size() + .checked_mul(buckets)? + .checked_add(ctrl_align - 1)? + & !(ctrl_align - 1); let len = ctrl_offset.checked_add(buckets + Group::WIDTH)?; Some(( @@ -451,18 +453,12 @@ impl RawTable { ) -> Result { debug_assert!(buckets.is_power_of_two()); - // Avoid `Option::ok_or_else` because it bloats LLVM IR. - let (layout, ctrl_offset) = match calculate_layout::(buckets) { - Some(lco) => lco, - None => return Err(fallibility.capacity_overflow()), - }; Ok(Self { table: RawTableInner::new_uninitialized( alloc, + Layout::new::(), buckets, fallibility, - layout, - ctrl_offset, )?, marker: PhantomData, }) @@ -476,7 +472,12 @@ impl RawTable { fallibility: Fallibility, ) -> Result { Ok(Self { - table: RawTableInner::fallible_with_capacity::(alloc, capacity, fallibility)?, + table: RawTableInner::fallible_with_capacity( + alloc, + Layout::new::(), + capacity, + fallibility, + )?, marker: PhantomData, }) } @@ -501,12 +502,7 @@ impl RawTable { /// Deallocates the table without dropping any entries. #[cfg_attr(feature = "inline-more", inline)] unsafe fn free_buckets(&mut self) { - // Avoid `Option::unwrap_or_else` because it bloats LLVM IR. - let (layout, ctrl_offset) = match calculate_layout::(self.buckets()) { - Some(lco) => lco, - None => hint::unreachable_unchecked(), - }; - self.table.free_buckets(layout, ctrl_offset) + self.table.free_buckets(Layout::new::()) } /// Returns pointer to one past last element of data table. @@ -774,8 +770,9 @@ impl RawTable { debug_assert!(self.table.items <= capacity); // Allocate and initialize the new table. - let mut new_table = RawTableInner::fallible_with_capacity::( + let mut new_table = RawTableInner::fallible_with_capacity( self.table.alloc.clone(), + Layout::new::(), capacity, fallibility, )?; @@ -788,7 +785,7 @@ impl RawTable { // // This guard is also used to free the old table on success, see // the comment at the bottom of this function. - let mut new_table = new_table.resize_panic_guard(calculate_layout::); + let mut new_table = new_table.resize_panic_guard(Layout::new::()); // Copy all elements to the new table. for item in self.iter() { @@ -1094,17 +1091,23 @@ impl RawTableInner { #[cfg_attr(feature = "inline-more", inline)] unsafe fn new_uninitialized( alloc: A, + t_layout: Layout, buckets: usize, fallibility: Fallibility, - layout: Layout, - ctrl_offset: usize, ) -> Result { debug_assert!(buckets.is_power_of_two()); + // Avoid `Option::ok_or_else` because it bloats LLVM IR. + let (layout, ctrl_offset) = match calculate_layout_for(t_layout, buckets) { + Some(lco) => lco, + None => return Err(fallibility.capacity_overflow()), + }; + let ptr: NonNull = match do_alloc(&alloc, layout) { Ok(block) => block.cast(), Err(_) => return Err(fallibility.alloc_err(layout)), }; + let ctrl = NonNull::new_unchecked(ptr.as_ptr().add(ctrl_offset)); Ok(Self { ctrl, @@ -1115,8 +1118,10 @@ impl RawTableInner { }) } - fn fallible_with_capacity( + #[inline] + fn fallible_with_capacity( alloc: A, + t_layout: Layout, capacity: usize, fallibility: Fallibility, ) -> Result { @@ -1124,33 +1129,15 @@ impl RawTableInner { Ok(Self::new_in(alloc)) } else { unsafe { - // Avoid `Option::ok_or_else` because it bloats LLVM IR. - let buckets = match capacity_to_buckets(capacity) { - Some(buckets) => buckets, - None => return Err(fallibility.capacity_overflow()), - }; - // Avoid `Option::ok_or_else` because it bloats LLVM IR. - let (layout, ctrl_offset) = match calculate_layout::(buckets) { - Some(lco) => lco, - None => return Err(fallibility.capacity_overflow()), - }; - Self::fallible_with_capacity_inner(alloc, buckets, fallibility, layout, ctrl_offset) - } - } - } + let buckets = + capacity_to_buckets(capacity).ok_or_else(|| fallibility.capacity_overflow())?; - #[inline] - unsafe fn fallible_with_capacity_inner( - alloc: A, - buckets: usize, - fallibility: Fallibility, - layout: Layout, - ctrl_offset: usize, - ) -> Result { - let result = Self::new_uninitialized(alloc, buckets, fallibility, layout, ctrl_offset)?; - result.ctrl(0).write_bytes(EMPTY, result.num_ctrl_bytes()); + let result = Self::new_uninitialized(alloc, t_layout, buckets, fallibility)?; + result.ctrl(0).write_bytes(EMPTY, result.num_ctrl_bytes()); - Ok(result) + Ok(result) + } + } } /// Searches for an empty or deleted bucket which is suitable for inserting @@ -1352,21 +1339,22 @@ impl RawTableInner { #[inline] unsafe fn resize_panic_guard<'s>( &'s mut self, - layout: fn(usize) -> Option<(Layout, usize)>, + layout_t: Layout, ) -> crate::scopeguard::ScopeGuard<&mut Self, impl FnMut(&mut &'s mut Self) + 's> { guard(self, move |self_| { if !self_.is_empty_singleton() { - let (layout, ctrl_offset) = match layout(self_.buckets()) { - Some(lco) => lco, - None => hint::unreachable_unchecked(), - }; - self_.free_buckets(layout, ctrl_offset); + self_.free_buckets(layout_t); } }) } #[inline] - unsafe fn free_buckets(&mut self, layout: Layout, ctrl_offset: usize) { + unsafe fn free_buckets(&mut self, t_layout: Layout) { + // Avoid `Option::unwrap_or_else` because it bloats LLVM IR. + let (layout, ctrl_offset) = match calculate_layout_for(t_layout, self.buckets()) { + Some(lco) => lco, + None => hint::unreachable_unchecked(), + }; self.alloc.deallocate( NonNull::new_unchecked(self.ctrl.as_ptr().sub(ctrl_offset)), layout, From f919e17ac504a8c4453f70a35e78675763207a58 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 6 Oct 2020 11:51:27 +0200 Subject: [PATCH 26/33] perf(compile): Shrink resize --- src/raw/mod.rs | 52 +++++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 1261ceda85..563ad87d57 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -767,25 +767,9 @@ impl RawTable { fallibility: Fallibility, ) -> Result<(), TryReserveError> { unsafe { - debug_assert!(self.table.items <= capacity); - - // Allocate and initialize the new table. - let mut new_table = RawTableInner::fallible_with_capacity( - self.table.alloc.clone(), - Layout::new::(), - capacity, - fallibility, - )?; - new_table.growth_left -= self.table.items; - new_table.items = self.table.items; - - // The hash function may panic, in which case we simply free the new - // table without dropping any elements that may have been copied into - // it. - // - // This guard is also used to free the old table on success, see - // the comment at the bottom of this function. - let mut new_table = new_table.resize_panic_guard(Layout::new::()); + let mut new_table = + self.table + .prepare_resize(Layout::new::(), capacity, fallibility)?; // Copy all elements to the new table. for item in self.iter() { @@ -1337,15 +1321,35 @@ impl RawTableInner { #[allow(clippy::mut_mut)] #[inline] - unsafe fn resize_panic_guard<'s>( - &'s mut self, + unsafe fn prepare_resize( + &self, layout_t: Layout, - ) -> crate::scopeguard::ScopeGuard<&mut Self, impl FnMut(&mut &'s mut Self) + 's> { - guard(self, move |self_| { + capacity: usize, + fallibility: Fallibility, + ) -> Result, TryReserveError> { + debug_assert!(self.items <= capacity); + + // Allocate and initialize the new table. + let mut new_table = RawTableInner::fallible_with_capacity( + self.alloc.clone(), + layout_t, + capacity, + fallibility, + )?; + new_table.growth_left -= self.items; + new_table.items = self.items; + + // The hash function may panic, in which case we simply free the new + // table without dropping any elements that may have been copied into + // it. + // + // This guard is also used to free the old table on success, see + // the comment at the bottom of this function. + Ok(guard(new_table, move |self_| { if !self_.is_empty_singleton() { self_.free_buckets(layout_t); } - }) + })) } #[inline] From 179a942dbfca02822bfe488d12d87ead5071c11c Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 6 Oct 2020 11:56:18 +0200 Subject: [PATCH 27/33] perf(compile): Move h2 calls into a dedicated set_ctrl_h2 function --- src/raw/mod.rs | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 563ad87d57..01b776dda0 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -781,7 +781,7 @@ impl RawTable { // - we know there is enough space in the table. // - all elements are unique. let index = new_table.find_insert_slot(hash); - new_table.set_ctrl(index, h2(hash)); + new_table.set_ctrl_h2(index, hash); new_table.bucket(index).copy_from_nonoverlapping(&item); } @@ -814,7 +814,7 @@ impl RawTable { let bucket = self.bucket(index); self.table.growth_left -= special_is_empty(old_ctrl) as usize; - self.table.set_ctrl(index, h2(hash)); + self.table.set_ctrl_h2(index, hash); bucket.write(value); self.table.items += 1; bucket @@ -871,7 +871,7 @@ impl RawTable { let old_ctrl = *self.table.ctrl(index); self.table.growth_left -= special_is_empty(old_ctrl) as usize; - self.table.set_ctrl(index, h2(hash)); + self.table.set_ctrl_h2(index, hash); bucket.write(value); self.table.items += 1; bucket @@ -1214,14 +1214,14 @@ impl RawTableInner { (pos.wrapping_sub(self.probe_seq(hash).pos) & self.bucket_mask) / Group::WIDTH }; if likely(probe_index(i) == probe_index(new_i)) { - self.set_ctrl(i, h2(hash)); + self.set_ctrl_h2(i, hash); return Slot::Skip; } // We are moving the current item to a new position. Write // our H2 to the control byte of the new position. let prev_ctrl = *self.ctrl(new_i); - self.set_ctrl(new_i, h2(hash)); + self.set_ctrl_h2(new_i, hash); if prev_ctrl == EMPTY { self.set_ctrl(i, EMPTY); Slot::Empty(new_i) @@ -1244,6 +1244,13 @@ impl RawTableInner { } } + /// Sets a control byte to the hash, and possibly also the replicated control byte at + /// the end of the array. + #[inline] + unsafe fn set_ctrl_h2(&self, index: usize, hash: u64) { + self.set_ctrl(index, h2(hash)) + } + /// Sets a control byte, and possibly also the replicated control byte at /// the end of the array. #[inline] @@ -1577,7 +1584,7 @@ impl RawTable { // - we know there is enough space in the table. // - all elements are unique. let index = guard_self.table.find_insert_slot(hash); - guard_self.table.set_ctrl(index, h2(hash)); + guard_self.table.set_ctrl_h2(index, hash); guard_self.bucket(index).write(item); } } From e04c18d80549e8257f154a77126f5cff067c02bd Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 6 Oct 2020 12:02:13 +0200 Subject: [PATCH 28/33] refactor: Merge set_ctrl_h2 into find_insert_slot when possible --- src/raw/mod.rs | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 01b776dda0..118c69272b 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -780,8 +780,7 @@ impl RawTable { // - there are no DELETED entries. // - we know there is enough space in the table. // - all elements are unique. - let index = new_table.find_insert_slot(hash); - new_table.set_ctrl_h2(index, hash); + let index = new_table.prepare_insert_slot(hash); new_table.bucket(index).copy_from_nonoverlapping(&item); } @@ -863,7 +862,7 @@ impl RawTable { #[cfg(any(feature = "raw", feature = "rustc-internal-api"))] pub fn insert_no_grow(&mut self, hash: u64, value: T) -> Bucket { unsafe { - let index = self.table.find_insert_slot(hash); + let index = self.table.prepare_insert_slot(hash); let bucket = self.table.bucket(index); // If we are replacing a DELETED entry then we don't need to update @@ -871,7 +870,6 @@ impl RawTable { let old_ctrl = *self.table.ctrl(index); self.table.growth_left -= special_is_empty(old_ctrl) as usize; - self.table.set_ctrl_h2(index, hash); bucket.write(value); self.table.items += 1; bucket @@ -1124,6 +1122,17 @@ impl RawTableInner { } } + /// Searches for an empty or deleted bucket which is suitable for inserting + /// a new element and sets the hash for that slot. + /// + /// There must be at least 1 empty bucket in the table. + #[inline] + unsafe fn prepare_insert_slot(&self, hash: u64) -> usize { + let index = self.find_insert_slot(hash); + self.set_ctrl_h2(index, hash); + index + } + /// Searches for an empty or deleted bucket which is suitable for inserting /// a new element. /// @@ -1583,8 +1592,7 @@ impl RawTable { // - there are no DELETED entries. // - we know there is enough space in the table. // - all elements are unique. - let index = guard_self.table.find_insert_slot(hash); - guard_self.table.set_ctrl_h2(index, hash); + let index = guard_self.table.prepare_insert_slot(hash); guard_self.bucket(index).write(item); } } From 93ba7fea2f81a40269f8ced5f7dfc52d062ee6ae Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Tue, 6 Oct 2020 12:19:58 +0200 Subject: [PATCH 29/33] Finish up into something that does not regress performance --- benches/bench.rs | 4 +- src/lib.rs | 5 + src/main.rs | 20 -- src/raw/bitmask.rs | 10 - src/raw/mod.rs | 488 ++++++++++++++++++++++----------------------- src/set.rs | 2 - 6 files changed, 242 insertions(+), 287 deletions(-) delete mode 100644 src/main.rs diff --git a/benches/bench.rs b/benches/bench.rs index 3accff741f..d1e4a2af06 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -86,7 +86,7 @@ macro_rules! bench_insert { b.iter(|| { m.clear(); for i in ($keydist).take(SIZE) { - m.insert(i, DropType(i)); + m.insert(i, (DropType(i), [i; 20])); } black_box(&mut m); }); @@ -112,7 +112,7 @@ macro_rules! bench_grow_insert { b.iter(|| { let mut m = $maptype::default(); for i in ($keydist).take(SIZE) { - m.insert(i, i); + m.insert(i, DropType(i)); } black_box(&mut m); }) diff --git a/src/lib.rs b/src/lib.rs index 3e930896d8..b8f2322bef 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -57,6 +57,11 @@ pub mod raw { pub use inner::*; #[cfg(feature = "rayon")] + /// [rayon]-based parallel iterator types for hash maps. + /// You will rarely need to interact with it directly unless you have need + /// to name one of the iterator types. + /// + /// [rayon]: https://docs.rs/rayon/1.0/rayon pub mod rayon { pub use crate::external_trait_impls::rayon::raw::*; } diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index 4cf12a3de2..0000000000 --- a/src/main.rs +++ /dev/null @@ -1,20 +0,0 @@ -fn main() { - let mut map1 = hashbrown::HashMap::new(); - map1.insert(1u8, ""); - map1.reserve(1000); - let mut map2 = hashbrown::HashMap::new(); - map2.insert(1i16, ""); - map2.reserve(1000); - let mut map3 = hashbrown::HashMap::new(); - map3.insert(3u16, ""); - map3.reserve(1000); - let mut map4 = hashbrown::HashMap::new(); - map4.insert(3u64, ""); - map4.reserve(1000); - dbg!(( - map1.iter().next(), - map2.iter().next(), - map3.iter().next(), - map4.iter().next() - )); -} diff --git a/src/raw/bitmask.rs b/src/raw/bitmask.rs index 356edbf9de..99b2d5341b 100644 --- a/src/raw/bitmask.rs +++ b/src/raw/bitmask.rs @@ -61,16 +61,6 @@ impl BitMask { } } - #[inline] - pub fn take_next_bit(&mut self) -> Option { - if let Some(index) = self.lowest_set_bit() { - *self = self.remove_lowest_bit(); - Some(index) - } else { - None - } - } - /// Returns the first set bit in the `BitMask`, if there is one. The /// bitmask must not be empty. #[inline] diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 118c69272b..f57d90b4e8 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -251,6 +251,41 @@ fn calculate_layout(buckets: usize) -> Option<(Layout, usize)> { data.extend(ctrl).ok() } +/// Helper which allows the max calculation for ctrl_align to be statically computed for each T +/// while keeping the rest of `calculate_layout_for` independent of `T` +#[derive(Copy, Clone)] +struct TableLayout { + size: usize, + ctrl_align: usize, +} + +impl TableLayout { + #[inline] + fn new() -> Self { + let layout = Layout::new::(); + Self { + size: layout.size(), + ctrl_align: usize::max(layout.align(), Group::WIDTH), + } + } + + #[inline] + fn calculate_layout_for(self, buckets: usize) -> Option<(Layout, usize)> { + debug_assert!(buckets.is_power_of_two()); + + let TableLayout { size, ctrl_align } = self; + // Manual layout calculation since Layout methods are not yet stable. + let ctrl_offset = + size.checked_mul(buckets)?.checked_add(ctrl_align - 1)? & !(ctrl_align - 1); + let len = ctrl_offset.checked_add(buckets + Group::WIDTH)?; + + Some(( + unsafe { Layout::from_size_align_unchecked(len, ctrl_align) }, + ctrl_offset, + )) + } +} + /// Returns a Layout which describes the allocation required for a hash table, /// and the offset of the control bytes in the allocation. /// (the offset is also one past last element of buckets) @@ -259,26 +294,7 @@ fn calculate_layout(buckets: usize) -> Option<(Layout, usize)> { #[cfg_attr(feature = "inline-more", inline)] #[cfg(not(feature = "nightly"))] fn calculate_layout(buckets: usize) -> Option<(Layout, usize)> { - calculate_layout_for(Layout::new::(), buckets) -} - -#[inline] -fn calculate_layout_for(layout: Layout, buckets: usize) -> Option<(Layout, usize)> { - debug_assert!(buckets.is_power_of_two()); - - // Manual layout calculation since Layout methods are not yet stable. - let ctrl_align = usize::max(layout.align(), Group::WIDTH); - let ctrl_offset = layout - .size() - .checked_mul(buckets)? - .checked_add(ctrl_align - 1)? - & !(ctrl_align - 1); - let len = ctrl_offset.checked_add(buckets + Group::WIDTH)?; - - Some(( - unsafe { Layout::from_size_align_unchecked(len, ctrl_align) }, - ctrl_offset, - )) + TableLayout::new::().calculate_layout_for(buckets) } /// A reference to a hash table bucket containing a `T`. @@ -456,7 +472,7 @@ impl RawTable { Ok(Self { table: RawTableInner::new_uninitialized( alloc, - Layout::new::(), + TableLayout::new::(), buckets, fallibility, )?, @@ -474,7 +490,7 @@ impl RawTable { Ok(Self { table: RawTableInner::fallible_with_capacity( alloc, - Layout::new::(), + TableLayout::new::(), capacity, fallibility, )?, @@ -502,7 +518,7 @@ impl RawTable { /// Deallocates the table without dropping any entries. #[cfg_attr(feature = "inline-more", inline)] unsafe fn free_buckets(&mut self) { - self.table.free_buckets(Layout::new::()) + self.table.free_buckets(TableLayout::new::()) } /// Returns pointer to one past last element of data table. @@ -725,30 +741,46 @@ impl RawTable { // At this point, DELETED elements are elements that we haven't // rehashed yet. Find them and re-insert them at their ideal // position. - let mut start = 0; - 'outer: while let Some(i) = guard.next_deleted(start) { - start = i + 1; - // Hash the current item - let item = guard.bucket(i); - let hash = hasher(item.as_ref()); + 'outer: for i in 0..guard.buckets() { + if *guard.ctrl(i) != DELETED { + continue; + } 'inner: loop { - match guard.search_new_slot(i, hash) { - Slot::Skip => continue 'outer, - Slot::Empty(new_i) => { - // If the target slot is empty, simply move the current - // element into the new slot and clear the old control - // byte. - guard.bucket(new_i).copy_from_nonoverlapping(&item); - continue 'outer; - } - Slot::Occupied(new_i) => { - // If the target slot is occupied, swap the two elements - // and then continue processing the element that we just - // swapped into the old slot. - mem::swap(guard.bucket(new_i).as_mut(), item.as_mut()); - continue 'inner; - } + // Hash the current item + let item = guard.bucket(i); + let hash = hasher(item.as_ref()); + + // Search for a suitable place to put it + let new_i = guard.find_insert_slot(hash); + + // Probing works by scanning through all of the control + // bytes in groups, which may not be aligned to the group + // size. If both the new and old position fall within the + // same unaligned group, then there is no benefit in moving + // it and we can just continue to the next item. + if likely(guard.is_in_same_group(i, new_i, hash)) { + guard.set_ctrl_h2(i, hash); + continue 'outer; + } + + // We are moving the current item to a new position. Write + // our H2 to the control byte of the new position. + let prev_ctrl = guard.replace_ctrl_h2(new_i, hash); + if prev_ctrl == EMPTY { + guard.set_ctrl(i, EMPTY); + // If the target slot is empty, simply move the current + // element into the new slot and clear the old control + // byte. + guard.bucket(new_i).copy_from_nonoverlapping(&item); + continue 'outer; + } else { + // If the target slot is occupied, swap the two elements + // and then continue processing the element that we just + // swapped into the old slot. + debug_assert_eq!(prev_ctrl, DELETED); + mem::swap(guard.bucket(new_i).as_mut(), item.as_mut()); + continue 'inner; } } } @@ -769,7 +801,7 @@ impl RawTable { unsafe { let mut new_table = self.table - .prepare_resize(Layout::new::(), capacity, fallibility)?; + .prepare_resize(TableLayout::new::(), capacity, fallibility)?; // Copy all elements to the new table. for item in self.iter() { @@ -780,7 +812,7 @@ impl RawTable { // - there are no DELETED entries. // - we know there is enough space in the table. // - all elements are unique. - let index = new_table.prepare_insert_slot(hash); + let (index, _) = new_table.prepare_insert_slot(hash); new_table.bucket(index).copy_from_nonoverlapping(&item); } @@ -811,11 +843,10 @@ impl RawTable { index = self.table.find_insert_slot(hash); } + self.table.record_item_insert_at(index, old_ctrl, hash); + let bucket = self.bucket(index); - self.table.growth_left -= special_is_empty(old_ctrl) as usize; - self.table.set_ctrl_h2(index, hash); bucket.write(value); - self.table.items += 1; bucket } } @@ -830,17 +861,13 @@ impl RawTable { #[cfg_attr(feature = "inline-more", inline)] pub fn try_insert_no_grow(&mut self, hash: u64, value: T) -> Result, T> { unsafe { - let index = self.find_insert_slot(hash); - let old_ctrl = *self.ctrl(index); - if unlikely(self.growth_left == 0 && special_is_empty(old_ctrl)) { - Err(value) - } else { - let bucket = self.bucket(index); - self.growth_left -= special_is_empty(old_ctrl) as usize; - self.set_ctrl(index, h2(hash)); - bucket.write(value); - self.items += 1; - Ok(bucket) + match self.table.prepare_insert_no_grow(hash) { + Ok(index) => { + let bucket = self.bucket(index); + bucket.write(value); + Ok(bucket) + } + Err(()) => Err(value), } } } @@ -862,12 +889,11 @@ impl RawTable { #[cfg(any(feature = "raw", feature = "rustc-internal-api"))] pub fn insert_no_grow(&mut self, hash: u64, value: T) -> Bucket { unsafe { - let index = self.table.prepare_insert_slot(hash); + let (index, old_ctrl) = self.table.prepare_insert_slot(hash); let bucket = self.table.bucket(index); // If we are replacing a DELETED entry then we don't need to update // the load counter. - let old_ctrl = *self.table.ctrl(index); self.table.growth_left -= special_is_empty(old_ctrl) as usize; bucket.write(value); @@ -1073,14 +1099,14 @@ impl RawTableInner { #[cfg_attr(feature = "inline-more", inline)] unsafe fn new_uninitialized( alloc: A, - t_layout: Layout, + table_layout: TableLayout, buckets: usize, fallibility: Fallibility, ) -> Result { debug_assert!(buckets.is_power_of_two()); // Avoid `Option::ok_or_else` because it bloats LLVM IR. - let (layout, ctrl_offset) = match calculate_layout_for(t_layout, buckets) { + let (layout, ctrl_offset) = match table_layout.calculate_layout_for(buckets) { Some(lco) => lco, None => return Err(fallibility.capacity_overflow()), }; @@ -1103,7 +1129,7 @@ impl RawTableInner { #[inline] fn fallible_with_capacity( alloc: A, - t_layout: Layout, + table_layout: TableLayout, capacity: usize, fallibility: Fallibility, ) -> Result { @@ -1114,7 +1140,7 @@ impl RawTableInner { let buckets = capacity_to_buckets(capacity).ok_or_else(|| fallibility.capacity_overflow())?; - let result = Self::new_uninitialized(alloc, t_layout, buckets, fallibility)?; + let result = Self::new_uninitialized(alloc, table_layout, buckets, fallibility)?; result.ctrl(0).write_bytes(EMPTY, result.num_ctrl_bytes()); Ok(result) @@ -1127,10 +1153,11 @@ impl RawTableInner { /// /// There must be at least 1 empty bucket in the table. #[inline] - unsafe fn prepare_insert_slot(&self, hash: u64) -> usize { + unsafe fn prepare_insert_slot(&self, hash: u64) -> (usize, u8) { let index = self.find_insert_slot(hash); + let old_ctrl = *self.ctrl(index); self.set_ctrl_h2(index, hash); - index + (index, old_ctrl) } /// Searches for an empty or deleted bucket which is suitable for inserting @@ -1139,12 +1166,12 @@ impl RawTableInner { /// There must be at least 1 empty bucket in the table. #[inline] fn find_insert_slot(&self, hash: u64) -> usize { - let mut probe_seq = self.table.probe_seq(hash); + let mut probe_seq = self.probe_seq(hash); loop { unsafe { - let group = Group::load(self.table.ctrl(probe_seq.pos)); + let group = Group::load(self.ctrl(probe_seq.pos)); if let Some(bit) = group.match_empty_or_deleted().lowest_set_bit() { - let result = (probe_seq.pos + bit) & self.table.bucket_mask; + let result = (probe_seq.pos + bit) & self.bucket_mask; // In tables smaller than the group width, trailing control // bytes outside the range of the table are filled with @@ -1155,10 +1182,10 @@ impl RawTableInner { // table. This second scan is guaranteed to find an empty // slot (due to the load factor) before hitting the trailing // control bytes (containing EMPTY). - if unlikely(is_full(*self.table.ctrl(result))) { - debug_assert!(self.table.bucket_mask < Group::WIDTH); + if unlikely(is_full(*self.ctrl(result))) { + debug_assert!(self.bucket_mask < Group::WIDTH); debug_assert_ne!(probe_seq.pos, 0); - return Group::load_aligned(self.table.ctrl(0)) + return Group::load_aligned(self.ctrl(0)) .match_empty_or_deleted() .lowest_set_bit_nonzero(); } @@ -1166,7 +1193,7 @@ impl RawTableInner { return result; } } - probe_seq.move_next(self.table.bucket_mask); + probe_seq.move_next(self.bucket_mask); } } @@ -1194,7 +1221,18 @@ impl RawTableInner { self.ctrl(0) .copy_to(self.ctrl(self.buckets()), Group::WIDTH); } - self.rehash_panic_guard(needs_drop, drop) + guard(self, move |self_| { + if needs_drop { + for i in 0..self_.buckets() { + if *self_.ctrl(i) == DELETED { + self_.set_ctrl(i, EMPTY); + drop(self_, i); + self_.items -= 1; + } + } + } + self_.growth_left = bucket_mask_to_capacity(self_.bucket_mask) - self_.items; + }) } #[cfg_attr(feature = "inline-more", inline)] @@ -1206,38 +1244,7 @@ impl RawTableInner { #[cfg_attr(feature = "inline-more", inline)] unsafe fn data_end(&self) -> NonNull { - NonNull::new_unchecked(self.ctrl.as_ptr() as *mut T) - } - - #[inline] - unsafe fn search_new_slot(&mut self, i: usize, hash: u64) -> Slot { - // Search for a suitable place to put it - let new_i = self.find_insert_slot(hash); - - // Probing works by scanning through all of the control - // bytes in groups, which may not be aligned to the group - // size. If both the new and old position fall within the - // same unaligned group, then there is no benefit in moving - // it and we can just continue to the next item. - let probe_index = |pos: usize| { - (pos.wrapping_sub(self.probe_seq(hash).pos) & self.bucket_mask) / Group::WIDTH - }; - if likely(probe_index(i) == probe_index(new_i)) { - self.set_ctrl_h2(i, hash); - return Slot::Skip; - } - - // We are moving the current item to a new position. Write - // our H2 to the control byte of the new position. - let prev_ctrl = *self.ctrl(new_i); - self.set_ctrl_h2(new_i, hash); - if prev_ctrl == EMPTY { - self.set_ctrl(i, EMPTY); - Slot::Empty(new_i) - } else { - debug_assert_eq!(prev_ctrl, DELETED); - Slot::Occupied(new_i) - } + NonNull::new_unchecked(self.ctrl.as_ptr().cast()) } /// Returns an iterator-like object for a probe sequence on the table. @@ -1253,6 +1260,36 @@ impl RawTableInner { } } + /// Returns the index of a bucket for which a value must be inserted if there is enough rooom + /// in the table, otherwise returns error + #[cfg(feature = "raw")] + #[inline] + unsafe fn prepare_insert_no_grow(&mut self, hash: u64) -> Result { + let index = self.find_insert_slot(hash); + let old_ctrl = *self.ctrl(index); + if unlikely(self.growth_left == 0 && special_is_empty(old_ctrl)) { + Err(()) + } else { + self.record_item_insert_at(index, old_ctrl, hash); + Ok(index) + } + } + + #[inline] + unsafe fn record_item_insert_at(&mut self, index: usize, old_ctrl: u8, hash: u64) { + self.growth_left -= special_is_empty(old_ctrl) as usize; + self.set_ctrl_h2(index, hash); + self.items += 1; + } + + #[inline] + fn is_in_same_group(&self, i: usize, new_i: usize, hash: u64) -> bool { + let probe_seq_pos = self.probe_seq(hash).pos; + let probe_index = + |pos: usize| (pos.wrapping_sub(probe_seq_pos) & self.bucket_mask) / Group::WIDTH; + probe_index(i) == probe_index(new_i) + } + /// Sets a control byte to the hash, and possibly also the replicated control byte at /// the end of the array. #[inline] @@ -1260,6 +1297,13 @@ impl RawTableInner { self.set_ctrl(index, h2(hash)) } + #[inline] + unsafe fn replace_ctrl_h2(&self, index: usize, hash: u64) -> u8 { + let prev_ctrl = *self.ctrl(index); + self.set_ctrl_h2(index, hash); + prev_ctrl + } + /// Sets a control byte, and possibly also the replicated control byte at /// the end of the array. #[inline] @@ -1310,36 +1354,11 @@ impl RawTableInner { self.bucket_mask == 0 } - unsafe fn next_deleted(&self, start: usize) -> Option { - (start..self.buckets()).find(|&i| *self.ctrl(i) == DELETED) - } - - #[allow(clippy::mut_mut)] - #[inline] - unsafe fn rehash_panic_guard<'s>( - &'s mut self, - needs_drop: bool, - drop: fn(&mut Self, usize), - ) -> crate::scopeguard::ScopeGuard<&mut Self, impl FnMut(&mut &'s mut Self) + 's> { - guard(self, move |self_| { - if needs_drop { - for i in 0..self_.buckets() { - if *self_.ctrl(i) == DELETED { - self_.set_ctrl(i, EMPTY); - drop(self_, i); - self_.items -= 1; - } - } - } - self_.growth_left = bucket_mask_to_capacity(self_.bucket_mask) - self_.items; - }) - } - #[allow(clippy::mut_mut)] #[inline] unsafe fn prepare_resize( &self, - layout_t: Layout, + table_layout: TableLayout, capacity: usize, fallibility: Fallibility, ) -> Result, TryReserveError> { @@ -1348,7 +1367,7 @@ impl RawTableInner { // Allocate and initialize the new table. let mut new_table = RawTableInner::fallible_with_capacity( self.alloc.clone(), - layout_t, + table_layout, capacity, fallibility, )?; @@ -1363,15 +1382,15 @@ impl RawTableInner { // the comment at the bottom of this function. Ok(guard(new_table, move |self_| { if !self_.is_empty_singleton() { - self_.free_buckets(layout_t); + self_.free_buckets(table_layout); } })) } #[inline] - unsafe fn free_buckets(&mut self, t_layout: Layout) { + unsafe fn free_buckets(&mut self, table_layout: TableLayout) { // Avoid `Option::unwrap_or_else` because it bloats LLVM IR. - let (layout, ctrl_offset) = match calculate_layout_for(t_layout, self.buckets()) { + let (layout, ctrl_offset) = match table_layout.calculate_layout_for(self.buckets()) { Some(lco) => lco, None => hint::unreachable_unchecked(), }; @@ -1419,12 +1438,6 @@ impl RawTableInner { } } -enum Slot { - Skip, - Empty(usize), - Occupied(usize), -} - impl Clone for RawTable { fn clone(&self) -> Self { if self.table.is_empty_singleton() { @@ -1592,7 +1605,7 @@ impl RawTable { // - there are no DELETED entries. // - we know there is enough space in the table. // - all elements are unique. - let index = guard_self.table.prepare_insert_slot(hash); + let (index, _) = guard_self.table.prepare_insert_slot(hash); guard_self.bucket(index).write(item); } } @@ -1649,17 +1662,13 @@ impl IntoIterator for RawTable { /// Iterator over a sub-range of a table. Unlike `RawIter` this iterator does /// not track an item count. pub(crate) struct RawIterRange { - inner: RawIterRangeInner, - // Pointer to the buckets for the current group. - data: Bucket, -} - -#[derive(Clone)] -pub(crate) struct RawIterRangeInner { // Mask of full buckets in the current group. Bits are cleared from this // mask as each element is processed. current_group: BitMask, + // Pointer to the buckets for the current group. + data: Bucket, + // Pointer to the next group of control bytes, // Must be aligned to the group size. next_ctrl: *const u8, @@ -1674,9 +1683,19 @@ impl RawIterRange { /// The control byte address must be aligned to the group size. #[cfg_attr(feature = "inline-more", inline)] unsafe fn new(ctrl: *const u8, data: Bucket, len: usize) -> Self { + debug_assert_ne!(len, 0); + debug_assert_eq!(ctrl as usize % Group::WIDTH, 0); + let end = ctrl.add(len); + + // Load the first group and advance ctrl to point to the next group + let current_group = Group::load_aligned(ctrl).match_full(); + let next_ctrl = ctrl.add(Group::WIDTH); + Self { - inner: RawIterRangeInner::new(ctrl, len), + current_group, data, + next_ctrl, + end, } } @@ -1688,7 +1707,7 @@ impl RawIterRange { #[cfg(feature = "rayon")] pub(crate) fn split(mut self) -> (Self, Option>) { unsafe { - if self.inner.end <= self.inner.next_ctrl { + if self.end <= self.next_ctrl { // Nothing to split if the group that we are current processing // is the last one. (self, None) @@ -1696,7 +1715,7 @@ impl RawIterRange { // len is the remaining number of elements after the group that // we are currently processing. It must be a multiple of the // group size (small tables are caught by the check above). - let len = offset_from(self.inner.end, self.inner.next_ctrl); + let len = offset_from(self.end, self.next_ctrl); debug_assert_eq!(len % Group::WIDTH, 0); // Split the remaining elements into two halves, but round the @@ -1708,7 +1727,7 @@ impl RawIterRange { let mid = (len / 2) & !(Group::WIDTH - 1); let tail = Self::new( - self.inner.next_ctrl.add(mid), + self.next_ctrl.add(mid), self.data.next_n(Group::WIDTH).next_n(mid), len - mid, ); @@ -1716,57 +1735,15 @@ impl RawIterRange { self.data.next_n(Group::WIDTH).next_n(mid).ptr, tail.data.ptr ); - debug_assert_eq!(self.inner.end, tail.inner.end); - self.inner.end = self.inner.next_ctrl.add(mid); - debug_assert_eq!(self.inner.end.add(Group::WIDTH), tail.inner.next_ctrl); + debug_assert_eq!(self.end, tail.end); + self.end = self.next_ctrl.add(mid); + debug_assert_eq!(self.end.add(Group::WIDTH), tail.next_ctrl); (self, Some(tail)) } } } } -impl RawIterRangeInner { - /// Returns a `RawIterRange` covering a subset of a table. - /// - /// The control byte address must be aligned to the group size. - #[inline] - unsafe fn new(ctrl: *const u8, len: usize) -> Self { - debug_assert_ne!(len, 0); - debug_assert_eq!(ctrl as usize % Group::WIDTH, 0); - let end = ctrl.add(len); - - // Load the first group and advance ctrl to point to the next group - let current_group = Group::load_aligned(ctrl).match_full(); - let next_ctrl = ctrl.add(Group::WIDTH); - - Self { - current_group, - next_ctrl, - end, - } - } - - #[inline] - unsafe fn next_group(&mut self) -> Option<()> { - if self.next_ctrl >= self.end { - None - } else { - self.current_group = Group::load_aligned(self.next_ctrl).match_full(); - self.next_ctrl = self.next_ctrl.add(Group::WIDTH); - Some(()) - } - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - // We don't have an item count, so just guess based on the range size. - ( - 0, - Some(unsafe { offset_from(self.end, self.next_ctrl) + Group::WIDTH }), - ) - } -} - // We make raw iterators unconditionally Send and Sync, and let the PhantomData // in the actual iterator implementations determine the real Send/Sync bounds. unsafe impl Send for RawIterRange {} @@ -1776,8 +1753,10 @@ impl Clone for RawIterRange { #[cfg_attr(feature = "inline-more", inline)] fn clone(&self) -> Self { Self { - inner: self.inner.clone(), data: self.data.clone(), + next_ctrl: self.next_ctrl, + current_group: self.current_group, + end: self.end, } } } @@ -1789,26 +1768,34 @@ impl Iterator for RawIterRange { fn next(&mut self) -> Option> { unsafe { loop { - if let Some(index) = self.inner.current_group.take_next_bit() { + if let Some(index) = self.current_group.lowest_set_bit() { + self.current_group = self.current_group.remove_lowest_bit(); return Some(self.data.next_n(index)); } + if self.next_ctrl >= self.end { + return None; + } + // We might read past self.end up to the next group boundary, // but this is fine because it only occurs on tables smaller // than the group size where the trailing control bytes are all // EMPTY. On larger tables self.end is guaranteed to be aligned // to the group size (since tables are power-of-two sized). - if let None = self.inner.next_group() { - return None; - } + self.current_group = Group::load_aligned(self.next_ctrl).match_full(); self.data = self.data.next_n(Group::WIDTH); + self.next_ctrl = self.next_ctrl.add(Group::WIDTH); } } } #[cfg_attr(feature = "inline-more", inline)] fn size_hint(&self) -> (usize, Option) { - self.inner.size_hint() + // We don't have an item count, so just guess based on the range size. + ( + 0, + Some(unsafe { offset_from(self.end, self.next_ctrl) + Group::WIDTH }), + ) } } @@ -1868,7 +1855,7 @@ impl RawIter { return; } - if self.iter.inner.next_ctrl < self.iter.inner.end + if self.iter.next_ctrl < self.iter.end && b.as_ptr() <= self.iter.data.next_n(Group::WIDTH).as_ptr() { // The iterator has not yet reached the bucket's group. @@ -1879,7 +1866,7 @@ impl RawIter { // To do that, we need to find its control byte. We know that self.iter.data is // at self.iter.next_ctrl - Group::WIDTH, so we work from there: let offset = offset_from(self.iter.data.as_ptr(), b.as_ptr()); - let ctrl = self.iter.inner.next_ctrl.sub(Group::WIDTH).add(offset); + let ctrl = self.iter.next_ctrl.sub(Group::WIDTH).add(offset); // This method should be called _before_ a removal, or _after_ an insert, // so in both cases the ctrl byte should indicate that the bucket is full. assert!(is_full(*ctrl)); @@ -1902,7 +1889,7 @@ impl RawIter { // - Otherwise, update the iterator cached group so that it won't // yield a to-be-removed bucket, or _will_ yield a to-be-added bucket. // We'll also need ot update the item count accordingly. - if let Some(index) = self.iter.inner.current_group.lowest_set_bit() { + if let Some(index) = self.iter.current_group.lowest_set_bit() { let next_bucket = self.iter.data.next_n(index); if b.as_ptr() > next_bucket.as_ptr() { // The toggled bucket is "before" the bucket the iterator would yield next. We @@ -1923,7 +1910,7 @@ impl RawIter { // Instead, we _just_ flip the bit for the particular bucket the caller asked // us to reflect. let our_bit = offset_from(self.iter.data.as_ptr(), b.as_ptr()); - let was_full = self.iter.inner.current_group.flip(our_bit); + let was_full = self.iter.current_group.flip(our_bit); debug_assert_ne!(was_full, is_insert); if is_insert { @@ -1935,16 +1922,10 @@ impl RawIter { if cfg!(debug_assertions) { if b.as_ptr() == next_bucket.as_ptr() { // The removed bucket should no longer be next - debug_assert_ne!( - self.iter.inner.current_group.lowest_set_bit(), - Some(index) - ); + debug_assert_ne!(self.iter.current_group.lowest_set_bit(), Some(index)); } else { // We should not have changed what bucket comes next. - debug_assert_eq!( - self.iter.inner.current_group.lowest_set_bit(), - Some(index) - ); + debug_assert_eq!(self.iter.current_group.lowest_set_bit(), Some(index)); } } } @@ -1953,6 +1934,14 @@ impl RawIter { } } } + + unsafe fn drop_elements(&mut self) { + if mem::needs_drop::() && self.len() != 0 { + for item in self { + item.drop(); + } + } + } } impl Clone for RawIter { @@ -2015,11 +2004,7 @@ unsafe impl<#[may_dangle] T, A: Allocator + Clone> Drop for RawIntoIter { fn drop(&mut self) { unsafe { // Drop all remaining elements - if mem::needs_drop::() && self.iter.len() != 0 { - while let Some(item) = self.iter.next() { - item.drop(); - } - } + self.iter.drop_elements(); // Free the table if let Some((ptr, layout)) = self.allocation { @@ -2034,11 +2019,7 @@ impl Drop for RawIntoIter { fn drop(&mut self) { unsafe { // Drop all remaining elements - if mem::needs_drop::() && self.iter.len() != 0 { - while let Some(item) = self.iter.next() { - item.drop(); - } - } + self.iter.drop_elements(); // Free the table if let Some((ptr, layout)) = self.allocation { @@ -2096,11 +2077,7 @@ impl Drop for RawDrain<'_, T, A> { fn drop(&mut self) { unsafe { // Drop all remaining elements. Note that this may panic. - if mem::needs_drop::() && self.iter.len() != 0 { - while let Some(item) = self.iter.next() { - item.drop(); - } - } + self.iter.drop_elements(); // Reset the contents of the table now that all elements have been // dropped. @@ -2139,10 +2116,12 @@ impl FusedIterator for RawDrain<'_, T, A> {} /// In rare cases, the iterator may return a bucket with a different hash. pub struct RawIterHash<'a, T, A: Allocator + Clone = Global> { inner: RawIterHashInner<'a, A>, - marker: PhantomData<&'a T>, + _marker: PhantomData, } -struct RawIterHashInner<'a, A> { +struct RawIterHashInner<'a, A: Allocator + Clone> { + table: &'a RawTableInner, + // The top 7 bits of the hash. h2_hash: u8, @@ -2156,16 +2135,16 @@ struct RawIterHashInner<'a, A> { } impl<'a, T, A: Allocator + Clone> RawIterHash<'a, T, A> { + #[cfg_attr(feature = "inline-more", inline)] fn new(table: &'a RawTable, hash: u64) -> Self { RawIterHash { inner: RawIterHashInner::new(&table.table, hash), - marker: PhantomData, + _marker: PhantomData, } } } - impl<'a, A: Allocator + Clone> RawIterHashInner<'a, A> { - #[inline] + #[cfg_attr(feature = "inline-more", inline)] fn new(table: &'a RawTableInner, hash: u64) -> Self { unsafe { let h2_hash = h2(hash); @@ -2197,20 +2176,23 @@ impl<'a, T, A: Allocator + Clone> Iterator for RawIterHash<'a, T, A> { } } -impl<'a, A: Allocator + Clone> RawIterHashInner<'a, A> { - #[inline] - unsafe fn next(&mut self) -> Option { - loop { - if let Some(bit) = self.bitmask.next() { - let index = (self.probe_seq.pos + bit) & self.table.bucket_mask; - return Some(index); - } - if likely(self.group.match_empty().any_bit_set()) { - return None; +impl<'a, A: Allocator + Clone> Iterator for RawIterHashInner<'a, A> { + type Item = usize; + + fn next(&mut self) -> Option { + unsafe { + loop { + if let Some(bit) = self.bitmask.next() { + let index = (self.probe_seq.pos + bit) & self.table.bucket_mask; + return Some(index); + } + if likely(self.group.match_empty().any_bit_set()) { + return None; + } + self.probe_seq.move_next(self.table.bucket_mask); + self.group = Group::load(self.table.ctrl(self.probe_seq.pos)); + self.bitmask = self.group.match_byte(self.h2_hash).into_iter(); } - self.probe_seq.move_next(self.table.bucket_mask); - self.group = Group::load(self.table.ctrl(self.probe_seq.pos)); - self.bitmask = self.group.match_byte(self.h2_hash).into_iter(); } } } diff --git a/src/set.rs b/src/set.rs index a451bcc771..ee2749fa2d 100644 --- a/src/set.rs +++ b/src/set.rs @@ -452,8 +452,6 @@ impl HashSet { impl HashSet where - T: Eq + Hash, - S: BuildHasher, A: Allocator + Clone, { /// Creates a new empty hash set which will use the given hasher to hash From 6a52d22044a0cc08f2fb5d64b8ffcb1818cb7450 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Sun, 24 Jan 2021 20:03:22 +0100 Subject: [PATCH 30/33] refactor: Remove calcualuate_layout for the nightly feature It was only used in `into_allocation` now so it does not pull its weight. --- src/raw/mod.rs | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index f57d90b4e8..9c3f890039 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -225,32 +225,6 @@ fn bucket_mask_to_capacity(bucket_mask: usize) -> usize { } } -/// Returns a Layout which describes the allocation required for a hash table, -/// and the offset of the control bytes in the allocation. -/// (the offset is also one past last element of buckets) -/// -/// Returns `None` if an overflow occurs. -#[cfg_attr(feature = "inline-more", inline)] -#[cfg(feature = "nightly")] -fn calculate_layout(buckets: usize) -> Option<(Layout, usize)> { - debug_assert!(buckets.is_power_of_two()); - - // Array of buckets - let data = Layout::array::(buckets).ok()?; - - // Array of control bytes. This must be aligned to the group size. - // - // We add `Group::WIDTH` control bytes at the end of the array which - // replicate the bytes at the start of the array and thus avoids the need to - // perform bounds-checking while probing. - // - // There is no possible overflow here since buckets is a power of two and - // Group::WIDTH is a small number. - let ctrl = unsafe { Layout::from_size_align_unchecked(buckets + Group::WIDTH, Group::WIDTH) }; - - data.extend(ctrl).ok() -} - /// Helper which allows the max calculation for ctrl_align to be statically computed for each T /// while keeping the rest of `calculate_layout_for` independent of `T` #[derive(Copy, Clone)] @@ -292,7 +266,6 @@ impl TableLayout { /// /// Returns `None` if an overflow occurs. #[cfg_attr(feature = "inline-more", inline)] -#[cfg(not(feature = "nightly"))] fn calculate_layout(buckets: usize) -> Option<(Layout, usize)> { TableLayout::new::().calculate_layout_for(buckets) } From 425fe487b3342af099b919ba26401c9558ddc706 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Sun, 24 Jan 2021 20:04:22 +0100 Subject: [PATCH 31/33] Add inline on prepare_rehash_in_place --- src/raw/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 9c3f890039..4a392c9f7c 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1171,6 +1171,7 @@ impl RawTableInner { } #[allow(clippy::mut_mut)] + #[inline] unsafe fn prepare_rehash_in_place<'s>( &'s mut self, needs_drop: bool, From cae5a3a893f1000bf355e1b563b84ba7d257e9da Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Sun, 24 Jan 2021 20:07:54 +0100 Subject: [PATCH 32/33] Explain the fn argument --- src/raw/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 4a392c9f7c..8549f6b51d 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1170,6 +1170,9 @@ impl RawTableInner { } } + // We use `fn` argument here for `drop` as the function will only be called if the `hasher` + // panics which should be exceptionally rare. In return we only instantiate a single + // `prepare_rehash_in_place` per allocator (instead of per type and allocator) #[allow(clippy::mut_mut)] #[inline] unsafe fn prepare_rehash_in_place<'s>( From 99a7e3ea026829b0a494ea6b80ea4a778153f050 Mon Sep 17 00:00:00 2001 From: Markus Westerlind Date: Mon, 25 Jan 2021 16:48:53 +0100 Subject: [PATCH 33/33] Try specializing the drop in rehash_in_place again --- src/raw/mod.rs | 41 +++++++++++++++-------------------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 8549f6b51d..ca575a149c 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -704,12 +704,20 @@ impl RawTable { // that we haven't rehashed yet. We unfortunately can't preserve the // element since we lost their hash and have no way of recovering it // without risking another panic. - let mut guard = self.table.prepare_rehash_in_place( - mem::needs_drop::(), - |self_: &mut RawTableInner, index| { - self_.bucket::(index).drop(); - }, - ); + self.table.prepare_rehash_in_place(); + + let mut guard = guard(&mut self.table, move |self_| { + if mem::needs_drop::() { + for i in 0..self_.buckets() { + if *self_.ctrl(i) == DELETED { + self_.set_ctrl(i, EMPTY); + self_.bucket::(i).drop(); + self_.items -= 1; + } + } + } + self_.growth_left = bucket_mask_to_capacity(self_.bucket_mask) - self_.items; + }); // At this point, DELETED elements are elements that we haven't // rehashed yet. Find them and re-insert them at their ideal @@ -1170,16 +1178,9 @@ impl RawTableInner { } } - // We use `fn` argument here for `drop` as the function will only be called if the `hasher` - // panics which should be exceptionally rare. In return we only instantiate a single - // `prepare_rehash_in_place` per allocator (instead of per type and allocator) #[allow(clippy::mut_mut)] #[inline] - unsafe fn prepare_rehash_in_place<'s>( - &'s mut self, - needs_drop: bool, - drop: fn(&mut Self, usize), - ) -> crate::scopeguard::ScopeGuard<&mut Self, impl FnMut(&mut &'s mut Self) + 's> { + unsafe fn prepare_rehash_in_place(&mut self) { // Bulk convert all full control bytes to DELETED, and all DELETED // control bytes to EMPTY. This effectively frees up all buckets // containing a DELETED entry. @@ -1198,18 +1199,6 @@ impl RawTableInner { self.ctrl(0) .copy_to(self.ctrl(self.buckets()), Group::WIDTH); } - guard(self, move |self_| { - if needs_drop { - for i in 0..self_.buckets() { - if *self_.ctrl(i) == DELETED { - self_.set_ctrl(i, EMPTY); - drop(self_, i); - self_.items -= 1; - } - } - } - self_.growth_left = bucket_mask_to_capacity(self_.bucket_mask) - self_.items; - }) } #[cfg_attr(feature = "inline-more", inline)]