Skip to content

Reduce size overhead of adaptative hashmap #40237

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 9, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 8 additions & 16 deletions src/libstd/collections/hash/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -396,8 +396,6 @@ pub struct HashMap<K, V, S = RandomState> {
table: RawTable<K, V>,

resize_policy: DefaultResizePolicy,

long_probes: bool,
}

/// Search for a pre-hashed key.
Expand Down Expand Up @@ -655,7 +653,6 @@ impl<K, V, S> HashMap<K, V, S>
hash_builder: hash_builder,
resize_policy: DefaultResizePolicy::new(),
table: RawTable::new(0),
long_probes: false,
}
}

Expand Down Expand Up @@ -688,7 +685,6 @@ impl<K, V, S> HashMap<K, V, S>
hash_builder: hash_builder,
resize_policy: resize_policy,
table: RawTable::new(raw_cap),
long_probes: false,
}
}

Expand Down Expand Up @@ -746,7 +742,7 @@ impl<K, V, S> HashMap<K, V, S>
let min_cap = self.len().checked_add(additional).expect("reserve overflow");
let raw_cap = self.resize_policy.raw_capacity(min_cap);
self.resize(raw_cap);
} else if self.long_probes && remaining <= self.len() {
} else if self.table.tag() && remaining <= self.len() {
// Probe sequence is too long and table is half full,
// resize early to reduce probing length.
let new_capacity = self.table.capacity() * 2;
Expand All @@ -763,7 +759,6 @@ impl<K, V, S> HashMap<K, V, S>
assert!(self.table.size() <= new_raw_cap);
assert!(new_raw_cap.is_power_of_two() || new_raw_cap == 0);

self.long_probes = false;
let mut old_table = replace(&mut self.table, RawTable::new(new_raw_cap));
let old_size = old_table.size();

Expand Down Expand Up @@ -844,8 +839,7 @@ impl<K, V, S> HashMap<K, V, S>
/// If the key already exists, the hashtable will be returned untouched
/// and a reference to the existing element will be returned.
fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> Option<V> {
let entry = search_hashed(&mut self.table, hash, |key| *key == k)
.into_entry(k, &mut self.long_probes);
let entry = search_hashed(&mut self.table, hash, |key| *key == k).into_entry(k);
match entry {
Some(Occupied(mut elem)) => Some(elem.insert(v)),
Some(Vacant(elem)) => {
Expand Down Expand Up @@ -1002,7 +996,7 @@ impl<K, V, S> HashMap<K, V, S>
self.reserve(1);
let hash = self.make_hash(&key);
search_hashed(&mut self.table, hash, |q| q.eq(&key))
.into_entry(key, &mut self.long_probes).expect("unreachable")
.into_entry(key).expect("unreachable")
}

/// Returns the number of elements in the map.
Expand Down Expand Up @@ -1456,7 +1450,7 @@ impl<K, V, M> InternalEntry<K, V, M> {

impl<'a, K, V> InternalEntry<K, V, &'a mut RawTable<K, V>> {
#[inline]
fn into_entry(self, key: K, long_probes: &'a mut bool) -> Option<Entry<'a, K, V>> {
fn into_entry(self, key: K) -> Option<Entry<'a, K, V>> {
match self {
InternalEntry::Occupied { elem } => {
Some(Occupied(OccupiedEntry {
Expand All @@ -1469,7 +1463,6 @@ impl<'a, K, V> InternalEntry<K, V, &'a mut RawTable<K, V>> {
hash: hash,
key: key,
elem: elem,
long_probes: long_probes,
}))
}
InternalEntry::TableIsEmpty => None,
Expand Down Expand Up @@ -1542,7 +1535,6 @@ pub struct VacantEntry<'a, K: 'a, V: 'a> {
hash: SafeHash,
key: K,
elem: VacantEntryState<K, V, &'a mut RawTable<K, V>>,
long_probes: &'a mut bool,
}

#[stable(feature= "debug_hash_map", since = "1.12.0")]
Expand Down Expand Up @@ -2117,15 +2109,15 @@ impl<'a, K: 'a, V: 'a> VacantEntry<'a, K, V> {
#[stable(feature = "rust1", since = "1.0.0")]
pub fn insert(self, value: V) -> &'a mut V {
match self.elem {
NeqElem(bucket, disp) => {
NeqElem(mut bucket, disp) => {
if disp >= DISPLACEMENT_THRESHOLD {
*self.long_probes = true;
bucket.table_mut().set_tag(true);
}
robin_hood(bucket, disp, self.hash, self.key, value)
},
NoElem(bucket, disp) => {
NoElem(mut bucket, disp) => {
if disp >= DISPLACEMENT_THRESHOLD {
*self.long_probes = true;
bucket.table_mut().set_tag(true);
}
bucket.put(self.hash, self.key, value).into_mut_refs().1
},
Expand Down
74 changes: 66 additions & 8 deletions src/libstd/collections/hash/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,42 @@ type HashUint = usize;

const EMPTY_BUCKET: HashUint = 0;

/// Special `Unique<HashUint>` that uses the lower bit of the pointer
/// to expose a boolean tag.
/// Note: when the pointer is initialized to EMPTY `.ptr()` will return
/// null and the tag functions shouldn't be used.
struct TaggedHashUintPtr(Unique<HashUint>);

impl TaggedHashUintPtr {
#[inline]
unsafe fn new(ptr: *mut HashUint) -> Self {
debug_assert!(ptr as usize & 1 == 0 || ptr as usize == EMPTY as usize);
TaggedHashUintPtr(Unique::new(ptr))
}

#[inline]
fn set_tag(&mut self, value: bool) {
let usize_ptr = &*self.0 as *const *mut HashUint as *mut usize;
unsafe {
if value {
*usize_ptr |= 1;
} else {
*usize_ptr &= !1;
}
}
}

#[inline]
fn tag(&self) -> bool {
(*self.0 as usize) & 1 == 1
}

#[inline]
fn ptr(&self) -> *mut HashUint {
(*self.0 as usize & !1) as *mut HashUint
}
}

/// The raw hashtable, providing safe-ish access to the unzipped and highly
/// optimized arrays of hashes, and key-value pairs.
///
Expand Down Expand Up @@ -72,10 +108,14 @@ const EMPTY_BUCKET: HashUint = 0;
/// around just the "table" part of the hashtable. It enforces some
/// invariants at the type level and employs some performance trickery,
/// but in general is just a tricked out `Vec<Option<(u64, K, V)>>`.
///
/// The hashtable also exposes a special boolean tag. The tag defaults to false
/// when the RawTable is created and is accessible with the `tag` and `set_tag`
/// functions.
pub struct RawTable<K, V> {
capacity: usize,
size: usize,
hashes: Unique<HashUint>,
hashes: TaggedHashUintPtr,

// Because K/V do not appear directly in any of the types in the struct,
// inform rustc that in fact instances of K and V are reachable from here.
Expand Down Expand Up @@ -208,6 +248,10 @@ impl<K, V, M> FullBucket<K, V, M> {
pub fn table(&self) -> &M {
&self.table
}
/// Borrow a mutable reference to the table.
pub fn table_mut(&mut self) -> &mut M {
&mut self.table
}
/// Move out the reference to the table.
pub fn into_table(self) -> M {
self.table
Expand All @@ -227,6 +271,10 @@ impl<K, V, M> EmptyBucket<K, V, M> {
pub fn table(&self) -> &M {
&self.table
}
/// Borrow a mutable reference to the table.
pub fn table_mut(&mut self) -> &mut M {
&mut self.table
}
}

impl<K, V, M> Bucket<K, V, M> {
Expand Down Expand Up @@ -687,7 +735,7 @@ impl<K, V> RawTable<K, V> {
return RawTable {
size: 0,
capacity: 0,
hashes: Unique::new(EMPTY as *mut HashUint),
hashes: TaggedHashUintPtr::new(EMPTY as *mut HashUint),
marker: marker::PhantomData,
};
}
Expand Down Expand Up @@ -728,7 +776,7 @@ impl<K, V> RawTable<K, V> {
RawTable {
capacity: capacity,
size: 0,
hashes: Unique::new(hashes),
hashes: TaggedHashUintPtr::new(hashes),
marker: marker::PhantomData,
}
}
Expand All @@ -737,13 +785,13 @@ impl<K, V> RawTable<K, V> {
let hashes_size = self.capacity * size_of::<HashUint>();
let pairs_size = self.capacity * size_of::<(K, V)>();

let buffer = *self.hashes as *mut u8;
let buffer = self.hashes.ptr() as *mut u8;
let (pairs_offset, _, oflo) =
calculate_offsets(hashes_size, pairs_size, align_of::<(K, V)>());
debug_assert!(!oflo, "capacity overflow");
unsafe {
RawBucket {
hash: *self.hashes,
hash: self.hashes.ptr(),
pair: buffer.offset(pairs_offset as isize) as *const _,
_marker: marker::PhantomData,
}
Expand All @@ -755,7 +803,7 @@ impl<K, V> RawTable<K, V> {
pub fn new(capacity: usize) -> RawTable<K, V> {
unsafe {
let ret = RawTable::new_uninitialized(capacity);
ptr::write_bytes(*ret.hashes, 0, capacity);
ptr::write_bytes(ret.hashes.ptr(), 0, capacity);
ret
}
}
Expand All @@ -774,7 +822,7 @@ impl<K, V> RawTable<K, V> {
fn raw_buckets(&self) -> RawBuckets<K, V> {
RawBuckets {
raw: self.first_bucket_raw(),
hashes_end: unsafe { self.hashes.offset(self.capacity as isize) },
hashes_end: unsafe { self.hashes.ptr().offset(self.capacity as isize) },
marker: marker::PhantomData,
}
}
Expand Down Expand Up @@ -832,6 +880,16 @@ impl<K, V> RawTable<K, V> {
marker: marker::PhantomData,
}
}

/// Set the table tag
pub fn set_tag(&mut self, value: bool) {
self.hashes.set_tag(value)
}

/// Get the table tag
pub fn tag(&self) -> bool {
self.hashes.tag()
}
}

/// A raw iterator. The basis for some other iterators in this module. Although
Expand Down Expand Up @@ -1156,7 +1214,7 @@ unsafe impl<#[may_dangle] K, #[may_dangle] V> Drop for RawTable<K, V> {
debug_assert!(!oflo, "should be impossible");

unsafe {
deallocate(*self.hashes as *mut u8, size, align);
deallocate(self.hashes.ptr() as *mut u8, size, align);
// Remember how everything was allocated out of one buffer
// during initialization? We only need one call to free here.
}
Expand Down