@@ -21,7 +21,18 @@ use ptr::{self, Unique, Shared};
21
21
22
22
use self :: BucketState :: * ;
23
23
24
- const EMPTY_BUCKET : u64 = 0 ;
24
+ /// Integer type used for stored hash values.
25
+ ///
26
+ /// No more than bit_width(usize) bits are needed to select a bucket.
27
+ ///
28
+ /// The most significant bit is ours to use for tagging `SafeHash`.
29
+ ///
30
+ /// (Even if we could have usize::MAX bytes allocated for buckets,
31
+ /// each bucket stores at least a `HashUint`, so there can be no more than
32
+ /// usize::MAX / size_of(usize) buckets.)
33
+ type HashUint = usize ;
34
+
35
+ const EMPTY_BUCKET : HashUint = 0 ;
25
36
26
37
/// The raw hashtable, providing safe-ish access to the unzipped and highly
27
38
/// optimized arrays of hashes, and key-value pairs.
@@ -64,7 +75,7 @@ const EMPTY_BUCKET: u64 = 0;
64
75
pub struct RawTable < K , V > {
65
76
capacity : usize ,
66
77
size : usize ,
67
- hashes : Unique < u64 > ,
78
+ hashes : Unique < HashUint > ,
68
79
69
80
// Because K/V do not appear directly in any of the types in the struct,
70
81
// inform rustc that in fact instances of K and V are reachable from here.
@@ -75,7 +86,7 @@ unsafe impl<K: Send, V: Send> Send for RawTable<K, V> {}
75
86
unsafe impl < K : Sync , V : Sync > Sync for RawTable < K , V > { }
76
87
77
88
struct RawBucket < K , V > {
78
- hash : * mut u64 ,
89
+ hash : * mut HashUint ,
79
90
// We use *const to ensure covariance with respect to K and V
80
91
pair : * const ( K , V ) ,
81
92
_marker : marker:: PhantomData < ( K , V ) > ,
@@ -136,15 +147,27 @@ pub struct GapThenFull<K, V, M> {
136
147
/// buckets.
137
148
#[ derive( PartialEq , Copy , Clone ) ]
138
149
pub struct SafeHash {
139
- hash : u64 ,
150
+ hash : HashUint ,
140
151
}
141
152
142
153
impl SafeHash {
143
154
/// Peek at the hash value, which is guaranteed to be non-zero.
144
155
#[ inline( always) ]
145
- pub fn inspect ( & self ) -> u64 {
156
+ pub fn inspect ( & self ) -> HashUint {
146
157
self . hash
147
158
}
159
+
160
+ #[ inline( always) ]
161
+ pub fn new ( hash : u64 ) -> Self {
162
+ // We need to avoid 0 in order to prevent collisions with
163
+ // EMPTY_HASH. We can maintain our precious uniform distribution
164
+ // of initial indexes by unconditionally setting the MSB,
165
+ // effectively reducing the hashes by one bit.
166
+ //
167
+ // Truncate hash to fit in `HashUint`.
168
+ let hash_bits = size_of :: < HashUint > ( ) * 8 ;
169
+ SafeHash { hash : ( 1 << ( hash_bits - 1 ) ) | ( hash as HashUint ) }
170
+ }
148
171
}
149
172
150
173
/// We need to remove hashes of 0. That's reserved for empty buckets.
@@ -156,25 +179,21 @@ pub fn make_hash<T: ?Sized, S>(hash_state: &S, t: &T) -> SafeHash
156
179
{
157
180
let mut state = hash_state. build_hasher ( ) ;
158
181
t. hash ( & mut state) ;
159
- // We need to avoid 0 in order to prevent collisions with
160
- // EMPTY_HASH. We can maintain our precious uniform distribution
161
- // of initial indexes by unconditionally setting the MSB,
162
- // effectively reducing 64-bits hashes to 63 bits.
163
- SafeHash { hash : 0x8000_0000_0000_0000 | state. finish ( ) }
182
+ SafeHash :: new ( state. finish ( ) )
164
183
}
165
184
166
- // `replace` casts a `*u64 ` to a `*SafeHash`. Since we statically
185
+ // `replace` casts a `*HashUint ` to a `*SafeHash`. Since we statically
167
186
// ensure that a `FullBucket` points to an index with a non-zero hash,
168
- // and a `SafeHash` is just a `u64 ` with a different name, this is
187
+ // and a `SafeHash` is just a `HashUint ` with a different name, this is
169
188
// safe.
170
189
//
171
190
// This test ensures that a `SafeHash` really IS the same size as a
172
- // `u64 `. If you need to change the size of `SafeHash` (and
191
+ // `HashUint `. If you need to change the size of `SafeHash` (and
173
192
// consequently made this test fail), `replace` needs to be
174
193
// modified to no longer assume this.
175
194
#[ test]
176
- fn can_alias_safehash_as_u64 ( ) {
177
- assert_eq ! ( size_of:: <SafeHash >( ) , size_of:: <u64 >( ) )
195
+ fn can_alias_safehash_as_hash ( ) {
196
+ assert_eq ! ( size_of:: <SafeHash >( ) , size_of:: <HashUint >( ) )
178
197
}
179
198
180
199
impl < K , V > RawBucket < K , V > {
@@ -605,14 +624,14 @@ impl<K, V> RawTable<K, V> {
605
624
return RawTable {
606
625
size : 0 ,
607
626
capacity : 0 ,
608
- hashes : Unique :: new ( EMPTY as * mut u64 ) ,
627
+ hashes : Unique :: new ( EMPTY as * mut HashUint ) ,
609
628
marker : marker:: PhantomData ,
610
629
} ;
611
630
}
612
631
613
632
// No need for `checked_mul` before a more restrictive check performed
614
633
// later in this method.
615
- let hashes_size = capacity. wrapping_mul ( size_of :: < u64 > ( ) ) ;
634
+ let hashes_size = capacity. wrapping_mul ( size_of :: < HashUint > ( ) ) ;
616
635
let pairs_size = capacity. wrapping_mul ( size_of :: < ( K , V ) > ( ) ) ;
617
636
618
637
// Allocating hashmaps is a little tricky. We need to allocate two
@@ -624,13 +643,13 @@ impl<K, V> RawTable<K, V> {
624
643
// right is a little subtle. Therefore, calculating offsets has been
625
644
// factored out into a different function.
626
645
let ( alignment, hash_offset, size, oflo) = calculate_allocation ( hashes_size,
627
- align_of :: < u64 > ( ) ,
646
+ align_of :: < HashUint > ( ) ,
628
647
pairs_size,
629
648
align_of :: < ( K , V ) > ( ) ) ;
630
649
assert ! ( !oflo, "capacity overflow" ) ;
631
650
632
651
// One check for overflow that covers calculation and rounding of size.
633
- let size_of_bucket = size_of :: < u64 > ( ) . checked_add ( size_of :: < ( K , V ) > ( ) ) . unwrap ( ) ;
652
+ let size_of_bucket = size_of :: < HashUint > ( ) . checked_add ( size_of :: < ( K , V ) > ( ) ) . unwrap ( ) ;
634
653
assert ! ( size >=
635
654
capacity. checked_mul( size_of_bucket)
636
655
. expect( "capacity overflow" ) ,
@@ -641,7 +660,7 @@ impl<K, V> RawTable<K, V> {
641
660
:: alloc:: oom ( )
642
661
}
643
662
644
- let hashes = buffer. offset ( hash_offset as isize ) as * mut u64 ;
663
+ let hashes = buffer. offset ( hash_offset as isize ) as * mut HashUint ;
645
664
646
665
RawTable {
647
666
capacity : capacity,
@@ -652,7 +671,7 @@ impl<K, V> RawTable<K, V> {
652
671
}
653
672
654
673
fn first_bucket_raw ( & self ) -> RawBucket < K , V > {
655
- let hashes_size = self . capacity * size_of :: < u64 > ( ) ;
674
+ let hashes_size = self . capacity * size_of :: < HashUint > ( ) ;
656
675
let pairs_size = self . capacity * size_of :: < ( K , V ) > ( ) ;
657
676
658
677
let buffer = * self . hashes as * mut u8 ;
@@ -756,7 +775,7 @@ impl<K, V> RawTable<K, V> {
756
775
/// this interface is safe, it's not used outside this module.
757
776
struct RawBuckets < ' a , K , V > {
758
777
raw : RawBucket < K , V > ,
759
- hashes_end : * mut u64 ,
778
+ hashes_end : * mut HashUint ,
760
779
761
780
// Strictly speaking, this should be &'a (K,V), but that would
762
781
// require that K:'a, and we often use RawBuckets<'static...> for
@@ -802,7 +821,7 @@ impl<'a, K, V> Iterator for RawBuckets<'a, K, V> {
802
821
/// the table's remaining entries. It's used in the implementation of Drop.
803
822
struct RevMoveBuckets < ' a , K , V > {
804
823
raw : RawBucket < K , V > ,
805
- hashes_end : * mut u64 ,
824
+ hashes_end : * mut HashUint ,
806
825
elems_left : usize ,
807
826
808
827
// As above, `&'a (K,V)` would seem better, but we often use
@@ -1036,10 +1055,10 @@ impl<K, V> Drop for RawTable<K, V> {
1036
1055
}
1037
1056
}
1038
1057
1039
- let hashes_size = self . capacity * size_of :: < u64 > ( ) ;
1058
+ let hashes_size = self . capacity * size_of :: < HashUint > ( ) ;
1040
1059
let pairs_size = self . capacity * size_of :: < ( K , V ) > ( ) ;
1041
1060
let ( align, _, size, oflo) = calculate_allocation ( hashes_size,
1042
- align_of :: < u64 > ( ) ,
1061
+ align_of :: < HashUint > ( ) ,
1043
1062
pairs_size,
1044
1063
align_of :: < ( K , V ) > ( ) ) ;
1045
1064
0 commit comments