@@ -24,10 +24,10 @@ use self::BucketState::*;
2424const EMPTY_BUCKET : u64 = 0 ;
2525
2626/// The raw hashtable, providing safe-ish access to the unzipped and highly
27- /// optimized arrays of hashes, keys, and values .
27+ /// optimized arrays of hashes, and key-value pairs .
2828///
29- /// This design uses less memory and is a lot faster than the naive
30- /// `Vec<Option<u64, K, V>>`, because we don't pay for the overhead of an
29+ /// This design is a lot faster than the naive
30+ /// `Vec<Option<( u64, K, V) >>`, because we don't pay for the overhead of an
3131/// option on every element, and we get a generally more cache-aware design.
3232///
3333/// Essential invariants of this structure:
@@ -48,17 +48,19 @@ const EMPTY_BUCKET: u64 = 0;
4848/// which will likely map to the same bucket, while not being confused
4949/// with "empty".
5050///
51- /// - All three "arrays represented by pointers" are the same length:
51+ /// - Both "arrays represented by pointers" are the same length:
5252/// `capacity`. This is set at creation and never changes. The arrays
53- /// are unzipped to save space (we don't have to pay for the padding
54- /// between odd sized elements, such as in a map from u64 to u8), and
55- /// be more cache aware (scanning through 8 hashes brings in at most
56- /// 2 cache lines, since they're all right beside each other).
53+ /// are unzipped and are more cache aware (scanning through 8 hashes
54+ /// brings in at most 2 cache lines, since they're all right beside each
55+ /// other). This layout may waste space in padding such as in a map from
56+ /// u64 to u8, but is a more cache conscious layout as the key-value pairs
57+ /// are only very shortly probed and the desired value will be in the same
58+ /// or next cache line.
5759///
5860/// You can kind of think of this module/data structure as a safe wrapper
5961/// around just the "table" part of the hashtable. It enforces some
6062/// invariants at the type level and employs some performance trickery,
61- /// but in general is just a tricked out `Vec<Option<u64, K, V>>`.
63+ /// but in general is just a tricked out `Vec<Option<( u64, K, V) >>`.
6264pub struct RawTable < K , V > {
6365 capacity : usize ,
6466 size : usize ,
@@ -74,10 +76,8 @@ unsafe impl<K: Sync, V: Sync> Sync for RawTable<K, V> {}
7476
7577struct RawBucket < K , V > {
7678 hash : * mut u64 ,
77-
7879 // We use *const to ensure covariance with respect to K and V
79- key : * const K ,
80- val : * const V ,
80+ pair : * const ( K , V ) ,
8181 _marker : marker:: PhantomData < ( K , V ) > ,
8282}
8383
@@ -181,8 +181,7 @@ impl<K, V> RawBucket<K, V> {
181181 unsafe fn offset ( self , count : isize ) -> RawBucket < K , V > {
182182 RawBucket {
183183 hash : self . hash . offset ( count) ,
184- key : self . key . offset ( count) ,
185- val : self . val . offset ( count) ,
184+ pair : self . pair . offset ( count) ,
186185 _marker : marker:: PhantomData ,
187186 }
188187 }
@@ -370,8 +369,7 @@ impl<K, V, M> EmptyBucket<K, V, M>
370369 pub fn put ( mut self , hash : SafeHash , key : K , value : V ) -> FullBucket < K , V , M > {
371370 unsafe {
372371 * self . raw . hash = hash. inspect ( ) ;
373- ptr:: write ( self . raw . key as * mut K , key) ;
374- ptr:: write ( self . raw . val as * mut V , value) ;
372+ ptr:: write ( self . raw . pair as * mut ( K , V ) , ( key, value) ) ;
375373
376374 self . table . borrow_table_mut ( ) . size += 1 ;
377375 }
@@ -430,7 +428,7 @@ impl<K, V, M: Deref<Target = RawTable<K, V>>> FullBucket<K, V, M> {
430428
431429 /// Gets references to the key and value at a given index.
432430 pub fn read ( & self ) -> ( & K , & V ) {
433- unsafe { ( & * self . raw . key , & * self . raw . val ) }
431+ unsafe { ( & ( * self . raw . pair ) . 0 , & ( * self . raw . pair ) . 1 ) }
434432 }
435433}
436434
@@ -447,13 +445,14 @@ impl<'t, K, V> FullBucket<K, V, &'t mut RawTable<K, V>> {
447445
448446 unsafe {
449447 * self . raw . hash = EMPTY_BUCKET ;
448+ let ( k, v) = ptr:: read ( self . raw . pair ) ;
450449 ( EmptyBucket {
451450 raw : self . raw ,
452451 idx : self . idx ,
453452 table : self . table ,
454453 } ,
455- ptr :: read ( self . raw . key ) ,
456- ptr :: read ( self . raw . val ) )
454+ k ,
455+ v )
457456 }
458457 }
459458}
@@ -466,8 +465,7 @@ impl<K, V, M> FullBucket<K, V, M>
466465 pub fn replace ( & mut self , h : SafeHash , k : K , v : V ) -> ( SafeHash , K , V ) {
467466 unsafe {
468467 let old_hash = ptr:: replace ( self . raw . hash as * mut SafeHash , h) ;
469- let old_key = ptr:: replace ( self . raw . key as * mut K , k) ;
470- let old_val = ptr:: replace ( self . raw . val as * mut V , v) ;
468+ let ( old_key, old_val) = ptr:: replace ( self . raw . pair as * mut ( K , V ) , ( k, v) ) ;
471469
472470 ( old_hash, old_key, old_val)
473471 }
@@ -479,7 +477,8 @@ impl<K, V, M> FullBucket<K, V, M>
479477{
480478 /// Gets mutable references to the key and value at a given index.
481479 pub fn read_mut ( & mut self ) -> ( & mut K , & mut V ) {
482- unsafe { ( & mut * ( self . raw . key as * mut K ) , & mut * ( self . raw . val as * mut V ) ) }
480+ let pair_mut = self . raw . pair as * mut ( K , V ) ;
481+ unsafe { ( & mut ( * pair_mut) . 0 , & mut ( * pair_mut) . 1 ) }
483482 }
484483}
485484
@@ -492,7 +491,7 @@ impl<'t, K, V, M> FullBucket<K, V, M>
492491 /// in exchange for this, the returned references have a longer lifetime
493492 /// than the references returned by `read()`.
494493 pub fn into_refs ( self ) -> ( & ' t K , & ' t V ) {
495- unsafe { ( & * self . raw . key , & * self . raw . val ) }
494+ unsafe { ( & ( * self . raw . pair ) . 0 , & ( * self . raw . pair ) . 1 ) }
496495 }
497496}
498497
@@ -502,7 +501,8 @@ impl<'t, K, V, M> FullBucket<K, V, M>
502501 /// This works similarly to `into_refs`, exchanging a bucket state
503502 /// for mutable references into the table.
504503 pub fn into_mut_refs ( self ) -> ( & ' t mut K , & ' t mut V ) {
505- unsafe { ( & mut * ( self . raw . key as * mut K ) , & mut * ( self . raw . val as * mut V ) ) }
504+ let pair_mut = self . raw . pair as * mut ( K , V ) ;
505+ unsafe { ( & mut ( * pair_mut) . 0 , & mut ( * pair_mut) . 1 ) }
506506 }
507507}
508508
@@ -517,8 +517,7 @@ impl<K, V, M> GapThenFull<K, V, M>
517517 pub fn shift ( mut self ) -> Option < GapThenFull < K , V , M > > {
518518 unsafe {
519519 * self . gap . raw . hash = mem:: replace ( & mut * self . full . raw . hash , EMPTY_BUCKET ) ;
520- ptr:: copy_nonoverlapping ( self . full . raw . key , self . gap . raw . key as * mut K , 1 ) ;
521- ptr:: copy_nonoverlapping ( self . full . raw . val , self . gap . raw . val as * mut V , 1 ) ;
520+ ptr:: copy_nonoverlapping ( self . full . raw . pair , self . gap . raw . pair as * mut ( K , V ) , 1 ) ;
522521 }
523522
524523 let FullBucket { raw : prev_raw, idx : prev_idx, .. } = self . full ;
@@ -560,49 +559,42 @@ fn test_rounding() {
560559 assert_eq ! ( round_up_to_next( 5 , 4 ) , 8 ) ;
561560}
562561
563- // Returns a tuple of (key_offset, val_offset ),
562+ // Returns a tuple of (pairs_offset, end_of_pairs_offset ),
564563// from the start of a mallocated array.
565564#[ inline]
566565fn calculate_offsets ( hashes_size : usize ,
567- keys_size : usize ,
568- keys_align : usize ,
569- vals_align : usize )
566+ pairs_size : usize ,
567+ pairs_align : usize )
570568 -> ( usize , usize , bool ) {
571- let keys_offset = round_up_to_next ( hashes_size, keys_align) ;
572- let ( end_of_keys, oflo) = keys_offset. overflowing_add ( keys_size) ;
573-
574- let vals_offset = round_up_to_next ( end_of_keys, vals_align) ;
569+ let pairs_offset = round_up_to_next ( hashes_size, pairs_align) ;
570+ let ( end_of_pairs, oflo) = pairs_offset. overflowing_add ( pairs_size) ;
575571
576- ( keys_offset , vals_offset , oflo)
572+ ( pairs_offset , end_of_pairs , oflo)
577573}
578574
579575// Returns a tuple of (minimum required malloc alignment, hash_offset,
580576// array_size), from the start of a mallocated array.
581577fn calculate_allocation ( hash_size : usize ,
582578 hash_align : usize ,
583- keys_size : usize ,
584- keys_align : usize ,
585- vals_size : usize ,
586- vals_align : usize )
579+ pairs_size : usize ,
580+ pairs_align : usize )
587581 -> ( usize , usize , usize , bool ) {
588582 let hash_offset = 0 ;
589- let ( _, vals_offset, oflo) = calculate_offsets ( hash_size, keys_size, keys_align, vals_align) ;
590- let ( end_of_vals, oflo2) = vals_offset. overflowing_add ( vals_size) ;
583+ let ( _, end_of_pairs, oflo) = calculate_offsets ( hash_size, pairs_size, pairs_align) ;
591584
592- let align = cmp:: max ( hash_align, cmp :: max ( keys_align , vals_align ) ) ;
585+ let align = cmp:: max ( hash_align, pairs_align ) ;
593586
594- ( align, hash_offset, end_of_vals , oflo || oflo2 )
587+ ( align, hash_offset, end_of_pairs , oflo)
595588}
596589
597590#[ test]
598591fn test_offset_calculation ( ) {
599- assert_eq ! ( calculate_allocation( 128 , 8 , 15 , 1 , 4 , 4 ) ,
600- ( 8 , 0 , 148 , false ) ) ;
601- assert_eq ! ( calculate_allocation( 3 , 1 , 2 , 1 , 1 , 1 ) , ( 1 , 0 , 6 , false ) ) ;
602- assert_eq ! ( calculate_allocation( 6 , 2 , 12 , 4 , 24 , 8 ) , ( 8 , 0 , 48 , false ) ) ;
603- assert_eq ! ( calculate_offsets( 128 , 15 , 1 , 4 ) , ( 128 , 144 , false ) ) ;
604- assert_eq ! ( calculate_offsets( 3 , 2 , 1 , 1 ) , ( 3 , 5 , false ) ) ;
605- assert_eq ! ( calculate_offsets( 6 , 12 , 4 , 8 ) , ( 8 , 24 , false ) ) ;
592+ assert_eq ! ( calculate_allocation( 128 , 8 , 16 , 8 ) , ( 8 , 0 , 144 , false ) ) ;
593+ assert_eq ! ( calculate_allocation( 3 , 1 , 2 , 1 ) , ( 1 , 0 , 5 , false ) ) ;
594+ assert_eq ! ( calculate_allocation( 6 , 2 , 12 , 4 ) , ( 4 , 0 , 20 , false ) ) ;
595+ assert_eq ! ( calculate_offsets( 128 , 15 , 4 ) , ( 128 , 143 , false ) ) ;
596+ assert_eq ! ( calculate_offsets( 3 , 2 , 4 ) , ( 4 , 6 , false ) ) ;
597+ assert_eq ! ( calculate_offsets( 6 , 12 , 4 ) , ( 8 , 20 , false ) ) ;
606598}
607599
608600impl < K , V > RawTable < K , V > {
@@ -621,8 +613,7 @@ impl<K, V> RawTable<K, V> {
621613 // No need for `checked_mul` before a more restrictive check performed
622614 // later in this method.
623615 let hashes_size = capacity * size_of :: < u64 > ( ) ;
624- let keys_size = capacity * size_of :: < K > ( ) ;
625- let vals_size = capacity * size_of :: < V > ( ) ;
616+ let pairs_size = capacity * size_of :: < ( K , V ) > ( ) ;
626617
627618 // Allocating hashmaps is a little tricky. We need to allocate three
628619 // arrays, but since we know their sizes and alignments up front,
@@ -634,19 +625,13 @@ impl<K, V> RawTable<K, V> {
634625 // factored out into a different function.
635626 let ( malloc_alignment, hash_offset, size, oflo) = calculate_allocation ( hashes_size,
636627 align_of :: < u64 > ( ) ,
637- keys_size,
638- align_of :: < K > ( ) ,
639- vals_size,
640- align_of :: < V > ( ) ) ;
641-
628+ pairs_size,
629+ align_of :: < ( K ,
630+ V ) > ( ) ) ;
642631 assert ! ( !oflo, "capacity overflow" ) ;
643632
644633 // One check for overflow that covers calculation and rounding of size.
645- let size_of_bucket = size_of :: < u64 > ( )
646- . checked_add ( size_of :: < K > ( ) )
647- . unwrap ( )
648- . checked_add ( size_of :: < V > ( ) )
649- . unwrap ( ) ;
634+ let size_of_bucket = size_of :: < u64 > ( ) . checked_add ( size_of :: < ( K , V ) > ( ) ) . unwrap ( ) ;
650635 assert ! ( size >=
651636 capacity. checked_mul( size_of_bucket)
652637 . expect( "capacity overflow" ) ,
@@ -669,17 +654,16 @@ impl<K, V> RawTable<K, V> {
669654
670655 fn first_bucket_raw ( & self ) -> RawBucket < K , V > {
671656 let hashes_size = self . capacity * size_of :: < u64 > ( ) ;
672- let keys_size = self . capacity * size_of :: < K > ( ) ;
657+ let pairs_size = self . capacity * size_of :: < ( K , V ) > ( ) ;
673658
674- let buffer = * self . hashes as * const u8 ;
675- let ( keys_offset , vals_offset , oflo) =
676- calculate_offsets ( hashes_size, keys_size , align_of :: < K > ( ) , align_of :: < V > ( ) ) ;
659+ let buffer = * self . hashes as * mut u8 ;
660+ let ( pairs_offset , _ , oflo) =
661+ calculate_offsets ( hashes_size, pairs_size , align_of :: < ( K , V ) > ( ) ) ;
677662 debug_assert ! ( !oflo, "capacity overflow" ) ;
678663 unsafe {
679664 RawBucket {
680665 hash : * self . hashes ,
681- key : buffer. offset ( keys_offset as isize ) as * const K ,
682- val : buffer. offset ( vals_offset as isize ) as * const V ,
666+ pair : buffer. offset ( pairs_offset as isize ) as * const _ ,
683667 _marker : marker:: PhantomData ,
684668 }
685669 }
@@ -844,7 +828,7 @@ impl<'a, K, V> Iterator for RevMoveBuckets<'a, K, V> {
844828
845829 if * self . raw . hash != EMPTY_BUCKET {
846830 self . elems_left -= 1 ;
847- return Some ( ( ptr:: read ( self . raw . key ) , ptr :: read ( self . raw . val ) ) ) ;
831+ return Some ( ptr:: read ( self . raw . pair ) ) ;
848832 }
849833 }
850834 }
@@ -909,7 +893,7 @@ impl<'a, K, V> Iterator for Iter<'a, K, V> {
909893 fn next ( & mut self ) -> Option < ( & ' a K , & ' a V ) > {
910894 self . iter . next ( ) . map ( |bucket| {
911895 self . elems_left -= 1 ;
912- unsafe { ( & * bucket. key , & * bucket. val ) }
896+ unsafe { ( & ( * bucket. pair ) . 0 , & ( * bucket. pair ) . 1 ) }
913897 } )
914898 }
915899
@@ -929,7 +913,8 @@ impl<'a, K, V> Iterator for IterMut<'a, K, V> {
929913 fn next ( & mut self ) -> Option < ( & ' a K , & ' a mut V ) > {
930914 self . iter . next ( ) . map ( |bucket| {
931915 self . elems_left -= 1 ;
932- unsafe { ( & * bucket. key , & mut * ( bucket. val as * mut V ) ) }
916+ let pair_mut = bucket. pair as * mut ( K , V ) ;
917+ unsafe { ( & ( * pair_mut) . 0 , & mut ( * pair_mut) . 1 ) }
933918 } )
934919 }
935920
@@ -950,7 +935,8 @@ impl<K, V> Iterator for IntoIter<K, V> {
950935 self . iter . next ( ) . map ( |bucket| {
951936 self . table . size -= 1 ;
952937 unsafe {
953- ( SafeHash { hash : * bucket. hash } , ptr:: read ( bucket. key ) , ptr:: read ( bucket. val ) )
938+ let ( k, v) = ptr:: read ( bucket. pair ) ;
939+ ( SafeHash { hash : * bucket. hash } , k, v)
954940 }
955941 } )
956942 }
@@ -974,9 +960,8 @@ impl<'a, K, V> Iterator for Drain<'a, K, V> {
974960 self . iter . next ( ) . map ( |bucket| {
975961 unsafe {
976962 ( * * self . table ) . size -= 1 ;
977- ( SafeHash { hash : ptr:: replace ( bucket. hash , EMPTY_BUCKET ) } ,
978- ptr:: read ( bucket. key ) ,
979- ptr:: read ( bucket. val ) )
963+ let ( k, v) = ptr:: read ( bucket. pair ) ;
964+ ( SafeHash { hash : ptr:: replace ( bucket. hash , EMPTY_BUCKET ) } , k, v)
980965 }
981966 } )
982967 }
@@ -1015,8 +1000,7 @@ impl<K: Clone, V: Clone> Clone for RawTable<K, V> {
10151000 ( full. hash ( ) , k. clone ( ) , v. clone ( ) )
10161001 } ;
10171002 * new_buckets. raw . hash = h. inspect ( ) ;
1018- ptr:: write ( new_buckets. raw . key as * mut K , k) ;
1019- ptr:: write ( new_buckets. raw . val as * mut V , v) ;
1003+ ptr:: write ( new_buckets. raw . pair as * mut ( K , V ) , ( k, v) ) ;
10201004 }
10211005 Empty ( ..) => {
10221006 * new_buckets. raw . hash = EMPTY_BUCKET ;
@@ -1054,14 +1038,11 @@ impl<K, V> Drop for RawTable<K, V> {
10541038 }
10551039
10561040 let hashes_size = self . capacity * size_of :: < u64 > ( ) ;
1057- let keys_size = self . capacity * size_of :: < K > ( ) ;
1058- let vals_size = self . capacity * size_of :: < V > ( ) ;
1041+ let pairs_size = self . capacity * size_of :: < ( K , V ) > ( ) ;
10591042 let ( align, _, size, oflo) = calculate_allocation ( hashes_size,
10601043 align_of :: < u64 > ( ) ,
1061- keys_size,
1062- align_of :: < K > ( ) ,
1063- vals_size,
1064- align_of :: < V > ( ) ) ;
1044+ pairs_size,
1045+ align_of :: < ( K , V ) > ( ) ) ;
10651046
10661047 debug_assert ! ( !oflo, "should be impossible" ) ;
10671048
0 commit comments