From cd41f90b1c08a931e36c47917da38bd34c90a6a4 Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Mon, 26 Feb 2024 20:54:44 -0800 Subject: [PATCH 1/3] Strengthen fastpath for u64 hashes Signed-off-by: Tom Kaitchuck --- Cargo.toml | 2 +- src/fallback_hash.rs | 2 +- src/lib.rs | 1 - src/operations.rs | 1 - tests/map_tests.rs | 40 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 42 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 01faf1e..4a32846 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -98,7 +98,7 @@ fxhash = "0.2.1" hex = "0.4.2" rand = "0.8.5" serde_json = "1.0.59" -hashbrown = "0.12.3" +hashbrown = "0.14.3" [package.metadata.docs.rs] rustc-args = ["-C", "target-feature=+aes"] diff --git a/src/fallback_hash.rs b/src/fallback_hash.rs index f78074d..eb55479 100644 --- a/src/fallback_hash.rs +++ b/src/fallback_hash.rs @@ -237,6 +237,7 @@ impl Hasher for AHasherU64 { #[inline] fn write_u64(&mut self, i: u64) { self.buffer = folded_multiply(i ^ self.buffer, MULTIPLE); + self.pad = self.pad.wrapping_add(i); } #[inline] @@ -341,7 +342,6 @@ impl Hasher for AHasherStr { #[cfg(test)] mod tests { - use crate::convert::Convert; use crate::fallback_hash::*; #[test] diff --git a/src/lib.rs b/src/lib.rs index 653c3bc..69fb2ca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -319,7 +319,6 @@ mod test { use crate::specialize::CallHasher; use crate::*; use std::collections::HashMap; - use std::hash::Hash; #[test] fn test_ahash_alias_map_construction() { diff --git a/src/operations.rs b/src/operations.rs index 5ec0337..4509c52 100644 --- a/src/operations.rs +++ b/src/operations.rs @@ -184,7 +184,6 @@ pub(crate) fn add_in_length(enc: &mut u128, len: u64) { #[cfg(test)] mod test { use super::*; - use crate::convert::Convert; // This is code to search for the shuffle constant // diff --git a/tests/map_tests.rs b/tests/map_tests.rs index bdf37d8..4ec2334 100644 --- a/tests/map_tests.rs +++ b/tests/map_tests.rs @@ -225,6 +225,46 @@ fn test_key_ref() { assert!(m.contains(&b"hello"[..])); } +#[cfg(feature = "std")] +#[test] +fn test_byte_dist() { + use rand::{rngs::StdRng, SeedableRng, Rng}; + + let mut r = StdRng::seed_from_u64(0xe786_c22b_119c_1479); + for _round in 0..100 { + let mut table: [bool; 256 * 8] = [false; 256 * 8]; + let hasher = RandomState::with_seeds(r.gen(), r.gen(), r.gen(), r.gen()); + for i in 0..128 { + let mut keys: [u8; 8] = hasher.hash_one(i as u64).to_ne_bytes(); + for idx in 0..8 { + while table[idx * 256 + keys[idx] as usize] { + keys[idx] = keys[idx].wrapping_add(1); + } + table[idx * 256 + keys[idx] as usize] = true; + } + } + + for idx in 0..8 { + let mut len = 0; + let mut total_len = 0; + let mut num_seq = 0; + for i in 0..256 { + if table[idx * 256 + i] { + len += 1; + } else if len != 0 { + num_seq += 1; + total_len += len; + len = 0; + } + } + let mean = total_len as f32 / num_seq as f32; + println!("Mean sequence length = {}", mean); + assert!(mean > 1.70); + assert!(mean < 3.375); + } + } +} + fn ahash_vec(b: &Vec) -> u64 { let mut total: u64 = 0; From 9d252030c1e23c192c24c9454ae3437e65f55080 Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Tue, 27 Feb 2024 10:05:41 -0800 Subject: [PATCH 2/3] Calibrate sensitivity and make deterministic Signed-off-by: Tom Kaitchuck --- Cargo.toml | 1 + tests/map_tests.rs | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4a32846..d80856b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -97,6 +97,7 @@ fnv = "1.0.5" fxhash = "0.2.1" hex = "0.4.2" rand = "0.8.5" +pcg-mwc = "0.2.1" serde_json = "1.0.59" hashbrown = "0.14.3" diff --git a/tests/map_tests.rs b/tests/map_tests.rs index 4ec2334..7849f4a 100644 --- a/tests/map_tests.rs +++ b/tests/map_tests.rs @@ -228,14 +228,18 @@ fn test_key_ref() { #[cfg(feature = "std")] #[test] fn test_byte_dist() { - use rand::{rngs::StdRng, SeedableRng, Rng}; + use rand::{SeedableRng, Rng, RngCore}; + use pcg_mwc::Mwc256XXA64; - let mut r = StdRng::seed_from_u64(0xe786_c22b_119c_1479); + let mut r = Mwc256XXA64::seed_from_u64(0xe786_c22b_119c_1479); + let mut lowest = 2.541; + let mut highest = 2.541; for _round in 0..100 { let mut table: [bool; 256 * 8] = [false; 256 * 8]; let hasher = RandomState::with_seeds(r.gen(), r.gen(), r.gen(), r.gen()); for i in 0..128 { let mut keys: [u8; 8] = hasher.hash_one(i as u64).to_ne_bytes(); + //let mut keys = r.next_u64().to_ne_bytes(); //This is a control to test assert sensitivity. for idx in 0..8 { while table[idx * 256 + keys[idx] as usize] { keys[idx] = keys[idx].wrapping_add(1); @@ -259,10 +263,16 @@ fn test_byte_dist() { } let mean = total_len as f32 / num_seq as f32; println!("Mean sequence length = {}", mean); - assert!(mean > 1.70); - assert!(mean < 3.375); + if mean > highest { + highest = mean; + } + if mean < lowest { + lowest = mean; + } } } + assert!(lowest > 1.9, "Lowest = {}", lowest); + assert!(highest < 3.9, "Highest = {}", highest); } From e94f2fb72cd8c4e74ca2854a80399287f0b81eca Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Tue, 27 Feb 2024 10:11:44 -0800 Subject: [PATCH 3/3] Apply change to both files Signed-off-by: Tom Kaitchuck --- src/aes_hash.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/aes_hash.rs b/src/aes_hash.rs index 0b9a1d4..7ad9af7 100644 --- a/src/aes_hash.rs +++ b/src/aes_hash.rs @@ -252,6 +252,7 @@ impl Hasher for AHasherU64 { #[inline] fn write_u64(&mut self, i: u64) { self.buffer = folded_multiply(i ^ self.buffer, MULTIPLE); + self.pad = self.pad.wrapping_add(i); } #[inline]