From 24491f60c3feb7e06f76a72262bfb35fadb256ed Mon Sep 17 00:00:00 2001 From: Tom Kaitchuck Date: Tue, 9 Aug 2022 22:55:01 -0700 Subject: [PATCH] Simplify interface (#121) * Use a runtime-rng feature flag instead of using OS detection (#82) * Use cfg-if to simplify conditions * Require rng for RandomState::default and new. * Don't use runtime rng when fuzzing * Add support for hash_one Signed-off-by: Tom Kaitchuck Co-authored-by: Amanieu d'Antras Co-authored-by: bl-ue <54780737+bl-ue@users.noreply.github.com> --- .github/workflows/rust.yml | 2 +- Cargo.toml | 33 +-- build.rs | 21 -- smhasher/ahash-cbindings/src/lib.rs | 8 +- src/aes_hash.rs | 4 +- src/fallback_hash.rs | 7 +- src/hash_map.rs | 2 - src/lib.rs | 146 +++++------ src/operations.rs | 6 +- src/random_state.rs | 372 ++++++++++++++++------------ src/specialize.rs | 23 +- tests/bench.rs | 74 +++++- tests/map_tests.rs | 18 +- tests/nopanic.rs | 31 ++- 14 files changed, 417 insertions(+), 330 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index ba596bf..e2c51ca 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -148,4 +148,4 @@ jobs: - uses: actions-rs/cargo@v1 with: command: check - args: --target wasm32-unknown-unknown + args: --target wasm32-unknown-unknown --no-default-features diff --git a/Cargo.toml b/Cargo.toml index 4b56472..6a0093a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ahash" -version = "0.7.6" +version = "0.8.0" authors = ["Tom Kaitchuck "] license = "MIT OR Apache-2.0" description = "A non-cryptographic hash function using AES-NI for high performance" @@ -22,15 +22,18 @@ bench = true doc = true [features] -default = ["std"] +default = ["std", "runtime-rng"] # Enabling this will enable `AHashMap` and `AHashSet`. std = [] -# This is an alternitive to runtime key generation which does compile time key generation if getrandom is not available. -# (If getrandom is available this does nothing.) -# If this is on (and getrandom is off) it implies the produced binary will not be identical. -# If this is disabled and gerrandom is unavailable constant keys are used. +# Runtime random key generation using getrandom. +runtime-rng = ["getrandom"] + +# This is an alternative to runtime key generation which does compile time key generation if runtime-rng is not available. +# (If runtime-rng is enabled this does nothing.) +# If this is on (and runtime-rng is off) it implies the produced binary will not be identical. +# If this is disabled and runtime-rng is unavailable constant keys are used. compile-time-rng = ["const-random"] [[bench]] @@ -62,29 +65,27 @@ debug-assertions = false codegen-units = 1 [build-dependencies] -version_check = "0.9" - -[target.'cfg(any(target_os = "linux", target_os = "android", target_os = "windows", target_os = "macos", target_os = "ios", target_os = "freebsd", target_os = "openbsd", target_os = "netbsd", target_os = "dragonfly", target_os = "solaris", target_os = "illumos", target_os = "fuchsia", target_os = "redox", target_os = "cloudabi", target_os = "haiku", target_os = "vxworks", target_os = "emscripten", target_os = "wasi"))'.dependencies] -getrandom = { version = "0.2.3" } -const-random = { version = "0.1.12", optional = true } -serde = { version = "1.0.117", optional = true } +version_check = "0.9.4" -[target.'cfg(not(any(target_os = "linux", target_os = "android", target_os = "windows", target_os = "macos", target_os = "ios", target_os = "freebsd", target_os = "openbsd", target_os = "netbsd", target_os = "dragonfly", target_os = "solaris", target_os = "illumos", target_os = "fuchsia", target_os = "redox", target_os = "cloudabi", target_os = "haiku", target_os = "vxworks", target_os = "emscripten", target_os = "wasi")))'.dependencies] +[dependencies] +getrandom = { version = "0.2.3", optional = true } const-random = { version = "0.1.12", optional = true } serde = { version = "1.0.117", optional = true } +cfg-if = "1.0" [target.'cfg(not(all(target_arch = "arm", target_os = "none")))'.dependencies] -once_cell = { version = "1.8", default-features = false, features = ["alloc"] } +once_cell = { version = "1.8", default-features = false, features = ["unstable", "alloc"] } [dev-dependencies] no-panic = "0.1.10" -criterion = {version = "0.3.2"} +criterion = {version = "0.3.2", features = ["html_reports"] } seahash = "4.0" fnv = "1.0.5" fxhash = "0.2.1" hex = "0.4.2" -rand = "0.7.3" +rand = "0.8.5" serde_json = "1.0.59" +hashbrown = "0.12.3" [package.metadata.docs.rs] rustc-args = ["-C", "target-feature=+aes"] diff --git a/build.rs b/build.rs index 0738275..58d71a9 100644 --- a/build.rs +++ b/build.rs @@ -10,27 +10,6 @@ fn main() { if let Some(true) = version_check::supports_feature("stdsimd") { println!("cargo:rustc-cfg=feature=\"stdsimd\""); } - let os = env::var("CARGO_CFG_TARGET_OS").expect("CARGO_CFG_TARGET_OS was not set"); - if os.eq_ignore_ascii_case("linux") - || os.eq_ignore_ascii_case("android") - || os.eq_ignore_ascii_case("windows") - || os.eq_ignore_ascii_case("macos") - || os.eq_ignore_ascii_case("ios") - || os.eq_ignore_ascii_case("freebsd") - || os.eq_ignore_ascii_case("openbsd") - || os.eq_ignore_ascii_case("dragonfly") - || os.eq_ignore_ascii_case("solaris") - || os.eq_ignore_ascii_case("illumos") - || os.eq_ignore_ascii_case("fuchsia") - || os.eq_ignore_ascii_case("redox") - || os.eq_ignore_ascii_case("cloudabi") - || os.eq_ignore_ascii_case("haiku") - || os.eq_ignore_ascii_case("vxworks") - || os.eq_ignore_ascii_case("emscripten") - || os.eq_ignore_ascii_case("wasi") - { - println!("cargo:rustc-cfg=feature=\"runtime-rng\""); - } let arch = env::var("CARGO_CFG_TARGET_ARCH").expect("CARGO_CFG_TARGET_ARCH was not set"); if arch.eq_ignore_ascii_case("x86_64") || arch.eq_ignore_ascii_case("aarch64") diff --git a/smhasher/ahash-cbindings/src/lib.rs b/smhasher/ahash-cbindings/src/lib.rs index 571ae87..e53c03e 100644 --- a/smhasher/ahash-cbindings/src/lib.rs +++ b/smhasher/ahash-cbindings/src/lib.rs @@ -1,10 +1,12 @@ +#![feature(build_hasher_simple_hash_one)] + use ahash::*; use core::slice; -use std::hash::{BuildHasher, Hasher}; +use std::hash::{BuildHasher}; #[no_mangle] pub extern "C" fn ahash64(buf: *const (), len: usize, seed: u64) -> u64 { let buf: &[u8] = unsafe { slice::from_raw_parts(buf as *const u8, len) }; - let build_hasher = RandomState::with_seeds(seed, 0, 0, 0); - <[u8]>::get_hash(&buf, &build_hasher) + let build_hasher = RandomState::with_seed(seed as usize); + build_hasher.hash_one(&buf) } diff --git a/src/aes_hash.rs b/src/aes_hash.rs index 085833f..df67bfd 100644 --- a/src/aes_hash.rs +++ b/src/aes_hash.rs @@ -1,6 +1,4 @@ use crate::convert::*; -#[cfg(feature = "specialize")] -use crate::fallback_hash::MULTIPLE; use crate::operations::*; use crate::RandomState; use core::hash::Hasher; @@ -50,7 +48,7 @@ impl AHasher { /// println!("Hash is {:x}!", hasher.finish()); /// ``` #[inline] - pub fn new_with_keys(key1: u128, key2: u128) -> Self { + pub(crate) fn new_with_keys(key1: u128, key2: u128) -> Self { let pi: [u128; 2] = PI.convert(); let key1 = key1 ^ pi[0]; let key2 = key2 ^ pi[1]; diff --git a/src/fallback_hash.rs b/src/fallback_hash.rs index aad9efc..efad74c 100644 --- a/src/fallback_hash.rs +++ b/src/fallback_hash.rs @@ -1,12 +1,13 @@ use crate::convert::*; use crate::operations::folded_multiply; use crate::operations::read_small; +use crate::operations::MULTIPLE; use crate::random_state::PI; use crate::RandomState; use core::hash::Hasher; -///This constant come from Kunth's prng (Empirically it works better than those from splitmix32). -pub(crate) const MULTIPLE: u64 = 6364136223846793005; + + const ROT: u32 = 23; //17 /// A `Hasher` for hashing an arbitrary stream of bytes. @@ -31,7 +32,7 @@ impl AHasher { /// Creates a new hasher keyed to the provided key. #[inline] #[allow(dead_code)] // Is not called if non-fallback hash is used. - pub fn new_with_keys(key1: u128, key2: u128) -> AHasher { + pub(crate) fn new_with_keys(key1: u128, key2: u128) -> AHasher { let pi: [u128; 2] = PI.convert(); let key1: [u64; 2] = (key1 ^ pi[0]).convert(); let key2: [u64; 2] = (key2 ^ pi[1]).convert(); diff --git a/src/hash_map.rs b/src/hash_map.rs index 4cce377..693ae4b 100644 --- a/src/hash_map.rs +++ b/src/hash_map.rs @@ -163,8 +163,6 @@ where /// types that can be `==` without being identical. See the [module-level /// documentation] for more. /// - /// [module-level documentation]: crate::collections#insert-and-complex-keys - /// /// # Examples /// /// ``` diff --git a/src/lib.rs b/src/lib.rs index a66e562..8c146bf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,103 +8,92 @@ //! //! aHash uses the hardware AES instruction on x86 processors to provide a keyed hash function. //! aHash is not a cryptographically secure hash. -//! -//! # Example -//! ``` -//! use ahash::{AHasher, RandomState}; -//! use std::collections::HashMap; -//! -//! let mut map: HashMap = HashMap::default(); -//! map.insert(12, 34); -//! ``` -//! For convenience, both new-type wrappers and type aliases are provided. The new type wrappers are called called `AHashMap` and `AHashSet`. These do the same thing with slightly less typing. -//! The type aliases are called `ahash::HashMap`, `ahash::HashSet` are also provided and alias the -//! std::[HashMap] and std::[HashSet]. Why are there two options? The wrappers are convenient but -//! can't be used where a generic `std::collection::HashMap` is required. //! -//! ```ignore -//! use ahash::AHashMap; -//! -//! let mut map: AHashMap = AHashMap::with_capacity(4); -//! map.insert(12, 34); -//! map.insert(56, 78); -//! // There are also type aliases provieded together with some extension traits to make -//! // it more of a drop in replacement for the std::HashMap/HashSet -//! use ahash::{HashMapExt, HashSetExt}; // Used to get with_capacity() -//! let mut map = ahash::HashMap::with_capacity(10); -//! map.insert(12, 34); -//! let mut set = ahash::HashSet::with_capacity(10); -//! set.insert(10); -//! ``` +#![cfg_attr(any(feature = "compile-time-rng", feature = "runtime-rng"), doc = r##" +# Example +``` +use ahash::{AHasher, RandomState}; +use std::collections::HashMap; + +let mut map: HashMap = HashMap::default(); +map.insert(12, 34); +``` +"##)] +#![cfg_attr(feature = "std", doc = r##" +For convenience, both new-type wrappers and type aliases are provided. The new type wrappers are called called `AHashMap` and `AHashSet`. These do the same thing with slightly less typing. +The type aliases are called `ahash::HashMap`, `ahash::HashSet` are also provided and alias the +std::[HashMap] and std::[HashSet]. Why are there two options? The wrappers are convenient but +can't be used where a generic `std::collection::HashMap` is required. + +``` +use ahash::AHashMap; + +let mut map: AHashMap = AHashMap::with_capacity(4); +map.insert(12, 34); +map.insert(56, 78); +// There are also type aliases provieded together with some extension traits to make +// it more of a drop in replacement for the std::HashMap/HashSet +use ahash::{HashMapExt, HashSetExt}; // Used to get with_capacity() +let mut map = ahash::HashMap::with_capacity(10); +map.insert(12, 34); +let mut set = ahash::HashSet::with_capacity(10); +set.insert(10); +``` +"##)] #![deny(clippy::correctness, clippy::complexity, clippy::perf)] #![allow(clippy::pedantic, clippy::cast_lossless, clippy::unreadable_literal)] #![cfg_attr(all(not(test), not(feature = "std")), no_std)] #![cfg_attr(feature = "specialize", feature(min_specialization))] +#![cfg_attr(feature = "specialize", feature(build_hasher_simple_hash_one))] #![cfg_attr(feature = "stdsimd", feature(stdsimd))] #[macro_use] mod convert; -#[cfg(any( - all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), - all( - any(target_arch = "arm", target_arch = "aarch64"), - any(target_feature = "aes", target_feature = "crypto"), - not(miri), - feature = "stdsimd" - ) -))] -mod aes_hash; mod fallback_hash; + +cfg_if::cfg_if! { + if #[cfg(any( + all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), + all(any(target_arch = "arm", target_arch = "aarch64"), + any(target_feature = "aes", target_feature = "crypto"), + not(miri), + feature = "stdsimd") + ))] { + mod aes_hash; + pub use crate::aes_hash::AHasher; + } else { + pub use crate::fallback_hash::AHasher; + } +} + +cfg_if::cfg_if! { + if #[cfg(feature = "std")] { + mod hash_map; + mod hash_set; + + pub use crate::hash_map::AHashMap; + pub use crate::hash_set::AHashSet; + + /// [Hasher]: std::hash::Hasher + /// [HashMap]: std::collections::HashMap + /// Type alias for [HashMap] + pub type HashMap = std::collections::HashMap; + + /// Type alias for [HashSet] + pub type HashSet = std::collections::HashSet; + } +} + #[cfg(test)] mod hash_quality_test; -#[cfg(feature = "std")] -/// [Hasher]: std::hash::Hasher -/// [HashMap]: std::collections::HashMap -/// Type alias for [HashMap] -pub type HashMap = std::collections::HashMap; -#[cfg(feature = "std")] -/// Type alias for [HashSet] -pub type HashSet = std::collections::HashSet; - -#[cfg(feature = "std")] -mod hash_map; -#[cfg(feature = "std")] -mod hash_set; mod operations; -mod random_state; +pub mod random_state; mod specialize; -#[cfg(any( - all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), - all( - any(target_arch = "arm", target_arch = "aarch64"), - any(target_feature = "aes", target_feature = "crypto"), - not(miri), - feature = "stdsimd" - ) -))] -pub use crate::aes_hash::AHasher; - -#[cfg(not(any( - all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), - all( - any(target_arch = "arm", target_arch = "aarch64"), - any(target_feature = "aes", target_feature = "crypto"), - not(miri), - feature = "stdsimd" - ) -)))] -pub use crate::fallback_hash::AHasher; pub use crate::random_state::RandomState; -pub use crate::specialize::CallHasher; - -#[cfg(feature = "std")] -pub use crate::hash_map::AHashMap; -#[cfg(feature = "std")] -pub use crate::hash_set::AHashSet; use core::hash::BuildHasher; use core::hash::Hash; use core::hash::Hasher; @@ -279,6 +268,7 @@ mod test { use crate::*; use std::collections::HashMap; use std::hash::Hash; + use crate::specialize::CallHasher; #[test] fn test_ahash_alias_map_construction() { diff --git a/src/operations.rs b/src/operations.rs index ab8f725..0ee98b9 100644 --- a/src/operations.rs +++ b/src/operations.rs @@ -1,5 +1,8 @@ use crate::convert::*; +///This constant come from Kunth's prng (Empirically it works better than those from splitmix32). +pub(crate) const MULTIPLE: u64 = 6364136223846793005; + /// This is a constant with a lot of special properties found by automated search. /// See the unit tests below. (Below are alternative values) #[cfg(all(target_feature = "ssse3", not(miri)))] @@ -60,7 +63,7 @@ pub(crate) fn add_and_shuffle(a: u128, b: u128) -> u128 { shuffle(sum.convert()) } -#[allow(unused)] //not used by fallbac +#[allow(unused)] //not used by fallback #[inline(always)] pub(crate) fn shuffle_and_add(base: u128, to_add: u128) -> u128 { let shuffled: [u64; 2] = shuffle(base).convert(); @@ -146,6 +149,7 @@ pub(crate) fn aesdec(value: u128, xor: u128) -> u128 { } } +#[allow(unused)] #[inline(always)] pub(crate) fn add_in_length(enc: &mut u128, len: u64) { #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))] diff --git a/src/random_state.rs b/src/random_state.rs index bfcc8bd..b750bcd 100644 --- a/src/random_state.rs +++ b/src/random_state.rs @@ -1,62 +1,34 @@ -#[cfg(all(feature = "runtime-rng", not(all(feature = "compile-time-rng", test))))] -use crate::convert::Convert; -#[cfg(feature = "specialize")] -use crate::BuildHasherExt; - -#[cfg(any( - all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), - all(any(target_arch = "arm", target_arch = "aarch64"), any(target_feature = "aes", target_feature = "crypto"), not(miri), feature = "stdsimd") -))] -pub use crate::aes_hash::*; - -#[cfg(not(any( - all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), - all(any(target_arch = "arm", target_arch = "aarch64"), any(target_feature = "aes", target_feature = "crypto"), not(miri), feature = "stdsimd") -)))] -pub use crate::fallback_hash::*; - -#[cfg(all(feature = "compile-time-rng", any(not(feature = "runtime-rng"), test)))] -use const_random::const_random; -use core::any::{Any, TypeId}; -use core::fmt; -use core::hash::BuildHasher; -#[cfg(feature = "specialize")] -use core::hash::Hash; -use core::hash::Hasher; -#[cfg(not(feature = "std"))] -extern crate alloc; -#[cfg(feature = "std")] -extern crate std as alloc; +use core::hash::Hash; +cfg_if::cfg_if! { + if #[cfg(any( + all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), + all(any(target_arch = "arm", target_arch = "aarch64"), any(target_feature = "aes", target_feature = "crypto"), not(miri), feature = "stdsimd") + ))] { + use crate::aes_hash::*; + } else { + use crate::fallback_hash::*; + } +} +cfg_if::cfg_if! { + if #[cfg(feature = "specialize")]{ + use crate::BuildHasherExt; + } +} +cfg_if::cfg_if! { + if #[cfg(feature = "std")] { + extern crate std as alloc; + } else { + extern crate alloc; + } +} use alloc::boxed::Box; use core::sync::atomic::{AtomicUsize, Ordering}; -#[cfg(not(all(target_arch = "arm", target_os = "none")))] -use once_cell::race::OnceBox; - -#[cfg(any( - all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), - all(any(target_arch = "arm", target_arch = "aarch64"), any(target_feature = "aes", target_feature = "crypto"), not(miri), feature = "stdsimd") -))] -use crate::aes_hash::*; -#[cfg(not(any( - all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), - all(any(target_arch = "arm", target_arch = "aarch64"), any(target_feature = "aes", target_feature = "crypto"), not(miri), feature = "stdsimd") -)))] -use crate::fallback_hash::*; - -#[cfg(not(all(target_arch = "arm", target_os = "none")))] -static RAND_SOURCE: OnceBox> = OnceBox::new(); - -/// A supplier of Randomness used for different hashers. -/// See [RandomState.set_random_source]. -pub trait RandomSource { - - fn get_fixed_seeds(&self) -> &'static [[u64; 4]; 2]; - - fn gen_hasher_seed(&self) -> usize; - -} +use core::any::{Any, TypeId}; +use core::fmt; +use core::hash::BuildHasher; +use core::hash::Hasher; pub(crate) const PI: [u64; 4] = [ 0x243f_6a88_85a3_08d3, @@ -72,6 +44,82 @@ pub(crate) const PI2: [u64; 4] = [ 0x3f84_d5b5_b547_0917, ]; +cfg_if::cfg_if! { + if #[cfg(all(feature = "compile-time-rng", any(test, fuzzing)))] { + #[inline] + fn get_fixed_seeds() -> &'static [[u64; 4]; 2] { + use const_random::const_random; + + const RAND: [[u64; 4]; 2] = [ + [ + const_random!(u64), + const_random!(u64), + const_random!(u64), + const_random!(u64), + ], [ + const_random!(u64), + const_random!(u64), + const_random!(u64), + const_random!(u64), + ] + ]; + &RAND + } + } else if #[cfg(all(feature = "runtime-rng", not(fuzzing)))] { + #[inline] + fn get_fixed_seeds() -> &'static [[u64; 4]; 2] { + use crate::convert::Convert; + + static SEEDS: OnceBox<[[u64; 4]; 2]> = OnceBox::new(); + + SEEDS.get_or_init(|| { + let mut result: [u8; 64] = [0; 64]; + getrandom::getrandom(&mut result).expect("getrandom::getrandom() failed."); + Box::new(result.convert()) + }) + } + } else if #[cfg(feature = "compile-time-rng")] { + #[inline] + fn get_fixed_seeds() -> &'static [[u64; 4]; 2] { + use const_random::const_random; + + const RAND: [[u64; 4]; 2] = [ + [ + const_random!(u64), + const_random!(u64), + const_random!(u64), + const_random!(u64), + ], [ + const_random!(u64), + const_random!(u64), + const_random!(u64), + const_random!(u64), + ] + ]; + &RAND + } + } else { + fn get_fixed_seeds() -> &'static [[u64; 4]; 2] { + &[PI, PI2] + } + } +} + +cfg_if::cfg_if! { + if #[cfg(not(all(target_arch = "arm", target_os = "none")))] { + use once_cell::race::OnceBox; + + static RAND_SOURCE: OnceBox> = OnceBox::new(); + } +} +/// A supplier of Randomness used for different hashers. +/// See [set_random_source]. +pub trait RandomSource { + + fn gen_hasher_seed(&self) -> usize; + +} + struct DefaultRandomSource { counter: AtomicUsize, } @@ -83,6 +131,7 @@ impl DefaultRandomSource { } } + #[cfg(all(target_arch = "arm", target_os = "none"))] const fn default() -> DefaultRandomSource { DefaultRandomSource { counter: AtomicUsize::new(PI[3] as usize), @@ -91,55 +140,50 @@ impl DefaultRandomSource { } impl RandomSource for DefaultRandomSource { - - #[cfg(all(feature = "runtime-rng", not(all(feature = "compile-time-rng", test))))] - fn get_fixed_seeds(&self) -> &'static [[u64; 4]; 2] { - static SEEDS: OnceBox<[[u64; 4]; 2]> = OnceBox::new(); - - SEEDS.get_or_init(|| { - let mut result: [u8; 64] = [0; 64]; - getrandom::getrandom(&mut result).expect("getrandom::getrandom() failed."); - Box::new(result.convert()) - }) - } - - #[cfg(all(feature = "compile-time-rng", any(not(feature = "runtime-rng"), test)))] - fn get_fixed_seeds(&self) -> &'static [[u64; 4]; 2] { - const RAND: [[u64; 4]; 2] = [ - [ - const_random!(u64), - const_random!(u64), - const_random!(u64), - const_random!(u64), - ], [ - const_random!(u64), - const_random!(u64), - const_random!(u64), - const_random!(u64), - ] - ]; - &RAND - } - - #[cfg(all(not(feature = "runtime-rng"), not(feature = "compile-time-rng")))] - fn get_fixed_seeds(&self) -> &'static [[u64; 4]; 2] { - &[PI, PI2] - } - - #[cfg(not(all(target_arch = "arm", target_os = "none")))] - fn gen_hasher_seed(&self) -> usize { - let stack = self as *const _ as usize; - self.counter.fetch_add(stack, Ordering::Relaxed) + cfg_if::cfg_if! { + if #[cfg(all(target_arch = "arm", target_os = "none"))] { + fn gen_hasher_seed(&self) -> usize { + let stack = self as *const _ as usize; + let previous = self.counter.load(Ordering::Relaxed); + let new = previous.wrapping_add(stack); + self.counter.store(new, Ordering::Relaxed); + new + } + } else { + fn gen_hasher_seed(&self) -> usize { + let stack = self as *const _ as usize; + self.counter.fetch_add(stack, Ordering::Relaxed) + } + } } +} - #[cfg(all(target_arch = "arm", target_os = "none"))] - fn gen_hasher_seed(&self) -> usize { - let stack = self as *const _ as usize; - let previous = self.counter.load(Ordering::Relaxed); - let new = previous.wrapping_add(stack); - self.counter.store(new, Ordering::Relaxed); - new - } +cfg_if::cfg_if! { + if #[cfg(all(target_arch = "arm", target_os = "none"))] { + #[inline] + fn get_src() -> &'static dyn RandomSource { + static RAND_SOURCE: DefaultRandomSource = DefaultRandomSource::default(); + &RAND_SOURCE + } + } else { + /// Provides an optional way to manually supply a source of randomness for Hasher keys. + /// + /// The provided [RandomSource] will be used to be used as a source of randomness by [RandomState] to generate new states. + /// If this method is not invoked the standard source of randomness is used as described in the Readme. + /// + /// The source of randomness can only be set once, and must be set before the first RandomState is created. + /// If the source has already been specified `Err` is returned with a `bool` indicating if the set failed because + /// method was previously invoked (true) or if the default source is already being used (false). + #[cfg(not(all(target_arch = "arm", target_os = "none")))] + pub fn set_random_source(source: impl RandomSource + Send + Sync + 'static) -> Result<(), bool> { + RAND_SOURCE.set(Box::new(Box::new(source))).map_err(|s| s.as_ref().type_id() != TypeId::of::<&DefaultRandomSource>()) + } + + #[inline] + fn get_src() -> &'static dyn RandomSource { + RAND_SOURCE.get_or_init(|| Box::new(Box::new(DefaultRandomSource::new()))).as_ref() + } + } } /// Provides a [Hasher] factory. This is typically used (e.g. by [HashMap]) to create @@ -164,38 +208,12 @@ impl fmt::Debug for RandomState { } impl RandomState { - - /// Provides an optional way to manually supply a source of randomness for Hasher keys. - /// - /// The provided [RandomSource] will be used to be used as a source of randomness by [RandomState] to generate new states. - /// If this method is not invoked the standard source of randomness is used as described in the Readme. - /// - /// The source of randomness can only be set once, and must be set before the first RandomState is created. - /// If the source has already been specified `Err` is returned with a `bool` indicating if the set failed because - /// method was previously invoked (true) or if the default source is already being used (false). - #[cfg(not(all(target_arch = "arm", target_os = "none")))] - pub fn set_random_source(source: impl RandomSource + Send + Sync + 'static) -> Result<(), bool> { - RAND_SOURCE.set(Box::new(Box::new(source))).map_err(|s| s.as_ref().type_id() != TypeId::of::<&DefaultRandomSource>()) - } - - #[inline] - #[cfg(not(all(target_arch = "arm", target_os = "none")))] - fn get_src() -> &'static dyn RandomSource { - RAND_SOURCE.get_or_init(|| Box::new(Box::new(DefaultRandomSource::new()))).as_ref() - } - - #[inline] - #[cfg(all(target_arch = "arm", target_os = "none"))] - fn get_src() -> &'static dyn RandomSource { - static RAND_SOURCE: DefaultRandomSource = DefaultRandomSource::default(); - &RAND_SOURCE - } - /// Use randomly generated keys #[inline] + #[cfg(any(feature = "compile-time-rng", feature = "runtime-rng"))] pub fn new() -> RandomState { - let src = Self::get_src(); - let fixed = src.get_fixed_seeds(); + let src = get_src(); + let fixed = get_fixed_seeds(); Self::from_keys(&fixed[0], &fixed[1], src.gen_hasher_seed()) } @@ -203,8 +221,8 @@ impl RandomState { /// This is done using a static counter, so it can safely be used with a fixed keys. #[inline] pub fn generate_with(k0: u64, k1: u64, k2: u64, k3: u64) -> RandomState { - let src = Self::get_src(); - let fixed = src.get_fixed_seeds(); + let src = get_src(); + let fixed = get_fixed_seeds(); RandomState::from_keys(&fixed[0], &[k0, k1, k2, k3], src.gen_hasher_seed()) } @@ -229,7 +247,7 @@ impl RandomState { /// Internal. Used by Default. #[inline] pub(crate) fn with_fixed_keys() -> RandomState { - let [k0, k1, k2, k3] = Self::get_src().get_fixed_seeds()[0]; + let [k0, k1, k2, k3] = get_fixed_seeds()[0]; RandomState { k0, k1, k2, k3 } } @@ -238,7 +256,7 @@ impl RandomState { /// Note: This method does not require the provided seed to be strong. #[inline] pub fn with_seed(key: usize) -> RandomState { - let fixed = Self::get_src().get_fixed_seeds(); + let fixed = get_fixed_seeds(); RandomState::from_keys(&fixed[0], &fixed[1], key) } @@ -250,8 +268,26 @@ impl RandomState { pub const fn with_seeds(k0: u64, k1: u64, k2: u64, k3: u64) -> RandomState { RandomState { k0: k0 ^ PI2[0], k1: k1 ^ PI2[1], k2: k2 ^ PI2[2], k3: k3 ^ PI2[3] } } + + /// Calculates the hash of a single value. + /// + /// This is intended as a convenience for code which *consumes* hashes, such + /// as the implementation of a hash table or in unit tests that check + /// whether a custom [`Hash`] implementation behaves as expected. + /// + /// This must not be used in any code which *creates* hashes, such as in an + /// implementation of [`Hash`]. The way to create a combined hash of + /// multiple values is to call [`Hash::hash`] multiple times using the same + /// [`Hasher`], not to call this method repeatedly and combine the results. + #[inline] + pub fn hash_one(&self, x: T) -> u64 + where Self: Sized { + use crate::specialize::CallHasher; + T::get_hash(&x, self) + } } +#[cfg(any(feature = "compile-time-rng", feature = "runtime-rng"))] impl Default for RandomState { #[inline] fn default() -> Self { @@ -267,26 +303,26 @@ impl BuildHasher for RandomState { /// [AHasher]s that will return different hashcodes, but [Hasher]s created from the same [BuildHasher] /// will generate the same hashes for the same input data. /// - /// # Examples - /// - /// ``` - /// use ahash::{AHasher, RandomState}; - /// use std::hash::{Hasher, BuildHasher}; - /// - /// let build_hasher = RandomState::new(); - /// let mut hasher_1 = build_hasher.build_hasher(); - /// let mut hasher_2 = build_hasher.build_hasher(); - /// - /// hasher_1.write_u32(1234); - /// hasher_2.write_u32(1234); - /// - /// assert_eq!(hasher_1.finish(), hasher_2.finish()); - /// - /// let other_build_hasher = RandomState::new(); - /// let mut different_hasher = other_build_hasher.build_hasher(); - /// different_hasher.write_u32(1234); - /// assert_ne!(different_hasher.finish(), hasher_1.finish()); - /// ``` + #[cfg_attr(any(feature = "compile-time-rng", feature = "runtime-rng"), doc = r##" # Examples +``` + use ahash::{AHasher, RandomState}; + use std::hash::{Hasher, BuildHasher}; + + let build_hasher = RandomState::new(); + let mut hasher_1 = build_hasher.build_hasher(); + let mut hasher_2 = build_hasher.build_hasher(); + + hasher_1.write_u32(1234); + hasher_2.write_u32(1234); + + assert_eq!(hasher_1.finish(), hasher_2.finish()); + + let other_build_hasher = RandomState::new(); + let mut different_hasher = other_build_hasher.build_hasher(); + different_hasher.write_u32(1234); + assert_ne!(different_hasher.finish(), hasher_1.finish()); +``` + "##)] /// [Hasher]: std::hash::Hasher /// [BuildHasher]: std::hash::BuildHasher /// [HashMap]: std::collections::HashMap @@ -294,6 +330,22 @@ impl BuildHasher for RandomState { fn build_hasher(&self) -> AHasher { AHasher::from_random_state(self) } + + /// Calculates the hash of a single value. + /// + /// This is intended as a convenience for code which *consumes* hashes, such + /// as the implementation of a hash table or in unit tests that check + /// whether a custom [`Hash`] implementation behaves as expected. + /// + /// This must not be used in any code which *creates* hashes, such as in an + /// implementation of [`Hash`]. The way to create a combined hash of + /// multiple values is to call [`Hash::hash`] multiple times using the same + /// [`Hasher`], not to call this method repeatedly and combine the results. + #[cfg(feature = "specialize")] + #[inline] + fn hash_one(&self, x: T) -> u64 { + RandomState::hash_one(self, x) + } } #[cfg(feature = "specialize")] @@ -329,27 +381,27 @@ mod test { #[test] fn test_unique() { - let a = RandomState::new(); - let b = RandomState::new(); + let a = RandomState::generate_with(1, 2, 3, 4); + let b = RandomState::generate_with(1, 2, 3, 4); assert_ne!(a.build_hasher().finish(), b.build_hasher().finish()); } #[cfg(all(feature = "runtime-rng", not(all(feature = "compile-time-rng", test))))] #[test] fn test_not_pi() { - assert_ne!(PI, RandomState::get_src().get_fixed_seeds()[0]); + assert_ne!(PI, get_fixed_seeds()[0]); } #[cfg(all(feature = "compile-time-rng", any(not(feature = "runtime-rng"), test)))] #[test] fn test_not_pi_const() { - assert_ne!(PI, RandomState::get_src().get_fixed_seeds()[0]); + assert_ne!(PI, get_fixed_seeds()[0]); } #[cfg(all(not(feature = "runtime-rng"), not(feature = "compile-time-rng")))] #[test] fn test_pi() { - assert_eq!(PI, RandomState::get_src().get_fixed_seeds()[0]); + assert_eq!(PI, get_fixed_seeds()[0]); } #[test] diff --git a/src/specialize.rs b/src/specialize.rs index d94a4ee..05d335b 100644 --- a/src/specialize.rs +++ b/src/specialize.rs @@ -17,28 +17,7 @@ use alloc::vec::Vec; /// Provides a way to get an optimized hasher for a given data type. /// Rather than using a Hasher generically which can hash any value, this provides a way to get a specialized hash /// for a specific type. So this may be faster for primitive types. -/// # Example -/// ``` -/// use std::hash::BuildHasher; -/// use ahash::RandomState; -/// use ahash::CallHasher; -/// -/// let hash_builder = RandomState::new(); -/// //... -/// let value: u32 = 17; -/// let hash = u32::get_hash(&value, &hash_builder); -/// ``` -/// Note that the type used to invoke `get_hash` must be the same a the type of value passed. -/// For example get a hasher specialized on `[u8]` can invoke: -/// ``` -/// /// use std::hash::BuildHasher; -/// # use ahash::RandomState; -/// # use ahash::CallHasher; -/// # let hash_builder = RandomState::new(); -/// let bytes: [u8; 4] = [1, 2, 3, 4]; -/// let hash = <[u8]>::get_hash(&bytes, &hash_builder); -/// ``` -pub trait CallHasher { +pub(crate) trait CallHasher { fn get_hash(value: &H, build_hasher: &B) -> u64; } diff --git a/tests/bench.rs b/tests/bench.rs index 0b3594d..251e134 100644 --- a/tests/bench.rs +++ b/tests/bench.rs @@ -1,8 +1,10 @@ -use ahash::{CallHasher, RandomState}; +#![cfg_attr(feature = "specialize", feature(build_hasher_simple_hash_one))] + +use ahash::{AHasher, RandomState}; use criterion::*; use fxhash::FxHasher; use std::collections::hash_map::DefaultHasher; -use std::hash::{Hash, Hasher}; +use std::hash::{BuildHasherDefault, Hash, Hasher}; #[cfg(any( all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), @@ -10,7 +12,7 @@ use std::hash::{Hash, Hasher}; ))] fn aeshash(b: &H) -> u64 { let build_hasher = RandomState::with_seeds(1, 2, 3, 4); - H::get_hash(b, &build_hasher) + build_hasher.hash_one(b) } #[cfg(not(any( all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), @@ -26,7 +28,7 @@ fn aeshash(_b: &H) -> u64 { )))] fn fallbackhash(b: &H) -> u64 { let build_hasher = RandomState::with_seeds(1, 2, 3, 4); - H::get_hash(b, &build_hasher) + build_hasher.hash_one(b) } #[cfg(any( all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)), @@ -144,6 +146,67 @@ fn bench_sip(c: &mut Criterion) { group.bench_with_input("string", &gen_strings(), |b, s| b.iter(|| black_box(siphash(s)))); } +fn bench_map(c: &mut Criterion) { + #[cfg(feature = "std")] + { + let mut group = c.benchmark_group("map"); + group.bench_function("aHash-alias", |b| b.iter(|| { + let hm: ahash::HashMap = (0..1_000_000).map(|i| (i, i)).collect(); + let mut sum = 0; + for i in 0..1_000_000 { + if let Some(x) = hm.get(&i) { + sum += x; + } + } + })); + group.bench_function("aHash-hashBrown", |b| b.iter(|| { + let hm: hashbrown::HashMap = (0..1_000_000).map(|i| (i, i)).collect(); + let mut sum = 0; + for i in 0..1_000_000 { + if let Some(x) = hm.get(&i) { + sum += x; + } + } + })); + group.bench_function("aHash-hashBrown-explicit", |b| b.iter(|| { + let hm: hashbrown::HashMap = (0..1_000_000).map(|i| (i, i)).collect(); + let mut sum = 0; + for i in 0..1_000_000 { + if let Some(x) = hm.get(&i) { + sum += x; + } + } + })); + group.bench_function("aHash-wrapper", |b| b.iter(|| { + let hm: ahash::AHashMap = (0..1_000_000).map(|i| (i, i)).collect(); + let mut sum = 0; + for i in 0..1_000_000 { + if let Some(x) = hm.get(&i) { + sum += x; + } + } + })); + group.bench_function("aHash-rand", |b| b.iter(|| { + let hm: std::collections::HashMap = (0..1_000_000).map(|i| (i, i)).collect(); + let mut sum = 0; + for i in 0..1_000_000 { + if let Some(x) = hm.get(&i) { + sum += x; + } + } + })); + group.bench_function("aHash-default", |b| b.iter(|| { + let hm: std::collections::HashMap> = (0..1_000_000).map(|i| (i, i)).collect(); + let mut sum = 0; + for i in 0..1_000_000 { + if let Some(x) = hm.get(&i) { + sum += x; + } + } + })); + } +} + criterion_main!(benches); #[cfg(any( @@ -169,5 +232,6 @@ criterion_group!( bench_fx, bench_fnv, bench_sea, - bench_sip + bench_sip, + bench_map, ); diff --git a/tests/map_tests.rs b/tests/map_tests.rs index 3647169..3f4a06a 100644 --- a/tests/map_tests.rs +++ b/tests/map_tests.rs @@ -1,9 +1,10 @@ +#![cfg_attr(feature = "specialize", feature(build_hasher_simple_hash_one))] + use std::hash::{BuildHasher, Hash, Hasher}; use criterion::*; use fxhash::FxHasher; - -use ahash::{AHasher, CallHasher, RandomState}; +use ahash::RandomState; fn gen_word_pairs() -> Vec { let words: Vec<_> = r#" @@ -150,9 +151,18 @@ fn check_for_collisions(build_hasher: &B, items: &[H], ); } +#[cfg(feature = "specialize")] +#[allow(unused)] // False positive +fn hash(b: &H, build_hasher: &B) -> u64 { + build_hasher.hash_one(b) +} + +#[cfg(not(feature = "specialize"))] #[allow(unused)] // False positive fn hash(b: &H, build_hasher: &B) -> u64 { - H::get_hash(b, build_hasher) + let mut hasher = build_hasher.build_hasher(); + b.hash(&mut hasher); + hasher.finish() } #[test] @@ -193,7 +203,7 @@ fn test_ahash_alias_set_construction() { fn ahash_vec(b: &Vec) -> u64 { let mut total: u64 = 0; for item in b { - let mut hasher = AHasher::new_with_keys(1234, 5678); + let mut hasher = RandomState::with_seeds(12, 34, 56, 78).build_hasher(); item.hash(&mut hasher); total = total.wrapping_add(hasher.finish()); } diff --git a/tests/nopanic.rs b/tests/nopanic.rs index d48ff55..4ad13bd 100644 --- a/tests/nopanic.rs +++ b/tests/nopanic.rs @@ -1,5 +1,5 @@ -use ahash::{AHasher, CallHasher, RandomState}; -use std::hash::BuildHasher; +use ahash::{AHasher, RandomState}; +use std::hash::{BuildHasher, Hash, Hasher}; #[macro_use] extern crate no_panic; @@ -8,8 +8,8 @@ extern crate no_panic; #[no_panic] fn hash_test_final(num: i32, string: &str) -> (u64, u64) { use core::hash::Hasher; - let mut hasher1 = AHasher::new_with_keys(1, 2); - let mut hasher2 = AHasher::new_with_keys(3, 4); + let mut hasher1 = RandomState::with_seeds(1, 2, 3, 4).build_hasher(); + let mut hasher2 = RandomState::with_seeds(3, 4, 5, 6).build_hasher(); hasher1.write_i32(num); hasher2.write(string.as_bytes()); (hasher1.finish(), hasher2.finish()) @@ -24,6 +24,14 @@ struct SimpleBuildHasher { hasher: AHasher, } +impl SimpleBuildHasher { + fn hash_one(&self, x: T) -> u64 where Self: Sized { + let mut hasher = self.build_hasher(); + x.hash(&mut hasher); + hasher.finish() + } +} + impl BuildHasher for SimpleBuildHasher { type Hasher = AHasher; @@ -35,11 +43,11 @@ impl BuildHasher for SimpleBuildHasher { #[inline(never)] #[no_panic] fn hash_test_specialize(num: i32, string: &str) -> (u64, u64) { - let hasher1 = AHasher::new_with_keys(1, 2); - let hasher2 = AHasher::new_with_keys(1, 2); + let hasher1 = RandomState::with_seeds(1, 2, 3, 4).build_hasher(); + let hasher2 = RandomState::with_seeds(1, 2, 3, 4).build_hasher(); ( - i32::get_hash(&num, &SimpleBuildHasher { hasher: hasher1 }), - <[u8]>::get_hash(string.as_bytes(), &SimpleBuildHasher { hasher: hasher2 }), + SimpleBuildHasher { hasher: hasher1 }.hash_one(num), + SimpleBuildHasher { hasher: hasher2 }.hash_one(string.as_bytes()), ) } @@ -54,8 +62,8 @@ fn hash_test_random(num: i32, string: &str) -> (u64, u64) { let build_hasher1 = RandomState::with_seeds(1, 2, 3, 4); let build_hasher2 = RandomState::with_seeds(1, 2, 3, 4); ( - i32::get_hash(&num, &build_hasher1), - <[u8]>::get_hash(string.as_bytes(), &build_hasher2), + build_hasher1.hash_one(&num), + build_hasher2.hash_one(string.as_bytes()) ) } @@ -68,5 +76,6 @@ fn hash_test_specialize_wrapper(num: i32, string: &str) { fn test_no_panic() { hash_test_final_wrapper(2, "Foo"); hash_test_specialize_wrapper(2, "Bar"); - hash_test_random_wrapper(2, "Baz"); + hash_test_random(2, "Baz"); + hash_test_random_wrapper(2, "Bat"); }