Skip to content

Commit c47f2ed

Browse files
committed
std: Stabilize the std::hash module
This commit aims to stabilize the `std::hash` module by standardizing on its hashing interface while rationalizing the current usage with the `HashMap` and `HashSet` types. The primary goal of this slight redesign is to separate the concepts of a hasher's state from a hashing algorithm itself. The primary change of this commit is to separate the `Hasher` trait into a `Hasher` and a `HashState` trait. Conceptually the old `Hasher` trait was actually just a factory for various states, but hashing had very little control over how these states were used. Additionally the old `Hasher` trait was actually fairly unrelated to hashing. This commit redesigns the existing `Hasher` trait to match what the notion of a `Hasher` normally implies with the following definition: trait Hasher: Writer { type Output; fn reset(&mut self); fn finish(&self) -> Output; } Note that the `Output` associated type is currently a type parameter due to associated types not being fully implemented yet. This new `Hasher` trait emphasizes that all hashers are sinks for bytes, and hashing algorithms may produce outputs other than a `u64`, so a the output type is made generic. With this definition, the old `Hasher` trait is realized as a new `HashState` trait in the `collections::hash_state` module as an experimental addition for now. The current definition looks like: trait HashState { type H: Hasher; fn hasher(&self) -> H; } Note that the `H` associated type (along with its `O` output) are both type parameters on the `HashState` trait due to the current limitations of associated types. The purpose of this trait is to emphasize that the one piece of functionality for implementors is that new instances of `Hasher` can be created. This conceptually represents the two keys from which more instances of a `SipHasher` can be created, and a `HashState` is what's stored in a `HashMap`, not a `Hasher`. Implementors of custom hash algorithms should implement the `Hasher` trait, and only hash algorithms intended for use in hash maps need to implement or worry about the `HashState` trait. Some other stability decision made for the `std::hash` module are: * The name of the module, hash, is `#![stable]` * The `Hash` and `Hasher` traits are `#[unstable]` due to type parameters that want to be associated types. * The `Writer` trait remains `#[experimental]` as it's intended to be replaced with an `io::Writer` (more details soon). * The top-level `hash` function is `#[unstable]` as it is intended to be generic over the hashing algorithm instead of hardwired to `SipHasher` * The inner `sip` module is now private as its one export, `SipHasher` is reexported in the `hash` module. There are many breaking changes outlined above, and as a result this commit is a: [breaking-change]
1 parent a592124 commit c47f2ed

File tree

23 files changed

+503
-449
lines changed

23 files changed

+503
-449
lines changed

src/libcollections/hash/mod.rs

+75-52
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
//! ```rust
3939
//! use std::hash;
4040
//! use std::hash::Hash;
41-
//! use std::hash::sip::SipState;
41+
//! use std::hash::SipHasher;
4242
//!
4343
//! struct Person {
4444
//! id: uint,
@@ -47,7 +47,7 @@
4747
//! }
4848
//!
4949
//! impl Hash for Person {
50-
//! fn hash(&self, state: &mut SipState) {
50+
//! fn hash(&self, state: &mut SipHasher) {
5151
//! self.id.hash(state);
5252
//! self.phone.hash(state);
5353
//! }
@@ -60,42 +60,63 @@
6060
//! ```
6161
6262
#![allow(unused_must_use)]
63+
#![stable]
6364

6465
use core::prelude::*;
6566

6667
use alloc::boxed::Box;
6768
use alloc::rc::Rc;
6869
use core::borrow::{Cow, ToOwned};
70+
use core::default::Default;
6971
use core::intrinsics::TypeId;
7072
use core::mem;
7173
use core::num::Int;
72-
7374
use vec::Vec;
7475

75-
/// Reexport the `sip::hash` function as our default hasher.
76-
pub use self::sip::hash as hash;
76+
pub use self::sip::SipHasher;
7777

78-
pub mod sip;
78+
mod sip;
7979

8080
/// A hashable type. The `S` type parameter is an abstract hash state that is
8181
/// used by the `Hash` to compute the hash. It defaults to
8282
/// `std::hash::sip::SipState`.
83-
pub trait Hash<S = sip::SipState> for Sized? {
83+
#[unstable = "waiting for std::hash dust to settle"]
84+
pub trait Hash<S: Writer = SipHasher> for Sized? {
8485
/// Computes the hash of a value.
8586
fn hash(&self, state: &mut S);
8687
}
8788

8889
/// A trait that computes a hash for a value. The main users of this trait are
8990
/// containers like `HashMap`, which need a generic way hash multiple types.
90-
pub trait Hasher<S> {
91-
/// Compute the hash of a value.
92-
fn hash<Sized? T: Hash<S>>(&self, value: &T) -> u64;
91+
// FIXME(#17307) Output should be an associated type
92+
#[unstable = "the Output type parameter should be an associated type"]
93+
pub trait Hasher<Output>: Writer {
94+
/// Resets this hasher back to its initial state (as if it were just
95+
/// created).
96+
#[stable]
97+
fn reset(&mut self);
98+
99+
/// Completes a round of hashing, producing the output hash generated.
100+
#[unstable = "may be renamed or may take some extra bytes"]
101+
fn finish(&self) -> Output;
93102
}
94103

104+
#[experimental = "this trait will likely be replaced by io::Writer"]
95105
pub trait Writer {
96106
fn write(&mut self, bytes: &[u8]);
97107
}
98108

109+
/// Hash a value with the default SipHasher algorithm (two initial keys of 0).
110+
///
111+
/// The specified value will be hashed with this hasher and then the resulting
112+
/// hash will be returned.
113+
#[unstable = "the hashing algorithm used will likely become generic soon"]
114+
pub fn hash<T: Hash<SipHasher>>(value: &T) -> u64 {
115+
let mut h: SipHasher = Default::default();
116+
value.hash(&mut h);
117+
h.finish()
118+
}
119+
99120
//////////////////////////////////////////////////////////////////////////////
100121

101122
macro_rules! impl_hash {
@@ -283,7 +304,8 @@ impl<S: Writer, T: Hash<S>, U: Hash<S>> Hash<S> for Result<T, U> {
283304
}
284305
}
285306

286-
impl<'a, T, Sized? B, S> Hash<S> for Cow<'a, T, B> where B: Hash<S> + ToOwned<T> {
307+
impl<'a, T, Sized? B, S> Hash<S> for Cow<'a, T, B>
308+
where B: Hash<S> + ToOwned<T>, S: Writer {
287309
#[inline]
288310
fn hash(&self, state: &mut S) {
289311
Hash::hash(&**self, state)
@@ -300,16 +322,6 @@ mod tests {
300322
use slice::SlicePrelude;
301323
use super::{Hash, Hasher, Writer};
302324

303-
struct MyWriterHasher;
304-
305-
impl Hasher<MyWriter> for MyWriterHasher {
306-
fn hash<Sized? T: Hash<MyWriter>>(&self, value: &T) -> u64 {
307-
let mut state = MyWriter { hash: 0 };
308-
value.hash(&mut state);
309-
state.hash
310-
}
311-
}
312-
313325
struct MyWriter {
314326
hash: u64,
315327
}
@@ -323,69 +335,80 @@ mod tests {
323335
}
324336
}
325337

338+
impl Hasher<u64> for MyWriter {
339+
fn reset(&mut self) { self.hash = 0; }
340+
fn finish(&self) -> u64 { self.hash }
341+
}
342+
326343
#[test]
327344
fn test_writer_hasher() {
328345
use alloc::boxed::Box;
329346

330-
let hasher = MyWriterHasher;
347+
fn hash<T: Hash<MyWriter>>(t: &T) -> u64 {
348+
let mut hasher = MyWriter { hash: 0 };
349+
t.hash(&mut hasher);
350+
hasher.finish()
351+
}
331352

332-
assert_eq!(hasher.hash(&()), 0);
353+
assert_eq!(hash(&()), 0);
333354

334-
assert_eq!(hasher.hash(&5u8), 5);
335-
assert_eq!(hasher.hash(&5u16), 5);
336-
assert_eq!(hasher.hash(&5u32), 5);
337-
assert_eq!(hasher.hash(&5u64), 5);
338-
assert_eq!(hasher.hash(&5u), 5);
355+
assert_eq!(hash(&5u8), 5);
356+
assert_eq!(hash(&5u16), 5);
357+
assert_eq!(hash(&5u32), 5);
358+
assert_eq!(hash(&5u64), 5);
359+
assert_eq!(hash(&5u), 5);
339360

340-
assert_eq!(hasher.hash(&5i8), 5);
341-
assert_eq!(hasher.hash(&5i16), 5);
342-
assert_eq!(hasher.hash(&5i32), 5);
343-
assert_eq!(hasher.hash(&5i64), 5);
344-
assert_eq!(hasher.hash(&5i), 5);
361+
assert_eq!(hash(&5i8), 5);
362+
assert_eq!(hash(&5i16), 5);
363+
assert_eq!(hash(&5i32), 5);
364+
assert_eq!(hash(&5i64), 5);
365+
assert_eq!(hash(&5i), 5);
345366

346-
assert_eq!(hasher.hash(&false), 0);
347-
assert_eq!(hasher.hash(&true), 1);
367+
assert_eq!(hash(&false), 0);
368+
assert_eq!(hash(&true), 1);
348369

349-
assert_eq!(hasher.hash(&'a'), 97);
370+
assert_eq!(hash(&'a'), 97);
350371

351372
let s: &str = "a";
352-
assert_eq!(hasher.hash(& s), 97 + 0xFF);
373+
assert_eq!(hash(& s), 97 + 0xFF);
353374
// FIXME (#18283) Enable test
354375
//let s: Box<str> = box "a";
355376
//assert_eq!(hasher.hash(& s), 97 + 0xFF);
356377
let cs: &[u8] = &[1u8, 2u8, 3u8];
357-
assert_eq!(hasher.hash(& cs), 9);
378+
assert_eq!(hash(& cs), 9);
358379
let cs: Box<[u8]> = box [1u8, 2u8, 3u8];
359-
assert_eq!(hasher.hash(& cs), 9);
380+
assert_eq!(hash(& cs), 9);
360381

361382
// FIXME (#18248) Add tests for hashing Rc<str> and Rc<[T]>
362383

363-
unsafe {
364-
let ptr: *const int = mem::transmute(5i);
365-
assert_eq!(hasher.hash(&ptr), 5);
366-
}
384+
let ptr = 5i as *const int;
385+
assert_eq!(hash(&ptr), 5);
367386

368-
unsafe {
369-
let ptr: *mut int = mem::transmute(5i);
370-
assert_eq!(hasher.hash(&ptr), 5);
371-
}
387+
let ptr = 5i as *mut int;
388+
assert_eq!(hash(&ptr), 5);
372389
}
373390

374391
struct Custom {
375392
hash: u64
376393
}
377394

378-
impl Hash<u64> for Custom {
379-
fn hash(&self, state: &mut u64) {
380-
*state = self.hash;
395+
struct CustomHasher { state: u64 }
396+
397+
impl Writer for CustomHasher {
398+
fn write(&mut self, _data: &[u8]) {}
399+
}
400+
401+
impl Hash<CustomHasher> for Custom {
402+
fn hash(&self, state: &mut CustomHasher) {
403+
state.state = self.hash;
381404
}
382405
}
383406

384407
#[test]
385408
fn test_custom_state() {
386409
let custom = Custom { hash: 5 };
387-
let mut state = 0;
410+
let mut state = CustomHasher { state: 0 };
388411
custom.hash(&mut state);
389-
assert_eq!(state, 5);
412+
assert_eq!(state.state, 5);
390413
}
391414
}

0 commit comments

Comments
 (0)