Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Encode hashes as bytes, not varint #110083

Merged
merged 3 commits into from
Apr 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion compiler/rustc_abi/src/layout.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,8 @@ pub trait LayoutCalculator {
{
// `ReprOptions.layout_seed` is a deterministic seed that we can use to
// randomize field ordering with
let mut rng = Xoshiro128StarStar::seed_from_u64(repr.field_shuffle_seed);
let mut rng =
Xoshiro128StarStar::seed_from_u64(repr.field_shuffle_seed.as_u64());

// Shuffle the ordering of the fields
optimizing.shuffle(&mut rng);
Expand Down
7 changes: 4 additions & 3 deletions compiler/rustc_abi/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use std::str::FromStr;

use bitflags::bitflags;
use rustc_data_structures::intern::Interned;
use rustc_data_structures::stable_hasher::Hash64;
#[cfg(feature = "nightly")]
use rustc_data_structures::stable_hasher::StableOrd;
use rustc_index::vec::{IndexSlice, IndexVec};
Expand Down Expand Up @@ -77,12 +78,12 @@ pub struct ReprOptions {
pub flags: ReprFlags,
/// The seed to be used for randomizing a type's layout
///
/// Note: This could technically be a `[u8; 16]` (a `u128`) which would
/// Note: This could technically be a `Hash128` which would
/// be the "most accurate" hash as it'd encompass the item and crate
/// hash without loss, but it does pay the price of being larger.
/// Everything's a tradeoff, a `u64` seed should be sufficient for our
/// Everything's a tradeoff, a 64-bit seed should be sufficient for our
/// purposes (primarily `-Z randomize-layout`)
pub field_shuffle_seed: u64,
pub field_shuffle_seed: Hash64,
}

impl ReprOptions {
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_codegen_llvm/src/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use crate::value::Value;
use rustc_ast::Mutability;
use rustc_codegen_ssa::mir::place::PlaceRef;
use rustc_codegen_ssa::traits::*;
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_data_structures::stable_hasher::{Hash128, HashStable, StableHasher};
use rustc_hir::def_id::DefId;
use rustc_middle::bug;
use rustc_middle::mir::interpret::{ConstAllocation, GlobalAlloc, Scalar};
Expand Down Expand Up @@ -261,7 +261,7 @@ impl<'ll, 'tcx> ConstMethods<'tcx> for CodegenCx<'ll, 'tcx> {
let hash = self.tcx.with_stable_hashing_context(|mut hcx| {
let mut hasher = StableHasher::new();
alloc.hash_stable(&mut hcx, &mut hasher);
hasher.finish::<u128>()
hasher.finish::<Hash128>()
});
llvm::set_value_name(value, format!("alloc_{hash:032x}").as_bytes());
}
Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use rustc_codegen_ssa::debuginfo::type_names;
use rustc_codegen_ssa::mir::debuginfo::{DebugScope, FunctionDebugContext, VariableKind};
use rustc_codegen_ssa::traits::*;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::stable_hasher::Hash128;
use rustc_data_structures::sync::Lrc;
use rustc_hir::def_id::{DefId, DefIdMap};
use rustc_index::vec::IndexVec;
Expand Down Expand Up @@ -61,7 +62,7 @@ pub struct CodegenUnitDebugContext<'ll, 'tcx> {
llcontext: &'ll llvm::Context,
llmod: &'ll llvm::Module,
builder: &'ll mut DIBuilder<'ll>,
created_files: RefCell<FxHashMap<Option<(u128, SourceFileHash)>, &'ll DIFile>>,
created_files: RefCell<FxHashMap<Option<(Hash128, SourceFileHash)>, &'ll DIFile>>,

type_map: metadata::TypeMap<'ll, 'tcx>,
namespace_map: RefCell<DefIdMap<&'ll DIScope>>,
Expand Down
5 changes: 2 additions & 3 deletions compiler/rustc_codegen_ssa/src/debuginfo/type_names.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// * `"` is treated as the start of a string.

use rustc_data_structures::fx::FxHashSet;
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_data_structures::stable_hasher::{Hash64, HashStable, StableHasher};
use rustc_hir::def_id::DefId;
use rustc_hir::definitions::{DefPathData, DefPathDataName, DisambiguatedDefPathData};
use rustc_hir::{AsyncGeneratorKind, GeneratorKind, Mutability};
Expand Down Expand Up @@ -675,8 +675,7 @@ fn push_const_param<'tcx>(tcx: TyCtxt<'tcx>, ct: ty::Const<'tcx>, output: &mut S
hcx.while_hashing_spans(false, |hcx| {
ct.to_valtree().hash_stable(hcx, &mut hasher)
});
let hash: u64 = hasher.finish();
hash
hasher.finish::<Hash64>()
});

if cpp_like_debuginfo(tcx) {
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_const_eval/src/interpret/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ pub(crate) fn eval_nullary_intrinsic<'tcx>(
}
sym::type_id => {
ensure_monomorphic_enough(tcx, tp_ty)?;
ConstValue::from_u64(tcx.type_id_hash(tp_ty))
ConstValue::from_u64(tcx.type_id_hash(tp_ty).as_u64())
}
sym::variant_count => match tp_ty.kind() {
// Correctly handles non-monomorphic calls, so there is no need for ensure_monomorphic_enough.
Expand Down
43 changes: 30 additions & 13 deletions compiler/rustc_data_structures/src/fingerprint.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::stable_hasher;
use crate::stable_hasher::{Hash64, StableHasher, StableHasherResult};
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
use std::hash::{Hash, Hasher};

Expand All @@ -9,32 +9,49 @@ mod tests;
#[repr(C)]
pub struct Fingerprint(u64, u64);

impl Fingerprint {
pub const ZERO: Fingerprint = Fingerprint(0, 0);
pub trait FingerprintComponent {
fn as_u64(&self) -> u64;
}

impl FingerprintComponent for Hash64 {
#[inline]
pub fn new(_0: u64, _1: u64) -> Fingerprint {
Fingerprint(_0, _1)
fn as_u64(&self) -> u64 {
Hash64::as_u64(*self)
}
}

impl FingerprintComponent for u64 {
#[inline]
fn as_u64(&self) -> u64 {
*self
}
}

impl Fingerprint {
pub const ZERO: Fingerprint = Fingerprint(0, 0);

#[inline]
pub fn from_smaller_hash(hash: u64) -> Fingerprint {
Fingerprint(hash, hash)
pub fn new<A, B>(_0: A, _1: B) -> Fingerprint
where
A: FingerprintComponent,
B: FingerprintComponent,
{
Fingerprint(_0.as_u64(), _1.as_u64())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not directly store 2 Hash64? Or directly a Hash128?

Copy link
Member Author

@saethlin saethlin Apr 15, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm trying to do some encapsulation here (it's not really working, probably needs follow-up PRs to improve it). I added some documentation to Hash64 and Hash128 to explain what they represent.

Users of Fingerprint store separate 64-bit hashes in it and there is also this user which creates a Fingerprint which is only half hash:

Fingerprint::new(
// `owner` is local, so is completely defined by the local hash
def_path_hash.local_hash(),
local_id.as_u32().into(),
)

After looking through this a lot I think the Fingerprint type should probably be overhauled or discarded. Different users of Fingerprint seem to be getting very different things out of it which seem mostly conceptually disjoint.

So Fingerprint seems to be used as:

  • a 128-bit hash
  • two 64-bit hashes, which can be separated later
  • a 64-bit hash and a 32-bit rustc_index

That seems like 3 different types to me.

}

#[inline]
pub fn to_smaller_hash(&self) -> u64 {
pub fn to_smaller_hash(&self) -> Hash64 {
// Even though both halves of the fingerprint are expected to be good
// quality hash values, let's still combine the two values because the
// Fingerprints in DefPathHash have the StableCrateId portion which is
// the same for all DefPathHashes from the same crate. Combining the
// two halves makes sure we get a good quality hash in such cases too.
self.0.wrapping_mul(3).wrapping_add(self.1)
Hash64::new(self.0.wrapping_mul(3).wrapping_add(self.1))
}

#[inline]
pub fn as_value(&self) -> (u64, u64) {
(self.0, self.1)
pub fn split(&self) -> (Hash64, Hash64) {
(Hash64::new(self.0), Hash64::new(self.1))
}

#[inline]
Expand Down Expand Up @@ -131,9 +148,9 @@ impl FingerprintHasher for crate::unhash::Unhasher {
}
}

impl stable_hasher::StableHasherResult for Fingerprint {
impl StableHasherResult for Fingerprint {
#[inline]
fn finish(hasher: stable_hasher::StableHasher) -> Self {
fn finish(hasher: StableHasher) -> Self {
let (_0, _1) = hasher.finalize();
Fingerprint(_0, _1)
}
Expand Down
7 changes: 4 additions & 3 deletions compiler/rustc_data_structures/src/fingerprint/tests.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
use super::*;
use crate::stable_hasher::Hash64;

// Check that `combine_commutative` is order independent.
#[test]
fn combine_commutative_is_order_independent() {
let a = Fingerprint::new(0xf6622fb349898b06, 0x70be9377b2f9c610);
let b = Fingerprint::new(0xa9562bf5a2a5303c, 0x67d9b6c82034f13d);
let c = Fingerprint::new(0x0d013a27811dbbc3, 0x9a3f7b3d9142ec43);
let a = Fingerprint::new(Hash64::new(0xf6622fb349898b06), Hash64::new(0x70be9377b2f9c610));
let b = Fingerprint::new(Hash64::new(0xa9562bf5a2a5303c), Hash64::new(0x67d9b6c82034f13d));
let c = Fingerprint::new(Hash64::new(0x0d013a27811dbbc3), Hash64::new(0x9a3f7b3d9142ec43));
let permutations = [(a, b, c), (a, c, b), (b, a, c), (b, c, a), (c, a, b), (c, b, a)];
let f = a.combine_commutative(b).combine_commutative(c);
for p in &permutations {
Expand Down
132 changes: 132 additions & 0 deletions compiler/rustc_data_structures/src/hashes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
//! rustc encodes a lot of hashes. If hashes are stored as `u64` or `u128`, a `derive(Encodable)`
//! will apply varint encoding to the hashes, which is less efficient than directly encoding the 8
//! or 16 bytes of the hash.
//!
//! The types in this module represent 64-bit or 128-bit hashes produced by a `StableHasher`.
//! `Hash64` and `Hash128` expose some utilty functions to encourage users to not extract the inner
//! hash value as an integer type and accidentally apply varint encoding to it.
//!
//! In contrast with `Fingerprint`, users of these types cannot and should not attempt to construct
//! and decompose these types into constitutent pieces. The point of these types is only to
//! connect the fact that they can only be produced by a `StableHasher` to their
//! `Encode`/`Decode` impls.

use crate::stable_hasher::{StableHasher, StableHasherResult};
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
use std::fmt;
use std::ops::BitXorAssign;

#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
pub struct Hash64 {
inner: u64,
}

impl Hash64 {
pub const ZERO: Hash64 = Hash64 { inner: 0 };

#[inline]
pub(crate) fn new(n: u64) -> Self {
Self { inner: n }
}

#[inline]
pub fn as_u64(self) -> u64 {
self.inner
}
}

impl BitXorAssign<u64> for Hash64 {
#[inline]
fn bitxor_assign(&mut self, rhs: u64) {
self.inner ^= rhs;
}
}

impl<S: Encoder> Encodable<S> for Hash64 {
#[inline]
fn encode(&self, s: &mut S) {
s.emit_raw_bytes(&self.inner.to_le_bytes());
}
}

impl<D: Decoder> Decodable<D> for Hash64 {
#[inline]
fn decode(d: &mut D) -> Self {
Self { inner: u64::from_le_bytes(d.read_raw_bytes(8).try_into().unwrap()) }
}
}

impl StableHasherResult for Hash64 {
#[inline]
fn finish(hasher: StableHasher) -> Self {
Self { inner: hasher.finalize().0 }
}
}

impl fmt::Debug for Hash64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.inner.fmt(f)
}
}

impl fmt::LowerHex for Hash64 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::LowerHex::fmt(&self.inner, f)
}
}

#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
pub struct Hash128 {
inner: u128,
}

impl Hash128 {
#[inline]
pub fn truncate(self) -> Hash64 {
Hash64 { inner: self.inner as u64 }
}

#[inline]
pub fn wrapping_add(self, other: Self) -> Self {
Self { inner: self.inner.wrapping_add(other.inner) }
}

#[inline]
pub fn as_u128(self) -> u128 {
self.inner
}
}

impl<S: Encoder> Encodable<S> for Hash128 {
#[inline]
fn encode(&self, s: &mut S) {
s.emit_raw_bytes(&self.inner.to_le_bytes());
}
}

impl<D: Decoder> Decodable<D> for Hash128 {
#[inline]
fn decode(d: &mut D) -> Self {
Self { inner: u128::from_le_bytes(d.read_raw_bytes(16).try_into().unwrap()) }
}
}

impl StableHasherResult for Hash128 {
#[inline]
fn finish(hasher: StableHasher) -> Self {
let (_0, _1) = hasher.finalize();
Self { inner: u128::from(_0) | (u128::from(_1) << 64) }
}
}

impl fmt::Debug for Hash128 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.inner.fmt(f)
}
}

impl fmt::LowerHex for Hash128 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::LowerHex::fmt(&self.inner, f)
}
}
1 change: 1 addition & 0 deletions compiler/rustc_data_structures/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ pub mod work_queue;
pub use atomic_ref::AtomicRef;
pub mod aligned;
pub mod frozen;
mod hashes;
pub mod owned_slice;
pub mod sso;
pub mod steal;
Expand Down
Loading