-
Notifications
You must be signed in to change notification settings - Fork 12.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[StableHash] Implement with xxh3_64bits #105849
Conversation
@llvm/pr-subscribers-llvm-adt Author: Kyungwoo Lee (kyulee-com) ChangesThis is a follow-up to address a suggestion from #105619. Full diff: https://github.com/llvm/llvm-project/pull/105849.diff 3 Files Affected:
diff --git a/llvm/include/llvm/ADT/StableHashing.h b/llvm/include/llvm/ADT/StableHashing.h
index f675f828f702e5..a5b655a10f6996 100644
--- a/llvm/include/llvm/ADT/StableHashing.h
+++ b/llvm/include/llvm/ADT/StableHashing.h
@@ -16,6 +16,7 @@
#define LLVM_ADT_STABLEHASHING_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/xxhash.h"
namespace llvm {
@@ -23,78 +24,29 @@ namespace llvm {
/// deserialized, and is stable across processes and executions.
using stable_hash = uint64_t;
-// Implementation details
-namespace hashing {
-namespace detail {
-
-// Stable hashes are based on the 64-bit FNV-1 hash:
-// https://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function
-
-const uint64_t FNV_PRIME_64 = 1099511628211u;
-const uint64_t FNV_OFFSET_64 = 14695981039346656037u;
-
-inline void stable_hash_append(stable_hash &Hash, const char Value) {
- Hash = Hash ^ (Value & 0xFF);
- Hash = Hash * FNV_PRIME_64;
-}
-
-inline void stable_hash_append(stable_hash &Hash, stable_hash Value) {
- for (unsigned I = 0; I < 8; ++I) {
- stable_hash_append(Hash, static_cast<char>(Value));
- Value >>= 8;
- }
+inline stable_hash stable_hash_combine(ArrayRef<stable_hash> Buffer) {
+ const uint8_t *Ptr = reinterpret_cast<const uint8_t *>(Buffer.data());
+ size_t Size = Buffer.size() * sizeof(stable_hash);
+ return xxh3_64bits(ArrayRef<uint8_t>(Ptr, Size));
}
-} // namespace detail
-} // namespace hashing
-
inline stable_hash stable_hash_combine(stable_hash A, stable_hash B) {
- stable_hash Hash = hashing::detail::FNV_OFFSET_64;
- hashing::detail::stable_hash_append(Hash, A);
- hashing::detail::stable_hash_append(Hash, B);
- return Hash;
+ stable_hash Hashes[2] = {A, B};
+ return stable_hash_combine(Hashes);
}
inline stable_hash stable_hash_combine(stable_hash A, stable_hash B,
stable_hash C) {
- stable_hash Hash = hashing::detail::FNV_OFFSET_64;
- hashing::detail::stable_hash_append(Hash, A);
- hashing::detail::stable_hash_append(Hash, B);
- hashing::detail::stable_hash_append(Hash, C);
- return Hash;
+ stable_hash Hashes[3] = {A, B, C};
+ return stable_hash_combine(Hashes);
}
inline stable_hash stable_hash_combine(stable_hash A, stable_hash B,
stable_hash C, stable_hash D) {
- stable_hash Hash = hashing::detail::FNV_OFFSET_64;
- hashing::detail::stable_hash_append(Hash, A);
- hashing::detail::stable_hash_append(Hash, B);
- hashing::detail::stable_hash_append(Hash, C);
- hashing::detail::stable_hash_append(Hash, D);
- return Hash;
-}
-
-/// Compute a stable_hash for a sequence of values.
-///
-/// This hashes a sequence of values. It produces the same stable_hash as
-/// 'stable_hash_combine(a, b, c, ...)', but can run over arbitrary sized
-/// sequences and is significantly faster given pointers and types which
-/// can be hashed as a sequence of bytes.
-template <typename InputIteratorT>
-stable_hash stable_hash_combine_range(InputIteratorT First,
- InputIteratorT Last) {
- stable_hash Hash = hashing::detail::FNV_OFFSET_64;
- for (auto I = First; I != Last; ++I)
- hashing::detail::stable_hash_append(Hash, *I);
- return Hash;
+ stable_hash Hashes[4] = {A, B, C, D};
+ return stable_hash_combine(Hashes);
}
-inline stable_hash stable_hash_combine_array(const stable_hash *P, size_t C) {
- stable_hash Hash = hashing::detail::FNV_OFFSET_64;
- for (size_t I = 0; I < C; ++I)
- hashing::detail::stable_hash_append(Hash, P[I]);
- return Hash;
-}
} // namespace llvm
#endif
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index ace05902d5df79..a0726ca64910ea 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -424,8 +424,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
const uint32_t *RegMask = MO.getRegMask();
std::vector<stable_hash> RegMaskHashes(RegMask, RegMask + RegMaskSize);
return hash_combine(MO.getType(), MO.getTargetFlags(),
- stable_hash_combine_array(RegMaskHashes.data(),
- RegMaskHashes.size()));
+ stable_hash_combine(RegMaskHashes));
}
assert(0 && "MachineOperand not associated with any MachineFunction");
diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index fb5e9a37d9b997..916acbf2d2cbf9 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -66,7 +66,7 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
SmallVector<stable_hash> DefOpcodes;
for (auto &Def : MRI.def_instructions(MO.getReg()))
DefOpcodes.push_back(Def.getOpcode());
- return stable_hash_combine_range(DefOpcodes.begin(), DefOpcodes.end());
+ return stable_hash_combine(DefOpcodes);
}
// Register operands don't have target flags.
@@ -78,8 +78,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
case MachineOperand::MO_FPImmediate: {
auto Val = MO.isCImm() ? MO.getCImm()->getValue()
: MO.getFPImm()->getValueAPF().bitcastToAPInt();
- auto ValHash =
- stable_hash_combine_array(Val.getRawData(), Val.getNumWords());
+ auto ValHash = stable_hash_combine(
+ ArrayRef<stable_hash>(Val.getRawData(), Val.getNumWords()));
return stable_hash_combine(MO.getType(), MO.getTargetFlags(), ValHash);
}
@@ -126,10 +126,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
const uint32_t *RegMask = MO.getRegMask();
std::vector<llvm::stable_hash> RegMaskHashes(RegMask,
RegMask + RegMaskSize);
- return stable_hash_combine(
- MO.getType(), MO.getTargetFlags(),
- stable_hash_combine_array(RegMaskHashes.data(),
- RegMaskHashes.size()));
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ stable_hash_combine(RegMaskHashes));
}
}
}
@@ -145,10 +143,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
MO.getShuffleMask(), std::back_inserter(ShuffleMaskHashes),
[](int S) -> llvm::stable_hash { return llvm::stable_hash(S); });
- return stable_hash_combine(
- MO.getType(), MO.getTargetFlags(),
- stable_hash_combine_array(ShuffleMaskHashes.data(),
- ShuffleMaskHashes.size()));
+ return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+ stable_hash_combine(ShuffleMaskHashes));
}
case MachineOperand::MO_MCSymbol: {
auto SymbolName = MO.getMCSymbol()->getName();
@@ -212,8 +208,7 @@ stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs,
HashComponents.push_back(static_cast<unsigned>(Op->getFailureOrdering()));
}
- return stable_hash_combine_range(HashComponents.begin(),
- HashComponents.end());
+ return stable_hash_combine(HashComponents);
}
stable_hash llvm::stableHashValue(const MachineBasicBlock &MBB) {
@@ -221,8 +216,7 @@ stable_hash llvm::stableHashValue(const MachineBasicBlock &MBB) {
// TODO: Hash more stuff like block alignment and branch probabilities.
for (const auto &MI : MBB)
HashComponents.push_back(stableHashValue(MI));
- return stable_hash_combine_range(HashComponents.begin(),
- HashComponents.end());
+ return stable_hash_combine(HashComponents);
}
stable_hash llvm::stableHashValue(const MachineFunction &MF) {
@@ -230,6 +224,5 @@ stable_hash llvm::stableHashValue(const MachineFunction &MF) {
// TODO: Hash lots more stuff like function alignment and stack objects.
for (const auto &MBB : MF)
HashComponents.push_back(stableHashValue(MBB));
- return stable_hash_combine_range(HashComponents.begin(),
- HashComponents.end());
+ return stable_hash_combine(HashComponents);
}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for doing this!
"stable" could mean the hash algorithm is deterministic for a specific compiler version, across different runs even on different architectures.
"stable" could also mean that the hash won't change across compiler versions (e.g. xxh3_64bits).
It might be worth clarifying it in the file-level comment. I believe we only intend this to be stable for a specific version of the compiler.
(I'm starting a 3-week vacation today and will have limited availability. Happy for other reviewers to approve any follow-ups)
There is behavior change in the API, so "NFC" is technically not appropriate. |
// processes, or compiler runs for a specific version. It currently employs | ||
// the xxh3_64bits hashing algorithm. Be aware that this implementation may be | ||
// adjusted or updated as improvements to the system are made. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we also add that these are stable across machines? My other edits are to clarify we mean the hash could change if the compiler changes. Let me know if these changes make sense!
// processes, or compiler runs for a specific version. It currently employs | |
// the xxh3_64bits hashing algorithm. Be aware that this implementation may be | |
// adjusted or updated as improvements to the system are made. | |
// processes, machines, or compiler runs for a specific compiler version. It currently employs | |
// the xxh3_64bits hashing algorithm. Be aware that this implementation may be | |
// adjusted or updated as improvements to the compiler are made. |
7459ba6
to
9e657d7
Compare
This is a follow-up to address a suggestion from #105619. The main goal of this change is to efficiently implement stable hash functions using the xxh3 64bits API. `stable_hash_combine_range` and `stable_hash_combine_array` functions are removed and consolidated into a more general `stable_hash_combine` function that takes an `ArrayRef<stable_hash>` as input.
This is a follow-up to address a suggestion from llvm#105619. The main goal of this change is to efficiently implement stable hash functions using the xxh3 64bits API. `stable_hash_combine_range` and `stable_hash_combine_array` functions are removed and consolidated into a more general `stable_hash_combine` function that takes an `ArrayRef<stable_hash>` as input.
This is a follow-up to address a suggestion from llvm#105619. The main goal of this change is to efficiently implement stable hash functions using the xxh3 64bits API. `stable_hash_combine_range` and `stable_hash_combine_array` functions are removed and consolidated into a more general `stable_hash_combine` function that takes an `ArrayRef<stable_hash>` as input. (cherry picked from commit 7615c0b)
This is a follow-up to address a suggestion from #105619.
The main goal of this change is to efficiently implement stable hash functions using the xxh3 64bits API.
stable_hash_combine_range
andstable_hash_combine_array
functions are removed and consolidated into a more generalstable_hash_combine
function that takes anArrayRef<stable_hash>
as input.