Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[StableHash] Implement with xxh3_64bits #105849

Merged
merged 3 commits into from
Aug 24, 2024
Merged

Conversation

kyulee-com
Copy link
Contributor

This is a follow-up to address a suggestion from #105619.
The main goal of this change is to efficiently implement stable hash functions using the xxh3 64bits API.
stable_hash_combine_range and stable_hash_combine_array functions are removed and consolidated into a more general stable_hash_combine function that takes an ArrayRef<stable_hash> as input.

@kyulee-com kyulee-com marked this pull request as ready for review August 23, 2024 16:22
@llvmbot
Copy link
Member

llvmbot commented Aug 23, 2024

@llvm/pr-subscribers-llvm-adt

Author: Kyungwoo Lee (kyulee-com)

Changes

This is a follow-up to address a suggestion from #105619.
The main goal of this change is to efficiently implement stable hash functions using the xxh3 64bits API.
stable_hash_combine_range and stable_hash_combine_array functions are removed and consolidated into a more general stable_hash_combine function that takes an ArrayRef&lt;stable_hash&gt; as input.


Full diff: https://github.com/llvm/llvm-project/pull/105849.diff

3 Files Affected:

  • (modified) llvm/include/llvm/ADT/StableHashing.h (+11-59)
  • (modified) llvm/lib/CodeGen/MachineOperand.cpp (+1-2)
  • (modified) llvm/lib/CodeGen/MachineStableHash.cpp (+10-17)
diff --git a/llvm/include/llvm/ADT/StableHashing.h b/llvm/include/llvm/ADT/StableHashing.h
index f675f828f702e5..a5b655a10f6996 100644
--- a/llvm/include/llvm/ADT/StableHashing.h
+++ b/llvm/include/llvm/ADT/StableHashing.h
@@ -16,6 +16,7 @@
 #define LLVM_ADT_STABLEHASHING_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/xxhash.h"
 
 namespace llvm {
 
@@ -23,78 +24,29 @@ namespace llvm {
 /// deserialized, and is stable across processes and executions.
 using stable_hash = uint64_t;
 
-// Implementation details
-namespace hashing {
-namespace detail {
-
-// Stable hashes are based on the 64-bit FNV-1 hash:
-// https://en.wikipedia.org/wiki/Fowler-Noll-Vo_hash_function
-
-const uint64_t FNV_PRIME_64 = 1099511628211u;
-const uint64_t FNV_OFFSET_64 = 14695981039346656037u;
-
-inline void stable_hash_append(stable_hash &Hash, const char Value) {
-  Hash = Hash ^ (Value & 0xFF);
-  Hash = Hash * FNV_PRIME_64;
-}
-
-inline void stable_hash_append(stable_hash &Hash, stable_hash Value) {
-  for (unsigned I = 0; I < 8; ++I) {
-    stable_hash_append(Hash, static_cast<char>(Value));
-    Value >>= 8;
-  }
+inline stable_hash stable_hash_combine(ArrayRef<stable_hash> Buffer) {
+  const uint8_t *Ptr = reinterpret_cast<const uint8_t *>(Buffer.data());
+  size_t Size = Buffer.size() * sizeof(stable_hash);
+  return xxh3_64bits(ArrayRef<uint8_t>(Ptr, Size));
 }
 
-} // namespace detail
-} // namespace hashing
-
 inline stable_hash stable_hash_combine(stable_hash A, stable_hash B) {
-  stable_hash Hash = hashing::detail::FNV_OFFSET_64;
-  hashing::detail::stable_hash_append(Hash, A);
-  hashing::detail::stable_hash_append(Hash, B);
-  return Hash;
+  stable_hash Hashes[2] = {A, B};
+  return stable_hash_combine(Hashes);
 }
 
 inline stable_hash stable_hash_combine(stable_hash A, stable_hash B,
                                        stable_hash C) {
-  stable_hash Hash = hashing::detail::FNV_OFFSET_64;
-  hashing::detail::stable_hash_append(Hash, A);
-  hashing::detail::stable_hash_append(Hash, B);
-  hashing::detail::stable_hash_append(Hash, C);
-  return Hash;
+  stable_hash Hashes[3] = {A, B, C};
+  return stable_hash_combine(Hashes);
 }
 
 inline stable_hash stable_hash_combine(stable_hash A, stable_hash B,
                                        stable_hash C, stable_hash D) {
-  stable_hash Hash = hashing::detail::FNV_OFFSET_64;
-  hashing::detail::stable_hash_append(Hash, A);
-  hashing::detail::stable_hash_append(Hash, B);
-  hashing::detail::stable_hash_append(Hash, C);
-  hashing::detail::stable_hash_append(Hash, D);
-  return Hash;
-}
-
-/// Compute a stable_hash for a sequence of values.
-///
-/// This hashes a sequence of values. It produces the same stable_hash as
-/// 'stable_hash_combine(a, b, c, ...)', but can run over arbitrary sized
-/// sequences and is significantly faster given pointers and types which
-/// can be hashed as a sequence of bytes.
-template <typename InputIteratorT>
-stable_hash stable_hash_combine_range(InputIteratorT First,
-                                      InputIteratorT Last) {
-  stable_hash Hash = hashing::detail::FNV_OFFSET_64;
-  for (auto I = First; I != Last; ++I)
-    hashing::detail::stable_hash_append(Hash, *I);
-  return Hash;
+  stable_hash Hashes[4] = {A, B, C, D};
+  return stable_hash_combine(Hashes);
 }
 
-inline stable_hash stable_hash_combine_array(const stable_hash *P, size_t C) {
-  stable_hash Hash = hashing::detail::FNV_OFFSET_64;
-  for (size_t I = 0; I < C; ++I)
-    hashing::detail::stable_hash_append(Hash, P[I]);
-  return Hash;
-}
 } // namespace llvm
 
 #endif
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index ace05902d5df79..a0726ca64910ea 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -424,8 +424,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
       const uint32_t *RegMask = MO.getRegMask();
       std::vector<stable_hash> RegMaskHashes(RegMask, RegMask + RegMaskSize);
       return hash_combine(MO.getType(), MO.getTargetFlags(),
-                          stable_hash_combine_array(RegMaskHashes.data(),
-                                                    RegMaskHashes.size()));
+                          stable_hash_combine(RegMaskHashes));
     }
 
     assert(0 && "MachineOperand not associated with any MachineFunction");
diff --git a/llvm/lib/CodeGen/MachineStableHash.cpp b/llvm/lib/CodeGen/MachineStableHash.cpp
index fb5e9a37d9b997..916acbf2d2cbf9 100644
--- a/llvm/lib/CodeGen/MachineStableHash.cpp
+++ b/llvm/lib/CodeGen/MachineStableHash.cpp
@@ -66,7 +66,7 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
       SmallVector<stable_hash> DefOpcodes;
       for (auto &Def : MRI.def_instructions(MO.getReg()))
         DefOpcodes.push_back(Def.getOpcode());
-      return stable_hash_combine_range(DefOpcodes.begin(), DefOpcodes.end());
+      return stable_hash_combine(DefOpcodes);
     }
 
     // Register operands don't have target flags.
@@ -78,8 +78,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
   case MachineOperand::MO_FPImmediate: {
     auto Val = MO.isCImm() ? MO.getCImm()->getValue()
                            : MO.getFPImm()->getValueAPF().bitcastToAPInt();
-    auto ValHash =
-        stable_hash_combine_array(Val.getRawData(), Val.getNumWords());
+    auto ValHash = stable_hash_combine(
+        ArrayRef<stable_hash>(Val.getRawData(), Val.getNumWords()));
     return stable_hash_combine(MO.getType(), MO.getTargetFlags(), ValHash);
   }
 
@@ -126,10 +126,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
           const uint32_t *RegMask = MO.getRegMask();
           std::vector<llvm::stable_hash> RegMaskHashes(RegMask,
                                                        RegMask + RegMaskSize);
-          return stable_hash_combine(
-              MO.getType(), MO.getTargetFlags(),
-              stable_hash_combine_array(RegMaskHashes.data(),
-                                        RegMaskHashes.size()));
+          return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+                                     stable_hash_combine(RegMaskHashes));
         }
       }
     }
@@ -145,10 +143,8 @@ stable_hash llvm::stableHashValue(const MachineOperand &MO) {
         MO.getShuffleMask(), std::back_inserter(ShuffleMaskHashes),
         [](int S) -> llvm::stable_hash { return llvm::stable_hash(S); });
 
-    return stable_hash_combine(
-        MO.getType(), MO.getTargetFlags(),
-        stable_hash_combine_array(ShuffleMaskHashes.data(),
-                                  ShuffleMaskHashes.size()));
+    return stable_hash_combine(MO.getType(), MO.getTargetFlags(),
+                               stable_hash_combine(ShuffleMaskHashes));
   }
   case MachineOperand::MO_MCSymbol: {
     auto SymbolName = MO.getMCSymbol()->getName();
@@ -212,8 +208,7 @@ stable_hash llvm::stableHashValue(const MachineInstr &MI, bool HashVRegs,
     HashComponents.push_back(static_cast<unsigned>(Op->getFailureOrdering()));
   }
 
-  return stable_hash_combine_range(HashComponents.begin(),
-                                   HashComponents.end());
+  return stable_hash_combine(HashComponents);
 }
 
 stable_hash llvm::stableHashValue(const MachineBasicBlock &MBB) {
@@ -221,8 +216,7 @@ stable_hash llvm::stableHashValue(const MachineBasicBlock &MBB) {
   // TODO: Hash more stuff like block alignment and branch probabilities.
   for (const auto &MI : MBB)
     HashComponents.push_back(stableHashValue(MI));
-  return stable_hash_combine_range(HashComponents.begin(),
-                                   HashComponents.end());
+  return stable_hash_combine(HashComponents);
 }
 
 stable_hash llvm::stableHashValue(const MachineFunction &MF) {
@@ -230,6 +224,5 @@ stable_hash llvm::stableHashValue(const MachineFunction &MF) {
   // TODO: Hash lots more stuff like function alignment and stack objects.
   for (const auto &MBB : MF)
     HashComponents.push_back(stableHashValue(MBB));
-  return stable_hash_combine_range(HashComponents.begin(),
-                                   HashComponents.end());
+  return stable_hash_combine(HashComponents);
 }

@kyulee-com kyulee-com requested a review from plotfi August 23, 2024 16:26
Copy link
Member

@MaskRay MaskRay left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for doing this!

"stable" could mean the hash algorithm is deterministic for a specific compiler version, across different runs even on different architectures.
"stable" could also mean that the hash won't change across compiler versions (e.g. xxh3_64bits).

It might be worth clarifying it in the file-level comment. I believe we only intend this to be stable for a specific version of the compiler.

(I'm starting a 3-week vacation today and will have limited availability. Happy for other reviewers to approve any follow-ups)

@MaskRay
Copy link
Member

MaskRay commented Aug 23, 2024

There is behavior change in the API, so "NFC" is technically not appropriate.

@kyulee-com kyulee-com changed the title [StableHash][NFC] Implement with xxh3_64bits [StableHash] Implement with xxh3_64bits Aug 23, 2024
Comment on lines 11 to 13
// processes, or compiler runs for a specific version. It currently employs
// the xxh3_64bits hashing algorithm. Be aware that this implementation may be
// adjusted or updated as improvements to the system are made.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we also add that these are stable across machines? My other edits are to clarify we mean the hash could change if the compiler changes. Let me know if these changes make sense!

Suggested change
// processes, or compiler runs for a specific version. It currently employs
// the xxh3_64bits hashing algorithm. Be aware that this implementation may be
// adjusted or updated as improvements to the system are made.
// processes, machines, or compiler runs for a specific compiler version. It currently employs
// the xxh3_64bits hashing algorithm. Be aware that this implementation may be
// adjusted or updated as improvements to the compiler are made.

@kyulee-com kyulee-com merged commit 7615c0b into llvm:main Aug 24, 2024
8 checks passed
@kyulee-com kyulee-com deleted the xxh3stablehash branch August 24, 2024 04:53
5chmidti pushed a commit that referenced this pull request Aug 24, 2024
This is a follow-up to address a suggestion from
#105619.
The main goal of this change is to efficiently implement stable hash
functions using the xxh3 64bits API.
`stable_hash_combine_range` and `stable_hash_combine_array` functions
are removed and consolidated into a more general `stable_hash_combine`
function that takes an `ArrayRef<stable_hash>` as input.
dmpolukhin pushed a commit to dmpolukhin/llvm-project that referenced this pull request Sep 2, 2024
This is a follow-up to address a suggestion from
llvm#105619.
The main goal of this change is to efficiently implement stable hash
functions using the xxh3 64bits API.
`stable_hash_combine_range` and `stable_hash_combine_array` functions
are removed and consolidated into a more general `stable_hash_combine`
function that takes an `ArrayRef<stable_hash>` as input.
adrian-prantl pushed a commit to adrian-prantl/llvm-project that referenced this pull request Oct 21, 2024
This is a follow-up to address a suggestion from
llvm#105619.
The main goal of this change is to efficiently implement stable hash
functions using the xxh3 64bits API.
`stable_hash_combine_range` and `stable_hash_combine_array` functions
are removed and consolidated into a more general `stable_hash_combine`
function that takes an `ArrayRef<stable_hash>` as input.

(cherry picked from commit 7615c0b)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants