Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[StructuralHash] Refactor #112621

Merged
merged 4 commits into from
Oct 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions llvm/include/llvm/IR/StructuralHash.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,26 @@
#ifndef LLVM_IR_STRUCTURALHASH_H
#define LLVM_IR_STRUCTURALHASH_H

#include "llvm/ADT/StableHashing.h"
#include <cstdint>

namespace llvm {

class Function;
class Module;

using IRHash = uint64_t;

/// Returns a hash of the function \p F.
/// \param F The function to hash.
/// \param DetailedHash Whether or not to encode additional information in the
/// hash. The additional information added into the hash when this flag is set
/// to true includes instruction and operand type information.
IRHash StructuralHash(const Function &F, bool DetailedHash = false);
stable_hash StructuralHash(const Function &F, bool DetailedHash = false);

/// Returns a hash of the module \p M by hashing all functions and global
/// variables contained within. \param M The module to hash. \param DetailedHash
/// Whether or not to encode additional information in the function hashes that
/// composed the module hash.
IRHash StructuralHash(const Module &M, bool DetailedHash = false);
stable_hash StructuralHash(const Module &M, bool DetailedHash = false);

} // end namespace llvm

Expand Down
133 changes: 86 additions & 47 deletions llvm/lib/IR/StructuralHash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,61 +24,93 @@ namespace {
// by the MergeFunctions pass.

class StructuralHashImpl {
uint64_t Hash = 4;
stable_hash Hash = 4;

void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
bool DetailedHash;

// This random value acts as a block header, as otherwise the partition of
// opcodes into BBs wouldn't affect the hash, only the order of the opcodes.
static constexpr stable_hash BlockHeaderHash = 45798;
static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72;
static constexpr stable_hash GlobalHeaderHash = 23456;

// This will produce different values on 32-bit and 64-bit systens as
// hash_combine returns a size_t. However, this is only used for
// detailed hashing which, in-tree, only needs to distinguish between
// differences in functions.
template <typename T> void hashArbitaryType(const T &V) {
hash(hash_combine(V));
// TODO: This is not stable.
template <typename T> stable_hash hashArbitaryType(const T &V) {
return hash_combine(V);
}

void hashType(Type *ValueType) {
hash(ValueType->getTypeID());
stable_hash hashType(Type *ValueType) {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(ValueType->getTypeID());
if (ValueType->isIntegerTy())
hash(ValueType->getIntegerBitWidth());
Hashes.emplace_back(ValueType->getIntegerBitWidth());
return stable_hash_combine(Hashes);
}

public:
StructuralHashImpl() = default;

void updateOperand(Value *Operand) {
hashType(Operand->getType());

// The cases enumerated below are not exhaustive and are only aimed to
// get decent coverage over the function.
if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
hashArbitaryType(ConstInt->getValue());
} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
hashArbitaryType(ConstFP->getValue());
} else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
hash(Arg->getArgNo());
} else if (Function *Func = dyn_cast<Function>(Operand)) {
StructuralHashImpl() = delete;
explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {}

stable_hash hashConstant(Constant *C) {
SmallVector<stable_hash> Hashes;
// TODO: hashArbitaryType() is not stable.
if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) {
Hashes.emplace_back(hashArbitaryType(ConstInt->getValue()));
} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) {
Hashes.emplace_back(hashArbitaryType(ConstFP->getValue()));
} else if (Function *Func = dyn_cast<Function>(C)) {
// Hashing the name will be deterministic as LLVM's hashing infrastructure
// has explicit support for hashing strings and will not simply hash
// the pointer.
hashArbitaryType(Func->getName());
Hashes.emplace_back(hashArbitaryType(Func->getName()));
}

return stable_hash_combine(Hashes);
}

stable_hash hashValue(Value *V) {
// Check constant and return its hash.
Constant *C = dyn_cast<Constant>(V);
if (C)
return hashConstant(C);

// Hash argument number.
SmallVector<stable_hash> Hashes;
if (Argument *Arg = dyn_cast<Argument>(V))
Hashes.emplace_back(Arg->getArgNo());

return stable_hash_combine(Hashes);
}

void updateInstruction(const Instruction &Inst, bool DetailedHash) {
hash(Inst.getOpcode());
stable_hash hashOperand(Value *Operand) {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(hashType(Operand->getType()));
Hashes.emplace_back(hashValue(Operand));
return stable_hash_combine(Hashes);
}

stable_hash hashInstruction(const Instruction &Inst) {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(Inst.getOpcode());

if (!DetailedHash)
return;
return stable_hash_combine(Hashes);

hashType(Inst.getType());
Hashes.emplace_back(hashType(Inst.getType()));

// Handle additional properties of specific instructions that cause
// semantic differences in the IR.
if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
hash(ComparisonInstruction->getPredicate());
Hashes.emplace_back(ComparisonInstruction->getPredicate());

for (const auto &Op : Inst.operands())
updateOperand(Op);
Hashes.emplace_back(hashOperand(Op));

return stable_hash_combine(Hashes);
}

// A function hash is calculated by considering only the number of arguments
Expand All @@ -97,15 +129,17 @@ class StructuralHashImpl {
// expensive checks for pass modification status). When modifying this
// function, most changes should be gated behind an option and enabled
// selectively.
void update(const Function &F, bool DetailedHash) {
void update(const Function &F) {
// Declarations don't affect analyses.
if (F.isDeclaration())
return;

hash(0x62642d6b6b2d6b72); // Function header
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(Hash);
Hashes.emplace_back(FunctionHeaderHash);

hash(F.isVarArg());
hash(F.arg_size());
Hashes.emplace_back(F.isVarArg());
Hashes.emplace_back(F.arg_size());

SmallVector<const BasicBlock *, 8> BBs;
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
Expand All @@ -118,17 +152,17 @@ class StructuralHashImpl {
while (!BBs.empty()) {
const BasicBlock *BB = BBs.pop_back_val();

// This random value acts as a block header, as otherwise the partition of
// opcodes into BBs wouldn't affect the hash, only the order of the
// opcodes
hash(45798);
Hashes.emplace_back(BlockHeaderHash);
for (auto &Inst : *BB)
updateInstruction(Inst, DetailedHash);
Hashes.emplace_back(hashInstruction(Inst));

for (const BasicBlock *Succ : successors(BB))
if (VisitedBBs.insert(Succ).second)
BBs.push_back(Succ);
}

// Update the combined hash in place.
Hash = stable_hash_combine(Hashes);
}

void update(const GlobalVariable &GV) {
Expand All @@ -137,30 +171,35 @@ class StructuralHashImpl {
// we ignore anything with the `.llvm` prefix
if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
return;
hash(23456); // Global header
hash(GV.getValueType()->getTypeID());
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(Hash);
Hashes.emplace_back(GlobalHeaderHash);
Hashes.emplace_back(GV.getValueType()->getTypeID());

// Update the combined hash in place.
Hash = stable_hash_combine(Hashes);
}

void update(const Module &M, bool DetailedHash) {
void update(const Module &M) {
for (const GlobalVariable &GV : M.globals())
update(GV);
for (const Function &F : M)
update(F, DetailedHash);
update(F);
}

uint64_t getHash() const { return Hash; }
};

} // namespace

IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
StructuralHashImpl H;
H.update(F, DetailedHash);
stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) {
StructuralHashImpl H(DetailedHash);
H.update(F);
return H.getHash();
}

IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
StructuralHashImpl H;
H.update(M, DetailedHash);
stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
StructuralHashImpl H(DetailedHash);
H.update(M);
return H.getHash();
}
6 changes: 3 additions & 3 deletions llvm/lib/Transforms/IPO/MergeFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,14 +172,14 @@ namespace {

class FunctionNode {
mutable AssertingVH<Function> F;
IRHash Hash;
stable_hash Hash;

public:
// Note the hash is recalculated potentially multiple times, but it is cheap.
FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {}

Function *getFunc() const { return F; }
IRHash getHash() const { return Hash; }
stable_hash getHash() const { return Hash; }

/// Replace the reference to the function F by the function G, assuming their
/// implementations are equal.
Expand Down Expand Up @@ -420,7 +420,7 @@ bool MergeFunctions::runOnModule(Module &M) {

// All functions in the module, ordered by hash. Functions with a unique
// hash value are easily eliminated.
std::vector<std::pair<IRHash, Function *>> HashedFuncs;
std::vector<std::pair<stable_hash, Function *>> HashedFuncs;
for (Function &Func : M) {
if (isEligibleForMerging(Func)) {
HashedFuncs.push_back({StructuralHash(Func), &Func});
Expand Down
17 changes: 8 additions & 9 deletions llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ lpad:
resume { ptr, i32 } zeroinitializer
}

define i8 @call_with_same_range() {
; CHECK-LABEL: @call_with_same_range
; CHECK: tail call i8 @call_with_range
bitcast i8 0 to i8
%out = call i8 @dummy(), !range !0
ret i8 %out
}

Comment on lines +66 to +73
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a particular reason we need to change the function order in this test?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the existing function merger pass processes functions based on hash values, which slightly changes the order of merged functions -- https://github.com/llvm/llvm-project/blob/main/llvm/lib/Transforms/IPO/MergeFunctions.cpp#L424-L433.
We could use CHECK-DAG for all lines, but that seems too loose. Instead, I adjusted the order of CHECK to match the updated behavior.

define i8 @invoke_with_same_range() personality ptr undef {
; CHECK-LABEL: @invoke_with_same_range()
; CHECK: tail call i8 @invoke_with_range()
Expand All @@ -76,15 +84,6 @@ lpad:
resume { ptr, i32 } zeroinitializer
}

define i8 @call_with_same_range() {
; CHECK-LABEL: @call_with_same_range
; CHECK: tail call i8 @call_with_range
bitcast i8 0 to i8
%out = call i8 @dummy(), !range !0
ret i8 %out
}


declare i8 @dummy();
declare i32 @__gxx_personality_v0(...)

Expand Down
Loading