Skip to content

Commit

Permalink
Reland (2nd attempt) [StructuralHash] Refactor (llvm#112621)
Browse files Browse the repository at this point in the history
This is largely NFC, and it prepares for llvm#112638.
 - Use stable_hash instead of uint64_t
 - Rename update* to hash* functions. They compute stable_hash locally and return it.

This is a patch for
https://discourse.llvm.org/t/rfc-global-function-merging/82608.
  • Loading branch information
kyulee-com authored and NoumanAmir657 committed Nov 4, 2024
1 parent 5614dfe commit a8a2732
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 68 deletions.
7 changes: 3 additions & 4 deletions llvm/include/llvm/IR/StructuralHash.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,26 @@
#ifndef LLVM_IR_STRUCTURALHASH_H
#define LLVM_IR_STRUCTURALHASH_H

#include "llvm/ADT/StableHashing.h"
#include <cstdint>

namespace llvm {

class Function;
class Module;

using IRHash = uint64_t;

/// Returns a hash of the function \p F.
/// \param F The function to hash.
/// \param DetailedHash Whether or not to encode additional information in the
/// hash. The additional information added into the hash when this flag is set
/// to true includes instruction and operand type information.
IRHash StructuralHash(const Function &F, bool DetailedHash = false);
stable_hash StructuralHash(const Function &F, bool DetailedHash = false);

/// Returns a hash of the module \p M by hashing all functions and global
/// variables contained within. \param M The module to hash. \param DetailedHash
/// Whether or not to encode additional information in the function hashes that
/// composed the module hash.
IRHash StructuralHash(const Module &M, bool DetailedHash = false);
stable_hash StructuralHash(const Module &M, bool DetailedHash = false);

} // end namespace llvm

Expand Down
133 changes: 86 additions & 47 deletions llvm/lib/IR/StructuralHash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,61 +24,93 @@ namespace {
// by the MergeFunctions pass.

class StructuralHashImpl {
uint64_t Hash = 4;
stable_hash Hash = 4;

void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
bool DetailedHash;

// This random value acts as a block header, as otherwise the partition of
// opcodes into BBs wouldn't affect the hash, only the order of the opcodes.
static constexpr stable_hash BlockHeaderHash = 45798;
static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72;
static constexpr stable_hash GlobalHeaderHash = 23456;

// This will produce different values on 32-bit and 64-bit systens as
// hash_combine returns a size_t. However, this is only used for
// detailed hashing which, in-tree, only needs to distinguish between
// differences in functions.
template <typename T> void hashArbitaryType(const T &V) {
hash(hash_combine(V));
// TODO: This is not stable.
template <typename T> stable_hash hashArbitaryType(const T &V) {
return hash_combine(V);
}

void hashType(Type *ValueType) {
hash(ValueType->getTypeID());
stable_hash hashType(Type *ValueType) {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(ValueType->getTypeID());
if (ValueType->isIntegerTy())
hash(ValueType->getIntegerBitWidth());
Hashes.emplace_back(ValueType->getIntegerBitWidth());
return stable_hash_combine(Hashes);
}

public:
StructuralHashImpl() = default;

void updateOperand(Value *Operand) {
hashType(Operand->getType());

// The cases enumerated below are not exhaustive and are only aimed to
// get decent coverage over the function.
if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
hashArbitaryType(ConstInt->getValue());
} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
hashArbitaryType(ConstFP->getValue());
} else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
hash(Arg->getArgNo());
} else if (Function *Func = dyn_cast<Function>(Operand)) {
StructuralHashImpl() = delete;
explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {}

stable_hash hashConstant(Constant *C) {
SmallVector<stable_hash> Hashes;
// TODO: hashArbitaryType() is not stable.
if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) {
Hashes.emplace_back(hashArbitaryType(ConstInt->getValue()));
} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) {
Hashes.emplace_back(hashArbitaryType(ConstFP->getValue()));
} else if (Function *Func = dyn_cast<Function>(C)) {
// Hashing the name will be deterministic as LLVM's hashing infrastructure
// has explicit support for hashing strings and will not simply hash
// the pointer.
hashArbitaryType(Func->getName());
Hashes.emplace_back(hashArbitaryType(Func->getName()));
}

return stable_hash_combine(Hashes);
}

stable_hash hashValue(Value *V) {
// Check constant and return its hash.
Constant *C = dyn_cast<Constant>(V);
if (C)
return hashConstant(C);

// Hash argument number.
SmallVector<stable_hash> Hashes;
if (Argument *Arg = dyn_cast<Argument>(V))
Hashes.emplace_back(Arg->getArgNo());

return stable_hash_combine(Hashes);
}

void updateInstruction(const Instruction &Inst, bool DetailedHash) {
hash(Inst.getOpcode());
stable_hash hashOperand(Value *Operand) {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(hashType(Operand->getType()));
Hashes.emplace_back(hashValue(Operand));
return stable_hash_combine(Hashes);
}

stable_hash hashInstruction(const Instruction &Inst) {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(Inst.getOpcode());

if (!DetailedHash)
return;
return stable_hash_combine(Hashes);

hashType(Inst.getType());
Hashes.emplace_back(hashType(Inst.getType()));

// Handle additional properties of specific instructions that cause
// semantic differences in the IR.
if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
hash(ComparisonInstruction->getPredicate());
Hashes.emplace_back(ComparisonInstruction->getPredicate());

for (const auto &Op : Inst.operands())
updateOperand(Op);
Hashes.emplace_back(hashOperand(Op));

return stable_hash_combine(Hashes);
}

// A function hash is calculated by considering only the number of arguments
Expand All @@ -97,15 +129,17 @@ class StructuralHashImpl {
// expensive checks for pass modification status). When modifying this
// function, most changes should be gated behind an option and enabled
// selectively.
void update(const Function &F, bool DetailedHash) {
void update(const Function &F) {
// Declarations don't affect analyses.
if (F.isDeclaration())
return;

hash(0x62642d6b6b2d6b72); // Function header
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(Hash);
Hashes.emplace_back(FunctionHeaderHash);

hash(F.isVarArg());
hash(F.arg_size());
Hashes.emplace_back(F.isVarArg());
Hashes.emplace_back(F.arg_size());

SmallVector<const BasicBlock *, 8> BBs;
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
Expand All @@ -118,17 +152,17 @@ class StructuralHashImpl {
while (!BBs.empty()) {
const BasicBlock *BB = BBs.pop_back_val();

// This random value acts as a block header, as otherwise the partition of
// opcodes into BBs wouldn't affect the hash, only the order of the
// opcodes
hash(45798);
Hashes.emplace_back(BlockHeaderHash);
for (auto &Inst : *BB)
updateInstruction(Inst, DetailedHash);
Hashes.emplace_back(hashInstruction(Inst));

for (const BasicBlock *Succ : successors(BB))
if (VisitedBBs.insert(Succ).second)
BBs.push_back(Succ);
}

// Update the combined hash in place.
Hash = stable_hash_combine(Hashes);
}

void update(const GlobalVariable &GV) {
Expand All @@ -137,30 +171,35 @@ class StructuralHashImpl {
// we ignore anything with the `.llvm` prefix
if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
return;
hash(23456); // Global header
hash(GV.getValueType()->getTypeID());
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(Hash);
Hashes.emplace_back(GlobalHeaderHash);
Hashes.emplace_back(GV.getValueType()->getTypeID());

// Update the combined hash in place.
Hash = stable_hash_combine(Hashes);
}

void update(const Module &M, bool DetailedHash) {
void update(const Module &M) {
for (const GlobalVariable &GV : M.globals())
update(GV);
for (const Function &F : M)
update(F, DetailedHash);
update(F);
}

uint64_t getHash() const { return Hash; }
};

} // namespace

IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
StructuralHashImpl H;
H.update(F, DetailedHash);
stable_hash llvm::StructuralHash(const Function &F, bool DetailedHash) {
StructuralHashImpl H(DetailedHash);
H.update(F);
return H.getHash();
}

IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
StructuralHashImpl H;
H.update(M, DetailedHash);
stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
StructuralHashImpl H(DetailedHash);
H.update(M);
return H.getHash();
}
6 changes: 3 additions & 3 deletions llvm/lib/Transforms/IPO/MergeFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,14 +172,14 @@ namespace {

class FunctionNode {
mutable AssertingVH<Function> F;
IRHash Hash;
stable_hash Hash;

public:
// Note the hash is recalculated potentially multiple times, but it is cheap.
FunctionNode(Function *F) : F(F), Hash(StructuralHash(*F)) {}

Function *getFunc() const { return F; }
IRHash getHash() const { return Hash; }
stable_hash getHash() const { return Hash; }

/// Replace the reference to the function F by the function G, assuming their
/// implementations are equal.
Expand Down Expand Up @@ -420,7 +420,7 @@ bool MergeFunctions::runOnModule(Module &M) {

// All functions in the module, ordered by hash. Functions with a unique
// hash value are easily eliminated.
std::vector<std::pair<IRHash, Function *>> HashedFuncs;
std::vector<std::pair<stable_hash, Function *>> HashedFuncs;
for (Function &Func : M) {
if (isEligibleForMerging(Func)) {
HashedFuncs.push_back({StructuralHash(Func), &Func});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ lpad:
}

define i8 @invoke_with_same_range() personality ptr undef {
; CHECK-LABEL: @invoke_with_same_range()
; CHECK: tail call i8 @invoke_with_range()
; CHECK-DAG: @invoke_with_same_range()
; CHECK-DAG: tail call i8 @invoke_with_range()
%out = invoke range(i8 0, 2) i8 @dummy() to label %next unwind label %lpad

next:
Expand All @@ -93,15 +93,15 @@ lpad:
}

define i8 @call_with_same_range() {
; CHECK-LABEL: @call_with_same_range
; CHECK: tail call i8 @call_with_range
; CHECK-DAG: @call_with_same_range()
; CHECK-DAG: tail call i8 @call_with_range()
%out = call range(i8 0, 2) i8 @dummy()
ret i8 %out
}

define i8 @call_with_same_range_attr(i8 range(i8 0, 2) %v) {
; CHECK-LABEL: @call_with_same_range_attr
; CHECK: tail call i8 @call_with_range_attr
; CHECK-DAG: @call_with_same_range_attr
; CHECK-DAG: tail call i8 @call_with_range_attr
%out = call i8 @dummy2(i8 %v)
ret i8 %out
}
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ lpad:
}

define i8 @invoke_with_same_range() personality ptr undef {
; CHECK-LABEL: @invoke_with_same_range()
; CHECK: tail call i8 @invoke_with_range()
; CHECK-DAG: @invoke_with_same_range()
; CHECK-DAG: tail call i8 @invoke_with_range()
%out = invoke i8 @dummy() to label %next unwind label %lpad, !range !0

next:
Expand All @@ -77,8 +77,8 @@ lpad:
}

define i8 @call_with_same_range() {
; CHECK-LABEL: @call_with_same_range
; CHECK: tail call i8 @call_with_range
; CHECK-DAG: @call_with_same_range
; CHECK-DAG: tail call i8 @call_with_range
bitcast i8 0 to i8
%out = call i8 @dummy(), !range !0
ret i8 %out
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/MergeFunc/inline-asm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
; CHECK-LABEL: @int_ptr_arg_different
; CHECK-NEXT: call void asm

; CHECK-LABEL: @int_ptr_null
; CHECK-NEXT: tail call void @float_ptr_null()
; CHECK-DAG: @int_ptr_null
; CHECK-DAG: tail call void @float_ptr_null()

; CHECK-LABEL: @int_ptr_arg_same
; CHECK-NEXT: tail call void @float_ptr_arg_same(ptr %0)
; CHECK-DAG: @int_ptr_arg_same
; CHECK-DAG: tail call void @float_ptr_arg_same(ptr %0)

; Used to satisfy minimum size limit
declare void @stuff()
Expand Down

0 comments on commit a8a2732

Please sign in to comment.