Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[StructuralHash] Refactor #112621

Merged
merged 4 commits into from
Oct 26, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/include/llvm/IR/StructuralHash.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,15 @@
#ifndef LLVM_IR_STRUCTURALHASH_H
#define LLVM_IR_STRUCTURALHASH_H

#include "llvm/ADT/StableHashing.h"
#include <cstdint>

namespace llvm {

class Function;
class Module;

using IRHash = uint64_t;
using IRHash = stable_hash;
boomanaiden154 marked this conversation as resolved.
Show resolved Hide resolved

/// Returns a hash of the function \p F.
/// \param F The function to hash.
Expand Down
124 changes: 82 additions & 42 deletions llvm/lib/IR/StructuralHash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,61 +24,91 @@ namespace {
// by the MergeFunctions pass.

class StructuralHashImpl {
uint64_t Hash = 4;
stable_hash Hash = 4;

void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
bool DetailedHash;

const stable_hash GlobalHeaderHash = stable_hash_name("Global Header");
const stable_hash FunctionHeaderHash = stable_hash_name("Function Header");
const stable_hash BlockHeaderHash = stable_hash_name("Block Header");

// This will produce different values on 32-bit and 64-bit systens as
// hash_combine returns a size_t. However, this is only used for
// detailed hashing which, in-tree, only needs to distinguish between
// differences in functions.
template <typename T> void hashArbitaryType(const T &V) {
hash(hash_combine(V));
// TODO: This is not stable.
template <typename T> stable_hash hashArbitaryType(const T &V) {
return hash_combine(V);
}

void hashType(Type *ValueType) {
hash(ValueType->getTypeID());
stable_hash hashType(Type *ValueType) {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(ValueType->getTypeID());
if (ValueType->isIntegerTy())
hash(ValueType->getIntegerBitWidth());
Hashes.emplace_back(ValueType->getIntegerBitWidth());
return stable_hash_combine(Hashes);
}

public:
StructuralHashImpl() = default;

void updateOperand(Value *Operand) {
hashType(Operand->getType());

// The cases enumerated below are not exhaustive and are only aimed to
// get decent coverage over the function.
if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
hashArbitaryType(ConstInt->getValue());
} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
hashArbitaryType(ConstFP->getValue());
} else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
hash(Arg->getArgNo());
} else if (Function *Func = dyn_cast<Function>(Operand)) {
StructuralHashImpl() = delete;
explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {}

stable_hash hashConstant(Constant *C) {
SmallVector<stable_hash> Hashes;
// TODO: hashArbitaryType() is not stable.
if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) {
Hashes.emplace_back(hashArbitaryType(ConstInt->getValue()));
} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) {
Hashes.emplace_back(hashArbitaryType(ConstFP->getValue()));
} else if (Function *Func = dyn_cast<Function>(C)) {
// Hashing the name will be deterministic as LLVM's hashing infrastructure
// has explicit support for hashing strings and will not simply hash
// the pointer.
hashArbitaryType(Func->getName());
Hashes.emplace_back(hashArbitaryType(Func->getName()));
}

return stable_hash_combine(Hashes);
}

stable_hash hashValue(Value *V) {
// Check constant and return its hash.
Constant *C = dyn_cast<Constant>(V);
if (C)
return hashConstant(C);

// Hash argument number.
SmallVector<stable_hash> Hashes;
if (Argument *Arg = dyn_cast<Argument>(V))
Hashes.emplace_back(Arg->getArgNo());

return stable_hash_combine(Hashes);
}

void updateInstruction(const Instruction &Inst, bool DetailedHash) {
hash(Inst.getOpcode());
stable_hash hashOperand(Value *Operand) {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(hashType(Operand->getType()));
Hashes.emplace_back(hashValue(Operand));
return stable_hash_combine(Hashes);
}

stable_hash hashInstruction(const Instruction &Inst) {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(Inst.getOpcode());

if (!DetailedHash)
return;
return stable_hash_combine(Hashes);

hashType(Inst.getType());
Hashes.emplace_back(hashType(Inst.getType()));

// Handle additional properties of specific instructions that cause
// semantic differences in the IR.
if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
hash(ComparisonInstruction->getPredicate());
Hashes.emplace_back(ComparisonInstruction->getPredicate());

for (const auto &Op : Inst.operands())
updateOperand(Op);
Hashes.emplace_back(hashOperand(Op));

return stable_hash_combine(Hashes);
}

// A function hash is calculated by considering only the number of arguments
Expand All @@ -97,15 +127,17 @@ class StructuralHashImpl {
// expensive checks for pass modification status). When modifying this
// function, most changes should be gated behind an option and enabled
// selectively.
void update(const Function &F, bool DetailedHash) {
void update(const Function &F) {
// Declarations don't affect analyses.
if (F.isDeclaration())
return;

hash(0x62642d6b6b2d6b72); // Function header
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(Hash);
Hashes.emplace_back(FunctionHeaderHash);

hash(F.isVarArg());
hash(F.arg_size());
Hashes.emplace_back(F.isVarArg());
Hashes.emplace_back(F.arg_size());

SmallVector<const BasicBlock *, 8> BBs;
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
Expand All @@ -121,14 +153,17 @@ class StructuralHashImpl {
// This random value acts as a block header, as otherwise the partition of
// opcodes into BBs wouldn't affect the hash, only the order of the
// opcodes
hash(45798);
Hashes.emplace_back(BlockHeaderHash);
for (auto &Inst : *BB)
updateInstruction(Inst, DetailedHash);
Hashes.emplace_back(hashInstruction(Inst));

for (const BasicBlock *Succ : successors(BB))
if (VisitedBBs.insert(Succ).second)
BBs.push_back(Succ);
}

// Update the combined hash in place.
Hash = stable_hash_combine(Hashes);
}

void update(const GlobalVariable &GV) {
Expand All @@ -137,15 +172,20 @@ class StructuralHashImpl {
// we ignore anything with the `.llvm` prefix
if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
return;
hash(23456); // Global header
hash(GV.getValueType()->getTypeID());
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(Hash);
Hashes.emplace_back(GlobalHeaderHash);
Hashes.emplace_back(GV.getValueType()->getTypeID());

// Update the combined hash in place.
Hash = stable_hash_combine(Hashes);
}

void update(const Module &M, bool DetailedHash) {
void update(const Module &M) {
for (const GlobalVariable &GV : M.globals())
update(GV);
for (const Function &F : M)
update(F, DetailedHash);
update(F);
}

uint64_t getHash() const { return Hash; }
Expand All @@ -154,13 +194,13 @@ class StructuralHashImpl {
} // namespace

IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
StructuralHashImpl H;
H.update(F, DetailedHash);
StructuralHashImpl H(DetailedHash);
H.update(F);
return H.getHash();
}

IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
StructuralHashImpl H;
H.update(M, DetailedHash);
StructuralHashImpl H(DetailedHash);
H.update(M);
return H.getHash();
}
28 changes: 14 additions & 14 deletions llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges-attr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,11 @@ lpad:
resume { ptr, i32 } zeroinitializer
}

define i8 @invoke_with_same_range() personality ptr undef {
; CHECK-LABEL: @invoke_with_same_range()
; CHECK: tail call i8 @invoke_with_range()
%out = invoke range(i8 0, 2) i8 @dummy() to label %next unwind label %lpad

next:
define i8 @call_with_same_range_attr(i8 range(i8 0, 2) %v) {
; CHECK-LABEL: @call_with_same_range_attr
; CHECK: tail call i8 @call_with_range_attr
%out = call i8 @dummy2(i8 %v)
ret i8 %out

lpad:
%pad = landingpad { ptr, i32 } cleanup
resume { ptr, i32 } zeroinitializer
}

define i8 @call_with_same_range() {
Expand All @@ -99,11 +93,17 @@ define i8 @call_with_same_range() {
ret i8 %out
}

define i8 @call_with_same_range_attr(i8 range(i8 0, 2) %v) {
; CHECK-LABEL: @call_with_same_range_attr
; CHECK: tail call i8 @call_with_range_attr
%out = call i8 @dummy2(i8 %v)
define i8 @invoke_with_same_range() personality ptr undef {
; CHECK-LABEL: @invoke_with_same_range()
; CHECK: tail call i8 @invoke_with_range()
%out = invoke range(i8 0, 2) i8 @dummy() to label %next unwind label %lpad

next:
ret i8 %out

lpad:
%pad = landingpad { ptr, i32 } cleanup
resume { ptr, i32 } zeroinitializer
}

declare i8 @dummy();
Expand Down
17 changes: 8 additions & 9 deletions llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,14 @@ lpad:
resume { ptr, i32 } zeroinitializer
}

define i8 @call_with_same_range() {
; CHECK-LABEL: @call_with_same_range
; CHECK: tail call i8 @call_with_range
bitcast i8 0 to i8
%out = call i8 @dummy(), !range !0
ret i8 %out
}

Comment on lines +66 to +73
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a particular reason we need to change the function order in this test?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the existing function merger pass processes functions based on hash values, which slightly changes the order of merged functions -- https://github.com/llvm/llvm-project/blob/main/llvm/lib/Transforms/IPO/MergeFunctions.cpp#L424-L433.
We could use CHECK-DAG for all lines, but that seems too loose. Instead, I adjusted the order of CHECK to match the updated behavior.

define i8 @invoke_with_same_range() personality ptr undef {
; CHECK-LABEL: @invoke_with_same_range()
; CHECK: tail call i8 @invoke_with_range()
Expand All @@ -76,15 +84,6 @@ lpad:
resume { ptr, i32 } zeroinitializer
}

define i8 @call_with_same_range() {
; CHECK-LABEL: @call_with_same_range
; CHECK: tail call i8 @call_with_range
bitcast i8 0 to i8
%out = call i8 @dummy(), !range !0
ret i8 %out
}


declare i8 @dummy();
declare i32 @__gxx_personality_v0(...)

Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/MergeFunc/inline-asm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
; CHECK-LABEL: @int_ptr_arg_different
; CHECK-NEXT: call void asm

; CHECK-LABEL: @int_ptr_null
; CHECK-NEXT: tail call void @float_ptr_null()

; CHECK-LABEL: @int_ptr_arg_same
; CHECK-NEXT: tail call void @float_ptr_arg_same(ptr %0)

; CHECK-LABEL: @int_ptr_null
; CHECK-NEXT: tail call void @float_ptr_null()

; Used to satisfy minimum size limit
declare void @stuff()

Expand Down
Loading