Skip to content

Commit

Permalink
[StructuralHash] Support Differences (#112638)
Browse files Browse the repository at this point in the history
This computes a structural hash while allowing for selective ignoring of
certain operands based on a custom function that is provided. Instead of
a single hash value, it now returns FunctionHashInfo which includes a
hash value, an instruction mapping, and a map to track the operand
location and its corresponding hash value that is ignored.

Depends on #112621.
This is a patch for
https://discourse.llvm.org/t/rfc-global-function-merging/82608.
  • Loading branch information
kyulee-com authored Oct 27, 2024
1 parent 242c770 commit 0dd9fdc
Show file tree
Hide file tree
Showing 8 changed files with 304 additions and 40 deletions.
13 changes: 10 additions & 3 deletions llvm/include/llvm/Analysis/StructuralHash.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,22 @@

namespace llvm {

enum class StructuralHashOptions {
None, /// Hash with opcode only.
Detailed, /// Hash with opcode and operands.
CallTargetIgnored, /// Ignore call target operand when computing hash.
};

/// Printer pass for StructuralHashes
class StructuralHashPrinterPass
: public PassInfoMixin<StructuralHashPrinterPass> {
raw_ostream &OS;
bool EnableDetailedStructuralHash;
const StructuralHashOptions Options;

public:
explicit StructuralHashPrinterPass(raw_ostream &OS, bool Detailed)
: OS(OS), EnableDetailedStructuralHash(Detailed) {}
explicit StructuralHashPrinterPass(raw_ostream &OS,
StructuralHashOptions Options)
: OS(OS), Options(Options) {}

PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);

Expand Down
45 changes: 45 additions & 0 deletions llvm/include/llvm/IR/StructuralHash.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
#ifndef LLVM_IR_STRUCTURALHASH_H
#define LLVM_IR_STRUCTURALHASH_H

#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StableHashing.h"
#include "llvm/IR/Instruction.h"
#include <cstdint>

namespace llvm {
Expand All @@ -35,6 +37,49 @@ stable_hash StructuralHash(const Function &F, bool DetailedHash = false);
/// composed the module hash.
stable_hash StructuralHash(const Module &M, bool DetailedHash = false);

/// The pair of an instruction index and a operand index.
using IndexPair = std::pair<unsigned, unsigned>;

/// A map from an instruction index to an instruction pointer.
using IndexInstrMap = MapVector<unsigned, Instruction *>;

/// A map from an IndexPair to a stable hash.
using IndexOperandHashMapType = DenseMap<IndexPair, stable_hash>;

/// A function that takes an instruction and an operand index and returns true
/// if the operand should be ignored in the function hash computation.
using IgnoreOperandFunc = std::function<bool(const Instruction *, unsigned)>;

struct FunctionHashInfo {
/// A hash value representing the structural content of the function
stable_hash FunctionHash;
/// A mapping from instruction indices to instruction pointers
std::unique_ptr<IndexInstrMap> IndexInstruction;
/// A mapping from pairs of instruction indices and operand indices
/// to the hashes of the operands. This can be used to analyze or
/// reconstruct the differences in ignored operands
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap;

FunctionHashInfo(stable_hash FuntionHash,
std::unique_ptr<IndexInstrMap> IndexInstruction,
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap)
: FunctionHash(FuntionHash),
IndexInstruction(std::move(IndexInstruction)),
IndexOperandHashMap(std::move(IndexOperandHashMap)) {}
};

/// Computes a structural hash of a given function, considering the structure
/// and content of the function's instructions while allowing for selective
/// ignoring of certain operands based on custom criteria. This hash can be used
/// to identify functions that are structurally similar or identical, which is
/// useful in optimizations, deduplication, or analysis tasks.
/// \param F The function to hash.
/// \param IgnoreOp A callable that takes an instruction and an operand index,
/// and returns true if the operand should be ignored in the hash computation.
/// \return A FunctionHashInfo structure
FunctionHashInfo StructuralHashWithDifferences(const Function &F,
IgnoreOperandFunc IgnoreOp);

} // end namespace llvm

#endif
27 changes: 23 additions & 4 deletions llvm/lib/Analysis/StructuralHash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,33 @@ using namespace llvm;
PreservedAnalyses StructuralHashPrinterPass::run(Module &M,
ModuleAnalysisManager &MAM) {
OS << "Module Hash: "
<< format("%016" PRIx64, StructuralHash(M, EnableDetailedStructuralHash))
<< format("%016" PRIx64,
StructuralHash(M, Options != StructuralHashOptions::None))
<< "\n";
for (Function &F : M) {
if (F.isDeclaration())
continue;
OS << "Function " << F.getName() << " Hash: "
<< format("%016" PRIx64, StructuralHash(F, EnableDetailedStructuralHash))
<< "\n";
if (Options == StructuralHashOptions::CallTargetIgnored) {
auto IgnoreOp = [&](const Instruction *I, unsigned OpndIdx) {
return I->getOpcode() == Instruction::Call &&
isa<Constant>(I->getOperand(OpndIdx));
};
auto FuncHashInfo = StructuralHashWithDifferences(F, IgnoreOp);
OS << "Function " << F.getName()
<< " Hash: " << format("%016" PRIx64, FuncHashInfo.FunctionHash)
<< "\n";
for (auto &[IndexPair, OpndHash] : *FuncHashInfo.IndexOperandHashMap) {
auto [InstIndex, OpndIndex] = IndexPair;
OS << "\tIgnored Operand Hash: " << format("%016" PRIx64, OpndHash)
<< " at (" << InstIndex << "," << OpndIndex << ")\n";
}
} else {
OS << "Function " << F.getName() << " Hash: "
<< format(
"%016" PRIx64,
StructuralHash(F, Options == StructuralHashOptions::Detailed))
<< "\n";
}
}
return PreservedAnalyses::all();
}
153 changes: 131 additions & 22 deletions llvm/lib/IR/StructuralHash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,18 @@ class StructuralHashImpl {
static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72;
static constexpr stable_hash GlobalHeaderHash = 23456;

// This will produce different values on 32-bit and 64-bit systens as
// hash_combine returns a size_t. However, this is only used for
// detailed hashing which, in-tree, only needs to distinguish between
// differences in functions.
// TODO: This is not stable.
template <typename T> stable_hash hashArbitaryType(const T &V) {
return hash_combine(V);
}
/// IgnoreOp is a function that returns true if the operand should be ignored.
IgnoreOperandFunc IgnoreOp = nullptr;
/// A mapping from instruction indices to instruction pointers.
/// The index represents the position of an instruction based on the order in
/// which it is first encountered.
std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr;
/// A mapping from pairs of instruction indices and operand indices
/// to the hashes of the operands.
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr;

/// Assign a unique ID to each Value in the order they are first seen.
DenseMap<const Value *, int> ValueToId;

stable_hash hashType(Type *ValueType) {
SmallVector<stable_hash> Hashes;
Expand All @@ -53,23 +57,95 @@ class StructuralHashImpl {

public:
StructuralHashImpl() = delete;
explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {}
explicit StructuralHashImpl(bool DetailedHash,
IgnoreOperandFunc IgnoreOp = nullptr)
: DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) {
if (IgnoreOp) {
IndexInstruction = std::make_unique<IndexInstrMap>();
IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
}
}

stable_hash hashAPInt(const APInt &I) {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(I.getBitWidth());
auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords());
Hashes.append(RawVals.begin(), RawVals.end());
return stable_hash_combine(Hashes);
}

stable_hash hashAPFloat(const APFloat &F) {
return hashAPInt(F.bitcastToAPInt());
}

stable_hash hashGlobalValue(const GlobalValue *GV) {
if (!GV->hasName())
return 0;
return stable_hash_name(GV->getName());
}

// Compute a hash for a Constant. This function is logically similar to
// FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
// we're interested in computing a hash rather than comparing two Constants.
// Some of the logic is simplified, e.g, we don't expand GEPOperator.
stable_hash hashConstant(Constant *C) {
SmallVector<stable_hash> Hashes;
// TODO: hashArbitaryType() is not stable.
if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) {
Hashes.emplace_back(hashArbitaryType(ConstInt->getValue()));
} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) {
Hashes.emplace_back(hashArbitaryType(ConstFP->getValue()));
} else if (Function *Func = dyn_cast<Function>(C)) {
// Hashing the name will be deterministic as LLVM's hashing infrastructure
// has explicit support for hashing strings and will not simply hash
// the pointer.
Hashes.emplace_back(hashArbitaryType(Func->getName()));

Type *Ty = C->getType();
Hashes.emplace_back(hashType(Ty));

if (C->isNullValue()) {
Hashes.emplace_back(static_cast<stable_hash>('N'));
return stable_hash_combine(Hashes);
}

return stable_hash_combine(Hashes);
if (auto *G = dyn_cast<GlobalValue>(C)) {
Hashes.emplace_back(hashGlobalValue(G));
return stable_hash_combine(Hashes);
}

if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) {
Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues()));
return stable_hash_combine(Hashes);
}

switch (C->getValueID()) {
case Value::ConstantIntVal: {
const APInt &Int = cast<ConstantInt>(C)->getValue();
Hashes.emplace_back(hashAPInt(Int));
return stable_hash_combine(Hashes);
}
case Value::ConstantFPVal: {
const APFloat &APF = cast<ConstantFP>(C)->getValueAPF();
Hashes.emplace_back(hashAPFloat(APF));
return stable_hash_combine(Hashes);
}
case Value::ConstantArrayVal:
case Value::ConstantStructVal:
case Value::ConstantVectorVal:
case Value::ConstantExprVal: {
for (const auto &Op : C->operands()) {
auto H = hashConstant(cast<Constant>(Op));
Hashes.emplace_back(H);
}
return stable_hash_combine(Hashes);
}
case Value::BlockAddressVal: {
const BlockAddress *BA = cast<BlockAddress>(C);
auto H = hashGlobalValue(BA->getFunction());
Hashes.emplace_back(H);
return stable_hash_combine(Hashes);
}
case Value::DSOLocalEquivalentVal: {
const auto *Equiv = cast<DSOLocalEquivalent>(C);
auto H = hashGlobalValue(Equiv->getGlobalValue());
Hashes.emplace_back(H);
return stable_hash_combine(Hashes);
}
default:
// Skip other types of constants for simplicity.
return stable_hash_combine(Hashes);
}
}

stable_hash hashValue(Value *V) {
Expand All @@ -83,6 +159,10 @@ class StructuralHashImpl {
if (Argument *Arg = dyn_cast<Argument>(V))
Hashes.emplace_back(Arg->getArgNo());

// Get an index (an insertion order) for the non-constant value.
auto [It, WasInserted] = ValueToId.try_emplace(V, ValueToId.size());
Hashes.emplace_back(It->second);

return stable_hash_combine(Hashes);
}

Expand All @@ -107,8 +187,20 @@ class StructuralHashImpl {
if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
Hashes.emplace_back(ComparisonInstruction->getPredicate());

for (const auto &Op : Inst.operands())
Hashes.emplace_back(hashOperand(Op));
unsigned InstIdx = 0;
if (IndexInstruction) {
InstIdx = IndexInstruction->size();
IndexInstruction->try_emplace(InstIdx, const_cast<Instruction *>(&Inst));
}

for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) {
auto OpndHash = hashOperand(Op);
if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) {
assert(IndexOperandHashMap);
IndexOperandHashMap->try_emplace({InstIdx, OpndIdx}, OpndHash);
} else
Hashes.emplace_back(OpndHash);
}

return stable_hash_combine(Hashes);
}
Expand Down Expand Up @@ -188,6 +280,14 @@ class StructuralHashImpl {
}

uint64_t getHash() const { return Hash; }

std::unique_ptr<IndexInstrMap> getIndexInstrMap() {
return std::move(IndexInstruction);
}

std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() {
return std::move(IndexOperandHashMap);
}
};

} // namespace
Expand All @@ -203,3 +303,12 @@ stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
H.update(M);
return H.getHash();
}

FunctionHashInfo
llvm::StructuralHashWithDifferences(const Function &F,
IgnoreOperandFunc IgnoreOp) {
StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp);
H.update(F);
return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(),
H.getIndexPairOpndHashMap());
}
14 changes: 11 additions & 3 deletions llvm/lib/Passes/PassBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1175,9 +1175,17 @@ Expected<std::string> parseMemProfUsePassOptions(StringRef Params) {
return Result;
}

Expected<bool> parseStructuralHashPrinterPassOptions(StringRef Params) {
return PassBuilder::parseSinglePassOption(Params, "detailed",
"StructuralHashPrinterPass");
Expected<StructuralHashOptions>
parseStructuralHashPrinterPassOptions(StringRef Params) {
if (Params.empty())
return StructuralHashOptions::None;
if (Params == "detailed")
return StructuralHashOptions::Detailed;
if (Params == "call-target-ignored")
return StructuralHashOptions::CallTargetIgnored;
return make_error<StringError>(
formatv("invalid structural hash printer parameter '{0}' ", Params).str(),
inconvertibleErrorCode());
}

Expected<bool> parseWinEHPrepareOptions(StringRef Params) {
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Passes/PassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -220,10 +220,11 @@ MODULE_PASS_WITH_PARAMS(
parseMSanPassOptions, "recover;kernel;eager-checks;track-origins=N")
MODULE_PASS_WITH_PARAMS(
"print<structural-hash>", "StructuralHashPrinterPass",
[](bool EnableDetailedStructuralHash) {
return StructuralHashPrinterPass(dbgs(), EnableDetailedStructuralHash);
[](StructuralHashOptions Options) {
return StructuralHashPrinterPass(dbgs(), Options);
},
parseStructuralHashPrinterPassOptions, "detailed")
parseStructuralHashPrinterPassOptions, "detailed;call-target-ignored")

#undef MODULE_PASS_WITH_PARAMS

#ifndef CGSCC_ANALYSIS
Expand Down
24 changes: 19 additions & 5 deletions llvm/test/Analysis/StructuralHash/structural-hash-printer.ll
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
; RUN: opt -passes='print<structural-hash>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -passes='print<structural-hash><detailed>' -disable-output %s 2>&1 | FileCheck %s -check-prefix=DETAILED-HASH
; RUN: opt -passes='print<structural-hash><call-target-ignored>' -disable-output %s 2>&1 | FileCheck %s -check-prefix=CALLTARGETIGNORED-HASH

; Add a declaration so that we can test we skip it.
declare i64 @d1()
declare i64 @d1(i64)
declare i64 @e1(i64)

define i64 @f1(i64 %a) {
%b = add i64 %a, 1
ret i64 %b
%c = call i64 @d1(i64 %b)
ret i64 %c
}

define i32 @f2(i32 %a) {
%b = add i32 %a, 2
ret i32 %b
define i64 @f2(i64 %a) {
%b = add i64 %a, 1
%c = call i64 @e1(i64 %b)
ret i64 %c
}

; CHECK: Module Hash: {{([a-f0-9]{16,})}}
Expand All @@ -22,3 +26,13 @@ define i32 @f2(i32 %a) {
; DETAILED-HASH-NEXT: Function f1 Hash: [[DF1H:([a-f0-9]{16,})]]
; DETAILED-HASH-NOT: [[DF1H]]
; DETAILED-HASH-NEXT: Function f2 Hash: {{([a-f0-9]{16,})}}

; When ignoring the call target, check if `f1` and `f2` produce the same function hash.
; The index for the call instruction is 1, and the index of the call target operand is 1.
; The ignored operand hashes for different call targets should be different.
; CALLTARGETIGNORED-HASH: Module Hash: {{([a-f0-9]{16,})}}
; CALLTARGETIGNORED-HASH-NEXT: Function f1 Hash: [[IF1H:([a-f0-9]{16,})]]
; CALLTARGETIGNORED-HASH-NEXT: Ignored Operand Hash: [[IO1H:([a-f0-9]{16,})]] at (1,1)
; CALLTARGETIGNORED-HASH-NEXT: Function f2 Hash: [[IF1H]]
; CALLTARGETIGNORED-HASH-NOT: [[IO1H]]
; CALLTARGETIGNORED-HASH-NEXT: Ignored Operand Hash: {{([a-f0-9]{16,})}} at (1,1)
Loading

0 comments on commit 0dd9fdc

Please sign in to comment.