Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[StructuralHash] Support Differences #112638

Merged
merged 5 commits into from
Oct 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions llvm/include/llvm/Analysis/StructuralHash.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,22 @@

namespace llvm {

enum class StructuralHashOptions {
None, /// Hash with opcode only.
Detailed, /// Hash with opcode and operands.
CallTargetIgnored, /// Ignore call target operand when computing hash.
};

/// Printer pass for StructuralHashes
class StructuralHashPrinterPass
: public PassInfoMixin<StructuralHashPrinterPass> {
raw_ostream &OS;
bool EnableDetailedStructuralHash;
const StructuralHashOptions Options;

public:
explicit StructuralHashPrinterPass(raw_ostream &OS, bool Detailed)
: OS(OS), EnableDetailedStructuralHash(Detailed) {}
explicit StructuralHashPrinterPass(raw_ostream &OS,
StructuralHashOptions Options)
: OS(OS), Options(Options) {}

PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);

Expand Down
45 changes: 45 additions & 0 deletions llvm/include/llvm/IR/StructuralHash.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
#ifndef LLVM_IR_STRUCTURALHASH_H
#define LLVM_IR_STRUCTURALHASH_H

#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StableHashing.h"
#include "llvm/IR/Instruction.h"
#include <cstdint>

namespace llvm {
Expand All @@ -35,6 +37,49 @@ stable_hash StructuralHash(const Function &F, bool DetailedHash = false);
/// composed the module hash.
stable_hash StructuralHash(const Module &M, bool DetailedHash = false);

/// The pair of an instruction index and a operand index.
using IndexPair = std::pair<unsigned, unsigned>;

/// A map from an instruction index to an instruction pointer.
using IndexInstrMap = MapVector<unsigned, Instruction *>;

/// A map from an IndexPair to a stable hash.
using IndexOperandHashMapType = DenseMap<IndexPair, stable_hash>;

/// A function that takes an instruction and an operand index and returns true
/// if the operand should be ignored in the function hash computation.
using IgnoreOperandFunc = std::function<bool(const Instruction *, unsigned)>;

struct FunctionHashInfo {
/// A hash value representing the structural content of the function
stable_hash FunctionHash;
/// A mapping from instruction indices to instruction pointers
std::unique_ptr<IndexInstrMap> IndexInstruction;
/// A mapping from pairs of instruction indices and operand indices
/// to the hashes of the operands. This can be used to analyze or
/// reconstruct the differences in ignored operands
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap;

FunctionHashInfo(stable_hash FuntionHash,
std::unique_ptr<IndexInstrMap> IndexInstruction,
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap)
: FunctionHash(FuntionHash),
IndexInstruction(std::move(IndexInstruction)),
IndexOperandHashMap(std::move(IndexOperandHashMap)) {}
};

/// Computes a structural hash of a given function, considering the structure
/// and content of the function's instructions while allowing for selective
/// ignoring of certain operands based on custom criteria. This hash can be used
/// to identify functions that are structurally similar or identical, which is
/// useful in optimizations, deduplication, or analysis tasks.
/// \param F The function to hash.
/// \param IgnoreOp A callable that takes an instruction and an operand index,
/// and returns true if the operand should be ignored in the hash computation.
/// \return A FunctionHashInfo structure
FunctionHashInfo StructuralHashWithDifferences(const Function &F,
IgnoreOperandFunc IgnoreOp);

} // end namespace llvm

#endif
27 changes: 23 additions & 4 deletions llvm/lib/Analysis/StructuralHash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,33 @@ using namespace llvm;
PreservedAnalyses StructuralHashPrinterPass::run(Module &M,
ModuleAnalysisManager &MAM) {
OS << "Module Hash: "
<< format("%016" PRIx64, StructuralHash(M, EnableDetailedStructuralHash))
<< format("%016" PRIx64,
StructuralHash(M, Options != StructuralHashOptions::None))
<< "\n";
for (Function &F : M) {
if (F.isDeclaration())
continue;
OS << "Function " << F.getName() << " Hash: "
<< format("%016" PRIx64, StructuralHash(F, EnableDetailedStructuralHash))
<< "\n";
if (Options == StructuralHashOptions::CallTargetIgnored) {
auto IgnoreOp = [&](const Instruction *I, unsigned OpndIdx) {
return I->getOpcode() == Instruction::Call &&
isa<Constant>(I->getOperand(OpndIdx));
};
auto FuncHashInfo = StructuralHashWithDifferences(F, IgnoreOp);
OS << "Function " << F.getName()
<< " Hash: " << format("%016" PRIx64, FuncHashInfo.FunctionHash)
<< "\n";
for (auto &[IndexPair, OpndHash] : *FuncHashInfo.IndexOperandHashMap) {
auto [InstIndex, OpndIndex] = IndexPair;
OS << "\tIgnored Operand Hash: " << format("%016" PRIx64, OpndHash)
<< " at (" << InstIndex << "," << OpndIndex << ")\n";
}
} else {
OS << "Function " << F.getName() << " Hash: "
<< format(
"%016" PRIx64,
StructuralHash(F, Options == StructuralHashOptions::Detailed))
<< "\n";
}
}
return PreservedAnalyses::all();
}
153 changes: 131 additions & 22 deletions llvm/lib/IR/StructuralHash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,18 @@ class StructuralHashImpl {
static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72;
static constexpr stable_hash GlobalHeaderHash = 23456;

// This will produce different values on 32-bit and 64-bit systens as
// hash_combine returns a size_t. However, this is only used for
// detailed hashing which, in-tree, only needs to distinguish between
// differences in functions.
// TODO: This is not stable.
template <typename T> stable_hash hashArbitaryType(const T &V) {
return hash_combine(V);
}
/// IgnoreOp is a function that returns true if the operand should be ignored.
IgnoreOperandFunc IgnoreOp = nullptr;
/// A mapping from instruction indices to instruction pointers.
/// The index represents the position of an instruction based on the order in
/// which it is first encountered.
std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr;
/// A mapping from pairs of instruction indices and operand indices
/// to the hashes of the operands.
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr;

/// Assign a unique ID to each Value in the order they are first seen.
DenseMap<const Value *, int> ValueToId;

stable_hash hashType(Type *ValueType) {
SmallVector<stable_hash> Hashes;
Expand All @@ -53,23 +57,95 @@ class StructuralHashImpl {

public:
StructuralHashImpl() = delete;
explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {}
explicit StructuralHashImpl(bool DetailedHash,
IgnoreOperandFunc IgnoreOp = nullptr)
: DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) {
if (IgnoreOp) {
IndexInstruction = std::make_unique<IndexInstrMap>();
IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
}
}

stable_hash hashAPInt(const APInt &I) {
SmallVector<stable_hash> Hashes;
Hashes.emplace_back(I.getBitWidth());
auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords());
Hashes.append(RawVals.begin(), RawVals.end());
return stable_hash_combine(Hashes);
}

stable_hash hashAPFloat(const APFloat &F) {
return hashAPInt(F.bitcastToAPInt());
}

stable_hash hashGlobalValue(const GlobalValue *GV) {
if (!GV->hasName())
return 0;
return stable_hash_name(GV->getName());
ellishg marked this conversation as resolved.
Show resolved Hide resolved
}

// Compute a hash for a Constant. This function is logically similar to
// FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
// we're interested in computing a hash rather than comparing two Constants.
// Some of the logic is simplified, e.g, we don't expand GEPOperator.
stable_hash hashConstant(Constant *C) {
SmallVector<stable_hash> Hashes;
// TODO: hashArbitaryType() is not stable.
if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) {
Hashes.emplace_back(hashArbitaryType(ConstInt->getValue()));
} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) {
Hashes.emplace_back(hashArbitaryType(ConstFP->getValue()));
} else if (Function *Func = dyn_cast<Function>(C)) {
// Hashing the name will be deterministic as LLVM's hashing infrastructure
// has explicit support for hashing strings and will not simply hash
// the pointer.
Hashes.emplace_back(hashArbitaryType(Func->getName()));

Type *Ty = C->getType();
Hashes.emplace_back(hashType(Ty));

if (C->isNullValue()) {
Hashes.emplace_back(static_cast<stable_hash>('N'));
return stable_hash_combine(Hashes);
}

return stable_hash_combine(Hashes);
if (auto *G = dyn_cast<GlobalValue>(C)) {
Hashes.emplace_back(hashGlobalValue(G));
return stable_hash_combine(Hashes);
}

if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) {
Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues()));
return stable_hash_combine(Hashes);
}

switch (C->getValueID()) {
case Value::ConstantIntVal: {
const APInt &Int = cast<ConstantInt>(C)->getValue();
Hashes.emplace_back(hashAPInt(Int));
return stable_hash_combine(Hashes);
}
case Value::ConstantFPVal: {
const APFloat &APF = cast<ConstantFP>(C)->getValueAPF();
Hashes.emplace_back(hashAPFloat(APF));
return stable_hash_combine(Hashes);
}
case Value::ConstantArrayVal:
case Value::ConstantStructVal:
case Value::ConstantVectorVal:
case Value::ConstantExprVal: {
for (const auto &Op : C->operands()) {
auto H = hashConstant(cast<Constant>(Op));
Hashes.emplace_back(H);
}
return stable_hash_combine(Hashes);
}
case Value::BlockAddressVal: {
const BlockAddress *BA = cast<BlockAddress>(C);
auto H = hashGlobalValue(BA->getFunction());
Hashes.emplace_back(H);
return stable_hash_combine(Hashes);
}
case Value::DSOLocalEquivalentVal: {
const auto *Equiv = cast<DSOLocalEquivalent>(C);
auto H = hashGlobalValue(Equiv->getGlobalValue());
Hashes.emplace_back(H);
return stable_hash_combine(Hashes);
}
default:
// Skip other types of constants for simplicity.
return stable_hash_combine(Hashes);
}
}

stable_hash hashValue(Value *V) {
Expand All @@ -83,6 +159,10 @@ class StructuralHashImpl {
if (Argument *Arg = dyn_cast<Argument>(V))
Hashes.emplace_back(Arg->getArgNo());

// Get an index (an insertion order) for the non-constant value.
auto [It, WasInserted] = ValueToId.try_emplace(V, ValueToId.size());
Hashes.emplace_back(It->second);

return stable_hash_combine(Hashes);
}

Expand All @@ -107,8 +187,20 @@ class StructuralHashImpl {
if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
Hashes.emplace_back(ComparisonInstruction->getPredicate());

for (const auto &Op : Inst.operands())
Hashes.emplace_back(hashOperand(Op));
unsigned InstIdx = 0;
if (IndexInstruction) {
InstIdx = IndexInstruction->size();
IndexInstruction->try_emplace(InstIdx, const_cast<Instruction *>(&Inst));
}

for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) {
auto OpndHash = hashOperand(Op);
if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) {
assert(IndexOperandHashMap);
IndexOperandHashMap->try_emplace({InstIdx, OpndIdx}, OpndHash);
} else
Hashes.emplace_back(OpndHash);
}

return stable_hash_combine(Hashes);
}
Expand Down Expand Up @@ -188,6 +280,14 @@ class StructuralHashImpl {
}

uint64_t getHash() const { return Hash; }

std::unique_ptr<IndexInstrMap> getIndexInstrMap() {
return std::move(IndexInstruction);
}

std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() {
return std::move(IndexOperandHashMap);
}
};

} // namespace
Expand All @@ -203,3 +303,12 @@ stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
H.update(M);
return H.getHash();
}

FunctionHashInfo
llvm::StructuralHashWithDifferences(const Function &F,
IgnoreOperandFunc IgnoreOp) {
StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp);
H.update(F);
return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(),
H.getIndexPairOpndHashMap());
}
14 changes: 11 additions & 3 deletions llvm/lib/Passes/PassBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1175,9 +1175,17 @@ Expected<std::string> parseMemProfUsePassOptions(StringRef Params) {
return Result;
}

Expected<bool> parseStructuralHashPrinterPassOptions(StringRef Params) {
return PassBuilder::parseSinglePassOption(Params, "detailed",
"StructuralHashPrinterPass");
Expected<StructuralHashOptions>
parseStructuralHashPrinterPassOptions(StringRef Params) {
if (Params.empty())
return StructuralHashOptions::None;
if (Params == "detailed")
return StructuralHashOptions::Detailed;
if (Params == "call-target-ignored")
return StructuralHashOptions::CallTargetIgnored;
return make_error<StringError>(
formatv("invalid structural hash printer parameter '{0}' ", Params).str(),
inconvertibleErrorCode());
}

Expected<bool> parseWinEHPrepareOptions(StringRef Params) {
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Passes/PassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -220,10 +220,11 @@ MODULE_PASS_WITH_PARAMS(
parseMSanPassOptions, "recover;kernel;eager-checks;track-origins=N")
MODULE_PASS_WITH_PARAMS(
"print<structural-hash>", "StructuralHashPrinterPass",
[](bool EnableDetailedStructuralHash) {
return StructuralHashPrinterPass(dbgs(), EnableDetailedStructuralHash);
[](StructuralHashOptions Options) {
return StructuralHashPrinterPass(dbgs(), Options);
},
parseStructuralHashPrinterPassOptions, "detailed")
parseStructuralHashPrinterPassOptions, "detailed;call-target-ignored")

#undef MODULE_PASS_WITH_PARAMS

#ifndef CGSCC_ANALYSIS
Expand Down
24 changes: 19 additions & 5 deletions llvm/test/Analysis/StructuralHash/structural-hash-printer.ll
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
; RUN: opt -passes='print<structural-hash>' -disable-output %s 2>&1 | FileCheck %s
; RUN: opt -passes='print<structural-hash><detailed>' -disable-output %s 2>&1 | FileCheck %s -check-prefix=DETAILED-HASH
; RUN: opt -passes='print<structural-hash><call-target-ignored>' -disable-output %s 2>&1 | FileCheck %s -check-prefix=CALLTARGETIGNORED-HASH

; Add a declaration so that we can test we skip it.
declare i64 @d1()
declare i64 @d1(i64)
declare i64 @e1(i64)

define i64 @f1(i64 %a) {
%b = add i64 %a, 1
ret i64 %b
%c = call i64 @d1(i64 %b)
ret i64 %c
}

define i32 @f2(i32 %a) {
%b = add i32 %a, 2
ret i32 %b
define i64 @f2(i64 %a) {
%b = add i64 %a, 1
%c = call i64 @e1(i64 %b)
ret i64 %c
}

; CHECK: Module Hash: {{([a-f0-9]{16,})}}
Expand All @@ -22,3 +26,13 @@ define i32 @f2(i32 %a) {
; DETAILED-HASH-NEXT: Function f1 Hash: [[DF1H:([a-f0-9]{16,})]]
; DETAILED-HASH-NOT: [[DF1H]]
; DETAILED-HASH-NEXT: Function f2 Hash: {{([a-f0-9]{16,})}}

; When ignoring the call target, check if `f1` and `f2` produce the same function hash.
; The index for the call instruction is 1, and the index of the call target operand is 1.
; The ignored operand hashes for different call targets should be different.
; CALLTARGETIGNORED-HASH: Module Hash: {{([a-f0-9]{16,})}}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since these are stable hashes. What do you think about adding the literal hash into the test? This would allow us to know exactly when the hash was changed, otherwise the test fails.

I see that you do check that f1 and f2 hash to the same value. Maybe that is good enough.

; CALLTARGETIGNORED-HASH-NEXT: Function f1 Hash: [[IF1H:([a-f0-9]{16,})]]
; CALLTARGETIGNORED-HASH-NEXT: Ignored Operand Hash: [[IO1H:([a-f0-9]{16,})]] at (1,1)
; CALLTARGETIGNORED-HASH-NEXT: Function f2 Hash: [[IF1H]]
; CALLTARGETIGNORED-HASH-NOT: [[IO1H]]
; CALLTARGETIGNORED-HASH-NEXT: Ignored Operand Hash: {{([a-f0-9]{16,})}} at (1,1)
Loading
Loading