Skip to content

Commit 0dd9fdc

Browse files
authored
[StructuralHash] Support Differences (#112638)
This computes a structural hash while allowing for selective ignoring of certain operands based on a custom function that is provided. Instead of a single hash value, it now returns FunctionHashInfo which includes a hash value, an instruction mapping, and a map to track the operand location and its corresponding hash value that is ignored. Depends on #112621. This is a patch for https://discourse.llvm.org/t/rfc-global-function-merging/82608.
1 parent 242c770 commit 0dd9fdc

File tree

8 files changed

+304
-40
lines changed

8 files changed

+304
-40
lines changed

llvm/include/llvm/Analysis/StructuralHash.h

+10-3
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,22 @@
1313

1414
namespace llvm {
1515

16+
enum class StructuralHashOptions {
17+
None, /// Hash with opcode only.
18+
Detailed, /// Hash with opcode and operands.
19+
CallTargetIgnored, /// Ignore call target operand when computing hash.
20+
};
21+
1622
/// Printer pass for StructuralHashes
1723
class StructuralHashPrinterPass
1824
: public PassInfoMixin<StructuralHashPrinterPass> {
1925
raw_ostream &OS;
20-
bool EnableDetailedStructuralHash;
26+
const StructuralHashOptions Options;
2127

2228
public:
23-
explicit StructuralHashPrinterPass(raw_ostream &OS, bool Detailed)
24-
: OS(OS), EnableDetailedStructuralHash(Detailed) {}
29+
explicit StructuralHashPrinterPass(raw_ostream &OS,
30+
StructuralHashOptions Options)
31+
: OS(OS), Options(Options) {}
2532

2633
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
2734

llvm/include/llvm/IR/StructuralHash.h

+45
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
#ifndef LLVM_IR_STRUCTURALHASH_H
1515
#define LLVM_IR_STRUCTURALHASH_H
1616

17+
#include "llvm/ADT/MapVector.h"
1718
#include "llvm/ADT/StableHashing.h"
19+
#include "llvm/IR/Instruction.h"
1820
#include <cstdint>
1921

2022
namespace llvm {
@@ -35,6 +37,49 @@ stable_hash StructuralHash(const Function &F, bool DetailedHash = false);
3537
/// composed the module hash.
3638
stable_hash StructuralHash(const Module &M, bool DetailedHash = false);
3739

40+
/// The pair of an instruction index and a operand index.
41+
using IndexPair = std::pair<unsigned, unsigned>;
42+
43+
/// A map from an instruction index to an instruction pointer.
44+
using IndexInstrMap = MapVector<unsigned, Instruction *>;
45+
46+
/// A map from an IndexPair to a stable hash.
47+
using IndexOperandHashMapType = DenseMap<IndexPair, stable_hash>;
48+
49+
/// A function that takes an instruction and an operand index and returns true
50+
/// if the operand should be ignored in the function hash computation.
51+
using IgnoreOperandFunc = std::function<bool(const Instruction *, unsigned)>;
52+
53+
struct FunctionHashInfo {
54+
/// A hash value representing the structural content of the function
55+
stable_hash FunctionHash;
56+
/// A mapping from instruction indices to instruction pointers
57+
std::unique_ptr<IndexInstrMap> IndexInstruction;
58+
/// A mapping from pairs of instruction indices and operand indices
59+
/// to the hashes of the operands. This can be used to analyze or
60+
/// reconstruct the differences in ignored operands
61+
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap;
62+
63+
FunctionHashInfo(stable_hash FuntionHash,
64+
std::unique_ptr<IndexInstrMap> IndexInstruction,
65+
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap)
66+
: FunctionHash(FuntionHash),
67+
IndexInstruction(std::move(IndexInstruction)),
68+
IndexOperandHashMap(std::move(IndexOperandHashMap)) {}
69+
};
70+
71+
/// Computes a structural hash of a given function, considering the structure
72+
/// and content of the function's instructions while allowing for selective
73+
/// ignoring of certain operands based on custom criteria. This hash can be used
74+
/// to identify functions that are structurally similar or identical, which is
75+
/// useful in optimizations, deduplication, or analysis tasks.
76+
/// \param F The function to hash.
77+
/// \param IgnoreOp A callable that takes an instruction and an operand index,
78+
/// and returns true if the operand should be ignored in the hash computation.
79+
/// \return A FunctionHashInfo structure
80+
FunctionHashInfo StructuralHashWithDifferences(const Function &F,
81+
IgnoreOperandFunc IgnoreOp);
82+
3883
} // end namespace llvm
3984

4085
#endif

llvm/lib/Analysis/StructuralHash.cpp

+23-4
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,33 @@ using namespace llvm;
2121
PreservedAnalyses StructuralHashPrinterPass::run(Module &M,
2222
ModuleAnalysisManager &MAM) {
2323
OS << "Module Hash: "
24-
<< format("%016" PRIx64, StructuralHash(M, EnableDetailedStructuralHash))
24+
<< format("%016" PRIx64,
25+
StructuralHash(M, Options != StructuralHashOptions::None))
2526
<< "\n";
2627
for (Function &F : M) {
2728
if (F.isDeclaration())
2829
continue;
29-
OS << "Function " << F.getName() << " Hash: "
30-
<< format("%016" PRIx64, StructuralHash(F, EnableDetailedStructuralHash))
31-
<< "\n";
30+
if (Options == StructuralHashOptions::CallTargetIgnored) {
31+
auto IgnoreOp = [&](const Instruction *I, unsigned OpndIdx) {
32+
return I->getOpcode() == Instruction::Call &&
33+
isa<Constant>(I->getOperand(OpndIdx));
34+
};
35+
auto FuncHashInfo = StructuralHashWithDifferences(F, IgnoreOp);
36+
OS << "Function " << F.getName()
37+
<< " Hash: " << format("%016" PRIx64, FuncHashInfo.FunctionHash)
38+
<< "\n";
39+
for (auto &[IndexPair, OpndHash] : *FuncHashInfo.IndexOperandHashMap) {
40+
auto [InstIndex, OpndIndex] = IndexPair;
41+
OS << "\tIgnored Operand Hash: " << format("%016" PRIx64, OpndHash)
42+
<< " at (" << InstIndex << "," << OpndIndex << ")\n";
43+
}
44+
} else {
45+
OS << "Function " << F.getName() << " Hash: "
46+
<< format(
47+
"%016" PRIx64,
48+
StructuralHash(F, Options == StructuralHashOptions::Detailed))
49+
<< "\n";
50+
}
3251
}
3352
return PreservedAnalyses::all();
3453
}

llvm/lib/IR/StructuralHash.cpp

+131-22
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,18 @@ class StructuralHashImpl {
3434
static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72;
3535
static constexpr stable_hash GlobalHeaderHash = 23456;
3636

37-
// This will produce different values on 32-bit and 64-bit systens as
38-
// hash_combine returns a size_t. However, this is only used for
39-
// detailed hashing which, in-tree, only needs to distinguish between
40-
// differences in functions.
41-
// TODO: This is not stable.
42-
template <typename T> stable_hash hashArbitaryType(const T &V) {
43-
return hash_combine(V);
44-
}
37+
/// IgnoreOp is a function that returns true if the operand should be ignored.
38+
IgnoreOperandFunc IgnoreOp = nullptr;
39+
/// A mapping from instruction indices to instruction pointers.
40+
/// The index represents the position of an instruction based on the order in
41+
/// which it is first encountered.
42+
std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr;
43+
/// A mapping from pairs of instruction indices and operand indices
44+
/// to the hashes of the operands.
45+
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr;
46+
47+
/// Assign a unique ID to each Value in the order they are first seen.
48+
DenseMap<const Value *, int> ValueToId;
4549

4650
stable_hash hashType(Type *ValueType) {
4751
SmallVector<stable_hash> Hashes;
@@ -53,23 +57,95 @@ class StructuralHashImpl {
5357

5458
public:
5559
StructuralHashImpl() = delete;
56-
explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {}
60+
explicit StructuralHashImpl(bool DetailedHash,
61+
IgnoreOperandFunc IgnoreOp = nullptr)
62+
: DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) {
63+
if (IgnoreOp) {
64+
IndexInstruction = std::make_unique<IndexInstrMap>();
65+
IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
66+
}
67+
}
68+
69+
stable_hash hashAPInt(const APInt &I) {
70+
SmallVector<stable_hash> Hashes;
71+
Hashes.emplace_back(I.getBitWidth());
72+
auto RawVals = ArrayRef<uint64_t>(I.getRawData(), I.getNumWords());
73+
Hashes.append(RawVals.begin(), RawVals.end());
74+
return stable_hash_combine(Hashes);
75+
}
76+
77+
stable_hash hashAPFloat(const APFloat &F) {
78+
return hashAPInt(F.bitcastToAPInt());
79+
}
80+
81+
stable_hash hashGlobalValue(const GlobalValue *GV) {
82+
if (!GV->hasName())
83+
return 0;
84+
return stable_hash_name(GV->getName());
85+
}
5786

87+
// Compute a hash for a Constant. This function is logically similar to
88+
// FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
89+
// we're interested in computing a hash rather than comparing two Constants.
90+
// Some of the logic is simplified, e.g, we don't expand GEPOperator.
5891
stable_hash hashConstant(Constant *C) {
5992
SmallVector<stable_hash> Hashes;
60-
// TODO: hashArbitaryType() is not stable.
61-
if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) {
62-
Hashes.emplace_back(hashArbitaryType(ConstInt->getValue()));
63-
} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) {
64-
Hashes.emplace_back(hashArbitaryType(ConstFP->getValue()));
65-
} else if (Function *Func = dyn_cast<Function>(C)) {
66-
// Hashing the name will be deterministic as LLVM's hashing infrastructure
67-
// has explicit support for hashing strings and will not simply hash
68-
// the pointer.
69-
Hashes.emplace_back(hashArbitaryType(Func->getName()));
93+
94+
Type *Ty = C->getType();
95+
Hashes.emplace_back(hashType(Ty));
96+
97+
if (C->isNullValue()) {
98+
Hashes.emplace_back(static_cast<stable_hash>('N'));
99+
return stable_hash_combine(Hashes);
70100
}
71101

72-
return stable_hash_combine(Hashes);
102+
if (auto *G = dyn_cast<GlobalValue>(C)) {
103+
Hashes.emplace_back(hashGlobalValue(G));
104+
return stable_hash_combine(Hashes);
105+
}
106+
107+
if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) {
108+
Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues()));
109+
return stable_hash_combine(Hashes);
110+
}
111+
112+
switch (C->getValueID()) {
113+
case Value::ConstantIntVal: {
114+
const APInt &Int = cast<ConstantInt>(C)->getValue();
115+
Hashes.emplace_back(hashAPInt(Int));
116+
return stable_hash_combine(Hashes);
117+
}
118+
case Value::ConstantFPVal: {
119+
const APFloat &APF = cast<ConstantFP>(C)->getValueAPF();
120+
Hashes.emplace_back(hashAPFloat(APF));
121+
return stable_hash_combine(Hashes);
122+
}
123+
case Value::ConstantArrayVal:
124+
case Value::ConstantStructVal:
125+
case Value::ConstantVectorVal:
126+
case Value::ConstantExprVal: {
127+
for (const auto &Op : C->operands()) {
128+
auto H = hashConstant(cast<Constant>(Op));
129+
Hashes.emplace_back(H);
130+
}
131+
return stable_hash_combine(Hashes);
132+
}
133+
case Value::BlockAddressVal: {
134+
const BlockAddress *BA = cast<BlockAddress>(C);
135+
auto H = hashGlobalValue(BA->getFunction());
136+
Hashes.emplace_back(H);
137+
return stable_hash_combine(Hashes);
138+
}
139+
case Value::DSOLocalEquivalentVal: {
140+
const auto *Equiv = cast<DSOLocalEquivalent>(C);
141+
auto H = hashGlobalValue(Equiv->getGlobalValue());
142+
Hashes.emplace_back(H);
143+
return stable_hash_combine(Hashes);
144+
}
145+
default:
146+
// Skip other types of constants for simplicity.
147+
return stable_hash_combine(Hashes);
148+
}
73149
}
74150

75151
stable_hash hashValue(Value *V) {
@@ -83,6 +159,10 @@ class StructuralHashImpl {
83159
if (Argument *Arg = dyn_cast<Argument>(V))
84160
Hashes.emplace_back(Arg->getArgNo());
85161

162+
// Get an index (an insertion order) for the non-constant value.
163+
auto [It, WasInserted] = ValueToId.try_emplace(V, ValueToId.size());
164+
Hashes.emplace_back(It->second);
165+
86166
return stable_hash_combine(Hashes);
87167
}
88168

@@ -107,8 +187,20 @@ class StructuralHashImpl {
107187
if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
108188
Hashes.emplace_back(ComparisonInstruction->getPredicate());
109189

110-
for (const auto &Op : Inst.operands())
111-
Hashes.emplace_back(hashOperand(Op));
190+
unsigned InstIdx = 0;
191+
if (IndexInstruction) {
192+
InstIdx = IndexInstruction->size();
193+
IndexInstruction->try_emplace(InstIdx, const_cast<Instruction *>(&Inst));
194+
}
195+
196+
for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) {
197+
auto OpndHash = hashOperand(Op);
198+
if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) {
199+
assert(IndexOperandHashMap);
200+
IndexOperandHashMap->try_emplace({InstIdx, OpndIdx}, OpndHash);
201+
} else
202+
Hashes.emplace_back(OpndHash);
203+
}
112204

113205
return stable_hash_combine(Hashes);
114206
}
@@ -188,6 +280,14 @@ class StructuralHashImpl {
188280
}
189281

190282
uint64_t getHash() const { return Hash; }
283+
284+
std::unique_ptr<IndexInstrMap> getIndexInstrMap() {
285+
return std::move(IndexInstruction);
286+
}
287+
288+
std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() {
289+
return std::move(IndexOperandHashMap);
290+
}
191291
};
192292

193293
} // namespace
@@ -203,3 +303,12 @@ stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) {
203303
H.update(M);
204304
return H.getHash();
205305
}
306+
307+
FunctionHashInfo
308+
llvm::StructuralHashWithDifferences(const Function &F,
309+
IgnoreOperandFunc IgnoreOp) {
310+
StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp);
311+
H.update(F);
312+
return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(),
313+
H.getIndexPairOpndHashMap());
314+
}

llvm/lib/Passes/PassBuilder.cpp

+11-3
Original file line numberDiff line numberDiff line change
@@ -1175,9 +1175,17 @@ Expected<std::string> parseMemProfUsePassOptions(StringRef Params) {
11751175
return Result;
11761176
}
11771177

1178-
Expected<bool> parseStructuralHashPrinterPassOptions(StringRef Params) {
1179-
return PassBuilder::parseSinglePassOption(Params, "detailed",
1180-
"StructuralHashPrinterPass");
1178+
Expected<StructuralHashOptions>
1179+
parseStructuralHashPrinterPassOptions(StringRef Params) {
1180+
if (Params.empty())
1181+
return StructuralHashOptions::None;
1182+
if (Params == "detailed")
1183+
return StructuralHashOptions::Detailed;
1184+
if (Params == "call-target-ignored")
1185+
return StructuralHashOptions::CallTargetIgnored;
1186+
return make_error<StringError>(
1187+
formatv("invalid structural hash printer parameter '{0}' ", Params).str(),
1188+
inconvertibleErrorCode());
11811189
}
11821190

11831191
Expected<bool> parseWinEHPrepareOptions(StringRef Params) {

llvm/lib/Passes/PassRegistry.def

+4-3
Original file line numberDiff line numberDiff line change
@@ -220,10 +220,11 @@ MODULE_PASS_WITH_PARAMS(
220220
parseMSanPassOptions, "recover;kernel;eager-checks;track-origins=N")
221221
MODULE_PASS_WITH_PARAMS(
222222
"print<structural-hash>", "StructuralHashPrinterPass",
223-
[](bool EnableDetailedStructuralHash) {
224-
return StructuralHashPrinterPass(dbgs(), EnableDetailedStructuralHash);
223+
[](StructuralHashOptions Options) {
224+
return StructuralHashPrinterPass(dbgs(), Options);
225225
},
226-
parseStructuralHashPrinterPassOptions, "detailed")
226+
parseStructuralHashPrinterPassOptions, "detailed;call-target-ignored")
227+
227228
#undef MODULE_PASS_WITH_PARAMS
228229

229230
#ifndef CGSCC_ANALYSIS
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
11
; RUN: opt -passes='print<structural-hash>' -disable-output %s 2>&1 | FileCheck %s
22
; RUN: opt -passes='print<structural-hash><detailed>' -disable-output %s 2>&1 | FileCheck %s -check-prefix=DETAILED-HASH
3+
; RUN: opt -passes='print<structural-hash><call-target-ignored>' -disable-output %s 2>&1 | FileCheck %s -check-prefix=CALLTARGETIGNORED-HASH
34

45
; Add a declaration so that we can test we skip it.
5-
declare i64 @d1()
6+
declare i64 @d1(i64)
7+
declare i64 @e1(i64)
68

79
define i64 @f1(i64 %a) {
810
%b = add i64 %a, 1
9-
ret i64 %b
11+
%c = call i64 @d1(i64 %b)
12+
ret i64 %c
1013
}
1114

12-
define i32 @f2(i32 %a) {
13-
%b = add i32 %a, 2
14-
ret i32 %b
15+
define i64 @f2(i64 %a) {
16+
%b = add i64 %a, 1
17+
%c = call i64 @e1(i64 %b)
18+
ret i64 %c
1519
}
1620

1721
; CHECK: Module Hash: {{([a-f0-9]{16,})}}
@@ -22,3 +26,13 @@ define i32 @f2(i32 %a) {
2226
; DETAILED-HASH-NEXT: Function f1 Hash: [[DF1H:([a-f0-9]{16,})]]
2327
; DETAILED-HASH-NOT: [[DF1H]]
2428
; DETAILED-HASH-NEXT: Function f2 Hash: {{([a-f0-9]{16,})}}
29+
30+
; When ignoring the call target, check if `f1` and `f2` produce the same function hash.
31+
; The index for the call instruction is 1, and the index of the call target operand is 1.
32+
; The ignored operand hashes for different call targets should be different.
33+
; CALLTARGETIGNORED-HASH: Module Hash: {{([a-f0-9]{16,})}}
34+
; CALLTARGETIGNORED-HASH-NEXT: Function f1 Hash: [[IF1H:([a-f0-9]{16,})]]
35+
; CALLTARGETIGNORED-HASH-NEXT: Ignored Operand Hash: [[IO1H:([a-f0-9]{16,})]] at (1,1)
36+
; CALLTARGETIGNORED-HASH-NEXT: Function f2 Hash: [[IF1H]]
37+
; CALLTARGETIGNORED-HASH-NOT: [[IO1H]]
38+
; CALLTARGETIGNORED-HASH-NEXT: Ignored Operand Hash: {{([a-f0-9]{16,})}} at (1,1)

0 commit comments

Comments
 (0)