From 14f0691bbf62959290a2eb2203d7eb3a133615f4 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Wed, 16 Oct 2024 17:09:07 -0700 Subject: [PATCH 1/5] [StructuralHash] Support Differences This comutes a structural hash while allowing for selective ignoring of certain operands based on a custom function that is provided. Instead of a single hash value, it now returns FunctionHashInfo which includes a hash value, an instruction mapping, and a map to track the operand location and its corresponding hash value that is ignored. --- llvm/include/llvm/IR/StructuralHash.h | 45 ++++++ llvm/lib/IR/StructuralHash.cpp | 194 ++++++++++++++++++++--- llvm/unittests/IR/StructuralHashTest.cpp | 55 +++++++ 3 files changed, 272 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h index e2e192cc9501b3..071575137ff572 100644 --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -14,7 +14,9 @@ #ifndef LLVM_IR_STRUCTURALHASH_H #define LLVM_IR_STRUCTURALHASH_H +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/StableHashing.h" +#include "llvm/IR/Instruction.h" #include namespace llvm { @@ -35,6 +37,49 @@ stable_hash StructuralHash(const Function &F, bool DetailedHash = false); /// composed the module hash. stable_hash StructuralHash(const Module &M, bool DetailedHash = false); +/// The pair of an instruction index and a operand index. +using IndexPair = std::pair; + +/// A map from an instruction index to an instruction pointer. +using IndexInstrMap = MapVector; + +/// A map from an IndexPair to a stable hash. +using IndexOperandHashMapType = DenseMap; + +/// A function that takes an instruction and an operand index and returns true +/// if the operand should be ignored in the function hash computation. +using IgnoreOperandFunc = std::function; + +struct FunctionHashInfo { + /// A hash value representing the structural content of the function + stable_hash FunctionHash; + /// A mapping from instruction indices to instruction pointers + std::unique_ptr IndexInstruction; + /// A mapping from pairs of instruction indices and operand indices + /// to the hashes of the operands. This can be used to analyze or + /// reconstruct the differences in ignored operands + std::unique_ptr IndexOperandHashMap; + + FunctionHashInfo(stable_hash FuntionHash, + std::unique_ptr IndexInstruction, + std::unique_ptr IndexOperandHashMap) + : FunctionHash(FuntionHash), + IndexInstruction(std::move(IndexInstruction)), + IndexOperandHashMap(std::move(IndexOperandHashMap)) {} +}; + +/// Computes a structural hash of a given function, considering the structure +/// and content of the function's instructions while allowing for selective +/// ignoring of certain operands based on custom criteria. This hash can be used +/// to identify functions that are structurally similar or identical, which is +/// useful in optimizations, deduplication, or analysis tasks. +/// \param F The function to hash. +/// \param IgnoreOp A callable that takes an instruction and an operand index, +/// and returns true if the operand should be ignored in the hash computation. +/// \return A FunctionHashInfo structure +FunctionHashInfo StructuralHashWithDifferences(const Function &F, + IgnoreOperandFunc IgnoreOp); + } // end namespace llvm #endif diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index 267a085c5af705..db54a848fd50dd 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -34,14 +34,18 @@ class StructuralHashImpl { static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72; static constexpr stable_hash GlobalHeaderHash = 23456; - // This will produce different values on 32-bit and 64-bit systens as - // hash_combine returns a size_t. However, this is only used for - // detailed hashing which, in-tree, only needs to distinguish between - // differences in functions. - // TODO: This is not stable. - template stable_hash hashArbitaryType(const T &V) { - return hash_combine(V); - } + /// IgnoreOp is a function that returns true if the operand should be ignored. + IgnoreOperandFunc IgnoreOp = nullptr; + /// A mapping from instruction indices to instruction pointers. + /// The index represents the position of an instruction based on the order in + /// which it is first encountered. + std::unique_ptr IndexInstruction = nullptr; + /// A mapping from pairs of instruction indices and operand indices + /// to the hashes of the operands. + std::unique_ptr IndexOperandHashMap = nullptr; + + /// Assign a unique ID to each Value in the order they are first seen. + DenseMap ValueToId; stable_hash hashType(Type *ValueType) { SmallVector Hashes; @@ -53,23 +57,138 @@ class StructuralHashImpl { public: StructuralHashImpl() = delete; - explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {} + explicit StructuralHashImpl(bool DetailedHash, + IgnoreOperandFunc IgnoreOp = nullptr) + : DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) { + if (IgnoreOp) { + IndexInstruction = std::make_unique(); + IndexOperandHashMap = std::make_unique(); + } + } + + stable_hash hashAPInt(const APInt &I) { + SmallVector Hashes; + Hashes.emplace_back(I.getBitWidth()); + for (unsigned J = 0; J < I.getNumWords(); ++J) + Hashes.emplace_back((I.getRawData())[J]); + return stable_hash_combine(Hashes); + } + + stable_hash hashAPFloat(const APFloat &F) { + SmallVector Hashes; + const fltSemantics &S = F.getSemantics(); + Hashes.emplace_back(APFloat::semanticsPrecision(S)); + Hashes.emplace_back(APFloat::semanticsMaxExponent(S)); + Hashes.emplace_back(APFloat::semanticsMinExponent(S)); + Hashes.emplace_back(APFloat::semanticsSizeInBits(S)); + Hashes.emplace_back(hashAPInt(F.bitcastToAPInt())); + return stable_hash_combine(Hashes); + } + + stable_hash hashGlobalValue(const GlobalValue *GV) { + if (!GV->hasName()) + return 0; + return stable_hash_name(GV->getName()); + } + // Compute a hash for a Constant. This function is logically similar to + // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here + // we're interested in computing a hash rather than comparing two Constants. + // Some of the logic is simplified, e.g, we don't expand GEPOperator. stable_hash hashConstant(Constant *C) { SmallVector Hashes; - // TODO: hashArbitaryType() is not stable. - if (ConstantInt *ConstInt = dyn_cast(C)) { - Hashes.emplace_back(hashArbitaryType(ConstInt->getValue())); - } else if (ConstantFP *ConstFP = dyn_cast(C)) { - Hashes.emplace_back(hashArbitaryType(ConstFP->getValue())); - } else if (Function *Func = dyn_cast(C)) { - // Hashing the name will be deterministic as LLVM's hashing infrastructure - // has explicit support for hashing strings and will not simply hash - // the pointer. - Hashes.emplace_back(hashArbitaryType(Func->getName())); + + Type *Ty = C->getType(); + Hashes.emplace_back(hashType(Ty)); + + if (C->isNullValue()) { + Hashes.emplace_back(static_cast('N')); + return stable_hash_combine(Hashes); } - return stable_hash_combine(Hashes); + auto *G = dyn_cast(C); + if (G) { + Hashes.emplace_back(hashGlobalValue(G)); + return stable_hash_combine(Hashes); + } + + if (const auto *Seq = dyn_cast(C)) { + Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues())); + return stable_hash_combine(Hashes); + } + + switch (C->getValueID()) { + case Value::UndefValueVal: + case Value::PoisonValueVal: + case Value::ConstantTokenNoneVal: { + return stable_hash_combine(Hashes); + } + case Value::ConstantIntVal: { + const APInt &Int = cast(C)->getValue(); + Hashes.emplace_back(hashAPInt(Int)); + return stable_hash_combine(Hashes); + } + case Value::ConstantFPVal: { + const APFloat &APF = cast(C)->getValueAPF(); + Hashes.emplace_back(hashAPFloat(APF)); + return stable_hash_combine(Hashes); + } + case Value::ConstantArrayVal: { + const ConstantArray *A = cast(C); + uint64_t NumElements = cast(Ty)->getNumElements(); + Hashes.emplace_back(NumElements); + for (auto &Op : A->operands()) { + auto H = hashConstant(cast(Op)); + Hashes.emplace_back(H); + } + return stable_hash_combine(Hashes); + } + case Value::ConstantStructVal: { + const ConstantStruct *S = cast(C); + unsigned NumElements = cast(Ty)->getNumElements(); + Hashes.emplace_back(NumElements); + for (auto &Op : S->operands()) { + auto H = hashConstant(cast(Op)); + Hashes.emplace_back(H); + } + return stable_hash_combine(Hashes); + } + case Value::ConstantVectorVal: { + const ConstantVector *V = cast(C); + unsigned NumElements = cast(Ty)->getNumElements(); + Hashes.emplace_back(NumElements); + for (auto &Op : V->operands()) { + auto H = hashConstant(cast(Op)); + Hashes.emplace_back(H); + } + return stable_hash_combine(Hashes); + } + case Value::ConstantExprVal: { + const ConstantExpr *E = cast(C); + unsigned NumOperands = E->getNumOperands(); + Hashes.emplace_back(NumOperands); + for (auto &Op : E->operands()) { + auto H = hashConstant(cast(Op)); + Hashes.emplace_back(H); + } + return stable_hash_combine(Hashes); + } + case Value::BlockAddressVal: { + const BlockAddress *BA = cast(C); + auto H = hashGlobalValue(BA->getFunction()); + Hashes.emplace_back(H); + return stable_hash_combine(Hashes); + } + case Value::DSOLocalEquivalentVal: { + const auto *Equiv = cast(C); + auto H = hashGlobalValue(Equiv->getGlobalValue()); + Hashes.emplace_back(H); + return stable_hash_combine(Hashes); + } + default: // Unknown constant, abort. + llvm_unreachable("Constant ValueID not recognized."); + } + return Hash; } stable_hash hashValue(Value *V) { @@ -83,6 +202,10 @@ class StructuralHashImpl { if (Argument *Arg = dyn_cast(V)) Hashes.emplace_back(Arg->getArgNo()); + // Get an index (an insertion order) for the non-constant value. + auto I = ValueToId.insert({V, ValueToId.size()}); + Hashes.emplace_back(I.first->second); + return stable_hash_combine(Hashes); } @@ -107,8 +230,20 @@ class StructuralHashImpl { if (const auto *ComparisonInstruction = dyn_cast(&Inst)) Hashes.emplace_back(ComparisonInstruction->getPredicate()); - for (const auto &Op : Inst.operands()) - Hashes.emplace_back(hashOperand(Op)); + unsigned InstIdx = 0; + if (IndexInstruction) { + InstIdx = IndexInstruction->size(); + IndexInstruction->insert({InstIdx, const_cast(&Inst)}); + } + + for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) { + auto OpndHash = hashOperand(Op); + if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) { + assert(IndexOperandHashMap); + IndexOperandHashMap->insert({{InstIdx, OpndIdx}, OpndHash}); + } else + Hashes.emplace_back(OpndHash); + } return stable_hash_combine(Hashes); } @@ -188,6 +323,12 @@ class StructuralHashImpl { } uint64_t getHash() const { return Hash; } + std::unique_ptr getIndexInstrMap() { + return std::move(IndexInstruction); + } + std::unique_ptr getIndexPairOpndHashMap() { + return std::move(IndexOperandHashMap); + } }; } // namespace @@ -203,3 +344,12 @@ stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { H.update(M); return H.getHash(); } + +FunctionHashInfo +llvm::StructuralHashWithDifferences(const Function &F, + IgnoreOperandFunc IgnoreOp) { + StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp); + H.update(F); + return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(), + H.getIndexPairOpndHashMap()); +} diff --git a/llvm/unittests/IR/StructuralHashTest.cpp b/llvm/unittests/IR/StructuralHashTest.cpp index 64e66aa5f97a6d..e9f18ed40bc65b 100644 --- a/llvm/unittests/IR/StructuralHashTest.cpp +++ b/llvm/unittests/IR/StructuralHashTest.cpp @@ -239,4 +239,59 @@ TEST(StructuralHashTest, ArgumentNumber) { EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); } + +TEST(StructuralHashTest, Differences) { + LLVMContext Ctx; + std::unique_ptr M1 = parseIR(Ctx, "define i64 @f(i64 %a) {\n" + " %c = add i64 %a, 1\n" + " %b = call i64 @f1(i64 %c)\n" + " ret i64 %b\n" + "}\n" + "declare i64 @f1(i64)"); + auto *F1 = M1->getFunction("f"); + std::unique_ptr M2 = parseIR(Ctx, "define i64 @g(i64 %a) {\n" + " %c = add i64 %a, 1\n" + " %b = call i64 @f2(i64 %c)\n" + " ret i64 %b\n" + "}\n" + "declare i64 @f2(i64)"); + auto *F2 = M2->getFunction("g"); + + // They are originally different when not ignoring any operand. + EXPECT_NE(StructuralHash(*F1, true), StructuralHash(*F2, true)); + EXPECT_NE(StructuralHashWithDifferences(*F1, nullptr).FunctionHash, + StructuralHashWithDifferences(*F2, nullptr).FunctionHash); + + // When we ignore the call target f1 vs f2, they have the same hash. + auto IgnoreOp = [&](const Instruction *I, unsigned OpndIdx) { + return I->getOpcode() == Instruction::Call && OpndIdx == 1; + }; + auto FuncHashInfo1 = StructuralHashWithDifferences(*F1, IgnoreOp); + auto FuncHashInfo2 = StructuralHashWithDifferences(*F2, IgnoreOp); + EXPECT_EQ(FuncHashInfo1.FunctionHash, FuncHashInfo2.FunctionHash); + + // There are total 3 instructions. + EXPECT_EQ(FuncHashInfo1.IndexInstruction->size(), 3u); + EXPECT_EQ(FuncHashInfo2.IndexInstruction->size(), 3u); + + // The only 1 operand (the call target) has been ignored. + EXPECT_EQ(FuncHashInfo1.IndexOperandHashMap->size(), 1u); + EXPECT_EQ(FuncHashInfo2.IndexOperandHashMap->size(), 1u); + + // The index pair of instruction and operand (1, 1) is a key in the map. + ASSERT_TRUE(FuncHashInfo1.IndexOperandHashMap->count({1, 1})); + ASSERT_TRUE(FuncHashInfo2.IndexOperandHashMap->count({1, 1})); + + // The indexed instruciton must be the call instruction as shown in the + // IgnoreOp above. + EXPECT_EQ(FuncHashInfo1.IndexInstruction->lookup(1)->getOpcode(), + Instruction::Call); + EXPECT_EQ(FuncHashInfo2.IndexInstruction->lookup(1)->getOpcode(), + Instruction::Call); + + // The ignored operand hashes (for f1 vs. f2) are different. + EXPECT_NE(FuncHashInfo1.IndexOperandHashMap->lookup({1, 1}), + FuncHashInfo2.IndexOperandHashMap->lookup({1, 1})); +} + } // end anonymous namespace From 6c35be80841db518618e2247166312a369683f37 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Sat, 19 Oct 2024 00:41:14 -0700 Subject: [PATCH 2/5] Address comments from ellishg --- llvm/lib/IR/StructuralHash.cpp | 38 +++++++----------------- llvm/unittests/IR/StructuralHashTest.cpp | 18 +++++++---- 2 files changed, 23 insertions(+), 33 deletions(-) diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index db54a848fd50dd..5a643702bd8379 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -69,20 +69,13 @@ class StructuralHashImpl { stable_hash hashAPInt(const APInt &I) { SmallVector Hashes; Hashes.emplace_back(I.getBitWidth()); - for (unsigned J = 0; J < I.getNumWords(); ++J) - Hashes.emplace_back((I.getRawData())[J]); + auto RawVals = ArrayRef(I.getRawData(), I.getNumWords()); + Hashes.append(RawVals.begin(), RawVals.end()); return stable_hash_combine(Hashes); } stable_hash hashAPFloat(const APFloat &F) { - SmallVector Hashes; - const fltSemantics &S = F.getSemantics(); - Hashes.emplace_back(APFloat::semanticsPrecision(S)); - Hashes.emplace_back(APFloat::semanticsMaxExponent(S)); - Hashes.emplace_back(APFloat::semanticsMinExponent(S)); - Hashes.emplace_back(APFloat::semanticsSizeInBits(S)); - Hashes.emplace_back(hashAPInt(F.bitcastToAPInt())); - return stable_hash_combine(Hashes); + return hashAPInt(F.bitcastToAPInt()); } stable_hash hashGlobalValue(const GlobalValue *GV) { @@ -106,8 +99,7 @@ class StructuralHashImpl { return stable_hash_combine(Hashes); } - auto *G = dyn_cast(C); - if (G) { + if (auto *G = dyn_cast(C)) { Hashes.emplace_back(hashGlobalValue(G)); return stable_hash_combine(Hashes); } @@ -135,8 +127,6 @@ class StructuralHashImpl { } case Value::ConstantArrayVal: { const ConstantArray *A = cast(C); - uint64_t NumElements = cast(Ty)->getNumElements(); - Hashes.emplace_back(NumElements); for (auto &Op : A->operands()) { auto H = hashConstant(cast(Op)); Hashes.emplace_back(H); @@ -145,8 +135,6 @@ class StructuralHashImpl { } case Value::ConstantStructVal: { const ConstantStruct *S = cast(C); - unsigned NumElements = cast(Ty)->getNumElements(); - Hashes.emplace_back(NumElements); for (auto &Op : S->operands()) { auto H = hashConstant(cast(Op)); Hashes.emplace_back(H); @@ -155,8 +143,6 @@ class StructuralHashImpl { } case Value::ConstantVectorVal: { const ConstantVector *V = cast(C); - unsigned NumElements = cast(Ty)->getNumElements(); - Hashes.emplace_back(NumElements); for (auto &Op : V->operands()) { auto H = hashConstant(cast(Op)); Hashes.emplace_back(H); @@ -165,8 +151,6 @@ class StructuralHashImpl { } case Value::ConstantExprVal: { const ConstantExpr *E = cast(C); - unsigned NumOperands = E->getNumOperands(); - Hashes.emplace_back(NumOperands); for (auto &Op : E->operands()) { auto H = hashConstant(cast(Op)); Hashes.emplace_back(H); @@ -185,10 +169,10 @@ class StructuralHashImpl { Hashes.emplace_back(H); return stable_hash_combine(Hashes); } - default: // Unknown constant, abort. - llvm_unreachable("Constant ValueID not recognized."); + default: + // Skip other types of constants for simplicity. + return stable_hash_combine(Hashes); } - return Hash; } stable_hash hashValue(Value *V) { @@ -203,8 +187,8 @@ class StructuralHashImpl { Hashes.emplace_back(Arg->getArgNo()); // Get an index (an insertion order) for the non-constant value. - auto I = ValueToId.insert({V, ValueToId.size()}); - Hashes.emplace_back(I.first->second); + auto [It, WasInserted] = ValueToId.try_emplace(V, ValueToId.size()); + Hashes.emplace_back(It->second); return stable_hash_combine(Hashes); } @@ -233,14 +217,14 @@ class StructuralHashImpl { unsigned InstIdx = 0; if (IndexInstruction) { InstIdx = IndexInstruction->size(); - IndexInstruction->insert({InstIdx, const_cast(&Inst)}); + IndexInstruction->try_emplace(InstIdx, const_cast(&Inst)); } for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) { auto OpndHash = hashOperand(Op); if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) { assert(IndexOperandHashMap); - IndexOperandHashMap->insert({{InstIdx, OpndIdx}, OpndHash}); + IndexOperandHashMap->try_emplace({InstIdx, OpndIdx}, OpndHash); } else Hashes.emplace_back(OpndHash); } diff --git a/llvm/unittests/IR/StructuralHashTest.cpp b/llvm/unittests/IR/StructuralHashTest.cpp index e9f18ed40bc65b..81c17120a1f6ff 100644 --- a/llvm/unittests/IR/StructuralHashTest.cpp +++ b/llvm/unittests/IR/StructuralHashTest.cpp @@ -10,6 +10,7 @@ #include "llvm/AsmParser/Parser.h" #include "llvm/IR/Module.h" #include "llvm/Support/SourceMgr.h" +#include "gmock/gmock-matchers.h" #include "gtest/gtest.h" #include @@ -18,6 +19,11 @@ using namespace llvm; namespace { +using testing::Contains; +using testing::Key; +using testing::Pair; +using testing::SizeIs; + std::unique_ptr parseIR(LLVMContext &Context, const char *IR) { SMDiagnostic Err; std::unique_ptr M = parseAssemblyString(IR, Err, Context); @@ -271,16 +277,16 @@ TEST(StructuralHashTest, Differences) { EXPECT_EQ(FuncHashInfo1.FunctionHash, FuncHashInfo2.FunctionHash); // There are total 3 instructions. - EXPECT_EQ(FuncHashInfo1.IndexInstruction->size(), 3u); - EXPECT_EQ(FuncHashInfo2.IndexInstruction->size(), 3u); + EXPECT_THAT(*FuncHashInfo1.IndexInstruction, SizeIs(3)); + EXPECT_THAT(*FuncHashInfo2.IndexInstruction, SizeIs(3)); // The only 1 operand (the call target) has been ignored. - EXPECT_EQ(FuncHashInfo1.IndexOperandHashMap->size(), 1u); - EXPECT_EQ(FuncHashInfo2.IndexOperandHashMap->size(), 1u); + EXPECT_THAT(*FuncHashInfo1.IndexOperandHashMap, SizeIs(1u)); + EXPECT_THAT(*FuncHashInfo2.IndexOperandHashMap, SizeIs(1u)); // The index pair of instruction and operand (1, 1) is a key in the map. - ASSERT_TRUE(FuncHashInfo1.IndexOperandHashMap->count({1, 1})); - ASSERT_TRUE(FuncHashInfo2.IndexOperandHashMap->count({1, 1})); + ASSERT_THAT(*FuncHashInfo1.IndexOperandHashMap, Contains(Key(Pair(1, 1)))); + ASSERT_THAT(*FuncHashInfo2.IndexOperandHashMap, Contains(Key(Pair(1, 1)))); // The indexed instruciton must be the call instruction as shown in the // IgnoreOp above. From 24768bbfe139a0f703310c9c57897ecdb743e653 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Sat, 19 Oct 2024 09:18:45 -0700 Subject: [PATCH 3/5] Address comments from ilovepi --- llvm/include/llvm/Analysis/StructuralHash.h | 9 +++-- llvm/lib/Analysis/StructuralHash.cpp | 27 ++++++++++++-- llvm/lib/IR/StructuralHash.cpp | 37 +++---------------- llvm/lib/Passes/PassBuilder.cpp | 16 ++++++-- llvm/lib/Passes/PassRegistry.def | 7 ++-- .../StructuralHash/structural-hash-printer.ll | 24 +++++++++--- 6 files changed, 71 insertions(+), 49 deletions(-) diff --git a/llvm/include/llvm/Analysis/StructuralHash.h b/llvm/include/llvm/Analysis/StructuralHash.h index 9f33c69aed345c..58846511807b6e 100644 --- a/llvm/include/llvm/Analysis/StructuralHash.h +++ b/llvm/include/llvm/Analysis/StructuralHash.h @@ -13,15 +13,18 @@ namespace llvm { +enum class StructuralHashOptions { None, Detailed, CallTargetIgnored }; + /// Printer pass for StructuralHashes class StructuralHashPrinterPass : public PassInfoMixin { raw_ostream &OS; - bool EnableDetailedStructuralHash; + const StructuralHashOptions Options; public: - explicit StructuralHashPrinterPass(raw_ostream &OS, bool Detailed) - : OS(OS), EnableDetailedStructuralHash(Detailed) {} + explicit StructuralHashPrinterPass(raw_ostream &OS, + StructuralHashOptions Options) + : OS(OS), Options(Options) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); diff --git a/llvm/lib/Analysis/StructuralHash.cpp b/llvm/lib/Analysis/StructuralHash.cpp index 3a2341fe59ad9c..4f2e003148b606 100644 --- a/llvm/lib/Analysis/StructuralHash.cpp +++ b/llvm/lib/Analysis/StructuralHash.cpp @@ -21,14 +21,33 @@ using namespace llvm; PreservedAnalyses StructuralHashPrinterPass::run(Module &M, ModuleAnalysisManager &MAM) { OS << "Module Hash: " - << format("%016" PRIx64, StructuralHash(M, EnableDetailedStructuralHash)) + << format("%016" PRIx64, + StructuralHash(M, Options != StructuralHashOptions::None)) << "\n"; for (Function &F : M) { if (F.isDeclaration()) continue; - OS << "Function " << F.getName() << " Hash: " - << format("%016" PRIx64, StructuralHash(F, EnableDetailedStructuralHash)) - << "\n"; + if (Options == StructuralHashOptions::CallTargetIgnored) { + auto IgnoreOp = [&](const Instruction *I, unsigned OpndIdx) { + return I->getOpcode() == Instruction::Call && + isa(I->getOperand(OpndIdx)); + }; + auto FuncHashInfo = StructuralHashWithDifferences(F, IgnoreOp); + OS << "Function " << F.getName() + << " Hash: " << format("%016" PRIx64, FuncHashInfo.FunctionHash) + << "\n"; + for (auto &[IndexPair, OpndHash] : *FuncHashInfo.IndexOperandHashMap) { + auto [InstIndex, OpndIndex] = IndexPair; + OS << "\tIgnored Operand Hash: " << format("%016" PRIx64, OpndHash) + << " at (" << InstIndex << "," << OpndIndex << ")\n"; + } + } else { + OS << "Function " << F.getName() << " Hash: " + << format( + "%016" PRIx64, + StructuralHash(F, Options == StructuralHashOptions::Detailed)) + << "\n"; + } } return PreservedAnalyses::all(); } diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index 5a643702bd8379..a51f9124af04da 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -110,11 +110,6 @@ class StructuralHashImpl { } switch (C->getValueID()) { - case Value::UndefValueVal: - case Value::PoisonValueVal: - case Value::ConstantTokenNoneVal: { - return stable_hash_combine(Hashes); - } case Value::ConstantIntVal: { const APInt &Int = cast(C)->getValue(); Hashes.emplace_back(hashAPInt(Int)); @@ -125,33 +120,11 @@ class StructuralHashImpl { Hashes.emplace_back(hashAPFloat(APF)); return stable_hash_combine(Hashes); } - case Value::ConstantArrayVal: { - const ConstantArray *A = cast(C); - for (auto &Op : A->operands()) { - auto H = hashConstant(cast(Op)); - Hashes.emplace_back(H); - } - return stable_hash_combine(Hashes); - } - case Value::ConstantStructVal: { - const ConstantStruct *S = cast(C); - for (auto &Op : S->operands()) { - auto H = hashConstant(cast(Op)); - Hashes.emplace_back(H); - } - return stable_hash_combine(Hashes); - } - case Value::ConstantVectorVal: { - const ConstantVector *V = cast(C); - for (auto &Op : V->operands()) { - auto H = hashConstant(cast(Op)); - Hashes.emplace_back(H); - } - return stable_hash_combine(Hashes); - } + case Value::ConstantArrayVal: + case Value::ConstantStructVal: + case Value::ConstantVectorVal: case Value::ConstantExprVal: { - const ConstantExpr *E = cast(C); - for (auto &Op : E->operands()) { + for (const auto &Op : C->operands()) { auto H = hashConstant(cast(Op)); Hashes.emplace_back(H); } @@ -307,9 +280,11 @@ class StructuralHashImpl { } uint64_t getHash() const { return Hash; } + std::unique_ptr getIndexInstrMap() { return std::move(IndexInstruction); } + std::unique_ptr getIndexPairOpndHashMap() { return std::move(IndexOperandHashMap); } diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index f5ce405ab8d961..7f3b766a7c4018 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1175,9 +1175,19 @@ Expected parseMemProfUsePassOptions(StringRef Params) { return Result; } -Expected parseStructuralHashPrinterPassOptions(StringRef Params) { - return PassBuilder::parseSinglePassOption(Params, "detailed", - "StructuralHashPrinterPass"); +Expected +parseStructuralHashPrinterPassOptions(StringRef Params) { + if (Params.empty()) + return StructuralHashOptions::None; + else if (Params == "detailed") + return StructuralHashOptions::Detailed; + else if (Params == "call-target-ignored") + return StructuralHashOptions::CallTargetIgnored; + else + return make_error( + formatv("invalid structural hash printer parameter '{0}' ", Params) + .str(), + inconvertibleErrorCode()); } Expected parseWinEHPrepareOptions(StringRef Params) { diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 549c1359b5852c..017ae311c55eb4 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -220,10 +220,11 @@ MODULE_PASS_WITH_PARAMS( parseMSanPassOptions, "recover;kernel;eager-checks;track-origins=N") MODULE_PASS_WITH_PARAMS( "print", "StructuralHashPrinterPass", - [](bool EnableDetailedStructuralHash) { - return StructuralHashPrinterPass(dbgs(), EnableDetailedStructuralHash); + [](StructuralHashOptions Options) { + return StructuralHashPrinterPass(dbgs(), Options); }, - parseStructuralHashPrinterPassOptions, "detailed") + parseStructuralHashPrinterPassOptions, "detailed;call-target-ignored") + #undef MODULE_PASS_WITH_PARAMS #ifndef CGSCC_ANALYSIS diff --git a/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll b/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll index 5936199bf32f43..3c23b54d297369 100644 --- a/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll +++ b/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll @@ -1,17 +1,21 @@ ; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s -check-prefix=DETAILED-HASH +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s -check-prefix=CALLTARGETIGNORED-HASH ; Add a declaration so that we can test we skip it. -declare i64 @d1() +declare i64 @d1(i64) +declare i64 @e1(i64) define i64 @f1(i64 %a) { %b = add i64 %a, 1 - ret i64 %b + %c = call i64 @d1(i64 %b) + ret i64 %c } -define i32 @f2(i32 %a) { - %b = add i32 %a, 2 - ret i32 %b +define i64 @f2(i64 %a) { + %b = add i64 %a, 1 + %c = call i64 @e1(i64 %b) + ret i64 %c } ; CHECK: Module Hash: {{([a-f0-9]{16,})}} @@ -22,3 +26,13 @@ define i32 @f2(i32 %a) { ; DETAILED-HASH-NEXT: Function f1 Hash: [[DF1H:([a-f0-9]{16,})]] ; DETAILED-HASH-NOT: [[DF1H]] ; DETAILED-HASH-NEXT: Function f2 Hash: {{([a-f0-9]{16,})}} + +; When ignoring the call target, check if `f1` and `f2` produce the same function hash. +; The index for the call instruction is 1, and the index of the call target operand is 1. +; The ignored operand hashes for different call targets should be different. +; CALLTARGETIGNORED-HASH: Module Hash: {{([a-f0-9]{16,})}} +; CALLTARGETIGNORED-HASH-NEXT: Function f1 Hash: [[IF1H:([a-f0-9]{16,})]] +; CALLTARGETIGNORED-HASH-NEXT: Ignored Operand Hash: [[IO1H:([a-f0-9]{16,})]] at (1,1) +; CALLTARGETIGNORED-HASH-NEXT: Function f2 Hash: [[IF1H]] +; CALLTARGETIGNORED-HASH-NOT: [[IO1H]] +; CALLTARGETIGNORED-HASH-NEXT: Ignored Operand Hash: {{([a-f0-9]{16,})}} at (1,1) From 14aa138267c26239cbd746c50f222a4cb46ab559 Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Mon, 21 Oct 2024 16:38:16 -0700 Subject: [PATCH 4/5] Address 2nd comments from ellishg --- llvm/lib/Passes/PassBuilder.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 7f3b766a7c4018..d1f75dfb5350a0 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1179,15 +1179,13 @@ Expected parseStructuralHashPrinterPassOptions(StringRef Params) { if (Params.empty()) return StructuralHashOptions::None; - else if (Params == "detailed") + if (Params == "detailed") return StructuralHashOptions::Detailed; - else if (Params == "call-target-ignored") + if (Params == "call-target-ignored") return StructuralHashOptions::CallTargetIgnored; - else - return make_error( - formatv("invalid structural hash printer parameter '{0}' ", Params) - .str(), - inconvertibleErrorCode()); + return make_error( + formatv("invalid structural hash printer parameter '{0}' ", Params).str(), + inconvertibleErrorCode()); } Expected parseWinEHPrepareOptions(StringRef Params) { From d3c35a765780eedf9ccc86949d639afede480dfb Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 25 Oct 2024 23:29:43 -0700 Subject: [PATCH 5/5] Address 3rd comments from ellishg --- llvm/include/llvm/Analysis/StructuralHash.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/Analysis/StructuralHash.h b/llvm/include/llvm/Analysis/StructuralHash.h index 58846511807b6e..4c9f063bc7d2c8 100644 --- a/llvm/include/llvm/Analysis/StructuralHash.h +++ b/llvm/include/llvm/Analysis/StructuralHash.h @@ -13,7 +13,11 @@ namespace llvm { -enum class StructuralHashOptions { None, Detailed, CallTargetIgnored }; +enum class StructuralHashOptions { + None, /// Hash with opcode only. + Detailed, /// Hash with opcode and operands. + CallTargetIgnored, /// Ignore call target operand when computing hash. +}; /// Printer pass for StructuralHashes class StructuralHashPrinterPass