Skip to content

Commit

Permalink
[StructuralHash] Support Differences
Browse files Browse the repository at this point in the history
This comutes a structural hash while allowing for selective ignoring of
certain operands based on a custom function that is provided.
Instead of a single hash value, it now returns FunctionHashInfo which
includes a hash value, an instruction mapping, and a map to track the
operand location and its corresponding hash value that is ignored.
  • Loading branch information
kyulee-com committed Oct 17, 2024
1 parent e715fc6 commit 6225d74
Show file tree
Hide file tree
Showing 3 changed files with 275 additions and 14 deletions.
46 changes: 46 additions & 0 deletions llvm/include/llvm/IR/StructuralHash.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
#ifndef LLVM_IR_STRUCTURALHASH_H
#define LLVM_IR_STRUCTURALHASH_H

#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StableHashing.h"
#include "llvm/IR/Instruction.h"
#include <cstdint>

namespace llvm {
Expand All @@ -23,6 +25,7 @@ class Function;
class Module;

using IRHash = stable_hash;
using OpndHash = stable_hash;

/// Returns a hash of the function \p F.
/// \param F The function to hash.
Expand All @@ -37,6 +40,49 @@ IRHash StructuralHash(const Function &F, bool DetailedHash = false);
/// composed the module hash.
IRHash StructuralHash(const Module &M, bool DetailedHash = false);

/// The pair of an instruction index and a operand index.
using IndexPair = std::pair<unsigned, unsigned>;

/// A map from an instruction index to an instruction pointer.
using IndexInstrMap = MapVector<unsigned, Instruction *>;

/// A map from an IndexPair to an OpndHash.
using IndexOperandHashMapType = DenseMap<IndexPair, OpndHash>;

/// A function that takes an instruction and an operand index and returns true
/// if the operand should be ignored in the function hash computation.
using IgnoreOperandFunc = std::function<bool(const Instruction *, unsigned)>;

struct FunctionHashInfo {
/// A hash value representing the structural content of the function
IRHash FunctionHash;
/// A mapping from instruction indices to instruction pointers
std::unique_ptr<IndexInstrMap> IndexInstruction;
/// A mapping from pairs of instruction indices and operand indices
/// to the hashes of the operands. This can be used to analyze or
/// reconstruct the differences in ignored operands
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap;

FunctionHashInfo(IRHash FuntionHash,
std::unique_ptr<IndexInstrMap> IndexInstruction,
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap)
: FunctionHash(FuntionHash),
IndexInstruction(std::move(IndexInstruction)),
IndexOperandHashMap(std::move(IndexOperandHashMap)) {}
};

/// Computes a structural hash of a given function, considering the structure
/// and content of the function's instructions while allowing for selective
/// ignoring of certain operands based on custom criteria. This hash can be used
/// to identify functions that are structurally similar or identical, which is
/// useful in optimizations, deduplication, or analysis tasks.
/// \param F The function to hash.
/// \param IgnoreOp A callable that takes an instruction and an operand index,
/// and returns true if the operand should be ignored in the hash computation.
/// \return A FunctionHashInfo structure
FunctionHashInfo StructuralHashWithDifferences(const Function &F,
IgnoreOperandFunc IgnoreOp);

} // end namespace llvm

#endif
188 changes: 174 additions & 14 deletions llvm/lib/IR/StructuralHash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,19 @@ class StructuralHashImpl {

bool DetailedHash;

/// IgnoreOp is a function that returns true if the operand should be ignored.
IgnoreOperandFunc IgnoreOp = nullptr;
/// A mapping from instruction indices to instruction pointers.
/// The index represents the position of an instruction based on the order in
/// which it is first encountered.
std::unique_ptr<IndexInstrMap> IndexInstruction = nullptr;
/// A mapping from pairs of instruction indices and operand indices
/// to the hashes of the operands.
std::unique_ptr<IndexOperandHashMapType> IndexOperandHashMap = nullptr;

/// Assign a unique ID to each Value in the order they are first seen.
DenseMap<const Value *, int> ValueToId;

// This will produce different values on 32-bit and 64-bit systens as
// hash_combine returns a size_t. However, this is only used for
// detailed hashing which, in-tree, only needs to distinguish between
Expand All @@ -47,24 +60,140 @@ class StructuralHashImpl {

public:
StructuralHashImpl() = delete;
explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {}
explicit StructuralHashImpl(bool DetailedHash,
IgnoreOperandFunc IgnoreOp = nullptr)
: DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) {
if (IgnoreOp) {
IndexInstruction = std::make_unique<IndexInstrMap>();
IndexOperandHashMap = std::make_unique<IndexOperandHashMapType>();
}
}

stable_hash hashConstant(Constant *C) {
stable_hash hashAPInt(const APInt &I) {
SmallVector<stable_hash> Hashes;
// TODO: hashArbitaryType() is not stable.
if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(C)) {
Hashes.emplace_back(hashArbitaryType(ConstInt->getValue()));
} else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(C)) {
Hashes.emplace_back(hashArbitaryType(ConstFP->getValue()));
} else if (Function *Func = dyn_cast<Function>(C))
// Hashing the name will be deterministic as LLVM's hashing infrastructure
// has explicit support for hashing strings and will not simply hash
// the pointer.
Hashes.emplace_back(hashArbitaryType(Func->getName()));
Hashes.emplace_back(I.getBitWidth());
for (unsigned J = 0; J < I.getNumWords(); ++J)
Hashes.emplace_back((I.getRawData())[J]);
return stable_hash_combine(Hashes);
}

stable_hash hashAPFloat(const APFloat &F) {
SmallVector<stable_hash> Hashes;
const fltSemantics &S = F.getSemantics();
Hashes.emplace_back(APFloat::semanticsPrecision(S));
Hashes.emplace_back(APFloat::semanticsMaxExponent(S));
Hashes.emplace_back(APFloat::semanticsMinExponent(S));
Hashes.emplace_back(APFloat::semanticsSizeInBits(S));
Hashes.emplace_back(hashAPInt(F.bitcastToAPInt()));
return stable_hash_combine(Hashes);
}

stable_hash hashGlobalValue(const GlobalValue *GV) {
if (!GV->hasName())
return 0;
return stable_hash_name(GV->getName());
}

// Compute a hash for a Constant. This function is logically similar to
// FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here
// we're interested in computing a hash rather than comparing two Constants.
// Some of the logic is simplified, e.g, we don't expand GEPOperator.
stable_hash hashConstant(Constant *C) {
SmallVector<stable_hash> Hashes;

Type *Ty = C->getType();
Hashes.emplace_back(hashType(Ty));

if (C->isNullValue()) {
Hashes.emplace_back(static_cast<stable_hash>('N'));
return stable_hash_combine(Hashes);
}

auto *G = dyn_cast<GlobalValue>(C);
if (G) {
Hashes.emplace_back(hashGlobalValue(G));
return stable_hash_combine(Hashes);
}

if (const auto *Seq = dyn_cast<ConstantDataSequential>(C)) {
Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues()));
return stable_hash_combine(Hashes);
}

switch (C->getValueID()) {
case Value::UndefValueVal:
case Value::PoisonValueVal:
case Value::ConstantTokenNoneVal: {
return stable_hash_combine(Hashes);
}
case Value::ConstantIntVal: {
const APInt &Int = cast<ConstantInt>(C)->getValue();
Hashes.emplace_back(hashAPInt(Int));
return stable_hash_combine(Hashes);
}
case Value::ConstantFPVal: {
const APFloat &APF = cast<ConstantFP>(C)->getValueAPF();
Hashes.emplace_back(hashAPFloat(APF));
return stable_hash_combine(Hashes);
}
case Value::ConstantArrayVal: {
const ConstantArray *A = cast<ConstantArray>(C);
uint64_t NumElements = cast<ArrayType>(Ty)->getNumElements();
Hashes.emplace_back(NumElements);
for (auto &Op : A->operands()) {
auto H = hashConstant(cast<Constant>(Op));
Hashes.emplace_back(H);
}
return stable_hash_combine(Hashes);
}
case Value::ConstantStructVal: {
const ConstantStruct *S = cast<ConstantStruct>(C);
unsigned NumElements = cast<StructType>(Ty)->getNumElements();
Hashes.emplace_back(NumElements);
for (auto &Op : S->operands()) {
auto H = hashConstant(cast<Constant>(Op));
Hashes.emplace_back(H);
}
return stable_hash_combine(Hashes);
}
case Value::ConstantVectorVal: {
const ConstantVector *V = cast<ConstantVector>(C);
unsigned NumElements = cast<FixedVectorType>(Ty)->getNumElements();
Hashes.emplace_back(NumElements);
for (auto &Op : V->operands()) {
auto H = hashConstant(cast<Constant>(Op));
Hashes.emplace_back(H);
}
return stable_hash_combine(Hashes);
}
case Value::ConstantExprVal: {
const ConstantExpr *E = cast<ConstantExpr>(C);
unsigned NumOperands = E->getNumOperands();
Hashes.emplace_back(NumOperands);
for (auto &Op : E->operands()) {
auto H = hashConstant(cast<Constant>(Op));
Hashes.emplace_back(H);
}
return stable_hash_combine(Hashes);
}
case Value::BlockAddressVal: {
const BlockAddress *BA = cast<BlockAddress>(C);
auto H = hashGlobalValue(BA->getFunction());
Hashes.emplace_back(H);
return stable_hash_combine(Hashes);
}
case Value::DSOLocalEquivalentVal: {
const auto *Equiv = cast<DSOLocalEquivalent>(C);
auto H = hashGlobalValue(Equiv->getGlobalValue());
Hashes.emplace_back(H);
return stable_hash_combine(Hashes);
}
default: // Unknown constant, abort.
llvm_unreachable("Constant ValueID not recognized.");
}
return Hash;
}

stable_hash hashValue(Value *V) {
// Check constant and return its hash.
Constant *C = dyn_cast<Constant>(V);
Expand All @@ -76,6 +205,10 @@ class StructuralHashImpl {
if (Argument *Arg = dyn_cast<Argument>(V))
Hashes.emplace_back(Arg->getArgNo());

// Get an index (an insertion order) for the non-constant value.
auto I = ValueToId.insert({V, ValueToId.size()});
Hashes.emplace_back(I.first->second);

return stable_hash_combine(Hashes);
}

Expand All @@ -100,8 +233,20 @@ class StructuralHashImpl {
if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
Hashes.emplace_back(ComparisonInstruction->getPredicate());

for (const auto &Op : Inst.operands())
Hashes.emplace_back(hashOperand(Op));
unsigned InstIdx = 0;
if (IndexInstruction) {
InstIdx = IndexInstruction->size();
IndexInstruction->insert({InstIdx, const_cast<Instruction *>(&Inst)});
}

for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) {
auto OpndHash = hashOperand(Op);
if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) {
assert(IndexOperandHashMap);
IndexOperandHashMap->insert({{InstIdx, OpndIdx}, OpndHash});
} else
Hashes.emplace_back(OpndHash);
}

return stable_hash_combine(Hashes);
}
Expand Down Expand Up @@ -184,6 +329,12 @@ class StructuralHashImpl {
}

uint64_t getHash() const { return Hash; }
std::unique_ptr<IndexInstrMap> getIndexInstrMap() {
return std::move(IndexInstruction);
}
std::unique_ptr<IndexOperandHashMapType> getIndexPairOpndHashMap() {
return std::move(IndexOperandHashMap);
}
};

} // namespace
Expand All @@ -199,3 +350,12 @@ IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
H.update(M);
return H.getHash();
}

FunctionHashInfo
llvm::StructuralHashWithDifferences(const Function &F,
IgnoreOperandFunc IgnoreOp) {
StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp);
H.update(F);
return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(),
H.getIndexPairOpndHashMap());
}
55 changes: 55 additions & 0 deletions llvm/unittests/IR/StructuralHashTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,4 +239,59 @@ TEST(StructuralHashTest, ArgumentNumber) {
EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2));
EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true));
}

TEST(StructuralHashTest, Differences) {
LLVMContext Ctx;
std::unique_ptr<Module> M1 = parseIR(Ctx, "define i64 @f(i64 %a) {\n"
" %c = add i64 %a, 1\n"
" %b = call i64 @f1(i64 %c)\n"
" ret i64 %b\n"
"}\n"
"declare i64 @f1(i64)");
auto *F1 = M1->getFunction("f");
std::unique_ptr<Module> M2 = parseIR(Ctx, "define i64 @g(i64 %a) {\n"
" %c = add i64 %a, 1\n"
" %b = call i64 @f2(i64 %c)\n"
" ret i64 %b\n"
"}\n"
"declare i64 @f2(i64)");
auto *F2 = M2->getFunction("g");

// They are originally different when not ignoring any operand.
EXPECT_NE(StructuralHash(*F1, true), StructuralHash(*F2, true));
EXPECT_NE(StructuralHashWithDifferences(*F1, nullptr).FunctionHash,
StructuralHashWithDifferences(*F2, nullptr).FunctionHash);

// When we ignore the call target f1 vs f2, they have the same hash.
auto IgnoreOp = [&](const Instruction *I, unsigned OpndIdx) {
return I->getOpcode() == Instruction::Call && OpndIdx == 1;
};
auto FuncHashInfo1 = StructuralHashWithDifferences(*F1, IgnoreOp);
auto FuncHashInfo2 = StructuralHashWithDifferences(*F2, IgnoreOp);
EXPECT_EQ(FuncHashInfo1.FunctionHash, FuncHashInfo2.FunctionHash);

// There are total 3 instructions.
EXPECT_EQ(FuncHashInfo1.IndexInstruction->size(), 3u);
EXPECT_EQ(FuncHashInfo2.IndexInstruction->size(), 3u);

// The only 1 operand (the call target) has been ignored.
EXPECT_EQ(FuncHashInfo1.IndexOperandHashMap->size(), 1u);
EXPECT_EQ(FuncHashInfo2.IndexOperandHashMap->size(), 1u);

// The index pair of instruction and operand (1, 1) is a key in the map.
ASSERT_TRUE(FuncHashInfo1.IndexOperandHashMap->count({1, 1}));
ASSERT_TRUE(FuncHashInfo2.IndexOperandHashMap->count({1, 1}));

// The indexed instruciton must be the call instruction as shown in the
// IgnoreOp above.
EXPECT_EQ(FuncHashInfo1.IndexInstruction->lookup(1)->getOpcode(),
Instruction::Call);
EXPECT_EQ(FuncHashInfo2.IndexInstruction->lookup(1)->getOpcode(),
Instruction::Call);

// The ignored operand hashes (for f1 vs. f2) are different.
EXPECT_NE(FuncHashInfo1.IndexOperandHashMap->lookup({1, 1}),
FuncHashInfo2.IndexOperandHashMap->lookup({1, 1}));
}

} // end anonymous namespace

0 comments on commit 6225d74

Please sign in to comment.