Skip to content

Commit d3da788

Browse files
authored
[CGData] Refactor Global Merge Functions (#115750)
This is a follow-up PR to refactor the initial global merge function pass implemented in #112671. It first collects stable functions relevant to the current module and iterates over those only, instead of iterating through all stable functions in the stable function map. This is a patch for https://discourse.llvm.org/t/rfc-global-function-merging/82608.
1 parent be18736 commit d3da788

File tree

1 file changed

+64
-93
lines changed

1 file changed

+64
-93
lines changed

llvm/lib/CodeGen/GlobalMergeFunctions.cpp

+64-93
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,6 @@ static cl::opt<bool> DisableCGDataForMerging(
3131
"merging is still enabled within a module."),
3232
cl::init(false));
3333

34-
STATISTIC(NumMismatchedFunctionHash,
35-
"Number of mismatched function hash for global merge function");
36-
STATISTIC(NumMismatchedInstCount,
37-
"Number of mismatched instruction count for global merge function");
38-
STATISTIC(NumMismatchedConstHash,
39-
"Number of mismatched const hash for global merge function");
40-
STATISTIC(NumMismatchedModuleId,
41-
"Number of mismatched Module Id for global merge function");
4234
STATISTIC(NumMergedFunctions,
4335
"Number of functions that are actually merged using function hash");
4436
STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed");
@@ -110,7 +102,7 @@ bool isEligibleFunction(Function *F) {
110102
return true;
111103
}
112104

113-
static bool isEligibleInstrunctionForConstantSharing(const Instruction *I) {
105+
static bool isEligibleInstructionForConstantSharing(const Instruction *I) {
114106
switch (I->getOpcode()) {
115107
case Instruction::Load:
116108
case Instruction::Store:
@@ -122,10 +114,15 @@ static bool isEligibleInstrunctionForConstantSharing(const Instruction *I) {
122114
}
123115
}
124116

117+
// This function takes an instruction, \p I, and an operand index, \p OpIdx.
118+
// It returns true if the operand should be ignored in the hash computation.
119+
// If \p OpIdx is out of range based on the other instruction context, it cannot
120+
// be ignored.
125121
static bool ignoreOp(const Instruction *I, unsigned OpIdx) {
126-
assert(OpIdx < I->getNumOperands() && "Invalid operand index");
122+
if (OpIdx >= I->getNumOperands())
123+
return false;
127124

128-
if (!isEligibleInstrunctionForConstantSharing(I))
125+
if (!isEligibleInstructionForConstantSharing(I))
129126
return false;
130127

131128
if (!isa<Constant>(I->getOperand(OpIdx)))
@@ -203,9 +200,9 @@ void GlobalMergeFunc::analyze(Module &M) {
203200
struct FuncMergeInfo {
204201
StableFunctionMap::StableFunctionEntry *SF;
205202
Function *F;
206-
std::unique_ptr<IndexInstrMap> IndexInstruction;
203+
IndexInstrMap *IndexInstruction;
207204
FuncMergeInfo(StableFunctionMap::StableFunctionEntry *SF, Function *F,
208-
std::unique_ptr<IndexInstrMap> IndexInstruction)
205+
IndexInstrMap *IndexInstruction)
209206
: SF(SF), F(F), IndexInstruction(std::move(IndexInstruction)) {}
210207
};
211208

@@ -420,101 +417,75 @@ static ParamLocsVecTy computeParamInfo(
420417
bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
421418
bool Changed = false;
422419

423-
// Build a map from stable function name to function.
424-
StringMap<Function *> StableNameToFuncMap;
425-
for (auto &F : M)
426-
StableNameToFuncMap[get_stable_name(F.getName())] = &F;
427-
// Track merged functions
428-
DenseSet<Function *> MergedFunctions;
429-
430-
auto ModId = M.getModuleIdentifier();
431-
for (auto &[Hash, SFS] : FunctionMap->getFunctionMap()) {
432-
// Parameter locations based on the unique hash sequences
433-
// across the candidates.
420+
// Collect stable functions related to the current module.
421+
DenseMap<stable_hash, SmallVector<std::pair<Function *, FunctionHashInfo>>>
422+
HashToFuncs;
423+
auto &Maps = FunctionMap->getFunctionMap();
424+
for (auto &F : M) {
425+
if (!isEligibleFunction(&F))
426+
continue;
427+
auto FI = llvm::StructuralHashWithDifferences(F, ignoreOp);
428+
if (Maps.contains(FI.FunctionHash))
429+
HashToFuncs[FI.FunctionHash].emplace_back(&F, std::move(FI));
430+
}
431+
432+
for (auto &[Hash, Funcs] : HashToFuncs) {
434433
std::optional<ParamLocsVecTy> ParamLocsVec;
435-
Function *MergedFunc = nullptr;
436-
std::string MergedModId;
437434
SmallVector<FuncMergeInfo> FuncMergeInfos;
438-
for (auto &SF : SFS) {
439-
// Get the function from the stable name.
440-
auto I = StableNameToFuncMap.find(
441-
*FunctionMap->getNameForId(SF->FunctionNameId));
442-
if (I == StableNameToFuncMap.end())
443-
continue;
444-
Function *F = I->second;
445-
assert(F);
446-
// Skip if the function has been merged before.
447-
if (MergedFunctions.count(F))
448-
continue;
449-
// Consider the function if it is eligible for merging.
450-
if (!isEligibleFunction(F))
435+
auto &SFS = Maps.at(Hash);
436+
assert(!SFS.empty());
437+
auto &RFS = SFS[0];
438+
439+
// Iterate functions with the same hash.
440+
for (auto &[F, FI] : Funcs) {
441+
// Check if the function is compatible with any stable function
442+
// in terms of the number of instructions and ignored operands.
443+
if (RFS->InstCount != FI.IndexInstruction->size())
451444
continue;
452445

453-
auto FI = llvm::StructuralHashWithDifferences(*F, ignoreOp);
454-
uint64_t FuncHash = FI.FunctionHash;
455-
if (Hash != FuncHash) {
456-
++NumMismatchedFunctionHash;
446+
auto hasValidSharedConst =
447+
[&](StableFunctionMap::StableFunctionEntry *SF) {
448+
for (auto &[Index, Hash] : *SF->IndexOperandHashMap) {
449+
auto [InstIndex, OpndIndex] = Index;
450+
assert(InstIndex < FI.IndexInstruction->size());
451+
auto *Inst = FI.IndexInstruction->lookup(InstIndex);
452+
if (!ignoreOp(Inst, OpndIndex))
453+
return false;
454+
}
455+
return true;
456+
};
457+
if (!hasValidSharedConst(RFS.get()))
457458
continue;
458-
}
459459

460-
if (SF->InstCount != FI.IndexInstruction->size()) {
461-
++NumMismatchedInstCount;
462-
continue;
463-
}
464-
bool HasValidSharedConst = true;
465-
for (auto &[Index, Hash] : *SF->IndexOperandHashMap) {
466-
auto [InstIndex, OpndIndex] = Index;
467-
assert(InstIndex < FI.IndexInstruction->size());
468-
auto *Inst = FI.IndexInstruction->lookup(InstIndex);
469-
if (!ignoreOp(Inst, OpndIndex)) {
470-
HasValidSharedConst = false;
471-
break;
472-
}
473-
}
474-
if (!HasValidSharedConst) {
475-
++NumMismatchedConstHash;
476-
continue;
477-
}
478-
if (!checkConstHashCompatible(*SF->IndexOperandHashMap,
479-
*FI.IndexOperandHashMap)) {
480-
++NumMismatchedConstHash;
481-
continue;
482-
}
483-
if (!ParamLocsVec.has_value()) {
484-
ParamLocsVec = computeParamInfo(SFS);
485-
LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging hash: " << Hash
486-
<< " with Params " << ParamLocsVec->size() << "\n");
487-
}
488-
if (!checkConstLocationCompatible(*SF, *FI.IndexInstruction,
489-
*ParamLocsVec)) {
490-
++NumMismatchedConstHash;
491-
continue;
492-
}
493-
494-
if (MergedFunc) {
495-
// Check if the matched functions fall into the same (first) module.
496-
// This module check is not strictly necessary as the functions can move
497-
// around. We just want to avoid merging functions from different
498-
// modules than the first one in the function map, as they may not end
499-
// up with being ICFed by the linker.
500-
if (MergedModId != *FunctionMap->getNameForId(SF->ModuleNameId)) {
501-
++NumMismatchedModuleId;
460+
for (auto &SF : SFS) {
461+
assert(SF->InstCount == FI.IndexInstruction->size());
462+
assert(hasValidSharedConst(SF.get()));
463+
// Check if there is any stable function that is compatiable with the
464+
// current one.
465+
if (!checkConstHashCompatible(*SF->IndexOperandHashMap,
466+
*FI.IndexOperandHashMap))
502467
continue;
468+
if (!ParamLocsVec.has_value()) {
469+
ParamLocsVec = computeParamInfo(SFS);
470+
LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging hash: " << Hash
471+
<< " with Params " << ParamLocsVec->size() << "\n");
503472
}
504-
} else {
505-
MergedFunc = F;
506-
MergedModId = *FunctionMap->getNameForId(SF->ModuleNameId);
507-
}
473+
if (!checkConstLocationCompatible(*SF, *FI.IndexInstruction,
474+
*ParamLocsVec))
475+
continue;
508476

509-
FuncMergeInfos.emplace_back(SF.get(), F, std::move(FI.IndexInstruction));
510-
MergedFunctions.insert(F);
477+
// If a stable function matching the current one is found,
478+
// create a candidate for merging and proceed to the next function.
479+
FuncMergeInfos.emplace_back(SF.get(), F, FI.IndexInstruction.get());
480+
break;
481+
}
511482
}
512483
unsigned FuncMergeInfoSize = FuncMergeInfos.size();
513484
if (FuncMergeInfoSize == 0)
514485
continue;
515486

516487
LLVM_DEBUG(dbgs() << "[GlobalMergeFunc] Merging function count "
517-
<< FuncMergeInfoSize << " in " << ModId << "\n");
488+
<< FuncMergeInfoSize << " for hash: " << Hash << "\n");
518489

519490
for (auto &FMI : FuncMergeInfos) {
520491
Changed = true;

0 commit comments

Comments
 (0)