6
6
//
7
7
// ===----------------------------------------------------------------------===//
8
8
//
9
- // TODO: This implements a function merge using function hash while tracking
10
- // differences in Constants. This uses stable function hash to find potential
11
- // merge candidates. The first codegen round collects stable function hashes,
12
- // and determines the merge candidates that match the stable function hashes.
13
- // The set of parameters pointing to different Constants are also computed
14
- // during the stable function merge. The second codegen round uses this global
15
- // function info to optimistically create a merged function in each module
16
- // context to guarantee correct transformation. Similar to the global outliner,
17
- // the linker's deduplication (ICF) folds the identical merged functions to save
18
- // the final binary size.
9
+ // This pass defines the implementation of a function merging mechanism
10
+ // that utilizes a stable function hash to track differences in constants and
11
+ // create potential merge candidates. The process involves two rounds:
12
+ // 1. The first round collects stable function hashes and identifies merge
13
+ // candidates with matching hashes. It also computes the set of parameters
14
+ // that point to different constants during the stable function merge.
15
+ // 2. The second round leverages this collected global function information to
16
+ // optimistically create a merged function in each module context, ensuring
17
+ // correct transformation.
18
+ // Similar to the global outliner, this approach uses the linker's deduplication
19
+ // (ICF) to fold identical merged functions, thereby reducing the final binary
20
+ // size. The work is inspired by the concepts discussed in the following paper:
21
+ // https://dl.acm.org/doi/pdf/10.1145/3652032.3657575.
19
22
//
20
23
// ===----------------------------------------------------------------------===//
21
24
22
25
#include " llvm/Transforms/IPO/GlobalMergeFunctions.h"
23
26
#include " llvm/ADT/Statistic.h"
24
27
#include " llvm/Analysis/ModuleSummaryAnalysis.h"
25
28
#include " llvm/CGData/CodeGenData.h"
26
- #include " llvm/CGData/StableFunctionMap.h"
27
- #include " llvm/CodeGen/MachineStableHash.h"
28
- #include " llvm/CodeGen/Passes.h"
29
29
#include " llvm/IR/IRBuilder.h"
30
30
#include " llvm/IR/StructuralHash.h"
31
31
#include " llvm/InitializePasses.h"
@@ -84,7 +84,7 @@ STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed");
84
84
STATISTIC (NumAnalyzedFunctions, " Number of functions that are analyzed" );
85
85
STATISTIC (NumEligibleFunctions, " Number of functions that are eligible" );
86
86
87
- // / Returns true if the \opIdx operand of \p CI is the callee operand.
87
+ // / Returns true if the \OpIdx operand of \p CI is the callee operand.
88
88
static bool isCalleeOperand (const CallBase *CI, unsigned OpIdx) {
89
89
return &CI->getCalledOperandUse () == &CI->getOperandUse (OpIdx);
90
90
}
@@ -148,22 +148,19 @@ bool isEligibleFunction(Function *F) {
148
148
if (F->hasFnAttribute (llvm::Attribute::NoMerge))
149
149
return false ;
150
150
151
- if (F->hasAvailableExternallyLinkage ()) {
151
+ if (F->hasAvailableExternallyLinkage ())
152
152
return false ;
153
- }
154
153
155
- if (F->getFunctionType ()->isVarArg ()) {
154
+ if (F->getFunctionType ()->isVarArg ())
156
155
return false ;
157
- }
158
156
159
157
if (F->getCallingConv () == CallingConv::SwiftTail)
160
158
return false ;
161
159
162
- // if function contains callsites with musttail, if we merge
160
+ // If function contains callsites with musttail, if we merge
163
161
// it, the merged function will have the musttail callsite, but
164
162
// the number of parameters can change, thus the parameter count
165
163
// of the callsite will mismatch with the function itself.
166
- // if (IgnoreMusttailFunction) {
167
164
for (const BasicBlock &BB : *F) {
168
165
for (const Instruction &I : BB) {
169
166
const auto *CB = dyn_cast<CallBase>(&I);
@@ -203,7 +200,6 @@ static bool ignoreOp(const Instruction *I, unsigned OpIdx) {
203
200
return true ;
204
201
}
205
202
206
- // copy from merge functions.cpp
207
203
static Value *createCast (IRBuilder<> &Builder, Value *V, Type *DestTy) {
208
204
Type *SrcTy = V->getType ();
209
205
if (SrcTy->isStructTy ()) {
@@ -252,7 +248,8 @@ void GlobalMergeFunc::analyze(Module &M) {
252
248
253
249
auto FI = llvm::StructuralHashWithDifferences (Func, ignoreOp);
254
250
255
- // Convert the map to a vector for a serialization-friendly format.
251
+ // Convert the operand map to a vector for a serialization-friendly
252
+ // format.
256
253
IndexOperandHashVecType IndexOperandHashes;
257
254
for (auto &Pair : *FI.IndexOperandHashMap )
258
255
IndexOperandHashes.emplace_back (Pair);
@@ -595,7 +592,7 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
595
592
// This module check is not strictly necessary as the functions can move
596
593
// around. We just want to avoid merging functions from different
597
594
// modules than the first one in the functon map, as they may not end up
598
- // with not being ICFed.
595
+ // with not being ICFed by the linker .
599
596
if (MergedModId != *FunctionMap->getNameForId (SF->ModuleNameId )) {
600
597
++NumMismatchedModuleIdGlobalMergeFunction;
601
598
continue ;
@@ -616,12 +613,12 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
616
613
dbgs () << " [GlobalMergeFunc] Merging function count " << FuncMergeInfoSize
617
614
<< " in " << ModId << " \n " ;
618
615
});
616
+
619
617
for (auto &FMI : FuncMergeInfos) {
620
618
Changed = true ;
621
619
622
620
// We've already validated all locations of constant operands pointed by
623
- // the parameters. Just use the first one to bookkeep the original
624
- // constants for each parameter
621
+ // the parameters. Populate parameters pointing to the original constants.
625
622
SmallVector<Constant *> Params;
626
623
SmallVector<Type *> ParamTypes;
627
624
for (auto &ParamLocs : ParamLocsVec) {
@@ -633,8 +630,7 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
633
630
ParamTypes.push_back (Opnd->getType ());
634
631
}
635
632
636
- // Create a merged function derived from the first function in the current
637
- // module context.
633
+ // Create a merged function derived from the current function.
638
634
Function *MergedFunc =
639
635
createMergedFunction (FMI, ParamTypes, ParamLocsVec);
640
636
@@ -645,7 +641,8 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
645
641
MergedFunc->dump ();
646
642
});
647
643
648
- // Create a thunk to the merged function.
644
+ // Transform the current function into a thunk that calls the merged
645
+ // function.
649
646
createThunk (FMI, Params, MergedFunc);
650
647
LLVM_DEBUG ({
651
648
dbgs () << " [GlobalMergeFunc] Thunk generated: \n " ;
0 commit comments