diff --git a/llvm/lib/CGData/StableFunctionMap.cpp b/llvm/lib/CGData/StableFunctionMap.cpp index fe7be0c0b6e7b8..4afe77d78a4fe5 100644 --- a/llvm/lib/CGData/StableFunctionMap.cpp +++ b/llvm/lib/CGData/StableFunctionMap.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CGData/StableFunctionMap.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -35,21 +36,30 @@ static cl::opt GlobalMergingMaxParams( cl::desc( "The maximum number of parameters allowed when merging functions."), cl::init(std::numeric_limits::max()), cl::Hidden); -static cl::opt GlobalMergingParamOverhead( +static cl::opt GlobalMergingSkipNoParams( + "global-merging-skip-no-params", + cl::desc("Skip merging functions with no parameters."), cl::init(true), + cl::Hidden); +static cl::opt GlobalMergingInstOverhead( + "global-merging-inst-overhead", + cl::desc("The overhead cost associated with each instruction when lowering " + "to machine instruction."), + cl::init(1.2), cl::Hidden); +static cl::opt GlobalMergingParamOverhead( "global-merging-param-overhead", cl::desc("The overhead cost associated with each parameter when merging " "functions."), - cl::init(2), cl::Hidden); -static cl::opt + cl::init(2.0), cl::Hidden); +static cl::opt GlobalMergingCallOverhead("global-merging-call-overhead", cl::desc("The overhead cost associated with each " "function call when merging functions."), - cl::init(1), cl::Hidden); -static cl::opt GlobalMergingExtraThreshold( + cl::init(1.0), cl::Hidden); +static cl::opt GlobalMergingExtraThreshold( "global-merging-extra-threshold", cl::desc("An additional cost threshold that must be exceeded for merging " "to be considered beneficial."), - cl::init(0), cl::Hidden); + cl::init(0.0), cl::Hidden); unsigned StableFunctionMap::getIdOrCreateForName(StringRef Name) { auto It = NameToId.find(Name); @@ -160,21 +170,32 @@ static bool isProfitable( if (InstCount < GlobalMergingMinInstrs) return false; - unsigned ParamCount = SFS[0]->IndexOperandHashMap->size(); - if (ParamCount > GlobalMergingMaxParams) - return false; - - unsigned Benefit = InstCount * (StableFunctionCount - 1); - unsigned Cost = - (GlobalMergingParamOverhead * ParamCount + GlobalMergingCallOverhead) * - StableFunctionCount + - GlobalMergingExtraThreshold; + double Cost = 0.0; + SmallSet UniqueHashVals; + for (auto &SF : SFS) { + UniqueHashVals.clear(); + for (auto &[IndexPair, Hash] : *SF->IndexOperandHashMap) + UniqueHashVals.insert(Hash); + unsigned ParamCount = UniqueHashVals.size(); + if (ParamCount > GlobalMergingMaxParams) + return false; + // Theoretically, if ParamCount is 0, it results in identical code folding + // (ICF), which we can skip merging here since the linker already handles + // ICF. This pass would otherwise introduce unnecessary thunks that are + // merely direct jumps. However, enabling this could be beneficial depending + // on downstream passes, so we provide an option for it. + if (GlobalMergingSkipNoParams && ParamCount == 0) + return false; + Cost += ParamCount * GlobalMergingParamOverhead + GlobalMergingCallOverhead; + } + Cost += GlobalMergingExtraThreshold; + double Benefit = + InstCount * (StableFunctionCount - 1) * GlobalMergingInstOverhead; bool Result = Benefit > Cost; LLVM_DEBUG(dbgs() << "isProfitable: Hash = " << SFS[0]->Hash << ", " << "StableFunctionCount = " << StableFunctionCount << ", InstCount = " << InstCount - << ", ParamCount = " << ParamCount << ", Benefit = " << Benefit << ", Cost = " << Cost << ", Result = " << (Result ? "true" : "false") << "\n"); return Result; diff --git a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp index c8f1b98c9a18e9..470582885fab0c 100644 --- a/llvm/lib/CodeGen/GlobalMergeFunctions.cpp +++ b/llvm/lib/CodeGen/GlobalMergeFunctions.cpp @@ -405,12 +405,13 @@ static ParamLocsVecTy computeParamInfo( } ParamLocsVecTy ParamLocsVec; - for (auto &[HashSeq, Locs] : HashSeqToLocs) { + for (auto &[HashSeq, Locs] : HashSeqToLocs) ParamLocsVec.push_back(std::move(Locs)); - llvm::sort(ParamLocsVec, [&](const ParamLocs &L, const ParamLocs &R) { - return L[0] < R[0]; - }); - } + + llvm::sort(ParamLocsVec, [&](const ParamLocs &L, const ParamLocs &R) { + return L[0] < R[0]; + }); + return ParamLocsVec; } diff --git a/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll b/llvm/test/CodeGen/Generic/cgdata-merge-local.ll similarity index 98% rename from llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll rename to llvm/test/CodeGen/Generic/cgdata-merge-local.ll index 660ffe6109948f..3517fbd3ae8a26 100644 --- a/llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll +++ b/llvm/test/CodeGen/Generic/cgdata-merge-local.ll @@ -2,7 +2,7 @@ ; while parameterizing a difference in their global variables, g1 and g2. ; To achieve this, we create two instances of the global merging function, f1.Tgm and f2.Tgm, ; which are tail-called from thunks f1 and f2 respectively. -; These identical functions, f1.Tgm and f2.Tgm, will be folded by the linker via Identical Code Folding (IFC). +; These identical functions, f1.Tgm and f2.Tgm, will be folded by the linker via Identical Code Folding (ICF). ; RUN: opt -S --passes=global-merge-func %s | FileCheck %s diff --git a/llvm/test/CodeGen/Generic/cgdata-merge-no-params.ll b/llvm/test/CodeGen/Generic/cgdata-merge-no-params.ll new file mode 100644 index 00000000000000..30d6a45c03a0c3 --- /dev/null +++ b/llvm/test/CodeGen/Generic/cgdata-merge-no-params.ll @@ -0,0 +1,42 @@ +; This test verifies whether two identical functions, f1 and f2, can be merged +; locally using the global merge function. +; The functions, f1.Tgm and f2.Tgm, will be folded by the linker through +; Identical Code Folding (ICF). +; While identical functions can already be folded by the linker, creating this +; canonical form can be beneficial in downstream passes. This merging process +; can be controlled by the -global-merging-skip-no-params option. + +; RUN: llc -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s --check-prefix=MERGE +; RUN: llc -enable-global-merge-func=true -global-merging-skip-no-params=true < %s | FileCheck %s --implicit-check-not=".Tgm" + +; MERGE: _f1.Tgm +; MERGE: _f2.Tgm + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-unknown-ios12.0.0" + +@g = external local_unnamed_addr global [0 x i32], align 4 +@g1 = external global i32, align 4 +@g2 = external global i32, align 4 + +define i32 @f1(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g1, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +} + +define i32 @f2(i32 %a) { +entry: + %idxprom = sext i32 %a to i64 + %arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %1 = load volatile i32, i32* @g1, align 4 + %mul = mul nsw i32 %1, %0 + %add = add nsw i32 %mul, 1 + ret i32 %add +}