-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[CGData] Global Merge Functions #112671
[CGData] Global Merge Functions #112671
Changes from all commits
4e2d83d
e86d78b
ac87d92
1d2436e
7007030
7b14e95
3f335e8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
//===------ GlobalMergeFunctions.h - Global merge functions -----*- C++ -*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// This pass defines the implementation of a function merging mechanism | ||
// that utilizes a stable function hash to track differences in constants and | ||
// identify potential merge candidates. The process involves two rounds: | ||
// 1. The first round collects stable function hashes and identifies merge | ||
// candidates with matching hashes. It also computes the set of parameters | ||
// that point to different constants during the stable function merge. | ||
// 2. The second round leverages this collected global function information to | ||
// optimistically create a merged function in each module context, ensuring | ||
// correct transformation. | ||
// Similar to the global outliner, this approach uses the linker's deduplication | ||
// (ICF) to fold identical merged functions, thereby reducing the final binary | ||
// size. The work is inspired by the concepts discussed in the following paper: | ||
// https://dl.acm.org/doi/pdf/10.1145/3652032.3657575. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_CODEGEN_GLOBALMERGEFUNCTIONS_H | ||
#define LLVM_CODEGEN_GLOBALMERGEFUNCTIONS_H | ||
|
||
#include "llvm/CGData/StableFunctionMap.h" | ||
#include "llvm/IR/Module.h" | ||
#include "llvm/IR/PassManager.h" | ||
#include "llvm/Pass.h" | ||
|
||
enum class HashFunctionMode { | ||
Local, | ||
BuildingHashFuncion, | ||
UsingHashFunction, | ||
}; | ||
|
||
namespace llvm { | ||
|
||
// A vector of locations (the pair of (instruction, operand) indices) reachable | ||
// from a parameter. | ||
using ParamLocs = SmallVector<IndexPair, 4>; | ||
// A vector of parameters | ||
using ParamLocsVecTy = SmallVector<ParamLocs, 8>; | ||
|
||
/// GlobalMergeFunc is a ModulePass that implements a function merging mechanism | ||
/// using stable function hashes. It identifies and merges functions with | ||
/// matching hashes across modules to optimize binary size. | ||
class GlobalMergeFunc { | ||
HashFunctionMode MergerMode = HashFunctionMode::Local; | ||
|
||
std::unique_ptr<StableFunctionMap> LocalFunctionMap; | ||
|
||
const ModuleSummaryIndex *Index; | ||
|
||
public: | ||
/// The suffix used to identify the merged function that parameterizes | ||
/// the constant values. Note that the original function, without this suffix, | ||
/// becomes a thunk supplying contexts to the merged function via parameters. | ||
static constexpr const char MergingInstanceSuffix[] = ".Tgm"; | ||
|
||
GlobalMergeFunc(const ModuleSummaryIndex *Index) : Index(Index) {}; | ||
|
||
void initializeMergerMode(const Module &M); | ||
|
||
bool run(Module &M); | ||
|
||
/// Analyze module to create stable function into LocalFunctionMap. | ||
void analyze(Module &M); | ||
|
||
/// Emit LocalFunctionMap into __llvm_merge section. | ||
void emitFunctionMap(Module &M); | ||
|
||
/// Merge functions in the module using the given function map. | ||
bool merge(Module &M, const StableFunctionMap *FunctionMap); | ||
}; | ||
|
||
/// Global function merging pass for new pass manager. | ||
struct GlobalMergeFuncPass : public PassInfoMixin<GlobalMergeFuncPass> { | ||
PreservedAnalyses run(Module &M, AnalysisManager<Module> &); | ||
}; | ||
|
||
} // end namespace llvm | ||
#endif // LLVM_CODEGEN_GLOBALMERGEFUNCTIONS_H |
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -14,11 +14,43 @@ | |||||||||||||||||||||||||
//===----------------------------------------------------------------------===// | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
#include "llvm/CGData/StableFunctionMap.h" | ||||||||||||||||||||||||||
#include "llvm/Support/CommandLine.h" | ||||||||||||||||||||||||||
#include "llvm/Support/Debug.h" | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
#define DEBUG_TYPE "stable-function-map" | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
using namespace llvm; | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
static cl::opt<unsigned> | ||||||||||||||||||||||||||
GlobalMergingMinMerges("global-merging-min-merges", | ||||||||||||||||||||||||||
cl::desc("Minimum number of similar functions with " | ||||||||||||||||||||||||||
"the same hash required for merging."), | ||||||||||||||||||||||||||
cl::init(2), cl::Hidden); | ||||||||||||||||||||||||||
static cl::opt<unsigned> GlobalMergingMinInstrs( | ||||||||||||||||||||||||||
"global-merging-min-instrs", | ||||||||||||||||||||||||||
cl::desc("The minimum instruction count required when merging functions."), | ||||||||||||||||||||||||||
cl::init(1), cl::Hidden); | ||||||||||||||||||||||||||
static cl::opt<unsigned> GlobalMergingMaxParams( | ||||||||||||||||||||||||||
"global-merging-max-params", | ||||||||||||||||||||||||||
cl::desc( | ||||||||||||||||||||||||||
"The maximum number of parameters allowed when merging functions."), | ||||||||||||||||||||||||||
cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden); | ||||||||||||||||||||||||||
static cl::opt<unsigned> GlobalMergingParamOverhead( | ||||||||||||||||||||||||||
"global-merging-param-overhead", | ||||||||||||||||||||||||||
cl::desc("The overhead cost associated with each parameter when merging " | ||||||||||||||||||||||||||
"functions."), | ||||||||||||||||||||||||||
cl::init(2), cl::Hidden); | ||||||||||||||||||||||||||
static cl::opt<unsigned> | ||||||||||||||||||||||||||
GlobalMergingCallOverhead("global-merging-call-overhead", | ||||||||||||||||||||||||||
cl::desc("The overhead cost associated with each " | ||||||||||||||||||||||||||
"function call when merging functions."), | ||||||||||||||||||||||||||
cl::init(1), cl::Hidden); | ||||||||||||||||||||||||||
static cl::opt<unsigned> GlobalMergingExtraThreshold( | ||||||||||||||||||||||||||
"global-merging-extra-threshold", | ||||||||||||||||||||||||||
cl::desc("An additional cost threshold that must be exceeded for merging " | ||||||||||||||||||||||||||
"to be considered beneficial."), | ||||||||||||||||||||||||||
cl::init(0), cl::Hidden); | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
unsigned StableFunctionMap::getIdOrCreateForName(StringRef Name) { | ||||||||||||||||||||||||||
auto It = NameToId.find(Name); | ||||||||||||||||||||||||||
if (It != NameToId.end()) | ||||||||||||||||||||||||||
|
@@ -117,7 +149,38 @@ static void removeIdenticalIndexPair( | |||||||||||||||||||||||||
SF->IndexOperandHashMap->erase(Pair); | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
void StableFunctionMap::finalize() { | ||||||||||||||||||||||||||
static bool isProfitable( | ||||||||||||||||||||||||||
const SmallVector<std::unique_ptr<StableFunctionMap::StableFunctionEntry>> | ||||||||||||||||||||||||||
&SFS) { | ||||||||||||||||||||||||||
unsigned StableFunctionCount = SFS.size(); | ||||||||||||||||||||||||||
if (StableFunctionCount < GlobalMergingMinMerges) | ||||||||||||||||||||||||||
return false; | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
unsigned InstCount = SFS[0]->InstCount; | ||||||||||||||||||||||||||
if (InstCount < GlobalMergingMinInstrs) | ||||||||||||||||||||||||||
return false; | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
unsigned ParamCount = SFS[0]->IndexOperandHashMap->size(); | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The number of unique values of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The number of unique values can be different depending on a stable function. |
||||||||||||||||||||||||||
if (ParamCount > GlobalMergingMaxParams) | ||||||||||||||||||||||||||
return false; | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
unsigned Benefit = InstCount * (StableFunctionCount - 1); | ||||||||||||||||||||||||||
unsigned Cost = | ||||||||||||||||||||||||||
(GlobalMergingParamOverhead * ParamCount + GlobalMergingCallOverhead) * | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also FYI the implementation that comes with this PR is not passing all the existing Swift repo tests despite lowering There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It seems we need to incorporate some type information to accurately reflect the precise cost, in theory. Since the profit model is currently computed offline, I've refined the parameter count for greater precision, as mentioned above
Despite some differences in the underlying assumptions, I'm curious about what the existing Swift merge can accomplish that this new pass cannot. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
These are the behavioral differences I have found by running
|
||||||||||||||||||||||||||
StableFunctionCount + | ||||||||||||||||||||||||||
GlobalMergingExtraThreshold; | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
bool Result = Benefit > Cost; | ||||||||||||||||||||||||||
LLVM_DEBUG(dbgs() << "isProfitable: Hash = " << SFS[0]->Hash << ", " | ||||||||||||||||||||||||||
<< "StableFunctionCount = " << StableFunctionCount | ||||||||||||||||||||||||||
<< ", InstCount = " << InstCount | ||||||||||||||||||||||||||
<< ", ParamCount = " << ParamCount | ||||||||||||||||||||||||||
<< ", Benefit = " << Benefit << ", Cost = " << Cost | ||||||||||||||||||||||||||
<< ", Result = " << (Result ? "true" : "false") << "\n"); | ||||||||||||||||||||||||||
return Result; | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
void StableFunctionMap::finalize(bool SkipTrim) { | ||||||||||||||||||||||||||
for (auto It = HashToFuncs.begin(); It != HashToFuncs.end(); ++It) { | ||||||||||||||||||||||||||
auto &[StableHash, SFS] = *It; | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
|
@@ -158,9 +221,15 @@ void StableFunctionMap::finalize() { | |||||||||||||||||||||||||
continue; | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
if (SkipTrim) | ||||||||||||||||||||||||||
continue; | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
// Trim the index pair that has the same operand hash across | ||||||||||||||||||||||||||
// stable functions. | ||||||||||||||||||||||||||
removeIdenticalIndexPair(SFS); | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
if (!isProfitable(SFS)) | ||||||||||||||||||||||||||
HashToFuncs.erase(It); | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
Finalized = true; | ||||||||||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The number of IR cannot precisely reflect the actual number of machine instructions (often the latter is larger for AArch64. Take the access of a global value for example, it will be expanded into an
ADRP
pair for small code model), which results inBenefit
underestimated and some profitable merging opportunities dropped. I am not sure if there is existing code that could be reused to better estimate the machine instruction count, but at least we may introduce a multiplier onInstCount
for fine-tuning of the behavior.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added
-global-merging-inst-overhead
to tune this parameter.