-
Notifications
You must be signed in to change notification settings - Fork 12.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[PassBuilder] Add ThinOrFullLTOPhase
to early simplication EP call backs
#114547
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) ChangesThe early simplication pipeline is used in non-LTO and (Thin/Full)LTO pre-link Full diff: https://github.com/llvm/llvm-project/pull/114547.diff 7 Files Affected:
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index ae33554a66b6b5..76cdf64b3c5a80 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -993,7 +993,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
createModuleToFunctionPassAdaptor(ObjCARCExpandPass()));
});
PB.registerPipelineEarlySimplificationEPCallback(
- [](ModulePassManager &MPM, OptimizationLevel Level) {
+ [](ModulePassManager &MPM, OptimizationLevel Level, bool) {
if (Level != OptimizationLevel::O0)
MPM.addPass(ObjCARCAPElimPass());
});
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index e6ced0cccb9b3c..530db62bce0efa 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -478,7 +478,8 @@ class PassBuilder {
/// This extension point allows adding optimization right after passes that do
/// basic simplification of the input IR.
void registerPipelineEarlySimplificationEPCallback(
- const std::function<void(ModulePassManager &, OptimizationLevel)> &C) {
+ const std::function<void(ModulePassManager &, OptimizationLevel, bool)>
+ &C) {
PipelineEarlySimplificationEPCallbacks.push_back(C);
}
@@ -637,7 +638,8 @@ class PassBuilder {
void invokePipelineStartEPCallbacks(ModulePassManager &MPM,
OptimizationLevel Level);
void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM,
- OptimizationLevel Level);
+ OptimizationLevel Level,
+ bool LTOPreLink = false);
static bool checkParametrizedPassName(StringRef Name, StringRef PassName) {
if (!Name.consume_front(PassName))
@@ -762,7 +764,8 @@ class PassBuilder {
FullLinkTimeOptimizationLastEPCallbacks;
SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
PipelineStartEPCallbacks;
- SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
+ SmallVector<std::function<void(ModulePassManager &, OptimizationLevel, bool)>,
+ 2>
PipelineEarlySimplificationEPCallbacks;
SmallVector<std::function<void(ModuleAnalysisManager &)>, 2>
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 3f28dd39911f79..8c76e0c4955959 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -384,9 +384,9 @@ void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM,
C(MPM, Level);
}
void PassBuilder::invokePipelineEarlySimplificationEPCallbacks(
- ModulePassManager &MPM, OptimizationLevel Level) {
+ ModulePassManager &MPM, OptimizationLevel Level, bool LTOPreLink) {
for (auto &C : PipelineEarlySimplificationEPCallbacks)
- C(MPM, Level);
+ C(MPM, Level, LTOPreLink);
}
// Helper to add AnnotationRemarksPass.
@@ -1140,7 +1140,10 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
lowertypetests::DropTestKind::Assume));
- invokePipelineEarlySimplificationEPCallbacks(MPM, Level);
+ bool LTOPreLink = (Phase == ThinOrFullLTOPhase::FullLTOPreLink) ||
+ (Phase == ThinOrFullLTOPhase::ThinLTOPreLink);
+
+ invokePipelineEarlySimplificationEPCallbacks(MPM, Level, LTOPreLink);
// Interprocedural constant propagation now that basic cleanup has occurred
// and prior to optimizing globals.
@@ -2155,7 +2158,7 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
if (PGOOpt && PGOOpt->DebugInfoForProfiling)
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
- invokePipelineEarlySimplificationEPCallbacks(MPM, Level);
+ invokePipelineEarlySimplificationEPCallbacks(MPM, Level, LTOPreLink);
// Build a minimal pipeline based on the semantics required by LLVM,
// which is just that always inlining occurs. Further, disable generating
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index d93ec34a703d3d..1d295804fe3d2a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -747,7 +747,7 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
});
PB.registerPipelineEarlySimplificationEPCallback(
- [](ModulePassManager &PM, OptimizationLevel Level) {
+ [](ModulePassManager &PM, OptimizationLevel Level, bool LTOPreLink) {
PM.addPass(AMDGPUPrintfRuntimeBindingPass());
if (Level == OptimizationLevel::O0)
@@ -755,7 +755,8 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PM.addPass(AMDGPUUnifyMetadataPass());
- if (InternalizeSymbols) {
+ // We don't want to run internalization at per-module stage.
+ if (InternalizeSymbols && !LTOPreLink) {
PM.addPass(InternalizePass(mustPreserveGV));
PM.addPass(GlobalDCEPass());
}
@@ -823,8 +824,15 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PM.addPass(AMDGPUSwLowerLDSPass(*this));
if (EnableLowerModuleLDS)
PM.addPass(AMDGPULowerModuleLDSPass(*this));
- if (EnableAMDGPUAttributor && Level != OptimizationLevel::O0)
- PM.addPass(AMDGPUAttributorPass(*this));
+ if (Level != OptimizationLevel::O0) {
+ if (EnableAMDGPUAttributor)
+ PM.addPass(AMDGPUAttributorPass(*this));
+ // Do we really need internalization in LTO?
+ if (InternalizeSymbols) {
+ PM.addPass(InternalizePass(mustPreserveGV));
+ PM.addPass(GlobalDCEPass());
+ }
+ }
});
PB.registerRegClassFilterParsingCallback(
diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 7d91fa8bb824cf..6775930c8fb2dd 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -138,7 +138,7 @@ void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
FPM.addPass(BPFPreserveStaticOffsetPass(false));
});
PB.registerPipelineEarlySimplificationEPCallback(
- [=](ModulePassManager &MPM, OptimizationLevel) {
+ [=](ModulePassManager &MPM, OptimizationLevel, bool) {
MPM.addPass(BPFAdjustOptPass());
});
}
diff --git a/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll b/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll
index b9eda0c1cd3bb6..792e83095efaa7 100644
--- a/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll
+++ b/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll
@@ -3,9 +3,17 @@
; RUN: opt -mtriple=amdgcn--amdhsa -S -passes="lto<O2>" -print-pipeline-passes %s -o - | FileCheck %s
; RUN: opt -mtriple=amdgcn--amdhsa -S -passes="lto<O3>" -print-pipeline-passes %s -o - | FileCheck %s
+; RUN: opt -mtriple=amdgcn--amdhsa -S -passes="lto-pre-link<O0>" -print-pipeline-passes %s -o - | FileCheck --check-prefix=PRE %s
+; RUN: opt -mtriple=amdgcn--amdhsa -S -passes="lto-pre-link<O1>" -print-pipeline-passes %s -o - | FileCheck --check-prefix=PRE %s
+; RUN: opt -mtriple=amdgcn--amdhsa -S -passes="lto-pre-link<O2>" -print-pipeline-passes %s -o - | FileCheck --check-prefix=PRE %s
+; RUN: opt -mtriple=amdgcn--amdhsa -S -passes="lto-pre-link<O3>" -print-pipeline-passes %s -o - | FileCheck --check-prefix=PRE %s
+
+
; CHECK: amdgpu-attributor
; O0-NOT: amdgpu-attributor
+; PRE-NOT: internalize
+
define amdgpu_kernel void @kernel() {
entry:
ret void
diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp
index 9a477193a29365..ccf271356b64c5 100644
--- a/llvm/tools/opt/NewPMDriver.cpp
+++ b/llvm/tools/opt/NewPMDriver.cpp
@@ -294,7 +294,7 @@ static void registerEPCallbacks(PassBuilder &PB) {
if (tryParsePipelineText<ModulePassManager>(
PB, PipelineEarlySimplificationEPPipeline))
PB.registerPipelineEarlySimplificationEPCallback(
- [&PB](ModulePassManager &PM, OptimizationLevel) {
+ [&PB](ModulePassManager &PM, OptimizationLevel, bool) {
ExitOnError Err("Unable to parse EarlySimplification pipeline: ");
Err(PB.parsePassPipeline(PM, PipelineEarlySimplificationEPPipeline));
});
|
LTOPreLink
to early simplication EP call backsThinOrFullLTOPhase
to early simplication EP call backs
ThinOrFullLTOPhase
to early simplication EP call backsThinOrFullLTOPhase
to early simplication EP call backs
56ab85b
to
c9be622
Compare
8ae74a4
to
e753c4f
Compare
c271a32
to
8f36cd9
Compare
e753c4f
to
912283a
Compare
912283a
to
56d807b
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
PassBuilder changes LGTM
The early simplication pipeline is used in non-LTO and (Thin/Full)LTO pre-link stage. There are some passes that we want them in non-LTO mode, but not at LTO pre-link stage. The control is missing currently. This PR adds the support. To demonstrate the use, we only enable the internalization pass in non-LTO mode for AMDGPU because having it run in pre-link stage causes some issues.
56d807b
to
3da52a0
Compare
…backs (llvm#114547) The early simplication pipeline is used in non-LTO and (Thin/Full)LTO pre-link stage. There are some passes that we want them in non-LTO mode, but not at LTO pre-link stage. The control is missing currently. This PR adds the support. To demonstrate the use, we only enable the internalization pass in non-LTO mode for AMDGPU because having it run in pre-link stage causes some issues.
The early simplication pipeline is used in non-LTO and (Thin/Full)LTO pre-link
stage. There are some passes that we want them in non-LTO mode, but not at LTO
pre-link stage. The control is missing currently. This PR adds the support. To
demonstrate the use, we only enable the internalization pass in non-LTO mode for
AMDGPU because having it run in pre-link stage causes some issues.