-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[PGO] Add ability to mark cold functions as optsize/minsize/optnone #69030
Conversation
still need to do measurements with this new approach, but lmk if this makes sense |
@llvm/pr-subscribers-pgo @llvm/pr-subscribers-lto Author: Arthur Eubanks (aeubanks) ChangesThe performance of cold functions shouldn't matter too much, so if we care about binary sizes, add an option to mark cold functions as optsize/minsize for binary size, or optnone for compile times [1]. Clang patch will be in a future patch Initial version: https://reviews.llvm.org/D149800 Patch is 22.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/69030.diff 13 Files Affected:
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index d066819871dfde3..f0c9b9d4daf08b5 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -770,7 +770,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
CodeGenOpts.InstrProfileOutput.empty() ? getDefaultProfileGenName()
: CodeGenOpts.InstrProfileOutput,
"", "", CodeGenOpts.MemoryProfileUsePath, nullptr, PGOOptions::IRInstr,
- PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling,
+ PGOOptions::NoCSAction, PGOOptions::ColdFuncAttr::None,
+ CodeGenOpts.DebugInfoForProfiling,
/*PseudoProbeForProfiling=*/false, CodeGenOpts.AtomicProfileUpdate);
else if (CodeGenOpts.hasProfileIRUse()) {
// -fprofile-use.
@@ -779,28 +780,32 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
PGOOpt = PGOOptions(
CodeGenOpts.ProfileInstrumentUsePath, "",
CodeGenOpts.ProfileRemappingFile, CodeGenOpts.MemoryProfileUsePath, VFS,
- PGOOptions::IRUse, CSAction, CodeGenOpts.DebugInfoForProfiling);
+ PGOOptions::IRUse, CSAction, PGOOptions::ColdFuncAttr::None,
+ CodeGenOpts.DebugInfoForProfiling);
} else if (!CodeGenOpts.SampleProfileFile.empty())
// -fprofile-sample-use
PGOOpt = PGOOptions(
CodeGenOpts.SampleProfileFile, "", CodeGenOpts.ProfileRemappingFile,
CodeGenOpts.MemoryProfileUsePath, VFS, PGOOptions::SampleUse,
- PGOOptions::NoCSAction, CodeGenOpts.DebugInfoForProfiling,
- CodeGenOpts.PseudoProbeForProfiling);
+ PGOOptions::NoCSAction, PGOOptions::ColdFuncAttr::None,
+ CodeGenOpts.DebugInfoForProfiling, CodeGenOpts.PseudoProbeForProfiling);
else if (!CodeGenOpts.MemoryProfileUsePath.empty())
// -fmemory-profile-use (without any of the above options)
PGOOpt = PGOOptions("", "", "", CodeGenOpts.MemoryProfileUsePath, VFS,
PGOOptions::NoAction, PGOOptions::NoCSAction,
+ PGOOptions::ColdFuncAttr::None,
CodeGenOpts.DebugInfoForProfiling);
else if (CodeGenOpts.PseudoProbeForProfiling)
// -fpseudo-probe-for-profiling
PGOOpt = PGOOptions("", "", "", /*MemoryProfile=*/"", nullptr,
PGOOptions::NoAction, PGOOptions::NoCSAction,
+ PGOOptions::ColdFuncAttr::None,
CodeGenOpts.DebugInfoForProfiling, true);
else if (CodeGenOpts.DebugInfoForProfiling)
// -fdebug-info-for-profiling
PGOOpt = PGOOptions("", "", "", /*MemoryProfile=*/"", nullptr,
- PGOOptions::NoAction, PGOOptions::NoCSAction, true);
+ PGOOptions::NoAction, PGOOptions::NoCSAction,
+ PGOOptions::ColdFuncAttr::None, true);
// Check to see if we want to generate a CS profile.
if (CodeGenOpts.hasProfileCSIRInstr()) {
@@ -823,7 +828,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
? getDefaultProfileGenName()
: CodeGenOpts.InstrProfileOutput,
"", /*MemoryProfile=*/"", nullptr, PGOOptions::NoAction,
- PGOOptions::CSIRInstr, CodeGenOpts.DebugInfoForProfiling);
+ PGOOptions::CSIRInstr, PGOOptions::ColdFuncAttr::None,
+ CodeGenOpts.DebugInfoForProfiling);
}
if (TM)
TM->setPGOOption(PGOOpt);
diff --git a/llvm/include/llvm/Support/PGOOptions.h b/llvm/include/llvm/Support/PGOOptions.h
index 87eb29a8de48a08..fe21169a137016b 100644
--- a/llvm/include/llvm/Support/PGOOptions.h
+++ b/llvm/include/llvm/Support/PGOOptions.h
@@ -27,10 +27,12 @@ class FileSystem;
struct PGOOptions {
enum PGOAction { NoAction, IRInstr, IRUse, SampleUse };
enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse };
+ enum class ColdFuncAttr { None, OptSize, MinSize, OptNone };
PGOOptions(std::string ProfileFile, std::string CSProfileGenFile,
std::string ProfileRemappingFile, std::string MemoryProfile,
IntrusiveRefCntPtr<vfs::FileSystem> FS,
PGOAction Action = NoAction, CSPGOAction CSAction = NoCSAction,
+ ColdFuncAttr ColdType = ColdFuncAttr::None,
bool DebugInfoForProfiling = false,
bool PseudoProbeForProfiling = false,
bool AtomicCounterUpdate = false);
@@ -44,6 +46,7 @@ struct PGOOptions {
std::string MemoryProfile;
PGOAction Action;
CSPGOAction CSAction;
+ ColdFuncAttr ColdType;
bool DebugInfoForProfiling;
bool PseudoProbeForProfiling;
bool AtomicCounterUpdate;
diff --git a/llvm/include/llvm/Transforms/Instrumentation/MarkColdFunctions.h b/llvm/include/llvm/Transforms/Instrumentation/MarkColdFunctions.h
new file mode 100644
index 000000000000000..06aecc911d8827b
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Instrumentation/MarkColdFunctions.h
@@ -0,0 +1,28 @@
+//===- MarkColdFunctions.h - ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_MARKCOLDFUNCTIONS_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_MARKCOLDFUNCTIONS_H
+
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/PGOOptions.h"
+
+namespace llvm {
+
+struct MarkColdFunctionsPass : public PassInfoMixin<MarkColdFunctionsPass> {
+ MarkColdFunctionsPass(PGOOptions::ColdFuncAttr ColdType)
+ : ColdType(ColdType) {}
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+
+private:
+ PGOOptions::ColdFuncAttr ColdType;
+};
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_INSTRUMENTATION_MARKCOLDFUNCTIONS_H
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index ccc4276e36dacf0..8126b3563e077d4 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -243,19 +243,23 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
if (!Conf.SampleProfile.empty())
PGOOpt = PGOOptions(Conf.SampleProfile, "", Conf.ProfileRemapping,
/*MemoryProfile=*/"", FS, PGOOptions::SampleUse,
- PGOOptions::NoCSAction, true);
+ PGOOptions::NoCSAction, PGOOptions::ColdFuncAttr::None,
+ true);
else if (Conf.RunCSIRInstr) {
PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping,
/*MemoryProfile=*/"", FS, PGOOptions::IRUse,
- PGOOptions::CSIRInstr, Conf.AddFSDiscriminator);
+ PGOOptions::CSIRInstr, PGOOptions::ColdFuncAttr::None,
+ Conf.AddFSDiscriminator);
} else if (!Conf.CSIRProfile.empty()) {
PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping,
/*MemoryProfile=*/"", FS, PGOOptions::IRUse,
- PGOOptions::CSIRUse, Conf.AddFSDiscriminator);
+ PGOOptions::CSIRUse, PGOOptions::ColdFuncAttr::None,
+ Conf.AddFSDiscriminator);
NoPGOWarnMismatch = !Conf.PGOWarnMismatch;
} else if (Conf.AddFSDiscriminator) {
PGOOpt = PGOOptions("", "", "", /*MemoryProfile=*/"", nullptr,
- PGOOptions::NoAction, PGOOptions::NoCSAction, true);
+ PGOOptions::NoAction, PGOOptions::NoCSAction,
+ PGOOptions::ColdFuncAttr::None, true);
}
TM->setPGOOption(PGOOpt);
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index fde759026e5d780..451037fd38482f4 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -143,6 +143,7 @@
#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
#include "llvm/Transforms/Instrumentation/KCFI.h"
+#include "llvm/Transforms/Instrumentation/MarkColdFunctions.h"
#include "llvm/Transforms/Instrumentation/MemProfiler.h"
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
@@ -234,8 +235,8 @@
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h"
#include "llvm/Transforms/Utils/CountVisits.h"
-#include "llvm/Transforms/Utils/Debugify.h"
#include "llvm/Transforms/Utils/DXILUpgrade.h"
+#include "llvm/Transforms/Utils/Debugify.h"
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
#include "llvm/Transforms/Utils/FixIrreducible.h"
#include "llvm/Transforms/Utils/HelloWorld.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 600f8d43caaf216..3770edad56ff8c5 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -73,6 +73,7 @@
#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
+#include "llvm/Transforms/Instrumentation/MarkColdFunctions.h"
#include "llvm/Transforms/Instrumentation/MemProfiler.h"
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
#include "llvm/Transforms/Scalar/ADCE.h"
@@ -212,6 +213,12 @@ static cl::opt<bool>
cl::desc("Enable DFA jump threading"),
cl::init(false), cl::Hidden);
+// TODO: turn on and remove flag
+static cl::opt<bool>
+ EnableMarkColdFunctions("enable-mark-cold-functions",
+ cl::desc("Enable pass to mark cold functions"),
+ cl::init(false));
+
static cl::opt<bool>
EnableHotColdSplit("hot-cold-split",
cl::desc("Enable hot-cold splitting pass"));
@@ -1127,6 +1134,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
if (EnableSyntheticCounts && !PGOOpt)
MPM.addPass(SyntheticCountsPropagation());
+ if (EnableMarkColdFunctions && PGOOpt &&
+ (PGOOpt->Action == PGOOptions::SampleUse ||
+ PGOOpt->Action == PGOOptions::IRUse))
+ MPM.addPass(MarkColdFunctionsPass(PGOOpt->ColdType));
+
if (EnableModuleInliner)
MPM.addPass(buildModuleInlinerPipeline(Level, Phase));
else
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 91782d661ddd7b7..adeaee070116bba 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -85,6 +85,7 @@ MODULE_PASS("print-ir-similarity", IRSimilarityAnalysisPrinterPass(dbgs()))
MODULE_PASS("lower-global-dtors", LowerGlobalDtorsPass())
MODULE_PASS("lower-ifunc", LowerIFuncPass())
MODULE_PASS("lowertypetests", LowerTypeTestsPass())
+MODULE_PASS("mark-cold-functions", MarkColdFunctionsPass(PGOOpt ? PGOOpt->ColdType : PGOOptions::ColdFuncAttr::None))
MODULE_PASS("metarenamer", MetaRenamerPass())
MODULE_PASS("mergefunc", MergeFunctionsPass())
MODULE_PASS("name-anon-globals", NameAnonGlobalPass())
diff --git a/llvm/lib/Support/PGOOptions.cpp b/llvm/lib/Support/PGOOptions.cpp
index 7e57b52e4ba2f54..5dfeafa188980e8 100644
--- a/llvm/lib/Support/PGOOptions.cpp
+++ b/llvm/lib/Support/PGOOptions.cpp
@@ -15,11 +15,12 @@ PGOOptions::PGOOptions(std::string ProfileFile, std::string CSProfileGenFile,
std::string ProfileRemappingFile,
std::string MemoryProfile,
IntrusiveRefCntPtr<vfs::FileSystem> FS, PGOAction Action,
- CSPGOAction CSAction, bool DebugInfoForProfiling,
- bool PseudoProbeForProfiling, bool AtomicCounterUpdate)
+ CSPGOAction CSAction, ColdFuncAttr ColdType,
+ bool DebugInfoForProfiling, bool PseudoProbeForProfiling,
+ bool AtomicCounterUpdate)
: ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
ProfileRemappingFile(ProfileRemappingFile), MemoryProfile(MemoryProfile),
- Action(Action), CSAction(CSAction),
+ Action(Action), CSAction(CSAction), ColdType(ColdType),
DebugInfoForProfiling(DebugInfoForProfiling ||
(Action == SampleUse && !PseudoProbeForProfiling)),
PseudoProbeForProfiling(PseudoProbeForProfiling),
diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
index 424f1d433606771..b704726dd780377 100644
--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_component_library(LLVMInstrumentation
DataFlowSanitizer.cpp
GCOVProfiling.cpp
BlockCoverageInference.cpp
+ MarkColdFunctions.cpp
MemProfiler.cpp
MemorySanitizer.cpp
IndirectCallPromotion.cpp
diff --git a/llvm/lib/Transforms/Instrumentation/MarkColdFunctions.cpp b/llvm/lib/Transforms/Instrumentation/MarkColdFunctions.cpp
new file mode 100644
index 000000000000000..9037607132ef41c
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/MarkColdFunctions.cpp
@@ -0,0 +1,65 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/MarkColdFunctions.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/IR/PassManager.h"
+
+using namespace llvm;
+
+PreservedAnalyses MarkColdFunctionsPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ if (ColdType == PGOOptions::ColdFuncAttr::None)
+ return PreservedAnalyses::all();
+ ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
+ if (!PSI.hasProfileSummary())
+ return PreservedAnalyses::all();
+ FunctionAnalysisManager &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ bool MadeChange = false;
+ for (Function &F : M) {
+ if (F.isDeclaration())
+ continue;
+ BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+ if (!PSI.isFunctionColdInCallGraph(&F, BFI))
+ continue;
+ // Add optsize/minsize/optnone if requested.
+ switch (ColdType) {
+ case PGOOptions::ColdFuncAttr::None:
+ assert(false);
+ break;
+ case PGOOptions::ColdFuncAttr::OptSize:
+ if (!F.hasFnAttribute(Attribute::OptimizeNone) &&
+ !F.hasFnAttribute(Attribute::OptimizeForSize) &&
+ !F.hasFnAttribute(Attribute::MinSize)) {
+ F.addFnAttr(Attribute::OptimizeForSize);
+ MadeChange = true;
+ }
+ break;
+ case PGOOptions::ColdFuncAttr::MinSize:
+ // Change optsize to minsize.
+ if (!F.hasFnAttribute(Attribute::OptimizeNone) &&
+ !F.hasFnAttribute(Attribute::MinSize)) {
+ F.removeFnAttr(Attribute::OptimizeForSize);
+ F.addFnAttr(Attribute::MinSize);
+ MadeChange = true;
+ }
+ break;
+ case PGOOptions::ColdFuncAttr::OptNone:
+ // Strip optsize/minsize.
+ F.removeFnAttr(Attribute::OptimizeForSize);
+ F.removeFnAttr(Attribute::MinSize);
+ F.addFnAttr(Attribute::OptimizeNone);
+ F.addFnAttr(Attribute::NoInline);
+ MadeChange = true;
+ break;
+ }
+ }
+ return MadeChange ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
diff --git a/llvm/test/Transforms/MarkColdFunctions/basic.ll b/llvm/test/Transforms/MarkColdFunctions/basic.ll
new file mode 100644
index 000000000000000..f79e3a8f1dda7db
--- /dev/null
+++ b/llvm/test/Transforms/MarkColdFunctions/basic.ll
@@ -0,0 +1,97 @@
+; RUN: opt < %s -passes=mark-cold-functions -pgo-kind=pgo-instr-use-pipeline -S -pgo-cold-func-attr=none | FileCheck %s --check-prefixes=NONE,CHECK
+; RUN: opt < %s -passes=mark-cold-functions -pgo-kind=pgo-instr-use-pipeline -S -pgo-cold-func-attr=optsize | FileCheck %s --check-prefixes=OPTSIZE,CHECK
+; RUN: opt < %s -passes=mark-cold-functions -pgo-kind=pgo-instr-use-pipeline -S -pgo-cold-func-attr=minsize | FileCheck %s --check-prefixes=MINSIZE,CHECK
+; RUN: opt < %s -passes=mark-cold-functions -pgo-kind=pgo-instr-use-pipeline -S -pgo-cold-func-attr=optnone | FileCheck %s --check-prefixes=OPTNONE,CHECK
+
+; Should be no changes without profile data
+; RUN: opt < %s -passes=mark-cold-functions -S -pgo-cold-func-attr=minsize | FileCheck %s --check-prefixes=NONE,CHECK
+
+; NONE-NOT: Function Attrs:
+; OPTSIZE: Function Attrs: optsize{{$}}
+; MINSIZE: Function Attrs: minsize{{$}}
+; OPTNONE: Function Attrs: noinline optnone{{$}}
+; CHECK: define void @cold()
+
+; NONE: Function Attrs: optsize{{$}}
+; OPTSIZE: Function Attrs: optsize{{$}}
+; MINSIZE: Function Attrs: minsize{{$}}
+; OPTNONE: Function Attrs: noinline optnone{{$}}
+; CHECK-NEXT: define void @cold1()
+
+; NONE: Function Attrs: minsize{{$}}
+; OPTSIZE: Function Attrs: minsize{{$}}
+; MINSIZE: Function Attrs: minsize{{$}}
+; OPTNONE: Function Attrs: noinline optnone{{$}}
+; CHECK-NEXT: define void @cold2()
+
+; CHECK: Function Attrs: noinline optnone{{$}}
+; CHECK-NEXT: define void @cold3()
+
+; CHECK-NOT: Function Attrs: {{.*}}optsize
+; CHECK-NOT: Function Attrs: {{.*}}minsize
+; CHECK-NOT: Function Attrs: {{.*}}optnone
+
+@s = global i32 0
+
+define void @cold() !prof !27 {
+ store i32 1, ptr @s, align 4
+ ret void
+}
+
+define void @cold1() optsize !prof !27 {
+ store i32 1, ptr @s, align 4
+ ret void
+}
+
+define void @cold2() minsize !prof !27 {
+ store i32 1, ptr @s, align 4
+ ret void
+}
+
+define void @cold3() noinline optnone !prof !27 {
+ store i32 1, ptr @s, align 4
+ ret void
+}
+
+define void @hot() !prof !28 {
+ %l = load i32, ptr @s, align 4
+ %add = add nsw i32 %l, 4
+ store i32 %add, ptr @s, align 4
+ ret void
+}
+
+attributes #0 = { optsize }
+attributes #1 = { minsize }
+attributes #2 = { noinline optnone }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 9040}
+!4 = !{!"MaxCount", i64 9000}
+!5 = !{!"MaxInternalCount", i64 0}
+!6 = !{!"MaxFunctionCount", i64 9000}
+!7 = !{!"NumCounts", i64 5}
+!8 = !{!"NumFunctions", i64 5}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26}
+!11 = !{i32 10000, i64 9000, i32 1}
+!12 = !{i32 100000, i64 9000, i32 1}
+!13 = !{i32 200000, i64 9000, i32 1}
+!14 = !{i32 300000, i64 9000, i32 1}
+!15 = !{i32 400000, i64 9000, i32 1}
+!16 = !{i32 500000, i64 9000, i32 1}
+!17 = !{i32 600000, i64 9000, i32 1}
+!18 = !{i32 700000, i64 9000, i32 1}
+!19 = !{i32 800000, i64 9000, i32 1}
+!20 = !{i32 900000, i64 9000, i32 1}
+!21 = !{i32 950000, i64 9000, i32 1}
+!22 = !{i32 990000, i64 9000, i32 1}
+!23 = !{i32 999000, i64 10, i32 5}
+!24 = !{i32 999900, i64 10, i32 5}
+!25 = !{i32 999990, i64 10, i32 5}
+!26 = !{i32 999999, i64 10, i32 5}
+!27 = !{!"function_entry_count", i64 10}
+!28 = !{!"function_entry_count", i64 9000}
diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp
index 6ae3f87099afd64..228eda8c3e0665c 100644
--- a/llvm/tools/opt/NewPMDriver.cpp
+++ b/llvm/tools/opt/NewPMDriver.cpp
@@ -202,6 +202,18 @@ static cl::opt<std::string>
cl::desc("Path to the profile remapping file."),
cl::Hidden);
+static cl::opt<PGOOptions::ColdFuncAttr> PGOColdFuncAttr(
+ "pgo-cold-func-attr", cl::init(PGOOptions::ColdFuncAttr::None), cl::Hidden,
+ cl::desc(
+ "Function attribute to apply to cold functions as determined by PGO"),
+ cl::values(clEnumValN(PGOOptions::ColdFuncAttr::None, "none", "None"),
+ clEnumValN(PGOOptions::ColdFuncAttr::OptSize, "optsize",
+ "Mark cold functions with optsize."),
+ clEnumValN(PGOOptions::ColdFuncAttr::MinSize, "...
[truncated]
|
|
||
using namespace llvm; | ||
|
||
PreservedAnalyses MarkColdFunctionsPass::run(Module &M, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why not just mark those function with 'cold' attribute, and have one global option to determine the optimization strategy of cold attributed functions? These functions are not different from functions annotated by a developer.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Having a global option determine how to handle cold
makes the IR more complicated. Reusing something that already exists and is being used is nicer.
Another alternative is to rely on the profile loaders to mark functions as cold
, then this pass only looks at functions marked cold
. WDYT?
It looks like iFDO marks cold functions as cold
, but sample FDO doesn't. If sample FDO is inaccurate then marking functions as cold
may be bad, so I assume that's why we don't mark functions as cold
for sample FDO. But there's also the "accurate" flag that says that the sample profile is accurate and functions not in the profile are actually cold. In that case we should mark functions as cold
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, you can use profile-sample-accurate or profile-accurate-for-syminlist to control the behavior, similar to how AFDO is selecting functions to put in .text.unlikely.
|
||
namespace llvm { | ||
|
||
struct MarkColdFunctionsPass : public PassInfoMixin<MarkColdFunctionsPass> { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this pass could be generalized a bit. It seems that the goal for this pr is to reduce the size overhead of -O2
by forcing cold functions to be optsize
/minsize
. Another goal could be to improve the performance of -Oz
by forcing very hot functions to be optsize
(as opposed to minsize
) or remove optsize
/minsize
from them entirely. This is actually something we explored in this paper a while back and I would eventually like to upstream something similar. I believe this could be incorporated into this pass, but the name seems to be too vague. I think "PGO" should to be in the name so users know that it is involved. Maybe PGOForceFuncAttrsPass
or ProfileGuidedFuncAttrsPass
?
CC @kyulee-com
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
renamed
The performance of cold functions shouldn't matter too much, so if we care about binary sizes, add an option to mark cold functions as optsize/minsize for binary size, or optnone for compile times [1]. Clang patch will be in a future patch Initial version: https://reviews.llvm.org/D149800 [1] https://discourse.llvm.org/t/rfc-new-feature-proposal-de-optimizing-cold-functions-using-pgo-info/56388
ping |
// Change optsize to minsize. | ||
if (!F.hasFnAttribute(Attribute::OptimizeNone) && | ||
!F.hasFnAttribute(Attribute::MinSize)) { | ||
F.removeFnAttr(Attribute::OptimizeForSize); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should it overwrite the existing attribute?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
or have an option to control this behavior?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Inclined to say that we should honor existing attributes which may come from source annotations (optnone can be used for correctness reasons, e.g. workaround bugs etc). For this reason, I'm thinking about it as PGODerivedFunctionAttrs
instead of PGOForceFunctionAttrs
.
I'm also seeing inconsistency here in how different attributes are handled, e.g. OptNone
seems to take precedence over any existing attributes, but not the case for some others.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it was intentional for optnone to take precedence, but I've changed it to bail out if we see optnone/minsize/optsize at all
How does this relate to the existing |
This patch allows more user control on the cold function action types. Another difference is that the existing API can be invoked in postInline passes which can see more cold functions after inlining. To replace those APIs, the new pass will need to run again post-inlining. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
FWIW we've tried this with sampling PGO in the past. While on paper this seems like an obvious thing to do, in reality aggressively tuning down opt level for cold functions can lead to regression since profile isn't always accurate.
That said, as long as this change only provides options for users to make decision and not changing the default behavior, it's probably fine.
They provide similar but different controls. However, the fact that the two mechanisms are completely disconnected is not ideal. If we want to apply an opt strategy for cold code, we should do it at both function level and block level consistently. In fact the unification was also discussed on the original RFC: https://discourse.llvm.org/t/rfc-new-feature-proposal-de-optimizing-cold-functions-using-pgo-info/56388/16?u=wenleihe Is there a plan on that front? |
This is intended to replace The basic block versions of these should remain as they are since they actually do need to look at profile information to determine per-block information. |
IIUC, this won't affect sample profile unless you mark the sample profile as "accurate" (e.g. |
This patch should not affect the behavior of "accurate". Many people use the "accurate" mode by default now (with profiled symbol list populated when generating sample pgo profile). So we don't want surprise perf churn. |
I don't understand, if you're saying the profile is accurate, then those functions are actually cold, so we should be able to mark them as optsize without performance regressions? |
Accurate is not black or white. The current heuristic requires certain level of accuracy to be effective. If you make the heuristics more aggressive (like what this patch is doing), you're raising the requirement of what can be considered accurate, and profile not meeting that new requirement could see regression with new heuristic. Whether a function is cold or not also depends on what is the calling context and how inlining is done. All that makes function level annotation inherently inaccurate when done before inlining. Not that we shouldn't try it, but it's not as clear cut as it appears to be, and we need to be careful. |
It will be more conservative (pre-inlining), so won't cause additional optimization suppression compared with the current PGSO. |
Sample PGO profile has inline context, so in the profile, we may have In this PR, you have |
Good example. This pass should be run post-inline. @aeubanks, any reason we want to run it early in the pipeline? |
We want the main function simplification pipeline to see these function attributes because some optimizations trigger or don't trigger depending on the presence of the attributes. Modifying function attributes is typically done in CGSCC/module passes since doing so can affect what callers of those functions see (in effect changing other functions), which shouldn't happen in function passes. I suppose it's possible to add this as a CGSCC pass that runs after inlining and before the function simplification pipeline, but this is more of a one time thing and CGSCC passes can revisit functions. So this pass makes the most sense as a module pass, but we can't insert a module pass between inlining and the function simplification pipeline. Can/does the inliner ignore these size attributes when it has call-site profile information? |
Looking at the current change, this new pass is actually after the sample loader (including sample loader inlining) pass, so wenlei@'s concern should be addressed. |
Oh I forgot that the sample profile has its own inliner. Yes this pass runs after we load profiling information since it uses profiling information, whether it's sample or instrumented. Is this patch good to go? |
break; | ||
case PGOOptions::ColdFuncOpt::OptNone: | ||
F.addFnAttr(Attribute::OptimizeNone); | ||
F.addFnAttr(Attribute::NoInline); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What if functions originally has AlwaysInline
attribute?
More generally, what if function originally has a conflicting attribute from what's being set here?
This is good to go only because it's off by default. Otherwise it's not. Left one more comment on the patch as I didn't get a chance to look at final code before it went in. |
Sorry, I thought I had waited long enough and that the previous comments were addressed. Will address your comments in a follow-up.
Is this the main objection? I didn't understand this sentence the first time reading it. The Sample PGO inliner runs before this pass so it shouldn't be affected, as mentioned before. By "later inliner" do you mean the normal CGSCC inliner? Is it possible to have a call site be hot but the callee be cold? I don't currently have a plan to make this pass do anything by default, but I would like to resolve objections anyway. Using the |
To be removed and promoted to a proper driver flag if experiments turn out fruitful. Original LLVM patch for this functionality: llvm#69030
To be removed and promoted to a proper driver flag if experiments turn out fruitful. For now, this can be experimented with `-mllvm -pgo-cold-func-opt=[optsize|minsize|optnone|default] -mllvm -enable-pgo-force-function-attrs`. Original LLVM patch for this functionality: #69030
) To be removed and promoted to a proper driver flag if experiments turn out fruitful. For now, this can be experimented with `-mllvm -pgo-cold-func-opt=[optsize|minsize|optnone|default] -mllvm -enable-pgo-force-function-attrs`. Original LLVM patch for this functionality: llvm#69030
The performance of cold functions shouldn't matter too much, so if we care about binary sizes, add an option to mark cold functions as optsize/minsize for binary size, or optnone for compile times [1]. Clang patch will be in a future patch.
This is intended to replace
shouldOptimizeForSize(Function&, ...)
. We've seen multiple cases where calls to this expensive function, if not careful, can blow up compile times. I will clean up users of that function in a followup patch.Initial version: https://reviews.llvm.org/D149800
[1] https://discourse.llvm.org/t/rfc-new-feature-proposal-de-optimizing-cold-functions-using-pgo-info/56388