-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[mlgo] inline for size: add bypass mechanism for perserving performance #95616
Conversation
This allows shrinking for size the cold part of the code, without sacrificing performance.
@llvm/pr-subscribers-llvm-analysis @llvm/pr-subscribers-mlgo Author: Mircea Trofin (mtrofin) ChangesThis allows shrinking for size the cold part of the code, without sacrificing performance. Full diff: https://github.com/llvm/llvm-project/pull/95616.diff 4 Files Affected:
diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
index f58862e533529..2aa077fe0e035 100644
--- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
@@ -13,6 +13,7 @@
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/IR/PassManager.h"
#include <deque>
@@ -89,6 +90,7 @@ class MLInlineAdvisor : public InlineAdvisor {
llvm::SmallPtrSet<const LazyCallGraph::Node *, 1> NodesInLastSCC;
DenseSet<const LazyCallGraph::Node *> AllNodes;
bool ForceStop = false;
+ ProfileSummaryInfo &PSI;
};
/// InlineAdvice that tracks changes post inlining. For that reason, it only
diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp
index 75eb8ece2e447..21946572339b9 100644
--- a/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/MLInlineAdvisor.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
#include "llvm/Analysis/InlineCost.h"
@@ -23,6 +24,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MLModelRunner.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ReleaseModeModelRunner.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
@@ -46,6 +48,14 @@ static cl::opt<bool>
InteractiveIncludeDefault("inliner-interactive-include-default", cl::Hidden,
cl::desc(InclDefaultMsg));
+enum class SkipMLPolicyCriteria { Never, IfCallerIsNotCold };
+
+static cl::opt<SkipMLPolicyCriteria> SkipPolicy(
+ "ml-inliner-skip-policy", cl::Hidden, cl::init(SkipMLPolicyCriteria::Never),
+ cl::values(clEnumValN(SkipMLPolicyCriteria::Never, "never", "never"),
+ clEnumValN(SkipMLPolicyCriteria::IfCallerIsNotCold,
+ "if-caller-not-cold", "if the caller is not cold")));
+
#if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
// codegen-ed file
#include "InlinerSizeModel.h" // NOLINT
@@ -129,7 +139,8 @@ MLInlineAdvisor::MLInlineAdvisor(
M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
ModelRunner(std::move(Runner)), GetDefaultAdvice(GetDefaultAdvice),
CG(MAM.getResult<LazyCallGraphAnalysis>(M)),
- InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) {
+ InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize),
+ PSI(MAM.getResult<ProfileSummaryAnalysis>(M)) {
assert(ModelRunner);
ModelRunner->switchContext("");
// Extract the 'call site height' feature - the position of a call site
@@ -334,6 +345,11 @@ std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
auto &TIR = FAM.getResult<TargetIRAnalysis>(Callee);
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
+ if (SkipPolicy == SkipMLPolicyCriteria::IfCallerIsNotCold) {
+ if (!PSI.isFunctionEntryCold(&Caller))
+ return std::make_unique<InlineAdvice>(this, CB, ORE,
+ GetDefaultAdvice(CB));
+ }
auto MandatoryKind = InlineAdvisor::getMandatoryKind(CB, FAM, ORE);
// If this is a "never inline" case, there won't be any changes to internal
// state we need to track, so we can just return the base InlineAdvice, which
diff --git a/llvm/lib/Analysis/models/gen-inline-oz-test-model.py b/llvm/lib/Analysis/models/gen-inline-oz-test-model.py
index 4898509ea544f..d6b5e1747a7b6 100644
--- a/llvm/lib/Analysis/models/gen-inline-oz-test-model.py
+++ b/llvm/lib/Analysis/models/gen-inline-oz-test-model.py
@@ -102,12 +102,12 @@ def get_output_spec_path(path):
return os.path.join(path, "output_spec.json")
-def build_mock_model(path, signature):
+def build_mock_model(path, signature, advice):
"""Build and save the mock model with the given signature"""
module = tf.Module()
def action(*inputs):
- return {signature["output"]: tf.constant(value=1, dtype=tf.int64)}
+ return {signature["output"]: tf.constant(value=advice, dtype=tf.int64)}
module.action = tf.function()(action)
action = {"action": module.action.get_concrete_function(signature["inputs"])}
@@ -128,12 +128,18 @@ def get_signature():
def main(argv):
- assert len(argv) == 2
+ assert len(argv) == 2 or (len(argv) == 3 and argv[2] == "never")
model_path = argv[1]
-
+
print(f"Output model to: [{argv[1]}]")
+
+ constant_advice = 1
+ if len(argv) == 3:
+ constant_advice = 0
+ print(f"The model will always return: {constant_advice}")
+
signature = get_signature()
- build_mock_model(model_path, signature)
+ build_mock_model(model_path, signature, constant_advice)
if __name__ == "__main__":
diff --git a/llvm/test/Transforms/Inline/ML/bypass.ll b/llvm/test/Transforms/Inline/ML/bypass.ll
new file mode 100644
index 0000000000000..ccdefdcc93bfe
--- /dev/null
+++ b/llvm/test/Transforms/Inline/ML/bypass.ll
@@ -0,0 +1,78 @@
+; REQUIRES: have_tflite
+; RUN: rm -rf %t.runfiles %t.tflite %t.model_out
+; RUN: mkdir %t.runfiles
+; RUN: cp %S/../../../../lib/Analysis/models/gen-inline-oz-test-model.py %t.runfiles
+; RUN: cp %S/../../../../lib/Analysis/models/saved-model-to-tflite.py %t.runfiles
+; RUN: %python %t.runfiles/gen-inline-oz-test-model.py %t.model_out never
+; RUN: %python %t.runfiles/saved-model-to-tflite.py %t.model_out %t.tflite
+
+; When running O2, we expect both callers to inline callee.
+; RUN: opt < %s -passes='default<O2>' -inline-threshold=0 -hot-callsite-threshold=100 -S | FileCheck %s --check-prefixes=O2-HOT,O2-COLD
+
+; The ML model we use always blocks inlining (by construction)
+; RUN: opt < %s -passes='default<O2>' -inline-threshold=0 -hot-callsite-threshold=100 \
+; RUN: -enable-ml-inliner=development -ml-inliner-model-under-training=%t.tflite \
+; RUN: -S | FileCheck %s --check-prefixes=ML-HOT,ML-COLD
+
+; When bypassing ML for non-cold callers, the hot caller will have its callee inlined, but the cold one won't
+; RUN: opt < %s -passes='default<O2>' -inline-threshold=0 -hot-callsite-threshold=100 \
+; RUN: -enable-ml-inliner=development -ml-inliner-model-under-training=%t.tflite \
+; RUN: -ml-inliner-skip-policy=if-caller-not-cold -S | FileCheck %s --check-prefixes=O2-HOT,ML-COLD
+
+declare void @extern()
+
+define i32 @callee(i32 %x) {
+ %x1 = add i32 %x, 1
+ %x2 = add i32 %x1, 1
+ %x3 = add i32 %x2, 1
+ call void @extern()
+ call void @extern()
+ ret i32 %x3
+}
+
+define i32 @hot_caller(i32 %y1) !prof !15 {
+ %y = call i32 @callee(i32 %y1), !prof !16
+ ret i32 %y
+}
+
+define i32 @cold_caller(i32 %y1) !prof !17 {
+ %y = call i32 @callee(i32 %y1), !prof !16
+ ret i32 %y
+}
+
+
+!llvm.module.flags = !{!1}
+!15 = !{!"function_entry_count", i64 300}
+!16 = !{!"branch_weights", i64 300}
+!17 = !{!"function_entry_count", i64 1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"SampleProfile"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
+
+; O2-HOT-LABEL: @hot_caller
+; O2-HOT-NOT: call i32 @callee
+; O2-HOT: call void @extern
+; O2-HOT-NEXT: call void @extern
+; O2-HOT-NEXT: ret
+; O2-COLD-LABEL: @cold_caller
+; O2-COLD-NOT: call i32 @callee
+; O2-COLD: call void @extern
+; O2-COLD-NEXT: call void @extern
+; O2-COLD-NEXT: ret
+
+; ML-HOT-LABEL: @hot_caller
+; ML-HOT-NEXT: call i32 @callee
+; ML-COLD-LABEL: @cold_caller
+; ML-COLD-NEXT: call i32 @callee
\ No newline at end of file
|
✅ With the latest revision this PR passed the Python code formatter. |
You mean we'd mark the cold callers as minsize and drive the bypass decision that way? Yes. In fact we want to experiment with that next, and we may then remove the bypass flag if we can do it all with one flag. I was trying to be a bit conservative because minsize would also affect how those callsites are compiled post-inliner. Which should compound more for size, and matter not for speed, but, again, wanted to be conservative. |
This allows shrinking for size the cold part of the code, without sacrificing performance.