From da816c2985263f108e852adc99c31b6775096010 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 17 Apr 2023 11:11:59 -0700 Subject: [PATCH] [TTI][BPF] Ensure ArgumentPromotion Not Exceeding Target MaxArgs With LLVM patch https://reviews.llvm.org/D148269, we hit a linux kernel bpf selftest compilation failure like below: ... progs/test_xdp_noinline.c:739:8: error: too many args to t8: i64 = GlobalAddress 0, progs/test_xdp_noinline.c:739:8 if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes)) ^ ... progs/test_xdp_noinline.c:321:6: error: defined with too many args bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval, ^ ... Note that bpf selftests are compiled with -O2 which is the recommended flag for bpf community. The bpf backend calling convention is only allowing 5 parameters in registers and does not allow pass arguments through stacks. In the above case, ArgumentPromotionPass replaced parameter '&pckt' as two parameters, so the total number of arguments after ArgumentPromotion pass becomes 6 and this caused later compilation failure during instruction selection phase. This patch added a TargetTransformInfo hook getMaxNumArgs() which returns 5 for BPF and UINT_MAX for other targets. Differential Revision: https://reviews.llvm.org/D148551 --- .../llvm/Analysis/TargetTransformInfo.h | 8 ++ .../llvm/Analysis/TargetTransformInfoImpl.h | 2 + llvm/lib/Analysis/TargetTransformInfo.cpp | 4 + llvm/lib/Target/BPF/BPFTargetTransformInfo.h | 4 + llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 5 ++ .../ArgumentPromotion/BPF/argpromotion.ll | 88 +++++++++++++++++++ .../ArgumentPromotion/BPF/lit.local.cfg | 2 + 7 files changed, 113 insertions(+) create mode 100644 llvm/test/Transforms/ArgumentPromotion/BPF/argpromotion.ll create mode 100644 llvm/test/Transforms/ArgumentPromotion/BPF/lit.local.cfg diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 1ced968ad585..3f7cb5f4bf28 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -1643,6 +1643,9 @@ class TargetTransformInfo { /// false, but it shouldn't matter what it returns anyway. bool hasArmWideBranch(bool Thumb) const; + /// \return The maximum number of function arguments the target supports. + unsigned getMaxNumArgs() const; + /// @} private: @@ -2003,6 +2006,7 @@ class TargetTransformInfo::Concept { virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0; virtual bool hasArmWideBranch(bool Thumb) const = 0; + virtual unsigned getMaxNumArgs() const = 0; }; template @@ -2694,6 +2698,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { bool hasArmWideBranch(bool Thumb) const override { return Impl.hasArmWideBranch(Thumb); } + + unsigned getMaxNumArgs() const override { + return Impl.getMaxNumArgs(); + } }; template diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 9ccba7f9a0b1..db5ebb7c30b2 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -878,6 +878,8 @@ class TargetTransformInfoImplBase { bool hasArmWideBranch(bool) const { return false; } + unsigned getMaxNumArgs() const { return UINT_MAX; } + protected: // Obtain the minimum required size to hold the value (without the sign) // In case of a vector it returns the min required size for one element. diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 92199eb33c4c..f7da4617b449 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -1194,6 +1194,10 @@ bool TargetTransformInfo::hasArmWideBranch(bool Thumb) const { return TTIImpl->hasArmWideBranch(Thumb); } +unsigned TargetTransformInfo::getMaxNumArgs() const { + return TTIImpl->getMaxNumArgs(); +} + bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const { return TTIImpl->shouldExpandReduction(II); } diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h index ba1cb5699e79..5aa9ec283406 100644 --- a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h +++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h @@ -77,6 +77,10 @@ class BPFTTIImpl : public BasicTTIImplBase { return Options; } + unsigned getMaxNumArgs() const { + return 5; + } + }; } // end namespace llvm diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index fc170299ca46..8c6c0728097c 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -781,6 +781,7 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM, // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. DenseMap> ArgsToPromote; + unsigned NumArgsAfterPromote = F->getFunctionType()->getNumParams(); for (Argument *PtrArg : PointerArgs) { // Replace sret attribute with noalias. This reduces register pressure by // avoiding a register copy. @@ -804,6 +805,7 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM, Types.push_back(Pair.second.Ty); if (areTypesABICompatible(Types, *F, TTI)) { + NumArgsAfterPromote += ArgParts.size() - 1; ArgsToPromote.insert({PtrArg, std::move(ArgParts)}); } } @@ -813,6 +815,9 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM, if (ArgsToPromote.empty()) return nullptr; + if (NumArgsAfterPromote > TTI.getMaxNumArgs()) + return nullptr; + return doPromotion(F, FAM, ArgsToPromote); } diff --git a/llvm/test/Transforms/ArgumentPromotion/BPF/argpromotion.ll b/llvm/test/Transforms/ArgumentPromotion/BPF/argpromotion.ll new file mode 100644 index 000000000000..6c39f27115ad --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/BPF/argpromotion.ll @@ -0,0 +1,88 @@ +; RUN: opt -passes=argpromotion -mtriple=bpf-pc-linux -S %s | FileCheck %s +; Source: +; struct t { +; int a, b, c, d, e, f, g; +; }; +; __attribute__((noinline)) static int foo1(struct t *p1, struct t *p2, struct t *p3) { +; return p1->a + p1->b + p2->c + p2->e + p3->f + p3->g; +; } +; __attribute__((noinline)) static int foo2(struct t *p1, struct t *p2, struct t *p3) { +; return p1->a + p1->b + p2->c + p2->e + p3->f; +; } +; void init(void *); +; int bar(void) { +; struct t v1, v2, v3; +; init(&v1); init(&v2); init(&v3); +; return foo1(&v1, &v2, &v3) + foo2(&v1, &v2, &v3); +; } +; Compilation flag: +; clang -target bpf -O2 -S t.c -mllvm -print-before=argpromotion -mllvm -print-module-scope +; and then do some manual tailoring to remove some attributes/metadata which is not used +; by argpromotion pass. + +%struct.t = type { i32, i32, i32, i32, i32, i32, i32 } + +define i32 @bar() { +entry: + %v1 = alloca %struct.t, align 4 + %v2 = alloca %struct.t, align 4 + %v3 = alloca %struct.t, align 4 + call void @init(ptr noundef nonnull %v1) + call void @init(ptr noundef nonnull %v2) + call void @init(ptr noundef nonnull %v3) + %call = call fastcc i32 @foo1(ptr noundef nonnull %v1, ptr noundef nonnull %v2, ptr noundef nonnull %v3) + %call1 = call fastcc i32 @foo2(ptr noundef nonnull %v1, ptr noundef nonnull %v2, ptr noundef nonnull %v3) + %add = add nsw i32 %call, %call1 + ret i32 %add +} + +declare void @init(ptr noundef) + +define internal i32 @foo1(ptr nocapture noundef readonly %p1, ptr nocapture noundef readonly %p2, ptr nocapture noundef readonly %p3) { +entry: + %0 = load i32, ptr %p1, align 4 + %b = getelementptr inbounds %struct.t, ptr %p1, i64 0, i32 1 + %1 = load i32, ptr %b, align 4 + %add = add nsw i32 %1, %0 + %c = getelementptr inbounds %struct.t, ptr %p2, i64 0, i32 2 + %2 = load i32, ptr %c, align 4 + %add1 = add nsw i32 %add, %2 + %e = getelementptr inbounds %struct.t, ptr %p2, i64 0, i32 4 + %3 = load i32, ptr %e, align 4 + %add2 = add nsw i32 %add1, %3 + %f = getelementptr inbounds %struct.t, ptr %p3, i64 0, i32 5 + %4 = load i32, ptr %f, align 4 + %add3 = add nsw i32 %add2, %4 + %g = getelementptr inbounds %struct.t, ptr %p3, i64 0, i32 6 + %5 = load i32, ptr %g, align 4 + %add4 = add nsw i32 %add3, %5 + ret i32 %add4 +} + +; Without number-of-argument constraint, argpromotion will create a function signature with 6 arguments. Since +; bpf target only supports maximum 5 arguments, so no argpromotion here. +; +; CHECK: i32 @foo1(ptr nocapture noundef readonly %p1, ptr nocapture noundef readonly %p2, ptr nocapture noundef readonly %p3) + +define internal i32 @foo2(ptr noundef %p1, ptr noundef %p2, ptr noundef %p3) { +entry: + %0 = load i32, ptr %p1, align 4 + %b = getelementptr inbounds %struct.t, ptr %p1, i64 0, i32 1 + %1 = load i32, ptr %b, align 4 + %add = add nsw i32 %0, %1 + %c = getelementptr inbounds %struct.t, ptr %p2, i64 0, i32 2 + %2 = load i32, ptr %c, align 4 + %add1 = add nsw i32 %add, %2 + %e = getelementptr inbounds %struct.t, ptr %p2, i64 0, i32 4 + %3 = load i32, ptr %e, align 4 + %add2 = add nsw i32 %add1, %3 + %f = getelementptr inbounds %struct.t, ptr %p3, i64 0, i32 5 + %4 = load i32, ptr %f, align 4 + %add3 = add nsw i32 %add2, %4 + ret i32 %add3 +} + +; Without number-of-argument constraint, argpromotion will create a function signature with 5 arguments, which equals +; the maximum number of argument permitted by bpf backend, so argpromotion result code does work. +; +; CHECK: i32 @foo2(i32 %p1.0.val, i32 %p1.4.val, i32 %p2.8.val, i32 %p2.16.val, i32 %p3.20.val) diff --git a/llvm/test/Transforms/ArgumentPromotion/BPF/lit.local.cfg b/llvm/test/Transforms/ArgumentPromotion/BPF/lit.local.cfg new file mode 100644 index 000000000000..a4ab2624af61 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/BPF/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'BPF' in config.root.targets: + config.unsupported = True