Skip to content

Commit

Permalink
[TTI][BPF] Ensure ArgumentPromotion Not Exceeding Target MaxArgs
Browse files Browse the repository at this point in the history
With LLVM patch https://reviews.llvm.org/D148269, we hit a linux kernel
bpf selftest compilation failure like below:
  ...
  progs/test_xdp_noinline.c:739:8: error: too many args to t8: i64 = GlobalAddress<ptr @encap_v4> 0, progs/test_xdp_noinline.c:739:8
              if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes))
                   ^
  ...
  progs/test_xdp_noinline.c:321:6: error: defined with too many args
  bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
       ^
  ...

Note that bpf selftests are compiled with -O2 which is
the recommended flag for bpf community.

The bpf backend calling convention is only allowing 5
parameters in registers and does not allow pass arguments
through stacks. In the above case, ArgumentPromotionPass
replaced parameter '&pckt' as two parameters, so the total
number of arguments after ArgumentPromotion pass becomes 6
and this caused later compilation failure during instruction
selection phase.

This patch added a TargetTransformInfo hook getMaxNumArgs()
which returns 5 for BPF and UINT_MAX for other targets.

Differential Revision: https://reviews.llvm.org/D148551
  • Loading branch information
yonghong-song committed Apr 19, 2023
1 parent a1ed8e3 commit da816c2
Show file tree
Hide file tree
Showing 7 changed files with 113 additions and 0 deletions.
8 changes: 8 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1643,6 +1643,9 @@ class TargetTransformInfo {
/// false, but it shouldn't matter what it returns anyway.
bool hasArmWideBranch(bool Thumb) const;

/// \return The maximum number of function arguments the target supports.
unsigned getMaxNumArgs() const;

/// @}

private:
Expand Down Expand Up @@ -2003,6 +2006,7 @@ class TargetTransformInfo::Concept {
virtual VPLegalization
getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
virtual bool hasArmWideBranch(bool Thumb) const = 0;
virtual unsigned getMaxNumArgs() const = 0;
};

template <typename T>
Expand Down Expand Up @@ -2694,6 +2698,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
bool hasArmWideBranch(bool Thumb) const override {
return Impl.hasArmWideBranch(Thumb);
}

unsigned getMaxNumArgs() const override {
return Impl.getMaxNumArgs();
}
};

template <typename T>
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,8 @@ class TargetTransformInfoImplBase {

bool hasArmWideBranch(bool) const { return false; }

unsigned getMaxNumArgs() const { return UINT_MAX; }

protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1194,6 +1194,10 @@ bool TargetTransformInfo::hasArmWideBranch(bool Thumb) const {
return TTIImpl->hasArmWideBranch(Thumb);
}

unsigned TargetTransformInfo::getMaxNumArgs() const {
return TTIImpl->getMaxNumArgs();
}

bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
return TTIImpl->shouldExpandReduction(II);
}
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/BPF/BPFTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ class BPFTTIImpl : public BasicTTIImplBase<BPFTTIImpl> {
return Options;
}

unsigned getMaxNumArgs() const {
return 5;
}

};

} // end namespace llvm
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,7 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
// Check to see which arguments are promotable. If an argument is promotable,
// add it to ArgsToPromote.
DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote;
unsigned NumArgsAfterPromote = F->getFunctionType()->getNumParams();
for (Argument *PtrArg : PointerArgs) {
// Replace sret attribute with noalias. This reduces register pressure by
// avoiding a register copy.
Expand All @@ -804,6 +805,7 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
Types.push_back(Pair.second.Ty);

if (areTypesABICompatible(Types, *F, TTI)) {
NumArgsAfterPromote += ArgParts.size() - 1;
ArgsToPromote.insert({PtrArg, std::move(ArgParts)});
}
}
Expand All @@ -813,6 +815,9 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
if (ArgsToPromote.empty())
return nullptr;

if (NumArgsAfterPromote > TTI.getMaxNumArgs())
return nullptr;

return doPromotion(F, FAM, ArgsToPromote);
}

Expand Down
88 changes: 88 additions & 0 deletions llvm/test/Transforms/ArgumentPromotion/BPF/argpromotion.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
; RUN: opt -passes=argpromotion -mtriple=bpf-pc-linux -S %s | FileCheck %s
; Source:
; struct t {
; int a, b, c, d, e, f, g;
; };
; __attribute__((noinline)) static int foo1(struct t *p1, struct t *p2, struct t *p3) {
; return p1->a + p1->b + p2->c + p2->e + p3->f + p3->g;
; }
; __attribute__((noinline)) static int foo2(struct t *p1, struct t *p2, struct t *p3) {
; return p1->a + p1->b + p2->c + p2->e + p3->f;
; }
; void init(void *);
; int bar(void) {
; struct t v1, v2, v3;
; init(&v1); init(&v2); init(&v3);
; return foo1(&v1, &v2, &v3) + foo2(&v1, &v2, &v3);
; }
; Compilation flag:
; clang -target bpf -O2 -S t.c -mllvm -print-before=argpromotion -mllvm -print-module-scope
; and then do some manual tailoring to remove some attributes/metadata which is not used
; by argpromotion pass.

%struct.t = type { i32, i32, i32, i32, i32, i32, i32 }

define i32 @bar() {
entry:
%v1 = alloca %struct.t, align 4
%v2 = alloca %struct.t, align 4
%v3 = alloca %struct.t, align 4
call void @init(ptr noundef nonnull %v1)
call void @init(ptr noundef nonnull %v2)
call void @init(ptr noundef nonnull %v3)
%call = call fastcc i32 @foo1(ptr noundef nonnull %v1, ptr noundef nonnull %v2, ptr noundef nonnull %v3)
%call1 = call fastcc i32 @foo2(ptr noundef nonnull %v1, ptr noundef nonnull %v2, ptr noundef nonnull %v3)
%add = add nsw i32 %call, %call1
ret i32 %add
}

declare void @init(ptr noundef)

define internal i32 @foo1(ptr nocapture noundef readonly %p1, ptr nocapture noundef readonly %p2, ptr nocapture noundef readonly %p3) {
entry:
%0 = load i32, ptr %p1, align 4
%b = getelementptr inbounds %struct.t, ptr %p1, i64 0, i32 1
%1 = load i32, ptr %b, align 4
%add = add nsw i32 %1, %0
%c = getelementptr inbounds %struct.t, ptr %p2, i64 0, i32 2
%2 = load i32, ptr %c, align 4
%add1 = add nsw i32 %add, %2
%e = getelementptr inbounds %struct.t, ptr %p2, i64 0, i32 4
%3 = load i32, ptr %e, align 4
%add2 = add nsw i32 %add1, %3
%f = getelementptr inbounds %struct.t, ptr %p3, i64 0, i32 5
%4 = load i32, ptr %f, align 4
%add3 = add nsw i32 %add2, %4
%g = getelementptr inbounds %struct.t, ptr %p3, i64 0, i32 6
%5 = load i32, ptr %g, align 4
%add4 = add nsw i32 %add3, %5
ret i32 %add4
}

; Without number-of-argument constraint, argpromotion will create a function signature with 6 arguments. Since
; bpf target only supports maximum 5 arguments, so no argpromotion here.
;
; CHECK: i32 @foo1(ptr nocapture noundef readonly %p1, ptr nocapture noundef readonly %p2, ptr nocapture noundef readonly %p3)

define internal i32 @foo2(ptr noundef %p1, ptr noundef %p2, ptr noundef %p3) {
entry:
%0 = load i32, ptr %p1, align 4
%b = getelementptr inbounds %struct.t, ptr %p1, i64 0, i32 1
%1 = load i32, ptr %b, align 4
%add = add nsw i32 %0, %1
%c = getelementptr inbounds %struct.t, ptr %p2, i64 0, i32 2
%2 = load i32, ptr %c, align 4
%add1 = add nsw i32 %add, %2
%e = getelementptr inbounds %struct.t, ptr %p2, i64 0, i32 4
%3 = load i32, ptr %e, align 4
%add2 = add nsw i32 %add1, %3
%f = getelementptr inbounds %struct.t, ptr %p3, i64 0, i32 5
%4 = load i32, ptr %f, align 4
%add3 = add nsw i32 %add2, %4
ret i32 %add3
}

; Without number-of-argument constraint, argpromotion will create a function signature with 5 arguments, which equals
; the maximum number of argument permitted by bpf backend, so argpromotion result code does work.
;
; CHECK: i32 @foo2(i32 %p1.0.val, i32 %p1.4.val, i32 %p2.8.val, i32 %p2.16.val, i32 %p3.20.val)
2 changes: 2 additions & 0 deletions llvm/test/Transforms/ArgumentPromotion/BPF/lit.local.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
if not 'BPF' in config.root.targets:
config.unsupported = True

0 comments on commit da816c2

Please sign in to comment.