From da816c2985263f108e852adc99c31b6775096010 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 17 Apr 2023 11:11:59 -0700
Subject: [PATCH] [TTI][BPF] Ensure ArgumentPromotion Not Exceeding Target
 MaxArgs

With LLVM patch https://reviews.llvm.org/D148269, we hit a linux kernel
bpf selftest compilation failure like below:
  ...
  progs/test_xdp_noinline.c:739:8: error: too many args to t8: i64 = GlobalAddress<ptr @encap_v4> 0, progs/test_xdp_noinline.c:739:8
              if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes))
                   ^
  ...
  progs/test_xdp_noinline.c:321:6: error: defined with too many args
  bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
       ^
  ...

Note that bpf selftests are compiled with -O2 which is
the recommended flag for bpf community.

The bpf backend calling convention is only allowing 5
parameters in registers and does not allow pass arguments
through stacks. In the above case, ArgumentPromotionPass
replaced parameter '&pckt' as two parameters, so the total
number of arguments after ArgumentPromotion pass becomes 6
and this caused later compilation failure during instruction
selection phase.

This patch added a TargetTransformInfo hook getMaxNumArgs()
which returns 5 for BPF and UINT_MAX for other targets.

Differential Revision: https://reviews.llvm.org/D148551
---
 .../llvm/Analysis/TargetTransformInfo.h       |  8 ++
 .../llvm/Analysis/TargetTransformInfoImpl.h   |  2 +
 llvm/lib/Analysis/TargetTransformInfo.cpp     |  4 +
 llvm/lib/Target/BPF/BPFTargetTransformInfo.h  |  4 +
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp |  5 ++
 .../ArgumentPromotion/BPF/argpromotion.ll     | 88 +++++++++++++++++++
 .../ArgumentPromotion/BPF/lit.local.cfg       |  2 +
 7 files changed, 113 insertions(+)
 create mode 100644 llvm/test/Transforms/ArgumentPromotion/BPF/argpromotion.ll
 create mode 100644 llvm/test/Transforms/ArgumentPromotion/BPF/lit.local.cfg
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 1ced968ad585..3f7cb5f4bf28 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1643,6 +1643,9 @@ class TargetTransformInfo {
   /// false, but it shouldn't matter what it returns anyway.
   bool hasArmWideBranch(bool Thumb) const;
 
+  /// \return The maximum number of function arguments the target supports.
+  unsigned getMaxNumArgs() const;
+
   /// @}
 
 private:
@@ -2003,6 +2006,7 @@ class TargetTransformInfo::Concept {
   virtual VPLegalization
   getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
   virtual bool hasArmWideBranch(bool Thumb) const = 0;
+  virtual unsigned getMaxNumArgs() const = 0;
 };
 
 template <typename T>
@@ -2694,6 +2698,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
   bool hasArmWideBranch(bool Thumb) const override {
     return Impl.hasArmWideBranch(Thumb);
   }
+
+  unsigned getMaxNumArgs() const override {
+    return Impl.getMaxNumArgs();
+  }
 };
 
 template <typename T>
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 9ccba7f9a0b1..db5ebb7c30b2 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -878,6 +878,8 @@ class TargetTransformInfoImplBase {
 
   bool hasArmWideBranch(bool) const { return false; }
 
+  unsigned getMaxNumArgs() const { return UINT_MAX; }
+
 protected:
   // Obtain the minimum required size to hold the value (without the sign)
   // In case of a vector it returns the min required size for one element.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 92199eb33c4c..f7da4617b449 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1194,6 +1194,10 @@ bool TargetTransformInfo::hasArmWideBranch(bool Thumb) const {
   return TTIImpl->hasArmWideBranch(Thumb);
 }
 
+unsigned TargetTransformInfo::getMaxNumArgs() const {
+  return TTIImpl->getMaxNumArgs();
+}
+
 bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
   return TTIImpl->shouldExpandReduction(II);
 }
diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
index ba1cb5699e79..5aa9ec283406 100644
--- a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
+++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
@@ -77,6 +77,10 @@ class BPFTTIImpl : public BasicTTIImplBase<BPFTTIImpl> {
     return Options;
   }
 
+  unsigned getMaxNumArgs() const {
+    return 5;
+  }
+
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index fc170299ca46..8c6c0728097c 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -781,6 +781,7 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
   // Check to see which arguments are promotable.  If an argument is promotable,
   // add it to ArgsToPromote.
   DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote;
+  unsigned NumArgsAfterPromote = F->getFunctionType()->getNumParams();
   for (Argument *PtrArg : PointerArgs) {
     // Replace sret attribute with noalias. This reduces register pressure by
     // avoiding a register copy.
@@ -804,6 +805,7 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
         Types.push_back(Pair.second.Ty);
 
       if (areTypesABICompatible(Types, *F, TTI)) {
+        NumArgsAfterPromote += ArgParts.size() - 1;
         ArgsToPromote.insert({PtrArg, std::move(ArgParts)});
       }
     }
@@ -813,6 +815,9 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
   if (ArgsToPromote.empty())
     return nullptr;
 
+  if (NumArgsAfterPromote > TTI.getMaxNumArgs())
+    return nullptr;
+
   return doPromotion(F, FAM, ArgsToPromote);
 }
 
diff --git a/llvm/test/Transforms/ArgumentPromotion/BPF/argpromotion.ll b/llvm/test/Transforms/ArgumentPromotion/BPF/argpromotion.ll
new file mode 100644
index 000000000000..6c39f27115ad
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/BPF/argpromotion.ll
@@ -0,0 +1,88 @@
+; RUN: opt -passes=argpromotion -mtriple=bpf-pc-linux -S %s | FileCheck %s
+; Source:
+;  struct t {
+;    int a, b, c, d, e, f, g;
+;  };
+;  __attribute__((noinline)) static int foo1(struct t *p1, struct t *p2, struct t *p3) {
+;    return p1->a + p1->b + p2->c + p2->e + p3->f + p3->g;
+;  }
+;  __attribute__((noinline)) static int foo2(struct t *p1, struct t *p2, struct t *p3) {
+;    return p1->a + p1->b + p2->c + p2->e + p3->f;
+;  }
+;  void init(void *);
+;  int bar(void) {
+;    struct t v1, v2, v3;
+;    init(&v1); init(&v2); init(&v3);
+;    return foo1(&v1, &v2, &v3) + foo2(&v1, &v2, &v3);
+;  }
+; Compilation flag:
+;   clang -target bpf -O2 -S t.c -mllvm -print-before=argpromotion -mllvm -print-module-scope
+;   and then do some manual tailoring to remove some attributes/metadata which is not used
+;   by argpromotion pass.
+
+%struct.t = type { i32, i32, i32, i32, i32, i32, i32 }
+
+define i32 @bar() {
+entry:
+  %v1 = alloca %struct.t, align 4
+  %v2 = alloca %struct.t, align 4
+  %v3 = alloca %struct.t, align 4
+  call void @init(ptr noundef nonnull %v1)
+  call void @init(ptr noundef nonnull %v2)
+  call void @init(ptr noundef nonnull %v3)
+  %call = call fastcc i32 @foo1(ptr noundef nonnull %v1, ptr noundef nonnull %v2, ptr noundef nonnull %v3)
+  %call1 = call fastcc i32 @foo2(ptr noundef nonnull %v1, ptr noundef nonnull %v2, ptr noundef nonnull %v3)
+  %add = add nsw i32 %call, %call1
+  ret i32 %add
+}
+
+declare void @init(ptr noundef)
+
+define internal i32 @foo1(ptr nocapture noundef readonly %p1, ptr nocapture noundef readonly %p2, ptr nocapture noundef readonly %p3) {
+entry:
+  %0 = load i32, ptr %p1, align 4
+  %b = getelementptr inbounds %struct.t, ptr %p1, i64 0, i32 1
+  %1 = load i32, ptr %b, align 4
+  %add = add nsw i32 %1, %0
+  %c = getelementptr inbounds %struct.t, ptr %p2, i64 0, i32 2
+  %2 = load i32, ptr %c, align 4
+  %add1 = add nsw i32 %add, %2
+  %e = getelementptr inbounds %struct.t, ptr %p2, i64 0, i32 4
+  %3 = load i32, ptr %e, align 4
+  %add2 = add nsw i32 %add1, %3
+  %f = getelementptr inbounds %struct.t, ptr %p3, i64 0, i32 5
+  %4 = load i32, ptr %f, align 4
+  %add3 = add nsw i32 %add2, %4
+  %g = getelementptr inbounds %struct.t, ptr %p3, i64 0, i32 6
+  %5 = load i32, ptr %g, align 4
+  %add4 = add nsw i32 %add3, %5
+  ret i32 %add4
+}
+
+; Without number-of-argument constraint, argpromotion will create a function signature with 6 arguments. Since
+; bpf target only supports maximum 5 arguments, so no argpromotion here.
+;
+; CHECK:  i32 @foo1(ptr nocapture noundef readonly %p1, ptr nocapture noundef readonly %p2, ptr nocapture noundef readonly %p3)
+
+define internal i32 @foo2(ptr noundef %p1, ptr noundef %p2, ptr noundef %p3) {
+entry:
+  %0 = load i32, ptr %p1, align 4
+  %b = getelementptr inbounds %struct.t, ptr %p1, i64 0, i32 1
+  %1 = load i32, ptr %b, align 4
+  %add = add nsw i32 %0, %1
+  %c = getelementptr inbounds %struct.t, ptr %p2, i64 0, i32 2
+  %2 = load i32, ptr %c, align 4
+  %add1 = add nsw i32 %add, %2
+  %e = getelementptr inbounds %struct.t, ptr %p2, i64 0, i32 4
+  %3 = load i32, ptr %e, align 4
+  %add2 = add nsw i32 %add1, %3
+  %f = getelementptr inbounds %struct.t, ptr %p3, i64 0, i32 5
+  %4 = load i32, ptr %f, align 4
+  %add3 = add nsw i32 %add2, %4
+  ret i32 %add3
+}
+
+; Without number-of-argument constraint, argpromotion will create a function signature with 5 arguments, which equals
+; the maximum number of argument permitted by bpf backend, so argpromotion result code does work.
+;
+; CHECK:  i32 @foo2(i32 %p1.0.val, i32 %p1.4.val, i32 %p2.8.val, i32 %p2.16.val, i32 %p3.20.val)
diff --git a/llvm/test/Transforms/ArgumentPromotion/BPF/lit.local.cfg b/llvm/test/Transforms/ArgumentPromotion/BPF/lit.local.cfg
new file mode 100644
index 000000000000..a4ab2624af61
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/BPF/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'BPF' in config.root.targets:
+    config.unsupported = True