[BPF] Add a few new insns under cpu=v4

In [1], a few new insns are proposed to expand BPF ISA to . fixing the limitation of existing insn (e.g., 16bit jmp offset) . adding new insns which may improve code quality (sign_ext_ld, sign_ext_mov, st) . feature complete (sdiv, smod) . better user experience (bswap) This patch implemented insn encoding for . sign-extended load . sign-extended mov . sdiv/smod . bswap insns . unconditional jump with 32bit offset The new bswap insns are generated under cpu=v4 for __builtin_bswap. For cpu=v3 or earlier, for __builtin_bswap, be or le insns are generated which is not intuitive for the user. To support 32-bit branch offset, a 32-bit ja (JMPL) insn is implemented. For conditional branch which is beyond 16-bit offset, llvm will do some transformation 'cond_jmp' -> 'cond_jmp + jmpl' to simulate 32bit conditional jmp. See BPFMIPeephole.cpp for details. The algorithm is hueristic based. I have tested bpf selftest pyperf600 with unroll account 600 which can indeed generate 32-bit jump insn, e.g., 13: 06 00 00 00 9b cd 00 00 gotol +0xcd9b <LBB0_6619> Eduard is working on to add 'st' insn to cpu=v4. A list of llc flags: disable-ldsx, disable-movsx, disable-bswap, disable-sdiv-smod, disable-gotol can be used to disable a particular insn for cpu v4. For example, user can do: llc -march=bpf -mcpu=v4 -disable-movsx t.ll to enable cpu v4 without movsx insns. References: [1] https://lore.kernel.org/bpf/4bfe98be-5333-1c7e-2f6d-42486c8ec039@meta.com/ Differential Revision: https://reviews.llvm.org/D144829
sx-aurora-dev · Jul 26, 2023 · 6c412b6 · 6c412b6
1 parent e9df4c9
commit 6c412b6
Show file tree

Hide file tree

Showing 26 changed files with 925 additions and 88 deletions.
diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp
@@ -32,7 +32,7 @@ void BPFTargetInfo::getTargetDefines(const LangOptions &Opts,
 }
 
 static constexpr llvm::StringLiteral ValidCPUNames[] = {"generic", "v1", "v2",
-                                                        "v3", "probe"};
+                                                        "v3", "v4", "probe"};
 
 bool BPFTargetInfo::isValidCPUName(StringRef Name) const {
   return llvm::is_contained(ValidCPUNames, Name);

diff --git a/clang/lib/Basic/Targets/BPF.h b/clang/lib/Basic/Targets/BPF.h
@@ -106,7 +106,7 @@ class LLVM_LIBRARY_VISIBILITY BPFTargetInfo : public TargetInfo {
   void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
 
   bool setCPU(const std::string &Name) override {
-    if (Name == "v3") {
+    if (Name == "v3" || Name == "v4") {
       HasAlu32 = true;
     }
 

diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
@@ -73,7 +73,7 @@
 
 // RUN: not %clang_cc1 -triple bpf--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix BPF
 // BPF: error: unknown target CPU 'not-a-cpu'
-// BPF-NEXT: note: valid target CPU values are: generic, v1, v2, v3, probe{{$}}
+// BPF-NEXT: note: valid target CPU values are: generic, v1, v2, v3, v4, probe{{$}}
 
 // RUN: not %clang_cc1 -triple avr--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AVR
 // AVR: error: unknown target CPU 'not-a-cpu'

diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -227,6 +227,7 @@ struct BPFOperand : public MCParsedAsmOperand {
         .Case("if", true)
         .Case("call", true)
         .Case("goto", true)
+        .Case("gotol", true)
         .Case("*", true)
         .Case("exit", true)
         .Case("lock", true)
@@ -241,13 +242,20 @@ struct BPFOperand : public MCParsedAsmOperand {
         .Case("u32", true)
         .Case("u16", true)
         .Case("u8", true)
+        .Case("s32", true)
+        .Case("s16", true)
+        .Case("s8", true)
         .Case("be64", true)
         .Case("be32", true)
         .Case("be16", true)
         .Case("le64", true)
         .Case("le32", true)
         .Case("le16", true)
+        .Case("bswap16", true)
+        .Case("bswap32", true)
+        .Case("bswap64", true)
         .Case("goto", true)
+        .Case("gotol", true)
         .Case("ll", true)
         .Case("skb", true)
         .Case("s", true)

diff --git a/llvm/lib/Target/BPF/BPF.td b/llvm/lib/Target/BPF/BPF.td
@@ -30,6 +30,7 @@ def : Proc<"generic", []>;
 def : Proc<"v1", []>;
 def : Proc<"v2", []>;
 def : Proc<"v3", [ALU32]>;
+def : Proc<"v4", [ALU32]>;
 def : Proc<"probe", []>;
 
 def BPFInstPrinter : AsmWriter {
@@ -45,7 +46,7 @@ def BPFAsmParserVariant : AsmParserVariant {
   int Variant = 0;
   string Name = "BPF";
   string BreakCharacters = ".";
-  string TokenizingCharacters = "#()[]=:.<>!+*";
+  string TokenizingCharacters = "#()[]=:.<>!+*%/";
 }
 
 def BPF : Target {

diff --git a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -192,15 +192,17 @@ void BPFDAGToDAGISel::Select(SDNode *Node) {
   default:
     break;
   case ISD::SDIV: {
-    DebugLoc Empty;
-    const DebugLoc &DL = Node->getDebugLoc();
-    if (DL != Empty)
-      errs() << "Error at line " << DL.getLine() << ": ";
-    else
-      errs() << "Error: ";
-    errs() << "Unsupport signed division for DAG: ";
-    Node->print(errs(), CurDAG);
-    errs() << "Please convert to unsigned div/mod.\n";
+    if (!Subtarget->hasSdivSmod()) {
+      DebugLoc Empty;
+      const DebugLoc &DL = Node->getDebugLoc();
+      if (DL != Empty)
+        errs() << "Error at line " << DL.getLine() << ": ";
+      else
+        errs() << "Error: ";
+      errs() << "Unsupport signed division for DAG: ";
+      Node->print(errs(), CurDAG);
+      errs() << "Please convert to unsigned div/mod.\n";
+    }
     break;
   }
   case ISD::INTRINSIC_W_CHAIN: {

diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -102,7 +102,8 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
 
     setOperationAction(ISD::SDIVREM, VT, Expand);
     setOperationAction(ISD::UDIVREM, VT, Expand);
-    setOperationAction(ISD::SREM, VT, Expand);
+    if (!STI.hasSdivSmod())
+      setOperationAction(ISD::SREM, VT, Expand);
     setOperationAction(ISD::MULHU, VT, Expand);
     setOperationAction(ISD::MULHS, VT, Expand);
     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
@@ -131,19 +132,23 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
 
   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
-  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
+  if (!STI.hasMovsx()) {
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
+    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
+  }
 
   // Extended load operations for i1 types must be promoted
   for (MVT VT : MVT::integer_valuetypes()) {
     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
 
-    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
-    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand);
-    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
+    if (!STI.hasLdsx()) {
+      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
+      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand);
+      setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
+    }
   }
 
   setBooleanContents(ZeroOrOneBooleanContent);
@@ -183,6 +188,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
   HasAlu32 = STI.getHasAlu32();
   HasJmp32 = STI.getHasJmp32();
   HasJmpExt = STI.getHasJmpExt();
+  HasMovsx = STI.hasMovsx();
 }
 
 bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
@@ -673,11 +679,15 @@ BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
   Register PromotedReg0 = RegInfo.createVirtualRegister(RC);
   Register PromotedReg1 = RegInfo.createVirtualRegister(RC);
   Register PromotedReg2 = RegInfo.createVirtualRegister(RC);
-  BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);
-  BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1)
-    .addReg(PromotedReg0).addImm(32);
-  BuildMI(BB, DL, TII.get(RShiftOp), PromotedReg2)
-    .addReg(PromotedReg1).addImm(32);
+  if (HasMovsx) {
+    BuildMI(BB, DL, TII.get(BPF::MOVSX_rr_32), PromotedReg0).addReg(Reg);
+  } else {
+    BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);
+    BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1)
+      .addReg(PromotedReg0).addImm(32);
+    BuildMI(BB, DL, TII.get(RShiftOp), PromotedReg2)
+      .addReg(PromotedReg1).addImm(32);
+  }
 
   return PromotedReg2;
 }

diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -71,6 +71,7 @@ class BPFTargetLowering : public TargetLowering {
   bool HasAlu32;
   bool HasJmp32;
   bool HasJmpExt;
+  bool HasMovsx;
 
   SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;

diff --git a/llvm/lib/Target/BPF/BPFInstrFormats.td b/llvm/lib/Target/BPF/BPFInstrFormats.td
@@ -90,6 +90,7 @@ def BPF_IMM  : BPFModeModifer<0x0>;
 def BPF_ABS  : BPFModeModifer<0x1>;
 def BPF_IND  : BPFModeModifer<0x2>;
 def BPF_MEM  : BPFModeModifer<0x3>;
+def BPF_MEMSX  : BPFModeModifer<0x4>;
 def BPF_ATOMIC : BPFModeModifer<0x6>;
 
 class BPFAtomicFlag<bits<4> val> {