[AMDGPU] Src1 of VOP3 DPP instructions can be SGPR on supported subtargets #67461

mbrkusanin · 2023-09-26T17:14:44Z

In order to avoid duplicating every dpp pseudo opcode that has src1, we allow it for all opcodes and add manual checks on subtargets that do not support it.

llvmbot · 2023-09-26T17:15:46Z

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-mc

Changes

In order to avoid duplicating every dpp pseudo opcode that has src1, we allow it for all opcodes and add manual checks on subtargets that do not support it.

Full diff: https://github.com/llvm/llvm-project/pull/67461.diff

11 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPU.td (+9-1)
(modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+26-9)
(modified) llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp (+9-1)
(modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+5)
(modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+1-1)
(modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+4)
(modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+1)
(modified) llvm/test/CodeGen/AMDGPU/dpp_combine.ll (+1)
(modified) llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir (+47-3)
(added) llvm/test/MC/AMDGPU/gfx1150_asm_features.s (+20)
(modified) llvm/test/MC/AMDGPU/gfx11_asm_err.s (+3)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index d5356d1be3d758a..924144c5366f37e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -472,6 +472,12 @@ def FeatureDPALU_DPP : SubtargetFeature<"dpp-64bit",
   "Support DPP (Data Parallel Primitives) extension in DP ALU"
 >;
 
+def FeatureDPPSrc1SGPR : SubtargetFeature<"dpp-src1-sgpr",
+  "HasDPPSrc1SGPR",
+  "true",
+  "Support SGPR for Src1 of DPP instructions"
+>;
+
 def FeaturePackedFP32Ops : SubtargetFeature<"packed-fp32-ops",
   "HasPackedFP32Ops",
   "true",
@@ -1383,11 +1389,13 @@ def FeatureISAVersion11_0_3 : FeatureSet<
 
 def FeatureISAVersion11_5_0 : FeatureSet<
   !listconcat(FeatureISAVersion11_Common.Features,
-    [FeatureSALUFloatInsts])>;
+    [FeatureSALUFloatInsts,
+     FeatureDPPSrc1SGPR])>;
 
 def FeatureISAVersion11_5_1 : FeatureSet<
   !listconcat(FeatureISAVersion11_Common.Features,
     [FeatureSALUFloatInsts,
+     FeatureDPPSrc1SGPR,
      FeatureGFX11FullVGPRs])>;
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 5032dd665b07e81..35656bcaea1af7f 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -4231,16 +4231,33 @@ bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
                                   const OperandVector &Operands) {
   const unsigned Opc = Inst.getOpcode();
   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
-  if (DppCtrlIdx < 0)
-    return true;
-  unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
+  if (DppCtrlIdx >= 0) {
+    unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
+
+    if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
+        AMDGPU::isDPALU_DPP(MII.get(Opc))) {
+      // DP ALU DPP is supported for row_newbcast only on GFX9*
+      SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
+      Error(S, "DP ALU dpp only supports row_newbcast");
+      return false;
+    }
+  }
 
-  if (!AMDGPU::isLegalDPALU_DPPControl(DppCtrl) &&
-      AMDGPU::isDPALU_DPP(MII.get(Opc))) {
-    // DP ALU DPP is supported for row_newbcast only on GFX9*
-    SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
-    Error(S, "DP ALU dpp only supports row_newbcast");
-    return false;
+  int Dpp8Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp8);
+  bool IsDPP = DppCtrlIdx >= 0 || Dpp8Idx >= 0;
+
+  if (IsDPP && !hasDPPSrc1SGPR(getSTI())) {
+    int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
+    if (Src1Idx >= 0) {
+      const MCOperand &Src1 = Inst.getOperand(Src1Idx);
+      const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+      if (Src1.isImm() ||
+          (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) {
+        AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]);
+        Error(Op.getStartLoc(), "invalid operand for instruction");
+        return false;
+      }
+    }
   }
 
   return true;
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index e1cd0f0a732b93e..3e516a6e40b372a 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -278,6 +278,7 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
     }
     auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
     assert(Src0);
+    int Src0Idx = NumOperands;
     if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
       LLVM_DEBUG(dbgs() << "  failed: src0 is illegal\n");
       Fail = true;
@@ -301,7 +302,14 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
     }
     auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
     if (Src1) {
-      if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
+      int OpNum = NumOperands;
+      // If subtarget does not support SGPRs for src1 operand then the
+      // requirements are the same as for src0. We check src0 instead because
+      // pseudos are shared between subtargets and allow SGPR for src1 on all.
+      if (!ST->hasDPPSrc1SGPR())
+        OpNum = Src0Idx;
+
+      if (!TII->isOperandLegal(*DPPInst.getInstr(), OpNum, Src1)) {
         LLVM_DEBUG(dbgs() << "  failed: src1 is illegal\n");
         Fail = true;
         break;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index f60938d830fcc43..c73cf6b2bd815ff 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -128,6 +128,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool HasDPP = false;
   bool HasDPP8 = false;
   bool HasDPALU_DPP = false;
+  bool HasDPPSrc1SGPR = false;
   bool HasPackedFP32Ops = false;
   bool HasImageInsts = false;
   bool HasExtendedImageInsts = false;
@@ -916,6 +917,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     return HasDPALU_DPP;
   }
 
+  bool hasDPPSrc1SGPR() const {
+    return HasDPPSrc1SGPR;
+  }
+
   bool hasPackedFP32Ops() const {
     return HasPackedFP32Ops;
   }
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index ea6df380153bb86..60a6964c754ff64 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2296,7 +2296,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
   field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
   field RegisterClass Src2DPP = getVregSrcForVT<Src2VT>.ret;
   field RegisterOperand Src0VOP3DPP = VGPRSrc_32;
-  field RegisterOperand Src1VOP3DPP = VRegSrc_32;
+  field RegisterOperand Src1VOP3DPP = getVOP3DPPSrcForVT<Src1VT>.ret;
   field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret;
   field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
   field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index da664c93d188963..6d0ad763d9e6cc1 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -2085,6 +2085,10 @@ bool hasVOPD(const MCSubtargetInfo &STI) {
   return STI.hasFeature(AMDGPU::FeatureVOPD);
 }
 
+bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI) {
+  return STI.hasFeature(AMDGPU::FeatureDPPSrc1SGPR);
+}
+
 unsigned hasKernargPreload(const MCSubtargetInfo &STI) {
   return STI.hasFeature(AMDGPU::FeatureKernargPreload);
 }
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index d5092333c217127..297a69f54d63721 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1169,6 +1169,7 @@ bool isGFX940(const MCSubtargetInfo &STI);
 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
 bool hasMAIInsts(const MCSubtargetInfo &STI);
 bool hasVOPD(const MCSubtargetInfo &STI);
+bool hasDPPSrc1SGPR(const MCSubtargetInfo &STI);
 int getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR, int32_t ArgNumVGPR);
 unsigned hasKernargPreload(const MCSubtargetInfo &STI);
 
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.ll b/llvm/test/CodeGen/AMDGPU/dpp_combine.ll
index 4d979e2cf74962c..a72bcbb529e7d9f 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
+; RUN: llc -march=amdgcn -mcpu=gfx1150 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
 
 ; GCN-LABEL: {{^}}dpp_add:
 ; GCN: global_load_{{dword|b32}} [[V:v[0-9]+]],
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
index 91e97a36a597a08..59649263f19a577 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir
@@ -1,4 +1,5 @@
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefix=GCN
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
+# RUN: llc -march=amdgcn -mcpu=gfx1150 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
 
 ---
 
@@ -6,7 +7,8 @@
 # GCN: %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64_dpp %3, %0, %1, %5, 1, 1, 15, 15, 1, implicit $exec
 # GCN: %8:vgpr_32 = V_CVT_PK_U8_F32_e64_dpp %3, 4, %0, 2, %2, 2, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
 # GCN: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec
-# GCN: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec
+# GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec
+# GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %3, 0, %1, 0, 2, 0, %7, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
 name: vop3
 tracksRegLiveness: true
 body:             |
@@ -28,11 +30,53 @@ body:             |
     %9:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
     %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec
 
-    ; should not be combined because src1 imm is illegal
+    ; should not be combined on subtargets where src1 imm is illegal
     %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
     %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec
 ...
 
+# GCN-label: name: vop3_sgpr_src1
+# GCN: %6:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %1, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GFX1100: %8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit $mode, implicit $exec
+# GFX1150: %8:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %1, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GFX1100: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec
+# GFX1150: %10:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %3, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit $mode, implicit $exec
+# GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, 42, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
+# GCN: %14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit $mode, implicit $exec
+name: vop3_sgpr_src1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1
+
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:sgpr_32 = COPY $sgpr0
+    %3:sgpr_32 = COPY $sgpr1
+    %4:vgpr_32 = IMPLICIT_DEF
+
+    ; should be combined because src2 allows sgpr
+    %5:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+    %6:vgpr_32 = V_MED3_F32_e64 0, %5, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
+
+    ; should be combined only on subtargets that allow sgpr for src1
+    %7:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+    %8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit $mode, implicit $exec
+
+    ; should be combined only on subtargets that allow sgpr for src1
+    %9:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+    %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec
+
+    ; should be combined only on subtargets that allow inlinable constants for src1
+    %11:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+    %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit $mode, implicit $exec
+
+    ; should not be combined when literal constants are used
+    %13:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
+    %14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit $mode, implicit $exec
+...
+
 # Regression test for src_modifiers on base u16 opcode
 # GCN-label: name: vop3_u16
 # GCN: %5:vgpr_32 = V_ADD_NC_U16_e64_dpp %3, 0, %1, 0, %3, 0, 0, 1, 15, 15, 1, implicit $exec
diff --git a/llvm/test/MC/AMDGPU/gfx1150_asm_features.s b/llvm/test/MC/AMDGPU/gfx1150_asm_features.s
new file mode 100644
index 000000000000000..ab7dda8485570d9
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx1150_asm_features.s
@@ -0,0 +1,20 @@
+
+// RUN: llvm-mc -arch=amdgcn -show-encoding -mcpu=gfx1150 %s | FileCheck --check-prefix=GFX1150 %s
+// RUN: llvm-mc -arch=amdgcn -show-encoding -mcpu=gfx1151 %s | FileCheck --check-prefix=GFX1150 %s
+
+//
+// Subtargets allow src1 of VOP3 DPP instructions to be SGPR or inlinable
+// constant.
+//
+
+v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff]
+
+v_add3_u32_e64_dpp v5, v1, 42, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf
+// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xfa,0x54,0x0d,0x04,0x01,0x1b,0x00,0xff]
+
+v_add3_u32_e64_dpp v5, v1, s2, v0 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x00,0x04,0x01,0x77,0x39,0x05]
+
+v_add3_u32_e64_dpp v5, v1, 42, v0 dpp8:[7,6,5,4,3,2,1,0]
+// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x54,0x01,0x04,0x01,0x77,0x39,0x05]
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_err.s
index e475a1d5190777e..6a3a0ca40bfb9ba 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_err.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_err.s
@@ -45,6 +45,9 @@ v_add3_u32_e64_dpp v5, v1, v2, 49812340 dpp8:[7,6,5,4,3,2,1,0]
 v_add3_u32_e64_dpp v5, v1, s1, v0 dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
 
+v_add3_u32_e64_dpp v5, v1, 42, v0 dpp8:[7,6,5,4,3,2,1,0]
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
 v_cvt_f32_i32_e64_dpp v5, s1 dpp8:[7,6,5,4,3,2,1,0]
 // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction

github-actions · 2023-09-26T17:17:16Z

✅ With the latest revision this PR passed the C/C++ code formatter.

Sisyph

Please add a disassembler test. It might be permissible to accept sgpr as src1 on gfx11 as well as gfx1150 (i.e. disassembler is not strict), but we should at least ensure it is acceptable on gfx1150.

llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp

rampitec

LGTM with couple nits.

llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir

llvm/test/MC/AMDGPU/gfx1150_asm_features.s

…rgets In order to avoid duplicating every dpp pseudo opcode that has src1, we allow it for all opcodes and add manual checks on subtargets that do not support it.

mbrkusanin · 2023-09-28T15:40:07Z

clang-format
added disassembler test
added assert to check if size of src0 and src1 is same for dpp that would be generated
added '---' to .mir test
test formatting
rebase

Sisyph

LGTM

Local branch amd-gfx 80abf82 Merged main:b4afade17564 into amd-gfx:259556a2c143 Remote branch main 2cd2445 [AMDGPU] Src1 of VOP3 DPP instructions can be SGPR on supported subtargets (llvm#67461)

jayfoad · 2024-01-12T10:56:30Z

@mbrkusanin @Sisyph it seems like this works for integer instructions:

$ llvm-mc -arch=amdgcn -mcpu=gfx1150 -show-encoding <<< "v_add_nc_u32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0]"
	.text
	v_add_nc_u32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff]

but not for floating point:

$ llvm-mc -arch=amdgcn -mcpu=gfx1150 -show-encoding <<< "v_add_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0]"
	.text
<stdin>:1:27: error: invalid operand for instruction
v_add_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0]
                          ^

Is that a bug or is there some reason for it?

mbrkusanin · 2024-01-12T14:27:25Z

@mbrkusanin @Sisyph it seems like this works for integer instructions:

$ llvm-mc -arch=amdgcn -mcpu=gfx1150 -show-encoding <<< "v_add_nc_u32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0]"
	.text
	v_add_nc_u32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff]

but not for floating point:

$ llvm-mc -arch=amdgcn -mcpu=gfx1150 -show-encoding <<< "v_add_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0]"
	.text
<stdin>:1:27: error: invalid operand for instruction
v_add_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0]
                          ^

Is that a bug or is there some reason for it?

A bug. Looks like class for src1_modifiers needs to be updated in tablegen, it differs for ints and floats. I'm working on a patch.

…ng VOPC. Fixes support on GFX1150 and GFX12 where src1 of e64_dpp instructions should allow sgpr and imm operands. PR llvm#67461 added support for this with int operands, but it was missing a piece for float. Changing VOPC e64_dpp will be in a different patch because there is a bug preventing that change.

#87382) …ng VOPC. Fixes support on GFX1150 and GFX12 where src1 of e64_dpp instructions should allow sgpr and imm operands. PR #67461 added support for this with int operands, but it was missing a piece for float. Changing VOPC e64_dpp will be in a different patch because there is a bug preventing that change.

llvm#67461 enabled SGPRs as src1 by default for all opcodes with manual checks for targets where this is not supported. llvm#155595 disabled this check for hasDPPSrc1SGPR, which resulted in SGPRs being used as operands on gfx11. This PR reenables this check and fixes the corresponding lit test.

mbrkusanin requested review from a team, Sisyph, jayfoad and rampitec September 26, 2023 17:14

llvmbot added backend:AMDGPU llvm:mc Machine (object) code labels Sep 26, 2023

Sisyph reviewed Sep 26, 2023

View reviewed changes

llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp Outdated Show resolved Hide resolved

rampitec reviewed Sep 26, 2023

View reviewed changes

llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir Outdated Show resolved Hide resolved

llvm/test/MC/AMDGPU/gfx1150_asm_features.s Outdated Show resolved Hide resolved

[AMDGPU] Src1 of VOP3 DPP instructions can be SGPR on supported subta…

8af39e0

…rgets In order to avoid duplicating every dpp pseudo opcode that has src1, we allow it for all opcodes and add manual checks on subtargets that do not support it.

mbrkusanin force-pushed the dpp-src1-sgpr branch from bec805c to 8af39e0 Compare September 28, 2023 15:36

Sisyph approved these changes Sep 28, 2023

View reviewed changes

mbrkusanin merged commit 2cd2445 into llvm:main Sep 29, 2023

mbrkusanin deleted the dpp-src1-sgpr branch September 29, 2023 09:54

jayfoad mentioned this pull request Jan 12, 2024

[AMDGPU] Update tests for GFX12 errors and unsupported instructions #77624

Merged

jayfoad mentioned this pull request Jan 17, 2024

[AMDGPU] Src1 of VOP3 DPP instructions can be SGPR on GFX12 #77929

Merged

Sisyph mentioned this pull request Apr 2, 2024

[AMDGPU][MC] Enables sgpr or imm src1 for float VOP3 DPP, but excludi… #87382

Merged

ptrojahn mentioned this pull request Oct 20, 2025

[AMDGPU] Disable dpp src1 sgpr on gfx11 #164241

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AMDGPU] Src1 of VOP3 DPP instructions can be SGPR on supported subtargets #67461

[AMDGPU] Src1 of VOP3 DPP instructions can be SGPR on supported subtargets #67461

Uh oh!

mbrkusanin commented Sep 26, 2023

Uh oh!

llvmbot commented Sep 26, 2023 •

edited

Loading

Uh oh!

github-actions bot commented Sep 26, 2023 •

edited

Loading

Uh oh!

Sisyph left a comment

Uh oh!

Uh oh!

rampitec left a comment

Uh oh!

Uh oh!

Uh oh!

mbrkusanin commented Sep 28, 2023

Uh oh!

Sisyph left a comment

Uh oh!

jayfoad commented Jan 12, 2024

Uh oh!

mbrkusanin commented Jan 12, 2024

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

[AMDGPU] Src1 of VOP3 DPP instructions can be SGPR on supported subtargets #67461

[AMDGPU] Src1 of VOP3 DPP instructions can be SGPR on supported subtargets #67461

Uh oh!

Conversation

mbrkusanin commented Sep 26, 2023

Uh oh!

llvmbot commented Sep 26, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

github-actions bot commented Sep 26, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Sisyph left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

rampitec left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

mbrkusanin commented Sep 28, 2023

Uh oh!

Sisyph left a comment

Choose a reason for hiding this comment

Uh oh!

jayfoad commented Jan 12, 2024

Uh oh!

mbrkusanin commented Jan 12, 2024

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

llvmbot commented Sep 26, 2023 •

edited

Loading

github-actions bot commented Sep 26, 2023 •

edited

Loading