-
Notifications
You must be signed in to change notification settings - Fork 12k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][True16][MC] VINTERP instructions supporting true16/fake16 #113634
[AMDGPU][True16][MC] VINTERP instructions supporting true16/fake16 #113634
Conversation
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-amdgpu Author: Brox Chen (broxigarchen) ChangesUpdate VInterp instructions with true16 and fake16 formats. This patch includes instructions: Patch is 80.94 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113634.diff 7 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index fdef9865b82c06..795e1cca2380f7 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -363,6 +363,21 @@ static DecodeStatus decodeOperand_VSrcT16(MCInst &Inst, unsigned Imm,
(AMDGPU::OperandSemantics)OperandSemantics));
}
+static DecodeStatus decodeOperand_VGPR_16(MCInst &Inst, unsigned Imm,
+ uint64_t /*Addr*/,
+ const MCDisassembler *Decoder) {
+ assert(isUInt<10>(Imm) && "10-bit encoding expected");
+
+ const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
+ if (Imm & AMDGPU::EncValues::IS_VGPR) {
+ bool IsHi = Imm & (1 << 9);
+ unsigned RegIdx = Imm & 0xff;
+ return addOperand(Inst, DAsm->createVGPR16Operand(RegIdx, IsHi));
+ }
+ return addOperand(Inst, DAsm->decodeNonVGPRSrcOp(AMDGPUDisassembler::OPW16,
+ Imm & 0xFF, false, 0));
+}
+
static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
uint64_t Addr,
const MCDisassembler *Decoder) {
@@ -763,14 +778,23 @@ void AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
}
void AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
- if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx11 ||
- MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_gfx12 ||
- MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx11 ||
- MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_gfx12 ||
- MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx11 ||
- MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_gfx12 ||
- MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx11 ||
- MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_gfx12) {
+ convertTrue16OpSel(MI);
+ if (MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx11 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx11 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_t16_gfx12 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P10_F16_F32_inreg_fake16_gfx12 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx11 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx11 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_t16_gfx12 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P10_RTZ_F16_F32_inreg_fake16_gfx12 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx11 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx11 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_t16_gfx12 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P2_F16_F32_inreg_fake16_gfx12 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx11 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx11 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_t16_gfx12 ||
+ MI.getOpcode() == AMDGPU::V_INTERP_P2_RTZ_F16_F32_inreg_fake16_gfx12) {
// The MCInst has this field that is not directly encoded in the
// instruction.
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::op_sel);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 3556f6a95b521e..8e3f6a9ffcae82 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -1244,6 +1244,14 @@ def VRegSrc_128: SrcReg9<VReg_128, "OPW128">;
def VRegSrc_256: SrcReg9<VReg_256, "OPW256">;
def VRegOrLdsSrc_32 : SrcReg9<VRegOrLds_32, "OPW32">;
+// True 16 Operands
+def VRegSrc_16 : RegisterOperand<VGPR_16> {
+ let DecoderMethod = "decodeOperand_VGPR_16";
+ let EncoderMethod = "getMachineOpValueT16";
+}
+def VRegSrc_fake16: SrcReg9<VGPR_32, "OPW16"> {
+ let EncoderMethod = "getMachineOpValueT16";
+}
//===----------------------------------------------------------------------===//
// VGPRSrc_*
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/VINTERPInstructions.td b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
index 81768c1ef963e8..a2fe4d0f4422f6 100644
--- a/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
@@ -11,29 +11,30 @@
//===----------------------------------------------------------------------===//
class VINTERPe <VOPProfile P> : Enc64 {
- bits<8> vdst;
+ bits<11> vdst;
bits<4> src0_modifiers;
- bits<9> src0;
+ bits<11> src0;
bits<3> src1_modifiers;
- bits<9> src1;
+ bits<11> src1;
bits<3> src2_modifiers;
- bits<9> src2;
+ bits<11> src2;
bits<1> clamp;
bits<3> waitexp;
let Inst{31-26} = 0x33; // VOP3P encoding
let Inst{25-24} = 0x1; // VINTERP sub-encoding
- let Inst{7-0} = vdst;
+ let Inst{7-0} = vdst{7-0};
let Inst{10-8} = waitexp;
- let Inst{11} = !if(P.HasOpSel, src0_modifiers{2}, 0); // op_sel(0)
- let Inst{12} = !if(P.HasOpSel, src1_modifiers{2}, 0); // op_sel(1)
- let Inst{13} = !if(P.HasOpSel, src2_modifiers{2}, 0); // op_sel(2)
- let Inst{14} = !if(P.HasOpSel, src0_modifiers{3}, 0); // op_sel(3)
+ // 16-bit select fields which can be interpreted as OpSel or hi/lo suffix
+ let Inst{11} = !if(P.HasSrc0, src0_modifiers{2}, 0);
+ let Inst{12} = !if(P.HasSrc1, src1_modifiers{2}, 0);
+ let Inst{13} = !if(P.HasSrc2, src2_modifiers{2}, 0);
+ let Inst{14} = !if(P.HasDst, src0_modifiers{3}, 0);
let Inst{15} = clamp;
- let Inst{40-32} = src0;
- let Inst{49-41} = src1;
- let Inst{58-50} = src2;
+ let Inst{40-32} = src0{8-0};
+ let Inst{49-41} = src1{8-0};
+ let Inst{58-50} = src2{8-0};
let Inst{61} = src0_modifiers{0}; // neg(0)
let Inst{62} = src1_modifiers{0}; // neg(1)
let Inst{63} = src2_modifiers{0}; // neg(2)
@@ -60,9 +61,10 @@ class VINTERP_Pseudo <string OpName, VOPProfile P, list<dag> pattern = []> :
let VINTERP = 1;
}
-class VINTERP_Real <VOP_Pseudo ps, int EncodingFamily> :
- VOP3_Real <ps, EncodingFamily> {
+class VINTERP_Real <VOP_Pseudo ps, int EncodingFamily, string asmName> :
+ VOP3_Real <ps, EncodingFamily, asmName> {
let VINTERP = 1;
+ let IsSingle = 1;
}
def VOP3_VINTERP_F32 : VOPProfile<[f32, f32, f32, f32]> {
@@ -83,44 +85,64 @@ def VOP3_VINTERP_F32 : VOPProfile<[f32, f32, f32, f32]> {
let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$waitexp";
}
-class VOP3_VINTERP_F16 <list<ValueType> ArgVT> : VOPProfile<ArgVT> {
- let HasOpSel = 1;
- let HasModifiers = 1;
+class VOP3_VINTERP_F16_t16 <list<ValueType> ArgVT> : VOPProfile_True16<VOPProfile<ArgVT>> {
+ let Src0Mod = FPT16VRegInputMods</*Fake16*/0>;
+ let Src1Mod = FPVRegInputMods;
+ let Src2Mod = !if(!eq(ArgVT[3].Size, 16), FPT16VRegInputMods</*Fake16*/0>,
+ FPVRegInputMods);
+ let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_16:$src0,
+ Src1Mod:$src1_modifiers, VRegSrc_32:$src1,
+ Src2Mod:$src2_modifiers,
+ !if(!eq(ArgVT[3].Size, 16), VRegSrc_16, VRegSrc_32):$src2,
+ Clamp:$clamp, op_sel0:$op_sel,
+ WaitEXP:$waitexp);
- let Src0Mod = FPVRegInputMods;
+ let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$op_sel$waitexp";
+}
+
+class VOP3_VINTERP_F16_fake16 <list<ValueType> ArgVT> : VOPProfile_Fake16<VOPProfile<ArgVT>> {
+ let Src0Mod = FPT16VRegInputMods</*Fake16*/1>;
let Src1Mod = FPVRegInputMods;
- let Src2Mod = FPVRegInputMods;
+ let Src2Mod = !if(!eq(ArgVT[3].Size, 16), FPT16VRegInputMods</*Fake16*/1>,
+ FPVRegInputMods);
- let Outs64 = (outs VGPR_32:$vdst);
- let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
+ let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_fake16:$src0,
Src1Mod:$src1_modifiers, VRegSrc_32:$src1,
- Src2Mod:$src2_modifiers, VRegSrc_32:$src2,
+ Src2Mod:$src2_modifiers,
+ !if(!eq(ArgVT[3].Size, 16), VRegSrc_fake16, VRegSrc_32):$src2,
Clamp:$clamp, op_sel0:$op_sel,
WaitEXP:$waitexp);
- let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$op_sel$waitexp";
-}
+ let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$op_sel$waitexp";
+ }
+
+
//===----------------------------------------------------------------------===//
// VINTERP Pseudo Instructions
//===----------------------------------------------------------------------===//
-
let SubtargetPredicate = HasVINTERPEncoding in {
+multiclass VINTERP_t16<string OpName, list<ValueType> ArgVT> {
+ let True16Predicate = UseRealTrue16Insts in {
+ def _t16 : VINTERP_Pseudo<OpName#"_t16", VOP3_VINTERP_F16_t16<ArgVT>> ;
+ }
+ let True16Predicate = UseFakeTrue16Insts in {
+ def _fake16 : VINTERP_Pseudo<OpName#"_fake16", VOP3_VINTERP_F16_fake16<ArgVT>> ;
+ }
+}
+
let Uses = [M0, EXEC, MODE] in {
def V_INTERP_P10_F32_inreg : VINTERP_Pseudo <"v_interp_p10_f32", VOP3_VINTERP_F32>;
def V_INTERP_P2_F32_inreg : VINTERP_Pseudo <"v_interp_p2_f32", VOP3_VINTERP_F32>;
-def V_INTERP_P10_F16_F32_inreg :
- VINTERP_Pseudo <"v_interp_p10_f16_f32", VOP3_VINTERP_F16<[f32, f32, f32, f32]>>;
-def V_INTERP_P2_F16_F32_inreg :
- VINTERP_Pseudo <"v_interp_p2_f16_f32", VOP3_VINTERP_F16<[f16, f32, f32, f32]>>;
+
+defm V_INTERP_P10_F16_F32_inreg : VINTERP_t16<"v_interp_p10_f16_f32", [f32, f16, f32, f16]>;
+defm V_INTERP_P2_F16_F32_inreg : VINTERP_t16<"v_interp_p2_f16_f32", [f16, f16, f32, f32]>;
} // Uses = [M0, EXEC, MODE]
let Uses = [M0, EXEC] in {
-def V_INTERP_P10_RTZ_F16_F32_inreg :
- VINTERP_Pseudo <"v_interp_p10_rtz_f16_f32", VOP3_VINTERP_F16<[f32, f32, f32, f32]>>;
-def V_INTERP_P2_RTZ_F16_F32_inreg :
- VINTERP_Pseudo <"v_interp_p2_rtz_f16_f32", VOP3_VINTERP_F16<[f16, f32, f32, f32]>>;
+defm V_INTERP_P10_RTZ_F16_F32_inreg : VINTERP_t16<"v_interp_p10_rtz_f16_f32", [f32, f16, f32, f16]>;
+defm V_INTERP_P2_RTZ_F16_F32_inreg : VINTERP_t16 <"v_interp_p2_rtz_f16_f32", [f16, f16, f32, f32]>;
} // Uses = [M0, EXEC]
} // SubtargetPredicate = HasVINTERPEncoding.
@@ -137,11 +159,6 @@ class VInterpF32Pat <SDPatternOperator op, Instruction inst> : GCNPat <
7) /* wait_exp */
>;
-def VINTERP_OPSEL {
- int LOW = 0;
- int HIGH = 0xa;
-}
-
class VInterpF16Pat <SDPatternOperator op, Instruction inst,
ValueType dst_type, bit high,
list<ComplexPattern> pat> : GCNPat <
@@ -167,45 +184,60 @@ multiclass VInterpF16Pat <SDPatternOperator op, Instruction inst,
def : VInterpF32Pat<int_amdgcn_interp_inreg_p10, V_INTERP_P10_F32_inreg>;
def : VInterpF32Pat<int_amdgcn_interp_inreg_p2, V_INTERP_P2_F32_inreg>;
+
+let True16Predicate = UseFakeTrue16Insts in {
defm : VInterpF16Pat<int_amdgcn_interp_inreg_p10_f16,
- V_INTERP_P10_F16_F32_inreg, f32,
+ V_INTERP_P10_F16_F32_inreg_fake16, f32,
[VINTERPModsHi, VINTERPMods, VINTERPModsHi]>;
defm : VInterpF16Pat<int_amdgcn_interp_inreg_p2_f16,
- V_INTERP_P2_F16_F32_inreg, f16,
+ V_INTERP_P2_F16_F32_inreg_fake16, f16,
[VINTERPModsHi, VINTERPMods, VINTERPMods]>;
defm : VInterpF16Pat<int_amdgcn_interp_p10_rtz_f16,
- V_INTERP_P10_RTZ_F16_F32_inreg, f32,
+ V_INTERP_P10_RTZ_F16_F32_inreg_fake16, f32,
[VINTERPModsHi, VINTERPMods, VINTERPModsHi]>;
defm : VInterpF16Pat<int_amdgcn_interp_p2_rtz_f16,
- V_INTERP_P2_RTZ_F16_F32_inreg, f16,
+ V_INTERP_P2_RTZ_F16_F32_inreg_fake16, f16,
[VINTERPModsHi, VINTERPMods, VINTERPMods]>;
+}
//===----------------------------------------------------------------------===//
// VINTERP Real Instructions
//===----------------------------------------------------------------------===//
-multiclass VINTERP_Real_gfx11 <bits<7> op> {
- let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
- def _gfx11 :
- VINTERP_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX11>,
- VINTERPe_gfx11<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+multiclass VINTERP_Real_gfx11 <bits<7> op, string asmName> {
+ defvar ps = !cast<VOP3_Pseudo>(NAME);
+ let AssemblerPredicate = isGFX11Only,
+ DecoderNamespace = "GFX11" #
+ !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
+ def _gfx11 :
+ VINTERP_Real<ps, SIEncodingFamily.GFX11, asmName>,
+ VINTERPe_gfx11<op, ps.Pfl>;
}
}
-multiclass VINTERP_Real_gfx12 <bits<7> op> {
- let AssemblerPredicate = isGFX12Only, DecoderNamespace = "GFX12" in {
- def _gfx12 :
- VINTERP_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.GFX12>,
- VINTERPe_gfx12<op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+multiclass VINTERP_Real_gfx12 <bits<7> op, string asmName> {
+ defvar ps = !cast<VOP3_Pseudo>(NAME);
+ let AssemblerPredicate = isGFX12Only,
+ DecoderNamespace = "GFX12" #
+ !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
+ def _gfx12 :
+ VINTERP_Real<ps, SIEncodingFamily.GFX12, asmName>,
+ VINTERPe_gfx12<op, ps.Pfl>;
}
}
-multiclass VINTERP_Real_gfx11_gfx12 <bits<7> op> :
- VINTERP_Real_gfx11<op>, VINTERP_Real_gfx12<op>;
+multiclass VINTERP_Real_gfx11_gfx12 <bits<7> op, string asmName = !cast<VOP3_Pseudo>(NAME).Mnemonic, string opName = NAME> :
+ VINTERP_Real_gfx11<op, asmName>, VINTERP_Real_gfx12<op, asmName>;
+
+multiclass VINTERP_Real_t16_and_fake16_gfx11_gfx12 <bits<7> op, string asmName = !cast<VOP3_Pseudo>(NAME).Mnemonic, string opName = NAME> {
+ defm _t16: VINTERP_Real_gfx11_gfx12<op, asmName, opName#"_t16">;
+ defm _fake16: VINTERP_Real_gfx11_gfx12<op, asmName, opName#"_fake16">;
+}
+
defm V_INTERP_P10_F32_inreg : VINTERP_Real_gfx11_gfx12<0x000>;
defm V_INTERP_P2_F32_inreg : VINTERP_Real_gfx11_gfx12<0x001>;
-defm V_INTERP_P10_F16_F32_inreg : VINTERP_Real_gfx11_gfx12<0x002>;
-defm V_INTERP_P2_F16_F32_inreg : VINTERP_Real_gfx11_gfx12<0x003>;
-defm V_INTERP_P10_RTZ_F16_F32_inreg : VINTERP_Real_gfx11_gfx12<0x004>;
-defm V_INTERP_P2_RTZ_F16_F32_inreg : VINTERP_Real_gfx11_gfx12<0x005>;
+defm V_INTERP_P10_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x002, "v_interp_p10_f16_f32">;
+defm V_INTERP_P2_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x003, "v_interp_p2_f16_f32">;
+defm V_INTERP_P10_RTZ_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x004, "v_interp_p10_rtz_f16_f32">;
+defm V_INTERP_P2_RTZ_F16_F32_inreg : VINTERP_Real_t16_and_fake16_gfx11_gfx12<0x005, "v_interp_p2_rtz_f16_f32">;
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vinterp.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-vinterp.mir
index f382800bfd3918..c4e31de14002de 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-vinterp.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vinterp.mir
@@ -15,16 +15,16 @@ body: |
; GFX11-NEXT: $vgpr2 = LDS_PARAM_LOAD 0, 1, 0, implicit $m0, implicit $exec
; GFX11-NEXT: $vgpr3 = LDS_PARAM_LOAD 0, 2, 0, implicit $m0, implicit $exec
; GFX11-NEXT: $vgpr4 = LDS_PARAM_LOAD 0, 3, 0, implicit $m0, implicit $exec
- ; GFX11-NEXT: $vgpr5 = V_INTERP_P10_F16_F32_inreg 0, $vgpr1, 0, $vgpr0, 0, $vgpr1, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
- ; GFX11-NEXT: $vgpr6 = V_INTERP_P10_F16_F32_inreg 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
- ; GFX11-NEXT: $vgpr7 = V_INTERP_P10_F16_F32_inreg 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 0, 1, implicit $m0, implicit $exec, implicit $mode
- ; GFX11-NEXT: $vgpr8 = V_INTERP_P10_F16_F32_inreg 0, $vgpr4, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $m0, implicit $exec, implicit $mode
+ ; GFX11-NEXT: $vgpr5 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr1, 0, $vgpr0, 0, $vgpr1, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
+ ; GFX11-NEXT: $vgpr6 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
+ ; GFX11-NEXT: $vgpr7 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 0, 1, implicit $m0, implicit $exec, implicit $mode
+ ; GFX11-NEXT: $vgpr8 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr4, 0, $vgpr0, 0, $vgpr4, 0, 0, 0, implicit $m0, implicit $exec, implicit $mode
$vgpr1 = LDS_PARAM_LOAD 0, 0, 0, implicit $m0, implicit $exec
$vgpr2 = LDS_PARAM_LOAD 0, 1, 0, implicit $m0, implicit $exec
$vgpr3 = LDS_PARAM_LOAD 0, 2, 0, implicit $m0, implicit $exec
$vgpr4 = LDS_PARAM_LOAD 0, 3, 0, implicit $m0, implicit $exec
- $vgpr5 = V_INTERP_P10_F16_F32_inreg 0, $vgpr1, 0, $vgpr0, 0, $vgpr1, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
- $vgpr6 = V_INTERP_P10_F16_F32_inreg 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
- $vgpr7 = V_INTERP_P10_F16_F32_inreg 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
- $vgpr8 = V_INTERP_P10_F16_F32_inreg 0, $vgpr4, 0, $vgpr0, 0, $vgpr4, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
+ $vgpr5 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr1, 0, $vgpr0, 0, $vgpr1, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
+ $vgpr6 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr2, 0, $vgpr0, 0, $vgpr2, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
+ $vgpr7 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr3, 0, $vgpr0, 0, $vgpr3, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
+ $vgpr8 = V_INTERP_P10_F16_F32_inreg_fake16 0, $vgpr4, 0, $vgpr0, 0, $vgpr4, 0, 0, 2, implicit $m0, implicit $exec, implicit $mode
...
diff --git a/llvm/test/MC/AMDGPU/vinterp.s b/llvm/test/MC/AMDGPU/vinterp.s
new file mode 100644
index 00000000000000..3ab6db6a5a3999
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/vinterp.s
@@ -0,0 +1,236 @@
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -show-encoding %s | FileCheck -check-prefix=GCN %s
+// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -show-encoding %s | FileCheck -check-prefix=GCN %s
+
+v_interp_p10_f32 v0, v1, v2, v3
+// GCN: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x04]
+
+v_interp_p10_f32 v1, v10, v20, v30
+// GCN: v_interp_p10_f32 v1, v10, v20, v30 wait_exp:0 ; encoding: [0x01,0x00,0x00,0xcd,0x0a,0x29,0x7a,0x04]
+
+v_interp_p10_f32 v2, v11, v21, v31
+// GCN: v_interp_p10_f32 v2, v11, v21, v31 wait_exp:0 ; encoding: [0x02,0x00,0x00,0xcd,0x0b,0x2b,0x7e,0x04]
+
+v_interp_p10_f32 v3, v12, v22, v32
+// GCN: v_interp_p10_f32 v3, v12, v22, v32 wait_exp:0 ; encoding: [0x03,0x00,0x00,0xcd,0x0c,0x2d,0x82,0x04]
+
+v_interp_p10_f32 v0, v1, v2, v3 clamp
+// GCN: v_interp_p10_f32 v0, v1, v2, v3 clamp wait_exp:0 ; encoding: [0x00,0x80,0x00,0xcd,0x01,0x05,0x0e,0x04]
+
+v_interp_p10_f32 v0, -v1, v2, v3
+// GCN: v_interp_p10_f32 v0, -v1, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x24]
+
+v_interp_p10_f32 v0, v1, -v2, v3
+// GCN: v_interp_p10_f32 v0, v1, -v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x44]
+
+v_interp_p10_f32 v0, v1, v2, -v3
+// GCN: v_interp_p10_f32 v0, v1, v2, -v3 wait_exp:0 ; encoding: [0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x84]
+
+v_interp_p10_f32 v0, v1, v2, v3 wait_exp:0
+// GCN: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x04]
+
+v_interp_p10_f32 v0, v1, v2, v3 wait_exp:1
+// GCN: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:1 ; encoding: [0x00,0x01,0x00,0xcd,0x01,0x05,0x0e,0x04]
+
+v_interp_p10_f32 v0, v1, v2, v3 wait_exp:7
+// GCN: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:7 ; encoding: [0x00,0x07,0x00,0xcd,0x01,0x05,0x0e,0x04]
+
+v_interp_p10_f32 v0, v1, v2, v3 clamp wait_exp:7
+// GCN: v_interp_p10_f32 v0, v1, v2, v3 clamp wait_exp:7 ; encoding: [0x00,0x87,0x00,0xcd,0x01,0x05,0x0e,0x04]
+
+v_interp_p2_f32 v0, v1, v2, v3
+// GCN: v_interp_p2_f32 v0, v1, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x01,0xcd,0x01,0x05,0x0e,0x04]
+
+v_interp_p2_f32 v1, v10, v...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
8559f96
to
1dbb0b4
Compare
ping! |
# GFX12-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 op_sel:[0,0,0,1] wait_exp:0 | ||
|
||
0x00,0x78,0x02,0xcd,0x01,0x05,0x0e,0x04 | ||
# GFX11-TRUE16: v_interp_p10_f16_f32 v0, v1.h, v2, v3.h op_sel:[1,1,1,1] wait_exp:0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These tests where op_sel is applied to 32-bit arguments do not make sense, as you should not apply op_sel to those. However, I see they have existed a while, and were just ported in this patch. And we generally are not strictly rejecting things in the disassembler.
24d6d39
to
be0b3ec
Compare
ping! The CI failure seems unrelated to this patch |
be0b3ec
to
e1c3d96
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
e1c3d96
to
9cd5cda
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with a nit.
let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$op_sel$waitexp"; | ||
} | ||
let Asm64 = "$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$op_sel$waitexp"; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unintended change adding the space?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
removed
# CHECK-TRUE16: v_interp_p10_f16_f32 v0, v1.l, v2, v3.l wait_exp:0 | ||
# CHECK-FAKE16: v_interp_p10_f16_f32 v0, v1, v2, v3 wait_exp:0 | ||
|
||
0x00,0x00,0x02,0xcd,0x01,0x05,0x0e,0x04 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There seems to be a number of duplicated lines in this file, running a --unique update on this test
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
removed duplicated dasm testlines
e4e4236
to
af9984d
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still LGTM
Seeing a few warnings after this change:
|
Update VInterp instructions with true16 and fake16 formats.
This patch includes instructions:
v_interp_p10_f16_f32
v_interp_p2_f16_f32
v_interp_p10_rtz_f16_f32
v_interp_p2_rtz_f16_f32
dasm test vinterp-fake16.txt is removed and the testline are merged into vinterp.txt which handles both true16/fake16 cases