Skip to content

Commit

Permalink
[AMDGPU][MC] Fix handling of A16 operands in intersect_ray instructions.
Browse files Browse the repository at this point in the history
The patch adds the support for 'noa16' operands in non-A16 variants of
the instructions, fixes validation of A16 operands and eliminates the
custom conversion to MCInst.

Part of <llvm/llvm-project#62629>.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D155057
  • Loading branch information
kosarev authored and veselypeta committed Sep 5, 2024
2 parents 122c3dc + 289ae65 commit 85ae98c
Show file tree
Hide file tree
Showing 10 changed files with 76 additions and 78 deletions.
41 changes: 17 additions & 24 deletions llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1632,7 +1632,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool validateMIMGGatherDMask(const MCInst &Inst);
bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
bool validateMIMGAddrSize(const MCInst &Inst);
bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
bool validateMIMGD16(const MCInst &Inst);
bool validateMIMGMSAA(const MCInst &Inst);
bool validateOpSel(const MCInst &Inst);
Expand Down Expand Up @@ -1742,8 +1742,6 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
bool IsAtomic = false);
void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);

void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);

bool parseDimId(unsigned &Encoding);
Expand Down Expand Up @@ -3580,7 +3578,8 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
return false;
}

bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
const SMLoc &IDLoc) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);

Expand All @@ -3600,16 +3599,20 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
assert(SrsrcIdx != -1);
assert(SrsrcIdx > VAddr0Idx);

if (DimIdx == -1)
return true; // intersect_ray
bool IsA16 = Inst.getOperand(A16Idx).getImm();
if (BaseOpcode->BVH) {
if (IsA16 == BaseOpcode->A16)
return true;
Error(IDLoc, "image address size does not match a16");
return false;
}

unsigned Dim = Inst.getOperand(DimIdx).getImm();
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
unsigned ActualAddrSize =
IsNSA ? SrsrcIdx - VAddr0Idx
: AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());

unsigned ExpectedAddrSize =
AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
Expand All @@ -3620,7 +3623,7 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
unsigned VAddrLastSize =
AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;

return VAddrLastIdx - VAddr0Idx + VAddrLastSize == ExpectedAddrSize;
ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
}
} else {
if (ExpectedAddrSize > 12)
Expand All @@ -3633,7 +3636,11 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
return true;
}

return ActualAddrSize == ExpectedAddrSize;
if (ActualAddrSize == ExpectedAddrSize)
return true;

Error(IDLoc, "image address size does not match dim and a16");
return false;
}

bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
Expand Down Expand Up @@ -4569,11 +4576,8 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
if (!validateMIMGDataSize(Inst, IDLoc)) {
return false;
}
if (!validateMIMGAddrSize(Inst)) {
Error(IDLoc,
"image address size does not match dim and a16");
if (!validateMIMGAddrSize(Inst, IDLoc))
return false;
}
if (!validateMIMGAtomicDMask(Inst)) {
Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
"invalid atomic image dmask");
Expand Down Expand Up @@ -7760,17 +7764,6 @@ void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands)
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
}

void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
const OperandVector &Operands) {
for (unsigned I = 1; I < Operands.size(); ++I) {
auto &Operand = (AMDGPUOperand &)*Operands[I];
if (Operand.isReg())
Operand.addRegOperands(Inst, 1);
}

Inst.addOperand(MCOperand::createImm(1)); // a16
}

//===----------------------------------------------------------------------===//
// smrd
//===----------------------------------------------------------------------===//
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -865,8 +865,7 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
assert(VDataIdx != -1);
if (BaseOpcode->BVH) {
// Add A16 operand for intersect_ray instructions
if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::a16))
addOperand(MI, MCOperand::createImm(1));
addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
return MCDisassembler::Success;
}

Expand Down
46 changes: 20 additions & 26 deletions llvm/lib/Target/AMDGPU/MIMGInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class MIMGBaseOpcode : PredicateControl {
bit IsAtomicRet = 0;
bit MSAA = 0;
bit BVH = 0;
bit A16 = 0;
}

def MIMGBaseOpcode : GenericEnum {
Expand All @@ -59,7 +60,7 @@ def MIMGBaseOpcodesTable : GenericTable {
let CppTypeName = "MIMGBaseOpcodeInfo";
let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler",
"Gather4", "NumExtraArgs", "Gradients", "G16", "Coordinates",
"LodOrClampOrMip", "HasD16", "MSAA", "BVH"];
"LodOrClampOrMip", "HasD16", "MSAA", "BVH", "A16"];
string TypeOf_BaseOpcode = "MIMGBaseOpcode";

let PrimaryKey = ["BaseOpcode"];
Expand Down Expand Up @@ -1191,50 +1192,43 @@ class MIMG_IntersectRay_Helper<bit Is64, bit IsA16> {
[node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]);
}

class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC, bit IsA16>
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
: MIMG_gfx10<op.GFX10M, (outs VReg_128:$vdata), "AMDGPU"> {

let InOperandList = !con((ins AddrRC:$vaddr0, SReg_128:$srsrc),
!if(IsA16, (ins A16:$a16), (ins)));
let AsmString = opcode#" $vdata, $vaddr0, $srsrc"#!if(IsA16, "$a16", "");
let InOperandList = (ins AddrRC:$vaddr0, SReg_128:$srsrc, A16:$a16);
let AsmString = opcode#" $vdata, $vaddr0, $srsrc$a16";

let nsa = 0;
}

class MIMG_IntersectRay_nsa_gfx10<mimgopc op, string opcode, int num_addrs, bit IsA16>
class MIMG_IntersectRay_nsa_gfx10<mimgopc op, string opcode, int num_addrs>
: MIMG_nsa_gfx10<op.GFX10M, (outs VReg_128:$vdata), num_addrs, "AMDGPU"> {
let InOperandList = !con(nsah.AddrIns,
(ins SReg_128:$srsrc),
!if(IsA16, (ins A16:$a16), (ins)));
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(IsA16, "$a16", "");
let InOperandList = !con(nsah.AddrIns, (ins SReg_128:$srsrc, A16:$a16));
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc$a16";
}

class MIMG_IntersectRay_gfx11<mimgopc op, string opcode, RegisterClass AddrRC, bit IsA16>
class MIMG_IntersectRay_gfx11<mimgopc op, string opcode, RegisterClass AddrRC>
: MIMG_gfx11<op.GFX11, (outs VReg_128:$vdata), "AMDGPU"> {

let InOperandList = !con((ins AddrRC:$vaddr0, SReg_128:$srsrc),
!if(IsA16, (ins A16:$a16), (ins)));
let AsmString = opcode#" $vdata, $vaddr0, $srsrc"#!if(IsA16, "$a16", "");
let InOperandList = (ins AddrRC:$vaddr0, SReg_128:$srsrc, A16:$a16);
let AsmString = opcode#" $vdata, $vaddr0, $srsrc$a16";

let nsa = 0;
}

class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
bit IsA16, list<RegisterClass> addr_types>
list<RegisterClass> addr_types>
: MIMG_nsa_gfx11<op.GFX11, (outs VReg_128:$vdata), num_addrs, "AMDGPU",
addr_types> {
let InOperandList = !con(nsah.AddrIns,
(ins SReg_128:$srsrc),
!if(IsA16, (ins A16:$a16), (ins)));
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(IsA16, "$a16", "");
let InOperandList = !con(nsah.AddrIns, (ins SReg_128:$srsrc, A16:$a16));
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc$a16";
}

multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16> {
defvar info = MIMG_IntersectRay_Helper<Is64, IsA16>;
def "" : MIMGBaseOpcode {
let BVH = 1;
let A16 = IsA16;
}
let AsmMatchConverter = !if(IsA16, "cvtIntersectRay", ""),
let AsmMatchConverter = "",
dmask = 0xf,
unorm = 1,
d16 = 0,
Expand All @@ -1248,17 +1242,17 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16> {
d16 = 0,
BaseOpcode = !cast<MIMGBaseOpcode>(NAME),
VDataDwords = 4 in {
def _sa_gfx10 : MIMG_IntersectRay_gfx10<op, opcode, info.RegClass, IsA16> {
def _sa_gfx10 : MIMG_IntersectRay_gfx10<op, opcode, info.RegClass> {
let VAddrDwords = info.VAddrDwords;
}
def _sa_gfx11 : MIMG_IntersectRay_gfx11<op, opcode, info.RegClass, IsA16> {
def _sa_gfx11 : MIMG_IntersectRay_gfx11<op, opcode, info.RegClass> {
let VAddrDwords = info.VAddrDwords;
}
def _nsa_gfx10 : MIMG_IntersectRay_nsa_gfx10<op, opcode, info.num_addrs, IsA16> {
def _nsa_gfx10 : MIMG_IntersectRay_nsa_gfx10<op, opcode, info.num_addrs> {
let VAddrDwords = info.num_addrs;
}
def _nsa_gfx11 : MIMG_IntersectRay_nsa_gfx11<op, opcode,
info.gfx11_nsa_addrs, IsA16,
info.gfx11_nsa_addrs,
info.gfx11_addr_types> {
let VAddrDwords = info.num_addrs;
}
Expand Down
3 changes: 1 addition & 2 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8044,8 +8044,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
}

Ops.push_back(TDescr);
if (IsA16)
Ops.push_back(DAG.getTargetConstant(1, DL, MVT::i1));
Ops.push_back(DAG.getTargetConstant(IsA16, DL, MVT::i1));
Ops.push_back(M->getChain());

auto *NewNode = DAG.getMachineNode(Opcode, DL, M->getVTList(), Ops);
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,7 @@ struct MIMGBaseOpcodeInfo {
bool HasD16;
bool MSAA;
bool BVH;
bool A16;
};

LLVM_READONLY
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/waitcnt-bvh.mir
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ body: |
bb.0:
; GCN-LABEL: name: waitcnt-check-inorder
; GCN: S_WAITCNT 0
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
S_ENDPGM 0
...
---
Expand All @@ -20,11 +20,11 @@ body: |
bb.0:
; GCN-LABEL: name: waitcnt-check-vs-vmem
; GCN: S_WAITCNT 0
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: S_WAITCNT 16240
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
; GCN-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
S_ENDPGM 0
...
Expand All @@ -34,11 +34,11 @@ body: |
bb.0:
; GCN-LABEL: name: waitcnt-check-vs-mimg-samp
; GCN: S_WAITCNT 0
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: S_WAITCNT 16240
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
; GCN-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr20_vgpr21, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
S_ENDPGM 0
...
Expand All @@ -50,10 +50,10 @@ body: |
; GCN: S_WAITCNT 0
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
; GCN-NEXT: S_WAITCNT 16240
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: S_ENDPGM 0
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, implicit $exec
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
S_ENDPGM 0
...
---
Expand All @@ -64,9 +64,9 @@ body: |
; GCN: S_WAITCNT 0
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr16_vgpr17, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
; GCN-NEXT: S_WAITCNT 16240
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
; GCN-NEXT: S_ENDPGM 0
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2 $vgpr16_vgpr17, $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, 15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128), align 4, addrspace 4)
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec :: (dereferenceable load (s128), addrspace 7)
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_BVH_INTERSECT_RAY_sa_gfx10 killed $vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, implicit $exec :: (dereferenceable load (s128), addrspace 7)
S_ENDPGM 0
...
6 changes: 6 additions & 0 deletions llvm/test/MC/AMDGPU/gfx1030_err.s
Original file line number Diff line number Diff line change
Expand Up @@ -207,3 +207,9 @@ s_waitcnt_depctr depctr_hold_cnt(0) depctr_hold_cnt(0)

s_waitcnt_depctr depctr_sa_sdst(0) depctr_sa_sdst(0)
// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: duplicate counter name depctr_sa_sdst

image_bvh_intersect_ray v[4:7], v[9:16], s[4:7] noa16
// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match a16

image_bvh_intersect_ray v[39:42], [v50, v46, v23, v17, v16, v15, v21, v20], s[12:15] noa16
// GFX10: :[[@LINE-1]]:{{[0-9]+}}: error: image address size does not match a16
Loading

0 comments on commit 85ae98c

Please sign in to comment.