-
Notifications
You must be signed in to change notification settings - Fork 13.7k
[AMDGPU][MC] Add GFX12 SMEM encoding #75215
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-mc Author: Mirko Brkušanin (mbrkusanin) ChangesPatch is 100.29 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75215.diff 10 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 799e102d56174d..920cf784858768 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -411,6 +411,12 @@ def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode",
"Has VGPR mode register indexing"
>;
+def FeatureScalarDwordx3Loads : SubtargetFeature<"scalar-dwordx3-loads",
+ "HasScalarDwordx3Loads",
+ "true",
+ "Has 96-bit scalar load instructions"
+>;
+
def FeatureScalarStores : SubtargetFeature<"scalar-stores",
"HasScalarStores",
"true",
@@ -1462,7 +1468,8 @@ def FeatureISAVersion12 : FeatureSet<
FeatureVcmpxPermlaneHazard,
FeatureSALUFloatInsts,
FeatureVGPRSingleUseHintInsts,
- FeatureMADIntraFwdBug]>;
+ FeatureMADIntraFwdBug,
+ FeatureScalarDwordx3Loads]>;
//===----------------------------------------------------------------------===//
@@ -2011,6 +2018,8 @@ def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">;
def HasAtomicCSubNoRtnInsts : Predicate<"Subtarget->hasAtomicCSubNoRtnInsts()">;
+def HasScalarDwordx3Loads : Predicate<"Subtarget->hasScalarDwordx3Loads()">;
+
// Include AMDGPU TD files
include "SISchedule.td"
include "GCNProcessors.td"
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 92427335c0ad2f..050ecb93ef16d6 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1665,6 +1665,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
SMLoc getInstLoc(const OperandVector &Operands) const;
bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
+ bool validateOffset(const MCInst &Inst, const OperandVector &Operands);
bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
bool validateSOPLiteral(const MCInst &Inst) const;
@@ -2630,7 +2631,7 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
// SGPR and TTMP registers must be aligned.
// Max required alignment is 4 dwords.
- AlignSize = std::min(RegWidth / 32, 4u);
+ AlignSize = std::min(llvm::bit_ceil(RegWidth / 32), 4u);
}
if (RegNum % AlignSize != 0) {
@@ -4139,6 +4140,40 @@ SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
return getLoc();
}
+bool AMDGPUAsmParser::validateOffset(const MCInst &Inst,
+ const OperandVector &Operands) {
+ auto Opcode = Inst.getOpcode();
+ auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
+ if (OpNum == -1)
+ return true;
+
+ uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
+ if ((TSFlags & SIInstrFlags::FLAT))
+ return validateFlatOffset(Inst, Operands);
+
+ if ((TSFlags & SIInstrFlags::SMRD))
+ return validateSMEMOffset(Inst, Operands);
+
+ const auto &Op = Inst.getOperand(OpNum);
+ if (isGFX12Plus() &&
+ (TSFlags & (SIInstrFlags::MUBUF | SIInstrFlags::MTBUF))) {
+ const unsigned OffsetSize = 24;
+ if (!isIntN(OffsetSize, Op.getImm())) {
+ Error(getFlatOffsetLoc(Operands),
+ Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
+ return false;
+ }
+ } else {
+ const unsigned OffsetSize = 16;
+ if (!isUIntN(OffsetSize, Op.getImm())) {
+ Error(getFlatOffsetLoc(Operands),
+ Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
+ return false;
+ }
+ }
+ return true;
+}
+
bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
const OperandVector &Operands) {
uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
@@ -4796,10 +4831,7 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
if (!validateMovrels(Inst, Operands)) {
return false;
}
- if (!validateFlatOffset(Inst, Operands)) {
- return false;
- }
- if (!validateSMEMOffset(Inst, Operands)) {
+ if (!validateOffset(Inst, Operands)) {
return false;
}
if (!validateMAIAccWrite(Inst, Operands)) {
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index ed019d26c1dfd8..4e3561aeaf93aa 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -206,6 +206,7 @@ DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
DECODE_OPERAND_REG_7(SReg_64, OPW64)
DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
+DECODE_OPERAND_REG_7(SReg_96, OPW96)
DECODE_OPERAND_REG_7(SReg_128, OPW128)
DECODE_OPERAND_REG_7(SReg_256, OPW256)
DECODE_OPERAND_REG_7(SReg_512, OPW512)
@@ -1232,6 +1233,8 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
case AMDGPU::TTMP_64RegClassID:
shift = 1;
break;
+ case AMDGPU::SGPR_96RegClassID:
+ case AMDGPU::TTMP_96RegClassID:
case AMDGPU::SGPR_128RegClassID:
case AMDGPU::TTMP_128RegClassID:
// ToDo: unclear if s[100:104] is available on VI. Can we use VCC as SGPR in
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 94b9e49b765a6f..39cb69685712f5 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -119,6 +119,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasFmaMixInsts = false;
bool HasMovrel = false;
bool HasVGPRIndexMode = false;
+ bool HasScalarDwordx3Loads = false;
bool HasScalarStores = false;
bool HasScalarAtomics = false;
bool HasSDWAOmod = false;
@@ -884,6 +885,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return getGeneration() >= VOLCANIC_ISLANDS;
}
+ bool hasScalarDwordx3Loads() const { return HasScalarDwordx3Loads; }
+
bool hasScalarStores() const {
return HasScalarStores;
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 7ea2280c474b05..981da13fe08952 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -414,7 +414,7 @@ def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
// SGPR 64-bit registers
def SGPR_64Regs : SIRegisterTuples<getSubRegs<2>.ret, SGPR_32, 105, 2, 2, "s">;
-// SGPR 96-bit registers. No operations use these, but for symmetry with 96-bit VGPRs.
+// SGPR 96-bit registers.
def SGPR_96Regs : SIRegisterTuples<getSubRegs<3>.ret, SGPR_32, 105, 4, 3, "s">;
// SGPR 128-bit registers
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index c18846483cf95a..f603106b21754d 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -74,7 +74,7 @@ class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic>
bits<7> sdst;
bits<32> offset;
bits<8> soffset;
- bits<5> cpol;
+ bits<5> cpol;
}
class OffsetMode<bit hasOffset, bit hasSOffset, string variant,
@@ -300,6 +300,8 @@ multiclass SM_Pseudo_Atomics<RegisterClass baseClass,
// does sdst for SMRD on SI/CI?
defm S_LOAD_DWORD : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
defm S_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_64, SReg_64_XEXEC>;
+let SubtargetPredicate = HasScalarDwordx3Loads in
+ defm S_LOAD_DWORDX3 : SM_Pseudo_Loads <SReg_64, SReg_96>;
defm S_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_64, SReg_128>;
defm S_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_64, SReg_256>;
defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_64, SReg_512>;
@@ -309,6 +311,8 @@ defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
// FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on
// SI/CI, bit disallowed for SMEM on VI.
defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_128, SReg_64_XEXEC>;
+let SubtargetPredicate = HasScalarDwordx3Loads in
+ defm S_BUFFER_LOAD_DWORDX3 : SM_Pseudo_Loads <SReg_128, SReg_96>;
defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_128, SReg_128>;
defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_128, SReg_256>;
defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_128, SReg_512>;
@@ -1179,7 +1183,7 @@ def SMInfoTable : GenericTable {
class SMEM_Real_gfx11<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
SMEM_Real_10Plus_common<op, ps, opName, SIEncodingFamily.GFX11,
SGPR_NULL_gfx11plus> {
- let AssemblerPredicate = isGFX11Plus;
+ let AssemblerPredicate = isGFX11Only;
let DecoderNamespace = "GFX11";
let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0);
@@ -1235,19 +1239,30 @@ defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx11 <0x23>;
// GFX12.
//===----------------------------------------------------------------------===//
-class SMEM_Real_gfx12<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
- SMEM_Real_10Plus_common<op, ps, opName, SIEncodingFamily.GFX12,
+class SMEM_Real_gfx12Plus<bits<6> op, SM_Pseudo ps, string opName,
+ int subtarget, RegisterWithSubRegs sgpr_null> :
+ SM_Real<ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>, Enc64 {
+
+ let Inst{18-13} = op;
+ let Inst{31-26} = 0x3d;
+
+ let Inst{55-32} = !if(ps.has_offset, offset{23-0}, !if(ps.has_soffset, 0, ?));
+ let Inst{63-57} = !if(ps.has_soffset, soffset{6-0},
+ !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?));
+}
+
+class SMEM_Real_gfx12<bits<6> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
+ SMEM_Real_gfx12Plus<op, ps, opName, SIEncodingFamily.GFX12,
SGPR_NULL_gfx11plus> {
let AssemblerPredicate = isGFX12Plus;
let DecoderNamespace = "GFX12";
- let Inst{18-13} = op{5-0};
- let Inst{19} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
- let Inst{24-20} = ?; // TODO-GFX12: Add new bits {24-20}: TH, Scope, NV
- let Inst{25} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0);
- let Inst{55-32} = offset{23-0};
+
+ let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
+ let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
}
-class SMEM_Real_Prefetch_gfx12 <bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx12<op, ps> {
+class SMEM_Real_Prefetch_gfx12<bits<6> op, SM_Pseudo ps> :
+ SMEM_Real_gfx12<op, ps> {
bits<7> sdata; // Only 5 bits of sdata are supported.
let sdst = ?;
@@ -1255,8 +1270,48 @@ class SMEM_Real_Prefetch_gfx12 <bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx12<op,
let Inst{10-6} = !if(ps.has_sdst, sdata{4-0}, ?);
}
+class SMEM_Real_Load_gfx12<bits<6> op, string ps, string opName, OffsetMode offsets> :
+ SMEM_Real_gfx12<op, !cast<SM_Pseudo>(ps # offsets.Variant), opName> {
+ RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
+ let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
+
+ let Inst{22-21} = cpol{4-3}; // scope
+ let Inst{24-23} = cpol{1-0}; // th - only lower 2 bits are supported
+}
+
+multiclass SM_Real_Loads_gfx12<bits<6> op, string ps = NAME> {
+ defvar opName = !tolower(NAME);
+ def _IMM_gfx12 : SMEM_Real_Load_gfx12<op, ps, opName, IMM_Offset>;
+ def _SGPR_IMM_gfx12 : SMEM_Real_Load_gfx12<op, ps, opName, SGPR_IMM_Offset>;
+}
+
+defm S_LOAD_B32 : SM_Real_Loads_gfx12<0x00, "S_LOAD_DWORD">;
+defm S_LOAD_B64 : SM_Real_Loads_gfx12<0x01, "S_LOAD_DWORDX2">;
+defm S_LOAD_B96 : SM_Real_Loads_gfx12<0x05, "S_LOAD_DWORDX3">;
+defm S_LOAD_B128 : SM_Real_Loads_gfx12<0x02, "S_LOAD_DWORDX4">;
+defm S_LOAD_B256 : SM_Real_Loads_gfx12<0x03, "S_LOAD_DWORDX8">;
+defm S_LOAD_B512 : SM_Real_Loads_gfx12<0x04, "S_LOAD_DWORDX16">;
+
+defm S_BUFFER_LOAD_B32 : SM_Real_Loads_gfx12<0x10, "S_BUFFER_LOAD_DWORD">;
+defm S_BUFFER_LOAD_B64 : SM_Real_Loads_gfx12<0x11, "S_BUFFER_LOAD_DWORDX2">;
+defm S_BUFFER_LOAD_B96 : SM_Real_Loads_gfx12<0x15, "S_BUFFER_LOAD_DWORDX3">;
+defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx12<0x12, "S_BUFFER_LOAD_DWORDX4">;
+defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx12<0x13, "S_BUFFER_LOAD_DWORDX8">;
+defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx12<0x14, "S_BUFFER_LOAD_DWORDX16">;
+
+def S_DCACHE_INV_gfx12 : SMEM_Real_gfx12<0x021, S_DCACHE_INV>;
+
def S_PREFETCH_INST_gfx12 : SMEM_Real_Prefetch_gfx12<0x24, S_PREFETCH_INST>;
def S_PREFETCH_INST_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x25, S_PREFETCH_INST_PC_REL>;
def S_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x26, S_PREFETCH_DATA>;
def S_BUFFER_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x27, S_BUFFER_PREFETCH_DATA>;
def S_PREFETCH_DATA_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x28, S_PREFETCH_DATA_PC_REL>;
+
+multiclass SMEM_Real_Probe_gfx12<bits<6> op> {
+ defvar ps = NAME;
+ def _IMM_gfx12 : SMEM_Real_Prefetch_gfx12<op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
+ def _SGPR_IMM_gfx12 : SMEM_Real_Prefetch_gfx12<op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>;
+}
+
+defm S_ATC_PROBE : SMEM_Real_Probe_gfx12<0x22>;
+defm S_ATC_PROBE_BUFFER : SMEM_Real_Probe_gfx12<0x23>;
diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_err.s
index 5e508b466e830b..e6b32b80e01769 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_err.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_err.s
@@ -152,3 +152,9 @@ v_fmac_f32_e64_dpp v5, v2, 0x1234 quad_perm:[3,2,1,0]
s_load_dword s1, s[2:3], s0 0x1
// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
+
+s_load_b96 s[20:22], s[2:3], s0
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_buffer_load_b96 s[20:22], s[4:7], s0
+// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_smem.s b/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
index ed7ad5bb0c4e82..1566b9c04e3494 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_smem.s
@@ -33,3 +33,813 @@ s_buffer_prefetch_data s[20:23], 100, s10, 7
s_buffer_prefetch_data s[20:23], 100, null, 7
// GFX12: s_buffer_prefetch_data s[20:23], 0x64, null, 7 ; encoding: [0xca,0xe1,0x04,0xf4,0x64,0x00,0x00,0xf8]
+
+s_load_b32 s5, s[2:3], s0
+// GFX12: s_load_b32 s5, s[2:3], s0 offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 s101, s[2:3], s0
+// GFX12: s_load_b32 s101, s[2:3], s0 offset:0x0 ; encoding: [0x41,0x19,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 vcc_lo, s[2:3], s0
+// GFX12: s_load_b32 vcc_lo, s[2:3], s0 offset:0x0 ; encoding: [0x81,0x1a,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 vcc_hi, s[2:3], s0
+// GFX12: s_load_b32 vcc_hi, s[2:3], s0 offset:0x0 ; encoding: [0xc1,0x1a,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 s5, s[4:5], s0
+// GFX12: s_load_b32 s5, s[4:5], s0 offset:0x0 ; encoding: [0x42,0x01,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 s5, s[100:101], s0
+// GFX12: s_load_b32 s5, s[100:101], s0 offset:0x0 ; encoding: [0x72,0x01,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 s5, vcc, s0
+// GFX12: s_load_b32 s5, vcc, s0 offset:0x0 ; encoding: [0x75,0x01,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b32 s5, s[2:3], s101
+// GFX12: s_load_b32 s5, s[2:3], s101 offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_b32 s5, s[2:3], vcc_lo
+// GFX12: s_load_b32 s5, s[2:3], vcc_lo offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b32 s5, s[2:3], vcc_hi
+// GFX12: s_load_b32 s5, s[2:3], vcc_hi offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_b32 s5, s[2:3], m0
+// GFX12: s_load_b32 s5, s[2:3], m0 offset:0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_b32 s5, s[2:3], 0x0
+// GFX12: s_load_b32 s5, s[2:3], 0x0 ; encoding: [0x41,0x01,0x00,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_b32 s5, s[2:3], s7 offset:0x12345
+// GFX12: s_load_b32 s5, s[2:3], s7 offset:0x12345 ; encoding: [0x41,0x01,0x00,0xf4,0x45,0x23,0x01,0x0e]
+
+s_load_b64 s[10:11], s[2:3], s0
+// GFX12: s_load_b64 s[10:11], s[2:3], s0 offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b64 s[12:13], s[2:3], s0
+// GFX12: s_load_b64 s[12:13], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x23,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b64 s[100:101], s[2:3], s0
+// GFX12: s_load_b64 s[100:101], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x39,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b64 vcc, s[2:3], s0
+// GFX12: s_load_b64 vcc, s[2:3], s0 offset:0x0 ; encoding: [0x81,0x3a,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b64 s[10:11], s[4:5], s0
+// GFX12: s_load_b64 s[10:11], s[4:5], s0 offset:0x0 ; encoding: [0x82,0x22,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b64 s[10:11], s[100:101], s0
+// GFX12: s_load_b64 s[10:11], s[100:101], s0 offset:0x0 ; encoding: [0xb2,0x22,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b64 s[10:11], vcc, s0
+// GFX12: s_load_b64 s[10:11], vcc, s0 offset:0x0 ; encoding: [0xb5,0x22,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b64 s[10:11], s[2:3], s101
+// GFX12: s_load_b64 s[10:11], s[2:3], s101 offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_b64 s[10:11], s[2:3], vcc_lo
+// GFX12: s_load_b64 s[10:11], s[2:3], vcc_lo offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b64 s[10:11], s[2:3], vcc_hi
+// GFX12: s_load_b64 s[10:11], s[2:3], vcc_hi offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_b64 s[10:11], s[2:3], m0
+// GFX12: s_load_b64 s[10:11], s[2:3], m0 offset:0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_b64 s[10:11], s[2:3], 0x0
+// GFX12: s_load_b64 s[10:11], s[2:3], 0x0 ; encoding: [0x81,0x22,0x00,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_b96 s[20:22], s[2:3], s0
+// GFX12: s_load_b96 s[20:22], s[2:3], s0 offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b96 s[24:26], s[2:3], s0
+// GFX12: s_load_b96 s[24:26], s[2:3], s0 offset:0x0 ; encoding: [0x01,0xa6,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b96 s[96:98], s[2:3], s0
+// GFX12: s_load_b96 s[96:98], s[2:3], s0 offset:0x0 ; encoding: [0x01,0xb8,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b96 s[20:22], s[4:5], s0
+// GFX12: s_load_b96 s[20:22], s[4:5], s0 offset:0x0 ; encoding: [0x02,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b96 s[20:22], s[100:101], s0
+// GFX12: s_load_b96 s[20:22], s[100:101], s0 offset:0x0 ; encoding: [0x32,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b96 s[20:22], vcc, s0
+// GFX12: s_load_b96 s[20:22], vcc, s0 offset:0x0 ; encoding: [0x35,0xa5,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b96 s[20:22], s[2:3], s101
+// GFX12: s_load_b96 s[20:22], s[2:3], s101 offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_b96 s[20:22], s[2:3], vcc_lo
+// GFX12: s_load_b96 s[20:22], s[2:3], vcc_lo offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xd4]
+
+s_load_b96 s[20:22], s[2:3], vcc_hi
+// GFX12: s_load_b96 s[20:22], s[2:3], vcc_hi offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xd6]
+
+s_load_b96 s[20:22], s[2:3], m0
+// GFX12: s_load_b96 s[20:22], s[2:3], m0 offset:0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xfa]
+
+s_load_b96 s[20:22], s[2:3], 0x0
+// GFX12: s_load_b96 s[20:22], s[2:3], 0x0 ; encoding: [0x01,0xa5,0x00,0xf4,0x00,0x00,0x00,0xf8]
+
+s_load_b128 s[20:23], s[2:3], s0
+// GFX12: s_load_b128 s[20:23], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b128 s[24:27], s[2:3], s0
+// GFX12: s_load_b128 s[24:27], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x46,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b128 s[96:99], s[2:3], s0
+// GFX12: s_load_b128 s[96:99], s[2:3], s0 offset:0x0 ; encoding: [0x01,0x58,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b128 s[20:23], s[4:5], s0
+// GFX12: s_load_b128 s[20:23], s[4:5], s0 offset:0x0 ; encoding: [0x02,0x45,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b128 s[20:23], s[100:101], s0
+// GFX12: s_load_b128 s[20:23], s[100:101], s0 offset:0x0 ; encoding: [0x32,0x45,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b128 s[20:23], vcc, s0
+// GFX12: s_load_b128 s[20:23], vcc, s0 offset:0x0 ; encoding: [0x35,0x45,0x00,0xf4,0x00,0x00,0x00,0x00]
+
+s_load_b128 s[20:23], s[2:3], s101
+// GFX12: s_load_b128 s[20:23], s[2:3], s101 offset:0x0 ; encoding: [0x01,0x45,0x00,0xf4,0x00,0x00,0x00,0xca]
+
+s_load_b128 s[20:23], s[2:3], vcc_lo
+// GF...
[truncated]
|
piotrAMD
approved these changes
Dec 14, 2023
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
9dbb9bb
to
e1a45a3
Compare
e1a45a3
to
b9534de
Compare
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.