@@ -74,7 +74,7 @@ class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic>
74
74
bits<7> sdst;
75
75
bits<32> offset;
76
76
bits<8> soffset;
77
- bits<5> cpol;
77
+ bits<5> cpol;
78
78
}
79
79
80
80
class OffsetMode<bit hasOffset, bit hasSOffset, string variant,
@@ -300,6 +300,8 @@ multiclass SM_Pseudo_Atomics<RegisterClass baseClass,
300
300
// does sdst for SMRD on SI/CI?
301
301
defm S_LOAD_DWORD : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
302
302
defm S_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_64, SReg_64_XEXEC>;
303
+ let SubtargetPredicate = HasScalarDwordx3Loads in
304
+ defm S_LOAD_DWORDX3 : SM_Pseudo_Loads <SReg_64, SReg_96>;
303
305
defm S_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_64, SReg_128>;
304
306
defm S_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_64, SReg_256>;
305
307
defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_64, SReg_512>;
@@ -309,6 +311,8 @@ defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
309
311
// FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on
310
312
// SI/CI, bit disallowed for SMEM on VI.
311
313
defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_128, SReg_64_XEXEC>;
314
+ let SubtargetPredicate = HasScalarDwordx3Loads in
315
+ defm S_BUFFER_LOAD_DWORDX3 : SM_Pseudo_Loads <SReg_128, SReg_96>;
312
316
defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_128, SReg_128>;
313
317
defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_128, SReg_256>;
314
318
defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_128, SReg_512>;
@@ -1179,7 +1183,7 @@ def SMInfoTable : GenericTable {
1179
1183
class SMEM_Real_gfx11<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
1180
1184
SMEM_Real_10Plus_common<op, ps, opName, SIEncodingFamily.GFX11,
1181
1185
SGPR_NULL_gfx11plus> {
1182
- let AssemblerPredicate = isGFX11Plus ;
1186
+ let AssemblerPredicate = isGFX11Only ;
1183
1187
let DecoderNamespace = "GFX11";
1184
1188
let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
1185
1189
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0);
@@ -1235,28 +1239,79 @@ defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx11 <0x23>;
1235
1239
// GFX12.
1236
1240
//===----------------------------------------------------------------------===//
1237
1241
1238
- class SMEM_Real_gfx12<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
1239
- SMEM_Real_10Plus_common<op, ps, opName, SIEncodingFamily.GFX12,
1240
- SGPR_NULL_gfx11plus> {
1242
+ class SMEM_Real_gfx12Plus<bits<6> op, SM_Pseudo ps, string opName,
1243
+ int subtarget, RegisterWithSubRegs sgpr_null> :
1244
+ SM_Real<ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>, Enc64 {
1245
+
1246
+ let Inst{18-13} = op;
1247
+ let Inst{31-26} = 0x3d;
1248
+
1249
+ let Inst{55-32} = !if(ps.has_offset, offset{23-0}, !if(ps.has_soffset, 0, ?));
1250
+ let Inst{63-57} = !if(ps.has_soffset, soffset{6-0},
1251
+ !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?));
1252
+ }
1253
+
1254
+ class SMEM_Real_gfx12<bits<6> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
1255
+ SMEM_Real_gfx12Plus<op, ps, opName, SIEncodingFamily.GFX12,
1256
+ SGPR_NULL_gfx11plus> {
1241
1257
let AssemblerPredicate = isGFX12Plus;
1242
1258
let DecoderNamespace = "GFX12";
1243
- let Inst{18-13} = op{5-0};
1244
- let Inst{19} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0);
1245
- let Inst{24-20} = ?; // TODO-GFX12: Add new bits {24-20}: TH, Scope, NV
1246
- let Inst{25} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0);
1247
- let Inst{55-32} = offset{23-0};
1259
+
1260
+ let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?);
1261
+ let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?);
1248
1262
}
1249
1263
1250
- class SMEM_Real_Prefetch_gfx12 <bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx12<op, ps> {
1264
+ class SMEM_Real_Prefetch_gfx12<bits<6> op, SM_Pseudo ps> :
1265
+ SMEM_Real_gfx12<op, ps> {
1251
1266
bits<7> sdata; // Only 5 bits of sdata are supported.
1252
1267
1253
1268
let sdst = ?;
1254
1269
let Inst{12-11} = 0; // Unused sdata bits.
1255
1270
let Inst{10-6} = !if(ps.has_sdst, sdata{4-0}, ?);
1256
1271
}
1257
1272
1273
+ class SMEM_Real_Load_gfx12<bits<6> op, string ps, string opName, OffsetMode offsets> :
1274
+ SMEM_Real_gfx12<op, !cast<SM_Pseudo>(ps # offsets.Variant), opName> {
1275
+ RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
1276
+ let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
1277
+
1278
+ let Inst{22-21} = cpol{4-3}; // scope
1279
+ let Inst{24-23} = cpol{1-0}; // th - only lower 2 bits are supported
1280
+ }
1281
+
1282
+ multiclass SM_Real_Loads_gfx12<bits<6> op, string ps = NAME> {
1283
+ defvar opName = !tolower(NAME);
1284
+ def _IMM_gfx12 : SMEM_Real_Load_gfx12<op, ps, opName, IMM_Offset>;
1285
+ def _SGPR_IMM_gfx12 : SMEM_Real_Load_gfx12<op, ps, opName, SGPR_IMM_Offset>;
1286
+ }
1287
+
1288
+ defm S_LOAD_B32 : SM_Real_Loads_gfx12<0x00, "S_LOAD_DWORD">;
1289
+ defm S_LOAD_B64 : SM_Real_Loads_gfx12<0x01, "S_LOAD_DWORDX2">;
1290
+ defm S_LOAD_B96 : SM_Real_Loads_gfx12<0x05, "S_LOAD_DWORDX3">;
1291
+ defm S_LOAD_B128 : SM_Real_Loads_gfx12<0x02, "S_LOAD_DWORDX4">;
1292
+ defm S_LOAD_B256 : SM_Real_Loads_gfx12<0x03, "S_LOAD_DWORDX8">;
1293
+ defm S_LOAD_B512 : SM_Real_Loads_gfx12<0x04, "S_LOAD_DWORDX16">;
1294
+
1295
+ defm S_BUFFER_LOAD_B32 : SM_Real_Loads_gfx12<0x10, "S_BUFFER_LOAD_DWORD">;
1296
+ defm S_BUFFER_LOAD_B64 : SM_Real_Loads_gfx12<0x11, "S_BUFFER_LOAD_DWORDX2">;
1297
+ defm S_BUFFER_LOAD_B96 : SM_Real_Loads_gfx12<0x15, "S_BUFFER_LOAD_DWORDX3">;
1298
+ defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx12<0x12, "S_BUFFER_LOAD_DWORDX4">;
1299
+ defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx12<0x13, "S_BUFFER_LOAD_DWORDX8">;
1300
+ defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx12<0x14, "S_BUFFER_LOAD_DWORDX16">;
1301
+
1302
+ def S_DCACHE_INV_gfx12 : SMEM_Real_gfx12<0x021, S_DCACHE_INV>;
1303
+
1258
1304
def S_PREFETCH_INST_gfx12 : SMEM_Real_Prefetch_gfx12<0x24, S_PREFETCH_INST>;
1259
1305
def S_PREFETCH_INST_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x25, S_PREFETCH_INST_PC_REL>;
1260
1306
def S_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x26, S_PREFETCH_DATA>;
1261
1307
def S_BUFFER_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x27, S_BUFFER_PREFETCH_DATA>;
1262
1308
def S_PREFETCH_DATA_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x28, S_PREFETCH_DATA_PC_REL>;
1309
+
1310
+ multiclass SMEM_Real_Probe_gfx12<bits<6> op> {
1311
+ defvar ps = NAME;
1312
+ def _IMM_gfx12 : SMEM_Real_Prefetch_gfx12<op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
1313
+ def _SGPR_IMM_gfx12 : SMEM_Real_Prefetch_gfx12<op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>;
1314
+ }
1315
+
1316
+ defm S_ATC_PROBE : SMEM_Real_Probe_gfx12<0x22>;
1317
+ defm S_ATC_PROBE_BUFFER : SMEM_Real_Probe_gfx12<0x23>;
0 commit comments