Skip to content

Commit 53a07b2

Browse files
committed
[AMDGPU][SDAG] Add missing cases for SI_INDIRECT_SRC/DST
Before, instruction selection would fail to select extract/insert elements for i32/float vectors of sizes 3, 5, 6 and 7 when -O0 was used. This patch adds the missing SI_INDIRECT_SRC/DST cases for those sizes.
1 parent 6e54034 commit 53a07b2

File tree

4 files changed

+8475
-8
lines changed

4 files changed

+8475
-8
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6304,7 +6304,11 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
63046304
}
63056305
case AMDGPU::SI_INDIRECT_SRC_V1:
63066306
case AMDGPU::SI_INDIRECT_SRC_V2:
6307+
case AMDGPU::SI_INDIRECT_SRC_V3:
63076308
case AMDGPU::SI_INDIRECT_SRC_V4:
6309+
case AMDGPU::SI_INDIRECT_SRC_V5:
6310+
case AMDGPU::SI_INDIRECT_SRC_V6:
6311+
case AMDGPU::SI_INDIRECT_SRC_V7:
63086312
case AMDGPU::SI_INDIRECT_SRC_V8:
63096313
case AMDGPU::SI_INDIRECT_SRC_V9:
63106314
case AMDGPU::SI_INDIRECT_SRC_V10:
@@ -6315,7 +6319,11 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
63156319
return emitIndirectSrc(MI, *BB, *getSubtarget());
63166320
case AMDGPU::SI_INDIRECT_DST_V1:
63176321
case AMDGPU::SI_INDIRECT_DST_V2:
6322+
case AMDGPU::SI_INDIRECT_DST_V3:
63186323
case AMDGPU::SI_INDIRECT_DST_V4:
6324+
case AMDGPU::SI_INDIRECT_DST_V5:
6325+
case AMDGPU::SI_INDIRECT_DST_V6:
6326+
case AMDGPU::SI_INDIRECT_DST_V7:
63196327
case AMDGPU::SI_INDIRECT_DST_V8:
63206328
case AMDGPU::SI_INDIRECT_DST_V9:
63216329
case AMDGPU::SI_INDIRECT_DST_V10:

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -969,7 +969,11 @@ class SI_INDIRECT_DST<RegisterClass rc> : VPseudoInstSI <
969969

970970
def SI_INDIRECT_SRC_V1 : SI_INDIRECT_SRC<VGPR_32>;
971971
def SI_INDIRECT_SRC_V2 : SI_INDIRECT_SRC<VReg_64>;
972+
def SI_INDIRECT_SRC_V3 : SI_INDIRECT_SRC<VReg_96>;
972973
def SI_INDIRECT_SRC_V4 : SI_INDIRECT_SRC<VReg_128>;
974+
def SI_INDIRECT_SRC_V5 : SI_INDIRECT_SRC<VReg_160>;
975+
def SI_INDIRECT_SRC_V6 : SI_INDIRECT_SRC<VReg_192>;
976+
def SI_INDIRECT_SRC_V7 : SI_INDIRECT_SRC<VReg_224>;
973977
def SI_INDIRECT_SRC_V8 : SI_INDIRECT_SRC<VReg_256>;
974978
def SI_INDIRECT_SRC_V9 : SI_INDIRECT_SRC<VReg_288>;
975979
def SI_INDIRECT_SRC_V10 : SI_INDIRECT_SRC<VReg_320>;
@@ -980,7 +984,11 @@ def SI_INDIRECT_SRC_V32 : SI_INDIRECT_SRC<VReg_1024>;
980984

981985
def SI_INDIRECT_DST_V1 : SI_INDIRECT_DST<VGPR_32>;
982986
def SI_INDIRECT_DST_V2 : SI_INDIRECT_DST<VReg_64>;
987+
def SI_INDIRECT_DST_V3 : SI_INDIRECT_DST<VReg_96>;
983988
def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
989+
def SI_INDIRECT_DST_V5 : SI_INDIRECT_DST<VReg_160>;
990+
def SI_INDIRECT_DST_V6 : SI_INDIRECT_DST<VReg_192>;
991+
def SI_INDIRECT_DST_V7 : SI_INDIRECT_DST<VReg_224>;
984992
def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
985993
def SI_INDIRECT_DST_V9 : SI_INDIRECT_DST<VReg_288>;
986994
def SI_INDIRECT_DST_V10 : SI_INDIRECT_DST<VReg_320>;
@@ -2779,7 +2787,11 @@ multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, string VecSize> {
27792787
}
27802788

27812789
defm : SI_INDIRECT_Pattern <v2f32, f32, "V2">;
2790+
defm : SI_INDIRECT_Pattern<v3f32, f32, "V3">;
27822791
defm : SI_INDIRECT_Pattern <v4f32, f32, "V4">;
2792+
defm : SI_INDIRECT_Pattern<v5f32, f32, "V5">;
2793+
defm : SI_INDIRECT_Pattern<v6f32, f32, "V6">;
2794+
defm : SI_INDIRECT_Pattern<v7f32, f32, "V7">;
27832795
defm : SI_INDIRECT_Pattern <v8f32, f32, "V8">;
27842796
defm : SI_INDIRECT_Pattern <v9f32, f32, "V9">;
27852797
defm : SI_INDIRECT_Pattern <v10f32, f32, "V10">;
@@ -2789,7 +2801,11 @@ defm : SI_INDIRECT_Pattern <v16f32, f32, "V16">;
27892801
defm : SI_INDIRECT_Pattern <v32f32, f32, "V32">;
27902802

27912803
defm : SI_INDIRECT_Pattern <v2i32, i32, "V2">;
2804+
defm : SI_INDIRECT_Pattern<v3i32, i32, "V3">;
27922805
defm : SI_INDIRECT_Pattern <v4i32, i32, "V4">;
2806+
defm : SI_INDIRECT_Pattern<v5i32, i32, "V5">;
2807+
defm : SI_INDIRECT_Pattern<v6i32, i32, "V6">;
2808+
defm : SI_INDIRECT_Pattern<v7i32, i32, "V7">;
27932809
defm : SI_INDIRECT_Pattern <v8i32, i32, "V8">;
27942810
defm : SI_INDIRECT_Pattern <v9i32, i32, "V9">;
27952811
defm : SI_INDIRECT_Pattern <v10i32, i32, "V10">;

0 commit comments

Comments
 (0)