Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1394,6 +1394,10 @@ SIInstrInfo::getIndirectGPRIDXPseudo(unsigned VecSize,
return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4);
if (VecSize <= 160) // 20 bytes
return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5);
if (VecSize <= 192) // 24 bytes
return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6);
if (VecSize <= 224) // 28 bytes
return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7);
if (VecSize <= 256) // 32 bytes
return get(AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8);
if (VecSize <= 288) // 36 bytes
Expand Down Expand Up @@ -1422,6 +1426,10 @@ SIInstrInfo::getIndirectGPRIDXPseudo(unsigned VecSize,
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4);
if (VecSize <= 160) // 20 bytes
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5);
if (VecSize <= 192) // 24 bytes
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6);
if (VecSize <= 224) // 28 bytes
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7);
if (VecSize <= 256) // 32 bytes
return get(AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8);
if (VecSize <= 288) // 36 bytes
Expand Down Expand Up @@ -1451,6 +1459,10 @@ static unsigned getIndirectVGPRWriteMovRelPseudoOpc(unsigned VecSize) {
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4;
if (VecSize <= 160) // 20 bytes
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5;
if (VecSize <= 192) // 24 bytes
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6;
if (VecSize <= 224) // 28 bytes
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7;
if (VecSize <= 256) // 32 bytes
return AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8;
if (VecSize <= 288) // 36 bytes
Expand Down Expand Up @@ -1480,6 +1492,10 @@ static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize) {
return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4;
if (VecSize <= 160) // 20 bytes
return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
if (VecSize <= 192) // 24 bytes
return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6;
if (VecSize <= 224) // 28 bytes
return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7;
if (VecSize <= 256) // 32 bytes
return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
if (VecSize <= 288) // 36 bytes
Expand Down Expand Up @@ -2244,6 +2260,8 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V3:
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V4:
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V5:
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V6:
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V7:
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V8:
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V9:
case AMDGPU::V_INDIRECT_REG_WRITE_MOVREL_B32_V10:
Expand All @@ -2256,6 +2274,8 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V3:
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V6:
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V7:
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
Expand Down Expand Up @@ -2303,6 +2323,8 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3:
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4:
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5:
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6:
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7:
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8:
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9:
case AMDGPU::V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10:
Expand Down Expand Up @@ -2347,6 +2369,8 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V3:
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V4:
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V5:
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V6:
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V7:
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V8:
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V9:
case AMDGPU::V_INDIRECT_REG_READ_GPR_IDX_B32_V10:
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1026,6 +1026,8 @@ def V_INDIRECT_REG_WRITE_MOVREL_B32_V2 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<
def V_INDIRECT_REG_WRITE_MOVREL_B32_V3 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_96>;
def V_INDIRECT_REG_WRITE_MOVREL_B32_V4 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_128>;
def V_INDIRECT_REG_WRITE_MOVREL_B32_V5 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_160>;
def V_INDIRECT_REG_WRITE_MOVREL_B32_V6 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_192>;
def V_INDIRECT_REG_WRITE_MOVREL_B32_V7 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_224>;
def V_INDIRECT_REG_WRITE_MOVREL_B32_V8 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_256>;
def V_INDIRECT_REG_WRITE_MOVREL_B32_V9 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_288>;
def V_INDIRECT_REG_WRITE_MOVREL_B32_V10 : V_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<VReg_320>;
Expand All @@ -1039,6 +1041,8 @@ def S_INDIRECT_REG_WRITE_MOVREL_B32_V2 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<
def S_INDIRECT_REG_WRITE_MOVREL_B32_V3 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_96>;
def S_INDIRECT_REG_WRITE_MOVREL_B32_V4 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_128>;
def S_INDIRECT_REG_WRITE_MOVREL_B32_V5 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_160>;
def S_INDIRECT_REG_WRITE_MOVREL_B32_V6 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_192>;
def S_INDIRECT_REG_WRITE_MOVREL_B32_V7 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_224>;
def S_INDIRECT_REG_WRITE_MOVREL_B32_V8 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_256>;
def S_INDIRECT_REG_WRITE_MOVREL_B32_V9 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_288>;
def S_INDIRECT_REG_WRITE_MOVREL_B32_V10 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_320>;
Expand Down Expand Up @@ -1071,6 +1075,8 @@ def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V2 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VR
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V3 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_96>;
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V4 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_128>;
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V5 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_160>;
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V6 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_192>;
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V7 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_224>;
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V8 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_256>;
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V9 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_288>;
def V_INDIRECT_REG_WRITE_GPR_IDX_B32_V10 : V_INDIRECT_REG_WRITE_GPR_IDX_pseudo<VReg_320>;
Expand All @@ -1091,6 +1097,8 @@ def V_INDIRECT_REG_READ_GPR_IDX_B32_V2 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg
def V_INDIRECT_REG_READ_GPR_IDX_B32_V3 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_96>;
def V_INDIRECT_REG_READ_GPR_IDX_B32_V4 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_128>;
def V_INDIRECT_REG_READ_GPR_IDX_B32_V5 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_160>;
def V_INDIRECT_REG_READ_GPR_IDX_B32_V6 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_192>;
def V_INDIRECT_REG_READ_GPR_IDX_B32_V7 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_224>;
def V_INDIRECT_REG_READ_GPR_IDX_B32_V8 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_256>;
def V_INDIRECT_REG_READ_GPR_IDX_B32_V9 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_288>;
def V_INDIRECT_REG_READ_GPR_IDX_B32_V10 : V_INDIRECT_REG_READ_GPR_IDX_pseudo<VReg_320>;
Expand Down
126 changes: 126 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-extract-vector-elt.mir
Original file line number Diff line number Diff line change
Expand Up @@ -964,3 +964,129 @@ body: |
%2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1
S_ENDPGM 0, implicit %2
...

---
name: extract_vector_elt_s_s32_v6s32
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, $sgpr6

; MOVREL-LABEL: name: extract_vector_elt_s_s32_v6s32
; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, $sgpr6
; MOVREL-NEXT: {{ $}}
; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_192 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr6
; MOVREL-NEXT: $m0 = COPY [[COPY1]]
; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
;
; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v6s32
; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, $sgpr6
; GPRIDX-NEXT: {{ $}}
; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_192 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr6
; GPRIDX-NEXT: $m0 = COPY [[COPY1]]
; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
%0:sgpr(<6 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
%1:sgpr(s32) = COPY $sgpr6
%2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1
S_ENDPGM 0, implicit %2
...

---
name: extract_vector_elt_s_s32_v7s32
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6, $sgpr7

; MOVREL-LABEL: name: extract_vector_elt_s_s32_v7s32
; MOVREL: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6, $sgpr7
; MOVREL-NEXT: {{ $}}
; MOVREL-NEXT: [[COPY:%[0-9]+]]:sgpr_224 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6
; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
; MOVREL-NEXT: $m0 = COPY [[COPY1]]
; MOVREL-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
; MOVREL-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
;
; GPRIDX-LABEL: name: extract_vector_elt_s_s32_v7s32
; GPRIDX: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6, $sgpr7
; GPRIDX-NEXT: {{ $}}
; GPRIDX-NEXT: [[COPY:%[0-9]+]]:sgpr_224 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6
; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr7
; GPRIDX-NEXT: $m0 = COPY [[COPY1]]
; GPRIDX-NEXT: [[S_MOVRELS_B32_:%[0-9]+]]:sreg_32 = S_MOVRELS_B32 [[COPY]].sub0, implicit $m0, implicit [[COPY]]
; GPRIDX-NEXT: S_ENDPGM 0, implicit [[S_MOVRELS_B32_]]
%0:sgpr(<7 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6
%1:sgpr(s32) = COPY $sgpr7
%2:sgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1
S_ENDPGM 0, implicit %2
...

---
name: extract_vector_elt_v_s32_v6s32
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, $sgpr2

; MOVREL-LABEL: name: extract_vector_elt_v_s32_v6s32
; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, $sgpr2
; MOVREL-NEXT: {{ $}}
; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_192 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
; MOVREL-NEXT: $m0 = COPY [[COPY1]]
; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
;
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v6s32
; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, $sgpr2
; GPRIDX-NEXT: {{ $}}
; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_192 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V6_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V6 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec
; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V6_]]
%0:vgpr(<6 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
%1:sgpr(s32) = COPY $sgpr2
%2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1
S_ENDPGM 0, implicit %2
...

---
name: extract_vector_elt_v_s32_v7s32
legalized: true
regBankSelected: true

body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, $sgpr2

; MOVREL-LABEL: name: extract_vector_elt_v_s32_v7s32
; MOVREL: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, $sgpr2
; MOVREL-NEXT: {{ $}}
; MOVREL-NEXT: [[COPY:%[0-9]+]]:vreg_224 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; MOVREL-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
; MOVREL-NEXT: $m0 = COPY [[COPY1]]
; MOVREL-NEXT: [[V_MOVRELS_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOVRELS_B32_e32 [[COPY]].sub0, implicit $m0, implicit $exec, implicit [[COPY]]
; MOVREL-NEXT: S_ENDPGM 0, implicit [[V_MOVRELS_B32_e32_]]
;
; GPRIDX-LABEL: name: extract_vector_elt_v_s32_v7s32
; GPRIDX: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, $sgpr2
; GPRIDX-NEXT: {{ $}}
; GPRIDX-NEXT: [[COPY:%[0-9]+]]:vreg_224 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
; GPRIDX-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr2
; GPRIDX-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V7_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V7 [[COPY]], [[COPY1]], 3, implicit-def $m0, implicit $m0, implicit $exec
; GPRIDX-NEXT: S_ENDPGM 0, implicit [[V_INDIRECT_REG_READ_GPR_IDX_B32_V7_]]
%0:vgpr(<7 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6
%1:sgpr(s32) = COPY $sgpr2
%2:vgpr(s32) = G_EXTRACT_VECTOR_ELT %0, %1
S_ENDPGM 0, implicit %2
...
Loading
Loading