Skip to content

Commit

Permalink
[AMDGPU] Don't optimize agpr phis if there the operand doesn't have s…
Browse files Browse the repository at this point in the history
…ubreg use

If the operand doesn't have any subreg use, the optimization could potentially
generate `V_ACCVGPR_READ_B32_e64` with wrong register class, such as the
following case:

%46:vreg_128 = V_ACCVGPR_READ_B32_e64 %38:areg_128, implicit $exec
  • Loading branch information
shiltian committed May 7, 2024
1 parent 7208569 commit fce0ec6
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 0 deletions.
2 changes: 2 additions & 0 deletions llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2106,6 +2106,8 @@ bool SIFoldOperands::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) {

for (unsigned K = 1; K < MI.getNumOperands(); K += 2) {
MachineOperand &PhiMO = MI.getOperand(K);
if (!PhiMO.getSubReg())
continue;
RegToMO[{PhiMO.getReg(), PhiMO.getSubReg()}].push_back(&PhiMO);
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -run-pass si-fold-operands -o - %s | FileCheck %s

---
name: skip_optimize_agpr_phi_without_subreg_use
tracksRegLiveness: true
machineFunctionInfo:
isEntryFunction: true
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
body: |
; CHECK-LABEL: name: skip_optimize_agpr_phi_without_subreg_use
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0
; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
; CHECK-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_64 = PHI [[S_MOV_B64_]], %bb.0, %8, %bb.1
; CHECK-NEXT: [[PHI1:%[0-9]+]]:areg_128 = PHI [[REG_SEQUENCE]], %bb.0, %10, %bb.1
; CHECK-NEXT: [[V_MFMA_F32_16X16X4F32_e64_:%[0-9]+]]:areg_128 = V_MFMA_F32_16X16X4F32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], [[PHI1]], 0, 0, 0, implicit $mode, implicit $exec
; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_64 = V_CMP_LT_I32_e64 killed [[BUFFER_LOAD_DWORD_OFFSET]], 1, implicit $exec
; CHECK-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_64 = SI_IF_BREAK killed [[V_CMP_LT_I32_e64_]], [[PHI]], implicit-def dead $scc
; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY [[V_MFMA_F32_16X16X4F32_e64_]], implicit $exec
; CHECK-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: SI_RETURN
bb.0:
successors: %bb.1(0x80000000)
%0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
%1:sgpr_32 = S_MOV_B32 0
%2:sgpr_128 = REG_SEQUENCE %1, %subreg.sub0, %1, %subreg.sub1, %1, %subreg.sub2, %1, %subreg.sub3
%3:vreg_128 = COPY %2
%4:sreg_64 = S_MOV_B64 0
%5:areg_128 = COPY %3, implicit $exec
%6:sreg_32 = S_MOV_B32 1
bb.1:
successors: %bb.1(0x80000000)
%7:sreg_64 = PHI %4, %bb.0, %8, %bb.1
%9:areg_128 = PHI %5, %bb.0, %10, %bb.1
%11:areg_128 = V_MFMA_F32_16X16X4F32_e64 %0, %0, %9, 0, 0, 0, implicit $mode, implicit $exec
%12:vgpr_32 = COPY %11.sub3
%13:vgpr_32 = COPY %11.sub2
%14:vgpr_32 = COPY %11.sub1
%15:vgpr_32 = COPY %11.sub0
%16:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec
%17:sreg_64 = V_CMP_LT_I32_e64 killed %16, %6, implicit $exec
%8:sreg_64 = SI_IF_BREAK killed %17, %7, implicit-def dead $scc
%10:areg_128 = COPY %11, implicit $exec
SI_LOOP %8, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
SI_RETURN
...

0 comments on commit fce0ec6

Please sign in to comment.