diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index cb448aaafa4c08..5c411a0955878f 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -2106,6 +2106,8 @@ bool SIFoldOperands::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) { for (unsigned K = 1; K < MI.getNumOperands(); K += 2) { MachineOperand &PhiMO = MI.getOperand(K); + if (!PhiMO.getSubReg()) + continue; RegToMO[{PhiMO.getReg(), PhiMO.getSubReg()}].push_back(&PhiMO); } } diff --git a/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir b/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir index a32b3d0f1e6b35..e94546fd5e8a51 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-agpr-phis.mir @@ -465,7 +465,6 @@ body: | ; GFX90A-NEXT: bb.2: ; GFX90A-NEXT: S_ENDPGM 0 bb.0: - ; Tests that tryOptimizeAGPRPhis kicks in for GFX908. liveins: $sgpr0, $scc successors: %bb.1 @@ -715,3 +714,85 @@ body: | bb.3: S_ENDPGM 0 ... + +--- +name: skip_optimize_agpr_phi_without_subreg_use +tracksRegLiveness: true +body: | + ; GFX908-LABEL: name: skip_optimize_agpr_phi_without_subreg_use + ; GFX908: bb.0: + ; GFX908-NEXT: successors: %bb.1(0x80000000) + ; GFX908-NEXT: liveins: $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX908-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec + ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec + ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec + ; GFX908-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec + ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3 + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: bb.1: + ; GFX908-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX908-NEXT: liveins: $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[PHI:%[0-9]+]]:areg_128_align2 = PHI [[REG_SEQUENCE]], %bb.0, %7, %bb.1 + ; GFX908-NEXT: [[V_MFMA_F32_16X16X4F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X4F32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], [[PHI]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_MFMA_F32_16X16X4F32_e64_]], implicit $exec + ; GFX908-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: bb.2: + ; GFX908-NEXT: S_ENDPGM 0 + ; + ; GFX90A-LABEL: name: skip_optimize_agpr_phi_without_subreg_use + ; GFX90A: bb.0: + ; GFX90A-NEXT: successors: %bb.1(0x80000000) + ; GFX90A-NEXT: liveins: $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; GFX90A-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec + ; GFX90A-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec + ; GFX90A-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec + ; GFX90A-NEXT: [[V_ACCVGPR_WRITE_B32_e64_3:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec + ; GFX90A-NEXT: [[REG_SEQUENCE:%[0-9]+]]:areg_128_align2 = REG_SEQUENCE [[V_ACCVGPR_WRITE_B32_e64_]], %subreg.sub0, [[V_ACCVGPR_WRITE_B32_e64_1]], %subreg.sub1, [[V_ACCVGPR_WRITE_B32_e64_2]], %subreg.sub2, [[V_ACCVGPR_WRITE_B32_e64_3]], %subreg.sub3 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: bb.1: + ; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GFX90A-NEXT: liveins: $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[PHI:%[0-9]+]]:areg_128_align2 = PHI [[REG_SEQUENCE]], %bb.0, %7, %bb.1 + ; GFX90A-NEXT: [[V_MFMA_F32_16X16X4F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_16X16X4F32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], [[PHI]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_MFMA_F32_16X16X4F32_e64_]], implicit $exec + ; GFX90A-NEXT: S_CBRANCH_SCC1 %bb.1, implicit $scc + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: bb.2: + ; GFX90A-NEXT: S_ENDPGM 0 + bb.0: + liveins: $scc + successors: %bb.1 + + %0:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %1:sgpr_32 = S_MOV_B32 0 + %2:sgpr_128 = REG_SEQUENCE %1, %subreg.sub0, %1, %subreg.sub1, %1, %subreg.sub2, %1, %subreg.sub3 + %3:vreg_128 = COPY %2 + %4:sreg_64 = S_MOV_B64 0 + %5:areg_128_align2 = COPY %3, implicit $exec + + bb.1: + liveins: $scc + successors: %bb.1, %bb.2 + + %9:areg_128_align2 = PHI %5, %bb.0, %10, %bb.1 + %11:areg_128_align2 = V_MFMA_F32_16X16X4F32_e64 %0:vgpr_32, %0:vgpr_32, %9:areg_128_align2, 0, 0, 0, implicit $mode, implicit $exec + %12:vgpr_32 = COPY %11.sub3 + %13:vgpr_32 = COPY %11.sub2 + %14:vgpr_32 = COPY %11.sub1 + %15:vgpr_32 = COPY %11.sub0 + %10:areg_128_align2 = COPY %11, implicit $exec + S_CBRANCH_SCC1 %bb.1, implicit $scc + + bb.2: + S_ENDPGM 0 + +...