From b241828d662f0d7e40788a7768e77035b41019ae Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 26 Oct 2023 14:35:09 +0100 Subject: [PATCH] [AMDGPU] Assert that we can find subregs in copyPhysReg. NFC. This helped to catch a codegen failure caused by #69703. MachineVerifier did not complain about this malformed COPY either before regalloc: %9:vreg_64 = COPY %17:vgpr_32 Or after regalloc: renamable $vgpr0_vgpr1 = COPY renamable $vgpr2, implicit $exec But we can at least catch the problem when copyPhysReg tries to expand it into 32-bit register moves and fails to find suitable source registers: $vgpr0 = V_MOV_B32_e32 $noreg, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr2 $vgpr1 = V_MOV_B32_e32 $noreg, implicit $exec, implicit $vgpr2, implicit $exec --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 51 ++++++++++++++------------ 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index ffcd415a66648..62f5a17635cee 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -742,23 +742,27 @@ static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB, for (unsigned Idx = 0; Idx < BaseIndices.size(); ++Idx) { int16_t SubIdx = BaseIndices[Idx]; - Register Reg = RI.getSubReg(DestReg, SubIdx); + Register DestSubReg = RI.getSubReg(DestReg, SubIdx); + Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx); + assert(DestSubReg && SrcSubReg && "Failed to find subregs!"); unsigned Opcode = AMDGPU::S_MOV_B32; // Is SGPR aligned? If so try to combine with next. - Register Src = RI.getSubReg(SrcReg, SubIdx); - bool AlignedDest = ((Reg - AMDGPU::SGPR0) % 2) == 0; - bool AlignedSrc = ((Src - AMDGPU::SGPR0) % 2) == 0; + bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2) == 0; + bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2) == 0; if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.size())) { // Can use SGPR64 copy unsigned Channel = RI.getChannelFromSubReg(SubIdx); SubIdx = RI.getSubRegFromChannel(Channel, 2); + DestSubReg = RI.getSubReg(DestReg, SubIdx); + SrcSubReg = RI.getSubReg(SrcReg, SubIdx); + assert(DestSubReg && SrcSubReg && "Failed to find subregs!"); Opcode = AMDGPU::S_MOV_B64; Idx++; } - LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), RI.getSubReg(DestReg, SubIdx)) - .addReg(RI.getSubReg(SrcReg, SubIdx)) + LastMI = BuildMI(MBB, I, DL, TII.get(Opcode), DestSubReg) + .addReg(SrcSubReg) .addReg(SrcReg, RegState::Implicit); if (!FirstMI) @@ -1098,6 +1102,9 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, SubIdx = SubIndices[Idx]; else SubIdx = SubIndices[SubIndices.size() - Idx - 1]; + Register DestSubReg = RI.getSubReg(DestReg, SubIdx); + Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx); + assert(DestSubReg && SrcSubReg && "Failed to find subregs!"); bool IsFirstSubreg = Idx == 0; bool UseKill = CanKillSuperReg && Idx == SubIndices.size() - 1; @@ -1105,30 +1112,26 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (Opcode == AMDGPU::INSTRUCTION_LIST_END) { Register ImpDefSuper = IsFirstSubreg ? Register(DestReg) : Register(); Register ImpUseSuper = SrcReg; - indirectCopyToAGPR(*this, MBB, MI, DL, RI.getSubReg(DestReg, SubIdx), - RI.getSubReg(SrcReg, SubIdx), UseKill, *RS, Overlap, - ImpDefSuper, ImpUseSuper); + indirectCopyToAGPR(*this, MBB, MI, DL, DestSubReg, SrcSubReg, UseKill, + *RS, Overlap, ImpDefSuper, ImpUseSuper); } else if (Opcode == AMDGPU::V_PK_MOV_B32) { - Register DstSubReg = RI.getSubReg(DestReg, SubIdx); - Register SrcSubReg = RI.getSubReg(SrcReg, SubIdx); MachineInstrBuilder MIB = - BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DstSubReg) - .addImm(SISrcMods::OP_SEL_1) - .addReg(SrcSubReg) - .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1) - .addReg(SrcSubReg) - .addImm(0) // op_sel_lo - .addImm(0) // op_sel_hi - .addImm(0) // neg_lo - .addImm(0) // neg_hi - .addImm(0) // clamp - .addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit); + BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_MOV_B32), DestSubReg) + .addImm(SISrcMods::OP_SEL_1) + .addReg(SrcSubReg) + .addImm(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1) + .addReg(SrcSubReg) + .addImm(0) // op_sel_lo + .addImm(0) // op_sel_hi + .addImm(0) // neg_lo + .addImm(0) // neg_hi + .addImm(0) // clamp + .addReg(SrcReg, getKillRegState(UseKill) | RegState::Implicit); if (IsFirstSubreg) MIB.addReg(DestReg, RegState::Define | RegState::Implicit); } else { MachineInstrBuilder Builder = - BuildMI(MBB, MI, DL, get(Opcode), RI.getSubReg(DestReg, SubIdx)) - .addReg(RI.getSubReg(SrcReg, SubIdx)); + BuildMI(MBB, MI, DL, get(Opcode), DestSubReg).addReg(SrcSubReg); if (IsFirstSubreg) Builder.addReg(DestReg, RegState::Define | RegState::Implicit);