@@ -742,23 +742,27 @@ static void expandSGPRCopy(const SIInstrInfo &TII, MachineBasicBlock &MBB,
742742
743743 for (unsigned Idx = 0 ; Idx < BaseIndices.size (); ++Idx) {
744744 int16_t SubIdx = BaseIndices[Idx];
745- Register Reg = RI.getSubReg (DestReg, SubIdx);
745+ Register DestSubReg = RI.getSubReg (DestReg, SubIdx);
746+ Register SrcSubReg = RI.getSubReg (SrcReg, SubIdx);
747+ assert (DestSubReg && SrcSubReg && " Failed to find subregs!" );
746748 unsigned Opcode = AMDGPU::S_MOV_B32;
747749
748750 // Is SGPR aligned? If so try to combine with next.
749- Register Src = RI.getSubReg (SrcReg, SubIdx);
750- bool AlignedDest = ((Reg - AMDGPU::SGPR0) % 2 ) == 0 ;
751- bool AlignedSrc = ((Src - AMDGPU::SGPR0) % 2 ) == 0 ;
751+ bool AlignedDest = ((DestSubReg - AMDGPU::SGPR0) % 2 ) == 0 ;
752+ bool AlignedSrc = ((SrcSubReg - AMDGPU::SGPR0) % 2 ) == 0 ;
752753 if (AlignedDest && AlignedSrc && (Idx + 1 < BaseIndices.size ())) {
753754 // Can use SGPR64 copy
754755 unsigned Channel = RI.getChannelFromSubReg (SubIdx);
755756 SubIdx = RI.getSubRegFromChannel (Channel, 2 );
757+ DestSubReg = RI.getSubReg (DestReg, SubIdx);
758+ SrcSubReg = RI.getSubReg (SrcReg, SubIdx);
759+ assert (DestSubReg && SrcSubReg && " Failed to find subregs!" );
756760 Opcode = AMDGPU::S_MOV_B64;
757761 Idx++;
758762 }
759763
760- LastMI = BuildMI (MBB, I, DL, TII.get (Opcode), RI. getSubReg (DestReg, SubIdx) )
761- .addReg (RI. getSubReg (SrcReg, SubIdx) )
764+ LastMI = BuildMI (MBB, I, DL, TII.get (Opcode), DestSubReg )
765+ .addReg (SrcSubReg )
762766 .addReg (SrcReg, RegState::Implicit);
763767
764768 if (!FirstMI)
@@ -1098,37 +1102,36 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
10981102 SubIdx = SubIndices[Idx];
10991103 else
11001104 SubIdx = SubIndices[SubIndices.size () - Idx - 1 ];
1105+ Register DestSubReg = RI.getSubReg (DestReg, SubIdx);
1106+ Register SrcSubReg = RI.getSubReg (SrcReg, SubIdx);
1107+ assert (DestSubReg && SrcSubReg && " Failed to find subregs!" );
11011108
11021109 bool IsFirstSubreg = Idx == 0 ;
11031110 bool UseKill = CanKillSuperReg && Idx == SubIndices.size () - 1 ;
11041111
11051112 if (Opcode == AMDGPU::INSTRUCTION_LIST_END) {
11061113 Register ImpDefSuper = IsFirstSubreg ? Register (DestReg) : Register ();
11071114 Register ImpUseSuper = SrcReg;
1108- indirectCopyToAGPR (*this , MBB, MI, DL, RI.getSubReg (DestReg, SubIdx),
1109- RI.getSubReg (SrcReg, SubIdx), UseKill, *RS, Overlap,
1110- ImpDefSuper, ImpUseSuper);
1115+ indirectCopyToAGPR (*this , MBB, MI, DL, DestSubReg, SrcSubReg, UseKill,
1116+ *RS, Overlap, ImpDefSuper, ImpUseSuper);
11111117 } else if (Opcode == AMDGPU::V_PK_MOV_B32) {
1112- Register DstSubReg = RI.getSubReg (DestReg, SubIdx);
1113- Register SrcSubReg = RI.getSubReg (SrcReg, SubIdx);
11141118 MachineInstrBuilder MIB =
1115- BuildMI (MBB, MI, DL, get (AMDGPU::V_PK_MOV_B32), DstSubReg )
1116- .addImm (SISrcMods::OP_SEL_1)
1117- .addReg (SrcSubReg)
1118- .addImm (SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1)
1119- .addReg (SrcSubReg)
1120- .addImm (0 ) // op_sel_lo
1121- .addImm (0 ) // op_sel_hi
1122- .addImm (0 ) // neg_lo
1123- .addImm (0 ) // neg_hi
1124- .addImm (0 ) // clamp
1125- .addReg (SrcReg, getKillRegState (UseKill) | RegState::Implicit);
1119+ BuildMI (MBB, MI, DL, get (AMDGPU::V_PK_MOV_B32), DestSubReg )
1120+ .addImm (SISrcMods::OP_SEL_1)
1121+ .addReg (SrcSubReg)
1122+ .addImm (SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1)
1123+ .addReg (SrcSubReg)
1124+ .addImm (0 ) // op_sel_lo
1125+ .addImm (0 ) // op_sel_hi
1126+ .addImm (0 ) // neg_lo
1127+ .addImm (0 ) // neg_hi
1128+ .addImm (0 ) // clamp
1129+ .addReg (SrcReg, getKillRegState (UseKill) | RegState::Implicit);
11261130 if (IsFirstSubreg)
11271131 MIB.addReg (DestReg, RegState::Define | RegState::Implicit);
11281132 } else {
11291133 MachineInstrBuilder Builder =
1130- BuildMI (MBB, MI, DL, get (Opcode), RI.getSubReg (DestReg, SubIdx))
1131- .addReg (RI.getSubReg (SrcReg, SubIdx));
1134+ BuildMI (MBB, MI, DL, get (Opcode), DestSubReg).addReg (SrcSubReg);
11321135 if (IsFirstSubreg)
11331136 Builder.addReg (DestReg, RegState::Define | RegState::Implicit);
11341137
0 commit comments