Skip to content

Commit 02a82f3

Browse files
committed
[AMDGPU] Convert flat scratch SS->SV in FI elimination
- Fix O0 crash on gfx950 by remapping SS to SV and materializing the offset in a VGPR when FrameReg is unavailable and no SGPR can be scavenged. Resolves issue #155902 - Reuse existing VGPR temp if available; otherwise scavenge one. - Add regression: llvm/test/CodeGen/AMDGPU/flat-scratch-ss-to-sv-scavenge.ll. Co-authored by Matt Arsenault
1 parent c2e90ff commit 02a82f3

File tree

2 files changed

+666
-2
lines changed

2 files changed

+666
-2
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3183,8 +3183,42 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
31833183
: RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
31843184
MI, false, 0, !UseSGPR);
31853185

3186-
// TODO: for flat scratch another attempt can be made with a VGPR index
3187-
// if no SGPRs can be scavenged.
3186+
// Fallback: If we need an SGPR but cannot scavenge one and there is no
3187+
// frame register, try to convert the flat-scratch instruction to use a
3188+
// VGPR index (SS -> SV) and materialize the offset in a VGPR.
3189+
if (!TmpSReg && !FrameReg && TII->isFLATScratch(*MI)) {
3190+
// Reuse an existing VGPR temp if available, otherwise scavenge one.
3191+
Register VTmp = (!UseSGPR && TmpReg)
3192+
? TmpReg
3193+
: RS->scavengeRegisterBackwards(
3194+
AMDGPU::VGPR_32RegClass, MI, false, 0);
3195+
if (VTmp) {
3196+
// Put the large offset into a VGPR and zero the immediate offset.
3197+
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), VTmp)
3198+
.addImm(Offset);
3199+
3200+
unsigned Opc = MI->getOpcode();
3201+
int NewOpc = AMDGPU::getFlatScratchInstSVfromSS(Opc);
3202+
if (NewOpc != -1) {
3203+
int OldSAddrIdx =
3204+
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr);
3205+
int NewVAddrIdx =
3206+
AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
3207+
if (OldSAddrIdx == NewVAddrIdx && OldSAddrIdx >= 0) {
3208+
MI->setDesc(TII->get(NewOpc));
3209+
// Replace former saddr (now vaddr) with the VGPR index.
3210+
MI->getOperand(NewVAddrIdx).ChangeToRegister(VTmp, false);
3211+
// Reset the immediate offset to 0 as it is now in vaddr.
3212+
MachineOperand *OffOp =
3213+
TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
3214+
assert(OffOp && "Flat scratch SV form must have offset operand");
3215+
OffOp->setImm(0);
3216+
return false;
3217+
}
3218+
}
3219+
}
3220+
}
3221+
31883222
if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
31893223
report_fatal_error("Cannot scavenge register in FI elimination!");
31903224

0 commit comments

Comments
 (0)