Skip to content

Commit

Permalink
[AMDGPU] Folding imm offset in more cases for scratch access (#70634)
Browse files Browse the repository at this point in the history
For scratch load/store, our hardware only accept non-negative value in
SGPR/VGPR. Besides the case that we can prove from known bits, we can
also prove that the value in `base` will be non-negative: 1.) When the
ADD for the address calculation has NonUnsignedWrap flag. 2.) When the
immediate offset is already negative.
  • Loading branch information
ruiling authored Nov 29, 2023
1 parent c6d6a57 commit c1511a6
Show file tree
Hide file tree
Showing 13 changed files with 826 additions and 839 deletions.
81 changes: 69 additions & 12 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1152,13 +1152,64 @@ bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
return CurDAG->SignBitIsZero(Base);
}

bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(SDValue Base,
uint64_t FlatVariant) const {
if (FlatVariant != SIInstrFlags::FlatScratch)
// Return whether the operation has NoUnsignedWrap property.
static bool isNoUnsignedWrap(SDValue Addr) {
return (Addr.getOpcode() == ISD::ADD &&
Addr->getFlags().hasNoUnsignedWrap()) ||
Addr->getOpcode() == ISD::OR;
}

// Check that the base address of flat scratch load/store in the form of `base +
// offset` is legal to be put in SGPR/VGPR (i.e. unsigned per hardware
// requirement). We always treat the first operand as the base address here.
bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(SDValue Addr) const {
if (isNoUnsignedWrap(Addr))
return true;
// When value in 32-bit Base can be negative calculate scratch offset using
// 32-bit add instruction, otherwise use Base(unsigned) + offset.
return CurDAG->SignBitIsZero(Base);

auto LHS = Addr.getOperand(0);
auto RHS = Addr.getOperand(1);

// If the immediate offset is negative and within certain range, the base
// address cannot also be negative. If the base is also negative, the sum
// would be either negative or much larger than the valid range of scratch
// memory a thread can access.
ConstantSDNode *ImmOp = nullptr;
if (Addr.getOpcode() == ISD::ADD && (ImmOp = dyn_cast<ConstantSDNode>(RHS))) {
if (ImmOp->getSExtValue() < 0 && ImmOp->getSExtValue() > -0x40000000)
return true;
}

return CurDAG->SignBitIsZero(LHS);
}

// Check address value in SGPR/VGPR are legal for flat scratch in the form
// of: SGPR + VGPR.
bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSV(SDValue Addr) const {
if (isNoUnsignedWrap(Addr))
return true;

auto LHS = Addr.getOperand(0);
auto RHS = Addr.getOperand(1);
return CurDAG->SignBitIsZero(RHS) && CurDAG->SignBitIsZero(LHS);
}

// Check address value in SGPR/VGPR are legal for flat scratch in the form
// of: SGPR + VGPR + Imm.
bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegalSVImm(SDValue Addr) const {
auto Base = Addr.getOperand(0);
auto *RHSImm = cast<ConstantSDNode>(Addr.getOperand(1));
// If the immediate offset is negative and within certain range, the base
// address cannot also be negative. If the base is also negative, the sum
// would be either negative or much larger than the valid range of scratch
// memory a thread can access.
if (isNoUnsignedWrap(Base) &&
(isNoUnsignedWrap(Addr) ||
(RHSImm->getSExtValue() < 0 && RHSImm->getSExtValue() > -0x40000000)))
return true;

auto LHS = Base.getOperand(0);
auto RHS = Base.getOperand(1);
return CurDAG->SignBitIsZero(RHS) && CurDAG->SignBitIsZero(LHS);
}

// TODO: If offset is too big, put low 16-bit into offset.
Expand Down Expand Up @@ -1555,7 +1606,8 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
SDValue N0, N1;
if (isBaseWithConstantOffset64(Addr, N0, N1) &&
isFlatScratchBaseLegal(N0, FlatVariant)) {
(FlatVariant != SIInstrFlags::FlatScratch ||
isFlatScratchBaseLegal(Addr))) {
int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();

const SIInstrInfo *TII = Subtarget->getInstrInfo();
Expand Down Expand Up @@ -1787,8 +1839,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,

int64_t COffsetVal = 0;

if (CurDAG->isBaseWithConstantOffset(Addr) &&
isFlatScratchBaseLegal(Addr.getOperand(0))) {
if (CurDAG->isBaseWithConstantOffset(Addr) && isFlatScratchBaseLegal(Addr)) {
COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
SAddr = Addr.getOperand(0);
} else {
Expand Down Expand Up @@ -1845,6 +1896,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
int64_t ImmOffset = 0;

SDValue LHS, RHS;
SDValue OrigAddr = Addr;
if (isBaseWithConstantOffset64(Addr, LHS, RHS)) {
int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue();
const SIInstrInfo *TII = Subtarget->getInstrInfo();
Expand All @@ -1866,7 +1918,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
VAddr = SDValue(VMov, 0);
SAddr = LHS;
if (!isFlatScratchBaseLegal(SAddr) || !isFlatScratchBaseLegal(VAddr))
if (!isFlatScratchBaseLegal(Addr))
return false;
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
return false;
Expand All @@ -1892,8 +1944,13 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
return false;
}

if (!isFlatScratchBaseLegal(SAddr) || !isFlatScratchBaseLegal(VAddr))
return false;
if (OrigAddr != Addr) {
if (!isFlatScratchBaseLegalSVImm(OrigAddr))
return false;
} else {
if (!isFlatScratchBaseLegalSV(OrigAddr))
return false;
}

if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
return false;
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,10 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
unsigned Size) const;
bool isFlatScratchBaseLegal(
SDValue Base, uint64_t FlatVariant = SIInstrFlags::FlatScratch) const;

bool isFlatScratchBaseLegal(SDValue Addr) const;
bool isFlatScratchBaseLegalSV(SDValue Addr) const;
bool isFlatScratchBaseLegalSVImm(SDValue Addr) const;

bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
Expand Down
92 changes: 82 additions & 10 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4103,7 +4103,9 @@ AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root,
int64_t ConstOffset;
std::tie(PtrBase, ConstOffset) =
getPtrBaseWithConstantOffset(Root.getReg(), *MRI);
if (ConstOffset == 0 || !isFlatScratchBaseLegal(PtrBase, FlatVariant))

if (ConstOffset == 0 || (FlatVariant == SIInstrFlags::FlatScratch &&
!isFlatScratchBaseLegal(Root.getReg())))
return Default;

unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();
Expand Down Expand Up @@ -4266,7 +4268,7 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
// possible.
std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);

if (ConstOffset != 0 && isFlatScratchBaseLegal(PtrBase) &&
if (ConstOffset != 0 && isFlatScratchBaseLegal(Addr) &&
TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS,
SIInstrFlags::FlatScratch)) {
Addr = PtrBase;
Expand Down Expand Up @@ -4343,6 +4345,7 @@ AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {
// possible.
std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);

Register OrigAddr = Addr;
if (ConstOffset != 0 &&
TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS, true)) {
Addr = PtrBase;
Expand All @@ -4360,8 +4363,13 @@ AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {
Register LHS = AddrDef->MI->getOperand(1).getReg();
auto LHSDef = getDefSrcRegIgnoringCopies(LHS, *MRI);

if (!isFlatScratchBaseLegal(LHS) || !isFlatScratchBaseLegal(RHS))
return std::nullopt;
if (OrigAddr != Addr) {
if (!isFlatScratchBaseLegalSVImm(OrigAddr))
return std::nullopt;
} else {
if (!isFlatScratchBaseLegalSV(OrigAddr))
return std::nullopt;
}

if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
return std::nullopt;
Expand Down Expand Up @@ -4494,14 +4502,78 @@ bool AMDGPUInstructionSelector::isDSOffset2Legal(Register Base, int64_t Offset0,
return KB->signBitIsZero(Base);
}

bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
Register Base, uint64_t FlatVariant) const {
if (FlatVariant != SIInstrFlags::FlatScratch)
// Return whether the operation has NoUnsignedWrap property.
bool isNoUnsignedWrap(MachineInstr *Addr) {
return Addr->getOpcode() == TargetOpcode::G_OR ||
(Addr->getOpcode() == TargetOpcode::G_PTR_ADD &&
Addr->getFlag(MachineInstr::NoUWrap));
};

// Check that the base address of flat scratch load/store in the form of `base +
// offset` is legal to be put in SGPR/VGPR (i.e. unsigned per hardware
// requirement). We always treat the first operand as the base address here.
bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(Register Addr) const {
MachineInstr *AddrMI = getDefIgnoringCopies(Addr, *MRI);

if (isNoUnsignedWrap(AddrMI))
return true;

// When value in 32-bit Base can be negative calculate scratch offset using
// 32-bit add instruction, otherwise use Base(unsigned) + offset.
return KB->signBitIsZero(Base);
Register LHS = AddrMI->getOperand(1).getReg();
Register RHS = AddrMI->getOperand(2).getReg();

if (AddrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
std::optional<ValueAndVReg> RhsValReg =
getIConstantVRegValWithLookThrough(RHS, *MRI);
// If the immediate offset is negative and within certain range, the base
// address cannot also be negative. If the base is also negative, the sum
// would be either negative or much larger than the valid range of scratch
// memory a thread can access.
if (RhsValReg && RhsValReg->Value.getSExtValue() < 0 &&
RhsValReg->Value.getSExtValue() > -0x40000000)
return true;
}

return KB->signBitIsZero(LHS);
}

// Check address value in SGPR/VGPR are legal for flat scratch in the form
// of: SGPR + VGPR.
bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSV(Register Addr) const {
MachineInstr *AddrMI = getDefIgnoringCopies(Addr, *MRI);

if (isNoUnsignedWrap(AddrMI))
return true;

Register LHS = AddrMI->getOperand(1).getReg();
Register RHS = AddrMI->getOperand(2).getReg();
return KB->signBitIsZero(RHS) && KB->signBitIsZero(LHS);
}

// Check address value in SGPR/VGPR are legal for flat scratch in the form
// of: SGPR + VGPR + Imm.
bool AMDGPUInstructionSelector::isFlatScratchBaseLegalSVImm(
Register Addr) const {
MachineInstr *AddrMI = getDefIgnoringCopies(Addr, *MRI);
Register Base = AddrMI->getOperand(1).getReg();
std::optional<DefinitionAndSourceRegister> BaseDef =
getDefSrcRegIgnoringCopies(Base, *MRI);
std::optional<ValueAndVReg> RHSOffset =
getIConstantVRegValWithLookThrough(AddrMI->getOperand(2).getReg(), *MRI);
assert(RHSOffset);

// If the immediate offset is negative and within certain range, the base
// address cannot also be negative. If the base is also negative, the sum
// would be either negative or much larger than the valid range of scratch
// memory a thread can access.
if (isNoUnsignedWrap(BaseDef->MI) &&
(isNoUnsignedWrap(AddrMI) ||
(RHSOffset->Value.getSExtValue() < 0 &&
RHSOffset->Value.getSExtValue() > -0x40000000)))
return true;

Register LHS = BaseDef->MI->getOperand(1).getReg();
Register RHS = BaseDef->MI->getOperand(2).getReg();
return KB->signBitIsZero(RHS) && KB->signBitIsZero(LHS);
}

bool AMDGPUInstructionSelector::isUnneededShiftMask(const MachineInstr &MI,
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,9 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool isDSOffsetLegal(Register Base, int64_t Offset) const;
bool isDSOffset2Legal(Register Base, int64_t Offset0, int64_t Offset1,
unsigned Size) const;
bool isFlatScratchBaseLegal(
Register Base, uint64_t FlatVariant = SIInstrFlags::FlatScratch) const;
bool isFlatScratchBaseLegal(Register Addr) const;
bool isFlatScratchBaseLegalSV(Register Addr) const;
bool isFlatScratchBaseLegalSVImm(Register Addr) const;

std::pair<Register, unsigned>
selectDS1Addr1OffsetImpl(MachineOperand &Root) const;
Expand Down
Loading

0 comments on commit c1511a6

Please sign in to comment.