Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,14 @@ class AMDGPUOperand : public MCParsedAsmOperand {
return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
}

bool isVCSrc_b32_Lo256() const {
return isRegOrInlineNoMods(AMDGPU::VS_32_Lo256RegClassID, MVT::i32);
}

bool isVCSrc_b64_Lo256() const {
return isRegOrInlineNoMods(AMDGPU::VS_64_Lo256RegClassID, MVT::i64);
}

bool isVCSrc_b64() const {
return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
}
Expand Down Expand Up @@ -2986,7 +2994,12 @@ MCRegister AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,

const MCRegisterInfo *TRI = getContext().getRegisterInfo();
const MCRegisterClass RC = TRI->getRegClass(RCID);
if (RegIdx >= RC.getNumRegs()) {
if (RegIdx >= RC.getNumRegs() || (RegKind == IS_VGPR && RegIdx > 255)) {
Error(Loc, "register index is out of range");
return AMDGPU::NoRegister;
}

if (RegKind == IS_VGPR && !isGFX1250() && RegIdx + RegWidth / 32 > 256) {
Error(Loc, "register index is out of range");
return MCRegister();
}
Expand Down
30 changes: 26 additions & 4 deletions llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,26 @@ void AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
}
}

// Given a wide tuple \p Reg check if it will overflow 256 registers.
// \returns \p Reg on success or NoRegister otherwise.
static unsigned CheckVGPROverflow(unsigned Reg, const MCRegisterClass &RC,
const MCRegisterInfo &MRI) {
unsigned NumRegs = RC.getSizeInBits() / 32;
MCRegister Sub0 = MRI.getSubReg(Reg, AMDGPU::sub0);
if (!Sub0)
return Reg;

MCRegister BaseReg;
if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(Sub0))
BaseReg = AMDGPU::VGPR0;
else if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Sub0))
BaseReg = AMDGPU::AGPR0;

assert(BaseReg && "Only vector registers expected");

return (Sub0 - BaseReg + NumRegs <= 256) ? Reg : AMDGPU::NoRegister;
}

// Note that before gfx10, the MIMG encoding provided no information about
// VADDR size. Consequently, decoded instructions always show address as if it
// has 1 dword, which could be not really so.
Expand Down Expand Up @@ -1327,8 +1347,9 @@ void AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
MCRegister VdataSub0 = MRI.getSubReg(Vdata0, AMDGPU::sub0);
Vdata0 = (VdataSub0 != 0)? VdataSub0 : Vdata0;

NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0,
&MRI.getRegClass(DataRCID));
const MCRegisterClass &NewRC = MRI.getRegClass(DataRCID);
NewVdata = MRI.getMatchingSuperReg(Vdata0, AMDGPU::sub0, &NewRC);
NewVdata = CheckVGPROverflow(NewVdata, NewRC, MRI);
if (!NewVdata) {
// It's possible to encode this such that the low register + enabled
// components exceeds the register count.
Expand All @@ -1347,8 +1368,9 @@ void AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;

auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
&MRI.getRegClass(AddrRCID));
const MCRegisterClass &NewRC = MRI.getRegClass(AddrRCID);
NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0, &NewRC);
NewVAddrSA = CheckVGPROverflow(NewVAddrSA, NewRC, MRI);
if (!NewVAddrSA)
return;
}
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,7 @@ GCNSubtarget::getMaxNumVectorRegs(const Function &F) const {

unsigned MaxNumVGPRs = MaxVectorRegs;
unsigned MaxNumAGPRs = 0;
unsigned NumArchVGPRs = has1024AddressableVGPRs() ? 1024 : 256;

// On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
// a wave may have up to 512 total vector registers combining together both
Expand All @@ -589,7 +590,6 @@ GCNSubtarget::getMaxNumVectorRegs(const Function &F) const {
if (hasGFX90AInsts()) {
unsigned MinNumAGPRs = 0;
const unsigned TotalNumAGPRs = AMDGPU::AGPR_32RegClass.getNumRegs();
const unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();

const std::pair<unsigned, unsigned> DefaultNumAGPR = {~0u, ~0u};

Expand All @@ -614,11 +614,11 @@ GCNSubtarget::getMaxNumVectorRegs(const Function &F) const {
MaxNumAGPRs = std::min(std::max(MinNumAGPRs, MaxNumAGPRs), MaxVectorRegs);
MinNumAGPRs = std::min(std::min(MinNumAGPRs, TotalNumAGPRs), MaxNumAGPRs);

MaxNumVGPRs = std::min(MaxVectorRegs - MinNumAGPRs, TotalNumVGPRs);
MaxNumVGPRs = std::min(MaxVectorRegs - MinNumAGPRs, NumArchVGPRs);
MaxNumAGPRs = std::min(MaxVectorRegs - MaxNumVGPRs, MaxNumAGPRs);

assert(MaxNumVGPRs + MaxNumAGPRs <= MaxVectorRegs &&
MaxNumAGPRs <= TotalNumAGPRs && MaxNumVGPRs <= TotalNumVGPRs &&
MaxNumAGPRs <= TotalNumAGPRs && MaxNumVGPRs <= NumArchVGPRs &&
"invalid register counts");
} else if (hasMAIInsts()) {
// On gfx908 the number of AGPRs always equals the number of VGPRs.
Expand Down
8 changes: 4 additions & 4 deletions llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI,
if (AMDGPU::isGFX10Plus(STI) && isVCMPX64(Desc)) {
assert((Encoding & 0xFF) == 0);
Encoding |= MRI.getEncodingValue(AMDGPU::EXEC_LO) &
AMDGPU::HWEncoding::REG_IDX_MASK;
AMDGPU::HWEncoding::LO256_REG_IDX_MASK;
}

for (unsigned i = 0; i < bytes; i++) {
Expand Down Expand Up @@ -551,7 +551,7 @@ void AMDGPUMCCodeEmitter::getAVOperandEncoding(
SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const {
MCRegister Reg = MI.getOperand(OpNo).getReg();
unsigned Enc = MRI.getEncodingValue(Reg);
unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
unsigned Idx = Enc & AMDGPU::HWEncoding::LO256_REG_IDX_MASK;
bool IsVGPROrAGPR =
Enc & (AMDGPU::HWEncoding::IS_VGPR | AMDGPU::HWEncoding::IS_AGPR);

Expand Down Expand Up @@ -593,7 +593,7 @@ void AMDGPUMCCodeEmitter::getMachineOpValue(const MCInst &MI,
const MCSubtargetInfo &STI) const {
if (MO.isReg()){
unsigned Enc = MRI.getEncodingValue(MO.getReg());
unsigned Idx = Enc & AMDGPU::HWEncoding::REG_IDX_MASK;
unsigned Idx = Enc & AMDGPU::HWEncoding::LO256_REG_IDX_MASK;
bool IsVGPROrAGPR =
Enc & (AMDGPU::HWEncoding::IS_VGPR | AMDGPU::HWEncoding::IS_AGPR);
Op = Idx | (IsVGPROrAGPR << 8);
Expand Down Expand Up @@ -656,7 +656,7 @@ void AMDGPUMCCodeEmitter::getMachineOpValueT16Lo128(
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isReg()) {
uint16_t Encoding = MRI.getEncodingValue(MO.getReg());
unsigned RegIdx = Encoding & AMDGPU::HWEncoding::REG_IDX_MASK;
unsigned RegIdx = Encoding & AMDGPU::HWEncoding::LO256_REG_IDX_MASK;
bool IsHi = Encoding & AMDGPU::HWEncoding::IS_HI16;
bool IsVGPR = Encoding & AMDGPU::HWEncoding::IS_VGPR;
assert((!IsVGPR || isUInt<7>(RegIdx)) && "VGPR0-VGPR127 expected!");
Expand Down
9 changes: 5 additions & 4 deletions llvm/lib/Target/AMDGPU/SIDefines.h
Original file line number Diff line number Diff line change
Expand Up @@ -354,10 +354,11 @@ enum : unsigned {
// Register codes as defined in the TableGen's HWEncoding field.
namespace HWEncoding {
enum : unsigned {
REG_IDX_MASK = 0xff,
IS_VGPR = 1 << 8,
IS_AGPR = 1 << 9,
IS_HI16 = 1 << 10,
REG_IDX_MASK = 0x3ff,
LO256_REG_IDX_MASK = 0xff,
IS_VGPR = 1 << 10,
IS_AGPR = 1 << 11,
IS_HI16 = 1 << 12,
};
} // namespace HWEncoding

Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1728,7 +1728,9 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
"Whole wave functions can use the reg mapped for their i1 argument");

// FIXME: Be more efficient!
for (MCRegister Reg : AMDGPU::VGPR_32RegClass)
unsigned NumArchVGPRs = ST.has1024AddressableVGPRs() ? 1024 : 256;
for (MCRegister Reg :
AMDGPU::VGPR_32RegClass.getRegisters().take_front(NumArchVGPRs))
if (MF.getRegInfo().isPhysRegModified(Reg)) {
MFI->reserveWWMRegister(Reg);
MF.begin()->addLiveIn(Reg);
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16916,7 +16916,7 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
switch (BitWidth) {
case 16:
RC = Subtarget->useRealTrue16Insts() ? &AMDGPU::VGPR_16RegClass
: &AMDGPU::VGPR_32RegClass;
: &AMDGPU::VGPR_32_Lo256RegClass;
break;
default:
RC = TRI->getVGPRClassForBitWidth(BitWidth);
Expand Down Expand Up @@ -16963,7 +16963,7 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
auto [Kind, Idx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Constraint);
if (Kind != '\0') {
if (Kind == 'v') {
RC = &AMDGPU::VGPR_32RegClass;
RC = &AMDGPU::VGPR_32_Lo256RegClass;
} else if (Kind == 's') {
RC = &AMDGPU::SGPR_32RegClass;
} else if (Kind == 'a') {
Expand Down Expand Up @@ -17005,6 +17005,7 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
return std::pair(0U, nullptr);
if (Idx < RC->getNumRegs())
return std::pair(RC->getRegister(Idx), RC);
return std::pair(0U, nullptr);
}
}

Expand Down
5 changes: 2 additions & 3 deletions llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ static constexpr StringLiteral WaitEventTypeName[] = {
// We reserve a fixed number of VGPR slots in the scoring tables for
// special tokens like SCMEM_LDS (needed for buffer load to LDS).
enum RegisterMapping {
SQ_MAX_PGM_VGPRS = 1024, // Maximum programmable VGPRs across all targets.
SQ_MAX_PGM_VGPRS = 2048, // Maximum programmable VGPRs across all targets.
AGPR_OFFSET = 512, // Maximum programmable ArchVGPRs across all targets.
SQ_MAX_PGM_SGPRS = 128, // Maximum programmable SGPRs across all targets.
// Artificial register slots to track LDS writes into specific LDS locations
Expand Down Expand Up @@ -831,15 +831,14 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,

MCRegister MCReg = AMDGPU::getMCReg(Op.getReg(), *Context->ST);
unsigned RegIdx = TRI->getHWRegIndex(MCReg);
assert(isUInt<8>(RegIdx));

const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Op.getReg());
unsigned Size = TRI->getRegSizeInBits(*RC);

// AGPRs/VGPRs are tracked every 16 bits, SGPRs by 32 bits
if (TRI->isVectorRegister(*MRI, Op.getReg())) {
unsigned Reg = RegIdx << 1 | (AMDGPU::isHi16Reg(MCReg, *TRI) ? 1 : 0);
assert(Reg < AGPR_OFFSET);
assert(!Context->ST->hasMAIInsts() || Reg < AGPR_OFFSET);
Result.first = Reg;
if (TRI->isAGPR(*MRI, Op.getReg()))
Result.first += AGPR_OFFSET;
Expand Down
57 changes: 55 additions & 2 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3273,6 +3273,10 @@ StringRef SIRegisterInfo::getRegAsmName(MCRegister Reg) const {
return AMDGPUInstPrinter::getRegisterName(Reg);
}

unsigned SIRegisterInfo::getHWRegIndex(MCRegister Reg) const {
return getEncodingValue(Reg) & AMDGPU::HWEncoding::REG_IDX_MASK;
}

unsigned AMDGPU::getRegBitWidth(const TargetRegisterClass &RC) {
return getRegBitWidth(RC.getID());
}
Expand Down Expand Up @@ -3353,6 +3357,40 @@ SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) const {
: getAnyVGPRClassForBitWidth(BitWidth);
}

const TargetRegisterClass *
SIRegisterInfo::getAlignedLo256VGPRClassForBitWidth(unsigned BitWidth) const {
if (BitWidth <= 32)
return &AMDGPU::VGPR_32_Lo256RegClass;
if (BitWidth <= 64)
return &AMDGPU::VReg_64_Lo256_Align2RegClass;
if (BitWidth <= 96)
return &AMDGPU::VReg_96_Lo256_Align2RegClass;
if (BitWidth <= 128)
return &AMDGPU::VReg_128_Lo256_Align2RegClass;
if (BitWidth <= 160)
return &AMDGPU::VReg_160_Lo256_Align2RegClass;
if (BitWidth <= 192)
return &AMDGPU::VReg_192_Lo256_Align2RegClass;
if (BitWidth <= 224)
return &AMDGPU::VReg_224_Lo256_Align2RegClass;
if (BitWidth <= 256)
return &AMDGPU::VReg_256_Lo256_Align2RegClass;
if (BitWidth <= 288)
return &AMDGPU::VReg_288_Lo256_Align2RegClass;
if (BitWidth <= 320)
return &AMDGPU::VReg_320_Lo256_Align2RegClass;
if (BitWidth <= 352)
return &AMDGPU::VReg_352_Lo256_Align2RegClass;
if (BitWidth <= 384)
return &AMDGPU::VReg_384_Lo256_Align2RegClass;
if (BitWidth <= 512)
return &AMDGPU::VReg_512_Lo256_Align2RegClass;
if (BitWidth <= 1024)
return &AMDGPU::VReg_1024_Lo256_Align2RegClass;

return nullptr;
}

static const TargetRegisterClass *
getAnyAGPRClassForBitWidth(unsigned BitWidth) {
if (BitWidth == 64)
Expand Down Expand Up @@ -3547,7 +3585,17 @@ bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,
const TargetRegisterClass *
SIRegisterInfo::getEquivalentVGPRClass(const TargetRegisterClass *SRC) const {
unsigned Size = getRegSizeInBits(*SRC);
const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size);

switch (SRC->getID()) {
default:
break;
case AMDGPU::VS_32_Lo256RegClassID:
case AMDGPU::VS_64_Lo256RegClassID:
return getAllocatableClass(getAlignedLo256VGPRClassForBitWidth(Size));
}

const TargetRegisterClass *VRC =
getAllocatableClass(getVGPRClassForBitWidth(Size));
assert(VRC && "Invalid register class size");
return VRC;
}
Expand Down Expand Up @@ -4005,7 +4053,12 @@ SIRegisterInfo::getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
unsigned SIRegisterInfo::getNumUsedPhysRegs(const MachineRegisterInfo &MRI,
const TargetRegisterClass &RC,
bool IncludeCalls) const {
for (MCPhysReg Reg : reverse(RC.getRegisters()))
unsigned NumArchVGPRs = ST.has1024AddressableVGPRs() ? 1024 : 256;
ArrayRef<MCPhysReg> Registers =
(RC.getID() == AMDGPU::VGPR_32RegClassID)
? RC.getRegisters().take_front(NumArchVGPRs)
: RC.getRegisters();
for (MCPhysReg Reg : reverse(Registers))
if (MRI.isPhysRegUsed(Reg, /*SkipRegMaskTest=*/!IncludeCalls))
return getHWRegIndex(Reg) + 1;
return 0;
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,13 +200,14 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
StringRef getRegAsmName(MCRegister Reg) const override;

// Pseudo regs are not allowed
unsigned getHWRegIndex(MCRegister Reg) const {
return getEncodingValue(Reg) & 0xff;
}
unsigned getHWRegIndex(MCRegister Reg) const;

LLVM_READONLY
const TargetRegisterClass *getVGPRClassForBitWidth(unsigned BitWidth) const;

LLVM_READONLY const TargetRegisterClass *
getAlignedLo256VGPRClassForBitWidth(unsigned BitWidth) const;

LLVM_READONLY
const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) const;

Expand Down
Loading