Skip to content

Commit

Permalink
Reland "[AArch64] Define high bits of FPR and GPR registers (take 2) (#…
Browse files Browse the repository at this point in the history
…114827)"

The issue with slow compile-time was caused by an assert in
AArch64RegisterInfo.cpp. The assert invokes 'checkAllSuperRegsMarked'
after adding all the reserved registers. This call gets very expensive
after adding the _HI registers due to the way the function searches
in the 'Exception' list, which is expected to be a small list but isn't
(the patch added 190 _HI regs).

It was possible to rewrite the code in such a way that the _HI registers
are marked as reserved after the check. This makes the problem go away
entirely and restores compile-time to what it was before (tested for
`check-runtimes`, which previously showed a ~5x slowdown).

This reverts commits:
  1434d2a
  2704647
  • Loading branch information
sdesmalen-arm committed Nov 27, 2024
1 parent dddeec4 commit 318c69d
Show file tree
Hide file tree
Showing 25 changed files with 603 additions and 329 deletions.
5 changes: 3 additions & 2 deletions bolt/unittests/Core/MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,14 +90,15 @@ INSTANTIATE_TEST_SUITE_P(AArch64, MCPlusBuilderTester,
::testing::Values(Triple::aarch64));

TEST_P(MCPlusBuilderTester, AliasX0) {
uint64_t AliasesX0[] = {AArch64::W0, AArch64::X0, AArch64::W0_W1,
uint64_t AliasesX0[] = {AArch64::W0, AArch64::W0_HI,
AArch64::X0, AArch64::W0_W1,
AArch64::X0_X1, AArch64::X0_X1_X2_X3_X4_X5_X6_X7};
size_t AliasesX0Count = sizeof(AliasesX0) / sizeof(*AliasesX0);
testRegAliases(Triple::aarch64, AArch64::X0, AliasesX0, AliasesX0Count);
}

TEST_P(MCPlusBuilderTester, AliasSmallerX0) {
uint64_t AliasesX0[] = {AArch64::W0, AArch64::X0};
uint64_t AliasesX0[] = {AArch64::W0, AArch64::W0_HI, AArch64::X0};
size_t AliasesX0Count = sizeof(AliasesX0) / sizeof(*AliasesX0);
testRegAliases(Triple::aarch64, AArch64::X0, AliasesX0, AliasesX0Count, true);
}
Expand Down
8 changes: 8 additions & 0 deletions llvm/include/llvm/MC/MCRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ struct MCRegisterDesc {

// Is true for constant registers.
bool IsConstant;

// Is true for artificial registers.
bool IsArtificial;
};

/// MCRegisterInfo base class - We assume that the target defines a static
Expand Down Expand Up @@ -396,6 +399,11 @@ class MCRegisterInfo {
/// Returns true if the given register is constant.
bool isConstant(MCRegister RegNo) const { return get(RegNo).IsConstant; }

/// Returns true if the given register is artificial, which means it
/// represents a regunit that is not separately addressable but still needs to
/// be modelled, such as the top 16-bits of a 32-bit GPR.
bool isArtificial(MCRegister RegNo) const { return get(RegNo).IsArtificial; }

/// Return the number of registers this target has (useful for
/// sizing arrays holding per register information)
unsigned getNumRegs() const {
Expand Down
14 changes: 11 additions & 3 deletions llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ namespace mca {

const unsigned WriteRef::INVALID_IID = std::numeric_limits<unsigned>::max();

static std::function<bool(MCPhysReg)>
isNonArtificial(const MCRegisterInfo &MRI) {
return [&MRI](MCPhysReg R) { return !MRI.isArtificial(R); };
}

WriteRef::WriteRef(unsigned SourceIndex, WriteState *WS)
: IID(SourceIndex), WriteBackCycle(), WriteResID(), RegisterID(),
Write(WS) {}
Expand Down Expand Up @@ -282,7 +287,8 @@ void RegisterFile::addRegisterWrite(WriteRef Write,
MCPhysReg ZeroRegisterID =
WS.clearsSuperRegisters() ? RegID : WS.getRegisterID();
ZeroRegisters.setBitVal(ZeroRegisterID, IsWriteZero);
for (MCPhysReg I : MRI.subregs(ZeroRegisterID))
for (MCPhysReg I :
make_filter_range(MRI.subregs(ZeroRegisterID), isNonArtificial(MRI)))
ZeroRegisters.setBitVal(I, IsWriteZero);

// If this move has been eliminated, then method tryEliminateMoveOrSwap should
Expand All @@ -304,7 +310,8 @@ void RegisterFile::addRegisterWrite(WriteRef Write,
// Update the mapping for register RegID including its sub-registers.
RegisterMappings[RegID].first = Write;
RegisterMappings[RegID].second.AliasRegID = 0U;
for (MCPhysReg I : MRI.subregs(RegID)) {
for (MCPhysReg I :
make_filter_range(MRI.subregs(RegID), isNonArtificial(MRI))) {
RegisterMappings[I].first = Write;
RegisterMappings[I].second.AliasRegID = 0U;
}
Expand Down Expand Up @@ -472,7 +479,8 @@ bool RegisterFile::tryEliminateMoveOrSwap(MutableArrayRef<WriteState> Writes,
AliasedReg = RMAlias.AliasRegID;

RegisterMappings[AliasReg].second.AliasRegID = AliasedReg;
for (MCPhysReg I : MRI.subregs(AliasReg))
for (MCPhysReg I :
make_filter_range(MRI.subregs(AliasReg), isNonArtificial(MRI)))
RegisterMappings[I].second.AliasRegID = AliasedReg;

if (ZeroRegisters[RS.getRegisterID()]) {
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1540,7 +1540,10 @@ static bool canRenameMOP(const MachineOperand &MOP,
// Note that this relies on the structure of the AArch64 register file. In
// particular, a subregister cannot be written without overwriting the
// whole register.
if (RegClass->HasDisjunctSubRegs) {
if (RegClass->HasDisjunctSubRegs && RegClass->CoveredBySubRegs &&
(TRI->getSubRegisterClass(RegClass, AArch64::dsub0) ||
TRI->getSubRegisterClass(RegClass, AArch64::qsub0) ||
TRI->getSubRegisterClass(RegClass, AArch64::zsub0))) {
LLVM_DEBUG(
dbgs()
<< " Cannot rename operands with multiple disjunct subregisters ("
Expand Down
32 changes: 30 additions & 2 deletions llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -490,13 +490,36 @@ AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const {
}

assert(checkAllSuperRegsMarked(Reserved));

// Add _HI registers after checkAllSuperRegsMarked as this check otherwise
// becomes considerably more expensive.
Reserved.set(AArch64::WSP_HI);
Reserved.set(AArch64::WZR_HI);
static_assert(AArch64::W30_HI - AArch64::W0_HI == 30,
"Unexpected order of registers");
Reserved.set(AArch64::W0_HI, AArch64::W30_HI);
static_assert(AArch64::B31_HI - AArch64::B0_HI == 31,
"Unexpected order of registers");
Reserved.set(AArch64::B0_HI, AArch64::B31_HI);
static_assert(AArch64::H31_HI - AArch64::H0_HI == 31,
"Unexpected order of registers");
Reserved.set(AArch64::H0_HI, AArch64::H31_HI);
static_assert(AArch64::S31_HI - AArch64::S0_HI == 31,
"Unexpected order of registers");
Reserved.set(AArch64::S0_HI, AArch64::S31_HI);
static_assert(AArch64::D31_HI - AArch64::D0_HI == 31,
"Unexpected order of registers");
Reserved.set(AArch64::D0_HI, AArch64::D31_HI);
static_assert(AArch64::Q31_HI - AArch64::Q0_HI == 31,
"Unexpected order of registers");
Reserved.set(AArch64::Q0_HI, AArch64::Q31_HI);

return Reserved;
}

BitVector
AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved = getStrictlyReservedRegs(MF);

BitVector Reserved(getNumRegs());
for (size_t i = 0; i < AArch64::GPR32commonRegClass.getNumRegs(); ++i) {
if (MF.getSubtarget<AArch64Subtarget>().isXRegisterReservedForRA(i))
markSuperRegs(Reserved, AArch64::GPR32commonRegClass.getRegister(i));
Expand All @@ -514,6 +537,11 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
}

assert(checkAllSuperRegsMarked(Reserved));

// Handle strictlyReservedRegs separately to avoid re-evaluating the assert,
// which becomes considerably expensive when considering the _HI registers.
Reserved |= getStrictlyReservedRegs(MF);

return Reserved;
}

Expand Down
Loading

0 comments on commit 318c69d

Please sign in to comment.