Skip to content

Commit

Permalink
Adding stressMode, masking away eGPR for unsupported nodes and cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
DeepakRajendrakumaran committed Jan 30, 2025
1 parent 1dfaddc commit dc3a1e8
Show file tree
Hide file tree
Showing 5 changed files with 467 additions and 83 deletions.
12 changes: 8 additions & 4 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ void CodeGenInterface::CopyRegisterInfo()
rbmFltCalleeTrash = compiler->rbmFltCalleeTrash;
rbmAllInt = compiler->rbmAllInt;
rbmIntCalleeTrash = compiler->rbmIntCalleeTrash;
regIntLast = compiler->regIntLast;
#endif // TARGET_AMD64

rbmAllMask = compiler->rbmAllMask;
Expand Down Expand Up @@ -5364,6 +5365,10 @@ void CodeGen::genFnProlog()
// will be skipped.
bool initRegZeroed = false;
regMaskTP excludeMask = intRegState.rsCalleeRegArgMaskLiveIn;
#if defined(TARGET_AMD64)
// TODO-Xarch-apx : Revert. Excluding eGPR so that it's not used for non REX2 supported movs.
excludeMask = excludeMask | RBM_HIGHINT;
#endif // !defined(TARGET_AMD64)

#ifdef TARGET_ARM
// If we have a variable sized frame (compLocallocUsed is true)
Expand Down Expand Up @@ -5773,7 +5778,7 @@ void CodeGen::genFnProlog()

if (initRegs)
{
for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
for (regNumber reg = REG_INT_FIRST; reg <= get_REG_INT_LAST(); reg = REG_NEXT(reg))
{
regMaskTP regMask = genRegMask(reg);
if (regMask & initRegs)
Expand Down Expand Up @@ -6315,8 +6320,7 @@ regMaskTP CodeGen::genPushRegs(regMaskTP regs, regMaskTP* byrefRegs, regMaskTP*
noway_assert(genTypeStSz(TYP_BYREF) == genTypeStSz(TYP_I_IMPL));

regMaskTP pushedRegs = regs;

for (regNumber reg = REG_INT_FIRST; reg <= REG_INT_LAST; reg = REG_NEXT(reg))
for (regNumber reg = REG_INT_FIRST; reg <= get_REG_INT_LAST(); reg = REG_NEXT(reg))
{
regMaskTP regMask = genRegMask(reg);

Expand Down Expand Up @@ -6388,7 +6392,7 @@ void CodeGen::genPopRegs(regMaskTP regs, regMaskTP byrefRegs, regMaskTP noRefReg
regMaskTP popedRegs = regs;

// Walk the registers in the reverse order as genPushRegs()
for (regNumber reg = REG_INT_LAST; reg >= REG_INT_LAST; reg = REG_PREV(reg))
for (regNumber reg = get_REG_INT_LAST(); reg >= REG_INT_FIRST; reg = REG_PREV(reg))
{
regMaskTP regMask = genRegMask(reg);

Expand Down
32 changes: 29 additions & 3 deletions src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,24 @@ SingleTypeRegSet LinearScan::lowSIMDRegs()
#endif
}

#if defined(TARGET_XARCH)
//------------------------------------------------------------------------
// getLowGprRegs(): Return the set of GPR registers associated with non APX
// encoding only, i.e., remove the eGPR registers from the available
// set.
//
// Return Value:
// Register mask of non APX GPR registers.
SingleTypeRegSet LinearScan::getLowGprRegs()
{
#if defined(TARGET_AMD64)
return (availableIntRegs & RBM_LOWINT.GetIntRegSet());
#else
return availableIntRegs;
#endif // TARGET_AMD64
}
#endif // TARGET_XARCH

void LinearScan::updateNextFixedRef(RegRecord* regRecord, RefPosition* nextRefPosition, RefPosition* nextKill)
{
LsraLocation nextLocation = nextRefPosition == nullptr ? MaxLocation : nextRefPosition->nodeLocation;
Expand Down Expand Up @@ -535,6 +553,8 @@ static const regMaskTP LsraLimitSmallFPSet = (RBM_XMM0 | RBM_XMM1 | RBM_XMM2 | R
static const regMaskTP LsraLimitUpperSimdSet =
(RBM_XMM16 | RBM_XMM17 | RBM_XMM18 | RBM_XMM19 | RBM_XMM20 | RBM_XMM21 | RBM_XMM22 | RBM_XMM23 | RBM_XMM24 |
RBM_XMM25 | RBM_XMM26 | RBM_XMM27 | RBM_XMM28 | RBM_XMM29 | RBM_XMM30 | RBM_XMM31);
static const regMaskTP LsraLimitExtGprSet =
(RBM_R16 | RBM_R17 | RBM_R18 | RBM_R19 | RBM_R20 | RBM_R21 | RBM_R22 | RBM_R23 | RBM_ETW_FRAMED_EBP);
#elif defined(TARGET_ARM)
// On ARM, we may need two registers to set up the target register for a virtual call, so we need
// to have at least the maximum number of arg registers, plus 2.
Expand Down Expand Up @@ -627,6 +647,13 @@ SingleTypeRegSet LinearScan::stressLimitRegs(RefPosition* refPosition, RegisterT
LsraLimitUpperSimdSet.GetRegSetForType(regType), minRegCount);
}
break;
case LSRA_LIMIT_EXT_GPR_SET:
if ((mask & LsraLimitExtGprSet) != RBM_NONE)
{
mask = getConstrainedRegMask(refPosition, regType, mask,
LsraLimitExtGprSet.GetRegSetForType(regType), minRegCount);
}
break;
#endif

default:
Expand Down Expand Up @@ -789,6 +816,8 @@ LinearScan::LinearScan(Compiler* theCompiler)
rbmFltCalleeTrash = compiler->rbmFltCalleeTrash;
rbmAllInt = compiler->rbmAllInt;
rbmIntCalleeTrash = compiler->rbmIntCalleeTrash;
regIntLast = compiler->regIntLast;
isApxSupported = compiler->canUseApxEncoding();
#endif // TARGET_AMD64

#if defined(TARGET_XARCH)
Expand Down Expand Up @@ -12398,9 +12427,6 @@ void LinearScan::verifyResolutionMove(GenTree* resolutionMove, LsraLocation curr
LinearScan::RegisterSelection::RegisterSelection(LinearScan* linearScan)
{
this->linearScan = linearScan;
#if defined(TARGET_AMD64)
rbmAllInt = linearScan->compiler->get_RBM_ALLINT();
#endif // TARGET_AMD64

#ifdef DEBUG
mappingTable = new ScoreMappingTable(linearScan->compiler->getAllocator(CMK_LSRA));
Expand Down
26 changes: 24 additions & 2 deletions src/coreclr/jit/lsra.h
Original file line number Diff line number Diff line change
Expand Up @@ -771,7 +771,8 @@ class LinearScan : public LinearScanInterface
LSRA_LIMIT_SMALL_SET = 0x3,
#if defined(TARGET_AMD64)
LSRA_LIMIT_UPPER_SIMD_SET = 0x2000,
LSRA_LIMIT_MASK = 0x2003
LSRA_LIMIT_EXT_GPR_SET = 0x4000,
LSRA_LIMIT_MASK = 0x6003
#else
LSRA_LIMIT_MASK = 0x3
#endif
Expand Down Expand Up @@ -1075,6 +1076,9 @@ class LinearScan : public LinearScanInterface
SingleTypeRegSet allByteRegs();
SingleTypeRegSet allSIMDRegs();
SingleTypeRegSet lowSIMDRegs();
#if defined(TARGET_XARCH)
SingleTypeRegSet getLowGprRegs();
#endif
SingleTypeRegSet internalFloatRegCandidates();

void makeRegisterInactive(RegRecord* physRegRecord);
Expand Down Expand Up @@ -1934,8 +1938,11 @@ class LinearScan : public LinearScanInterface
int BuildBinaryUses(GenTreeOp* node, SingleTypeRegSet candidates = RBM_NONE);
int BuildCastUses(GenTreeCast* cast, SingleTypeRegSet candidates);
#ifdef TARGET_XARCH
int BuildRMWUses(GenTree* node, GenTree* op1, GenTree* op2, SingleTypeRegSet candidates = RBM_NONE);
int BuildRMWUses(
GenTree* node, GenTree* op1, GenTree* op2, SingleTypeRegSet op1Candidates, SingleTypeRegSet op2Candidates);
inline SingleTypeRegSet BuildEvexIncompatibleMask(GenTree* tree);
inline SingleTypeRegSet BuildApxIncompatibleGPRMask(GenTree* tree, SingleTypeRegSet candidates, bool isGPR = false);
inline bool DoesThisUseGPR(GenTree* op);
#endif // !TARGET_XARCH
int BuildSelect(GenTreeOp* select);
// This is the main entry point for building the RefPositions for a node.
Expand Down Expand Up @@ -2049,6 +2056,8 @@ class LinearScan : public LinearScanInterface
regMaskTP rbmFltCalleeTrash;
regMaskTP rbmAllInt;
regMaskTP rbmIntCalleeTrash;
regNumber regIntLast;
bool isApxSupported;

FORCEINLINE regMaskTP get_RBM_ALLFLOAT() const
{
Expand All @@ -2066,6 +2075,19 @@ class LinearScan : public LinearScanInterface
{
return this->rbmIntCalleeTrash;
}
FORCEINLINE regNumber get_REG_INT_LAST() const
{
return this->regIntLast;
}
FORCEINLINE bool getIsApxSupported() const
{
return this->isApxSupported;
}
#else
FORCEINLINE regNumber get_REG_INT_LAST() const
{
return REG_INT_LAST;
}
#endif // TARGET_AMD64

#if defined(TARGET_XARCH)
Expand Down
103 changes: 96 additions & 7 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -879,7 +879,11 @@ regMaskTP LinearScan::getKillSetForCall(GenTreeCall* call)
// so don't use the register post-call until it is consumed by SwiftError.
if (call->HasSwiftErrorHandling())
{
#ifdef TARGET_AMD64
killMask.AddGprRegs(RBM_SWIFT_ERROR.GetIntRegSet(), RBM_ALLINT);
#else
killMask.AddGprRegs(RBM_SWIFT_ERROR.GetIntRegSet());
#endif
}
#endif // SWIFT_SUPPORT

Expand Down Expand Up @@ -915,15 +919,23 @@ regMaskTP LinearScan::getKillSetForBlockStore(GenTreeBlk* blkNode)
if (isCopyBlk)
{
// rep movs kills RCX, RDI and RSI
#ifdef TARGET_AMD64
killMask.AddGprRegs(SRBM_RCX | SRBM_RDI | SRBM_RSI, RBM_ALLINT);
#else
killMask.AddGprRegs(SRBM_RCX | SRBM_RDI | SRBM_RSI);
#endif
}
else
{
// rep stos kills RCX and RDI.
// (Note that the Data() node, if not constant, will be assigned to
// RCX, but it's find that this kills it, as the value is not available
// after this node in any case.)
#ifdef TARGET_AMD64
killMask.AddGprRegs(SRBM_RDI | SRBM_RCX, RBM_ALLINT);
#else
killMask.AddGprRegs(SRBM_RDI | SRBM_RCX);
#endif
}
break;
#endif
Expand Down Expand Up @@ -2275,7 +2287,11 @@ void LinearScan::buildIntervals()
// If there is a secret stub param, it is also live in
if (compiler->info.compPublishStubParam)
{
#ifdef TARGET_AMD64
intRegState->rsCalleeRegArgMaskLiveIn.AddGprRegs(RBM_SECRET_STUB_PARAM.GetIntRegSet(), RBM_ALLINT);
#else
intRegState->rsCalleeRegArgMaskLiveIn.AddGprRegs(RBM_SECRET_STUB_PARAM.GetIntRegSet());
#endif

LclVarDsc* stubParamDsc = compiler->lvaGetDesc(compiler->lvaStubArgumentVar);
if (isCandidateVar(stubParamDsc))
Expand Down Expand Up @@ -3794,21 +3810,50 @@ int LinearScan::BuildBinaryUses(GenTreeOp* node, SingleTypeRegSet candidates)
{
GenTree* op1 = node->gtGetOp1();
GenTree* op2 = node->gtGetOp2IfPresent();

#ifdef TARGET_XARCH
if (node->OperIsBinary() && isRMWRegOper(node))
{
assert(op2 != nullptr);
return BuildRMWUses(node, op1, op2, candidates);
if (candidates == RBM_NONE && varTypeUsesFloatReg(node) && (op1->isContainedIndir() || op2->isContainedIndir()))
{
if (op1->isContainedIndir())
{
return BuildRMWUses(node, op1, op2, getLowGprRegs(), candidates);
}
else
{
return BuildRMWUses(node, op1, op2, candidates, getLowGprRegs());
}
}
return BuildRMWUses(node, op1, op2, candidates, candidates);
}
#endif // TARGET_XARCH
int srcCount = 0;
if (op1 != nullptr)
{
srcCount += BuildOperandUses(op1, candidates);
#ifdef TARGET_XARCH
// BSWAP creates movbe
if (op1->isContainedIndir() &&
((varTypeUsesFloatReg(node) || node->OperGet() == GT_BSWAP || node->OperGet() == GT_BSWAP16)) &&
candidates == RBM_NONE)
{
srcCount += BuildOperandUses(op1, getLowGprRegs());
}
else
#endif
{
srcCount += BuildOperandUses(op1, candidates);
}
}
if (op2 != nullptr)
{

#ifdef TARGET_XARCH
if (op2->isContainedIndir() && varTypeUsesFloatReg(op1) && candidates == RBM_NONE)
{
candidates = getLowGprRegs();
}
#endif
srcCount += BuildOperandUses(op2, candidates);
}
return srcCount;
Expand Down Expand Up @@ -3865,6 +3910,8 @@ void LinearScan::BuildStoreLocDef(GenTreeLclVarCommon* storeLoc,
assert(varDsc->lvTracked);
unsigned varIndex = varDsc->lvVarIndex;
Interval* varDefInterval = getIntervalForLocalVar(varIndex);

GenTree* op1 = storeLoc->gtGetOp1();
if (!storeLoc->IsLastUse(index))
{
VarSetOps::AddElemD(compiler, currentLiveVars, varIndex);
Expand Down Expand Up @@ -3905,6 +3952,14 @@ void LinearScan::BuildStoreLocDef(GenTreeLclVarCommon* storeLoc,
defCandidates = allRegs(type);
#endif // TARGET_X86

#ifdef TARGET_AMD64
if (op1->isContained() && op1->OperIs(GT_BITCAST) && varTypeUsesIntReg(varDsc->GetRegisterType(storeLoc)))
{
defCandidates = getLowGprRegs();
}

#endif // TARGET_AMD64

RefPosition* def = newRefPosition(varDefInterval, currentLoc + 1, RefTypeDef, storeLoc, defCandidates, index);
if (varDefInterval->isWriteThru)
{
Expand Down Expand Up @@ -3987,6 +4042,7 @@ int LinearScan::BuildMultiRegStoreLoc(GenTreeLclVar* storeLoc)
}
assert(isCandidateVar(fieldVarDsc));
BuildStoreLocDef(storeLoc, fieldVarDsc, singleUseRef, i);

if (isMultiRegSrc && (i < (dstCount - 1)))
{
currentLoc += 2;
Expand Down Expand Up @@ -4067,9 +4123,20 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc)
}
else if (op1->isContained() && op1->OperIs(GT_BITCAST))
{
GenTree* bitCastSrc = op1->gtGetOp1();
RegisterType registerType = regType(bitCastSrc->TypeGet());
singleUseRef = BuildUse(bitCastSrc, allRegs(registerType));
GenTree* bitCastSrc = op1->gtGetOp1();
RegisterType registerType = regType(bitCastSrc->TypeGet());
SingleTypeRegSet candidates = RBM_NONE;
#ifdef TARGET_AMD64
if (registerType == IntRegisterType)
{
candidates = getLowGprRegs();
}
else
#endif // TARGET_AMD64
{
candidates = allRegs(registerType);
}
singleUseRef = BuildUse(bitCastSrc, candidates);

Interval* srcInterval = singleUseRef->getInterval();
assert(regType(srcInterval->registerType) == registerType);
Expand Down Expand Up @@ -4151,7 +4218,16 @@ int LinearScan::BuildSimple(GenTree* tree)
}
if (tree->IsValue())
{
BuildDef(tree);
#ifdef TARGET_AMD64
if ((tree->OperGet() == GT_BSWAP || tree->OperGet() == GT_BSWAP16) && varTypeUsesIntReg(tree))
{
BuildDef(tree, getLowGprRegs());
}
else
#endif // TARGET_AMD64
{
BuildDef(tree);
}
}
return srcCount;
}
Expand Down Expand Up @@ -4569,6 +4645,19 @@ int LinearScan::BuildCmpOperands(GenTree* tree)
}
#endif // TARGET_X86

#ifdef TARGET_AMD64
if (op2->isContainedIndir() && varTypeUsesFloatReg(op1) && op2Candidates == RBM_NONE)
{
// We only use RSI and RDI for EnC code, so we don't want to favor callee-save regs.
op2Candidates = getLowGprRegs();
}
if (op1->isContainedIndir() && varTypeUsesFloatReg(op2) && op1Candidates == RBM_NONE)
{
// We only use RSI and RDI for EnC code, so we don't want to favor callee-save regs.
op1Candidates = getLowGprRegs();
}
#endif // TARGET_AMD64

int srcCount = BuildOperandUses(op1, op1Candidates);
srcCount += BuildOperandUses(op2, op2Candidates);
return srcCount;
Expand Down
Loading

0 comments on commit dc3a1e8

Please sign in to comment.