Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[arm64] JIT: Enable CSE/hoisting for "arrayBase + elementOffset" #61293

Merged
merged 22 commits into from
Nov 10, 2021
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1574,6 +1574,15 @@ void CodeGen::genConsumeRegs(GenTree* tree)
{
genConsumeAddress(tree);
}
#ifdef TARGET_ARM64
else if (tree->OperIs(GT_BFIZ))
{
// Can be contained as part of LEA on ARM64
GenTreeCast* cast = tree->gtGetOp1()->AsCast();
assert(cast->isContained());
genConsumeAddress(cast->CastOp());
}
#endif
else if (tree->OperIsLocalRead())
{
// A contained lcl var must be living on stack and marked as reg optional, or not be a
Expand Down
27 changes: 25 additions & 2 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6985,6 +6985,14 @@ void emitter::emitIns_R_R_R_Ext(instruction ins,
{
shiftAmount = insOptsLSL(opt) ? scale : 0;
}

// If target reg is ZR - it means we're doing an implicit nullcheck
// where target type was ignored and set to TYP_INT.
if ((reg1 == REG_ZR) && (shiftAmount > 0))
{
shiftAmount = scale;
}

assert((shiftAmount == scale) || (shiftAmount == 0));

reg2 = encodingSPtoZR(reg2);
Expand Down Expand Up @@ -13481,8 +13489,23 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR
}
else // no scale
{
// Then load/store dataReg from/to [memBase + index]
emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum());
if (index->OperIs(GT_BFIZ) && index->isContained())
{
// Then load/store dataReg from/to [memBase + index*scale with sign/zero extension]
GenTreeCast* cast = index->gtGetOp1()->AsCast();

// For now, this code only supports extensions from i32/u32
assert(cast->isContained() && varTypeIsInt(cast->CastFromType()));

emitIns_R_R_R_Ext(ins, attr, dataReg, memBase->GetRegNum(), cast->CastOp()->GetRegNum(),
cast->IsUnsigned() ? INS_OPTS_UXTW : INS_OPTS_SXTW,
(int)index->gtGetOp2()->AsIntCon()->IconValue());
}
else
{
// Then load/store dataReg from/to [memBase + index]
emitIns_R_R_R(ins, attr, dataReg, memBase->GetRegNum(), index->GetRegNum());
}
}
}
}
Expand Down
22 changes: 22 additions & 0 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5033,6 +5033,28 @@ bool Lowering::TryCreateAddrMode(GenTree* addr, bool isContainable, var_types ta
}
}

#ifdef TARGET_ARM64
// Check if we can "contain" LEA(BFIZ) in order to extend 32bit index to 64bit as part of load/store.
if ((index != nullptr) && index->OperIs(GT_BFIZ) && index->gtGetOp1()->OperIs(GT_CAST) &&
index->gtGetOp2()->IsCnsIntOrI() && varTypeIsIntegral(targetType))
{
// BFIZ node is a binary op where op1 is GT_CAST and op2 is GT_CNS_INT
GenTreeCast* cast = index->gtGetOp1()->AsCast();
assert(cast->isContained());

const unsigned shiftBy = (unsigned)index->gtGetOp2()->AsIntCon()->IconValue();

// 'scale' and 'offset' have to be unset since we're going to use [base + index * SXTW/UXTW scale] form
// where there is no room for additional offsets/scales on ARM64. 'shiftBy' has to match target's width.
if (cast->CastOp()->TypeIs(TYP_INT) && cast->TypeIs(TYP_LONG) && (genTypeSize(targetType) == (1U << shiftBy)) &&
(scale == 1) && (offset == 0))
{
// TODO: Make sure that genCreateAddrMode marks such BFIZ candidates as GTF_DONT_CSE for better CQ.
EgorBo marked this conversation as resolved.
Show resolved Hide resolved
MakeSrcContained(addrMode, index);
}
}
#endif

JITDUMP("New addressing mode node:\n ");
DISPNODE(addrMode);
JITDUMP("\n");
Expand Down
11 changes: 10 additions & 1 deletion src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -689,7 +689,16 @@ int LinearScan::BuildNode(GenTree* tree)
if (index != nullptr)
{
srcCount++;
BuildUse(index);
if (index->OperIs(GT_BFIZ) && index->isContained())
{
GenTreeCast* cast = index->gtGetOp1()->AsCast();
assert(cast->isContained() && (cns == 0));
BuildUse(cast->CastOp());
}
else
{
BuildUse(index);
}
}
assert(dstCount == 1);

Expand Down
18 changes: 15 additions & 3 deletions src/coreclr/jit/lsrabuild.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3021,10 +3021,22 @@ int LinearScan::BuildAddrUses(GenTree* addr, regMaskTP candidates)
BuildUse(addrMode->Base(), candidates);
srcCount++;
}
if ((addrMode->Index() != nullptr) && !addrMode->Index()->isContained())
if (addrMode->Index() != nullptr)
{
BuildUse(addrMode->Index(), candidates);
srcCount++;
if (!addrMode->Index()->isContained())
{
BuildUse(addrMode->Index(), candidates);
srcCount++;
}
#ifdef TARGET_ARM64
else if (addrMode->Index()->OperIs(GT_BFIZ))
{
GenTreeCast* cast = addrMode->Index()->gtGetOp1()->AsCast();
assert(cast->isContained());
BuildUse(cast->CastOp(), candidates);
srcCount++;
}
#endif
}
return srcCount;
}
Expand Down
86 changes: 57 additions & 29 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4909,7 +4909,7 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call,
GenTree* arg = fgMakeTmpArgNode(argEntry);

// Change the expression to "(tmp=val),tmp"
arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);
arg = gtNewOperNode(GT_COMMA, arg->TypeGet(), copyBlk, arg);

#endif // FEATURE_FIXED_OUT_ARGS

Expand Down Expand Up @@ -5461,15 +5461,29 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
// the partial byref will not point within the object, and thus not get updated correctly during a GC.
// This is mostly a risk in fully-interruptible code regions.

/* Add the first element's offset */

GenTree* cns = gtNewIconNode(elemOffs, TYP_I_IMPL);

addr = gtNewOperNode(GT_ADD, TYP_I_IMPL, addr, cns);
// We can generate two types of trees for "addr":
//
// 1) "arrRef + (index + elemOffset)"
// 2) "(arrRef + elemOffset) + index"
//
// XArch has powerful addressing modes such as [base + index*scale + offset] so it's fine with 1),
// while for Arm we better try to make an invariant sub-tree as large as possible, which is usually
// "(arrRef + elemOffset)" and is CSE/LoopHoisting friendly => produces better codegen.
// 2) should still be safe from GC's point of view since both ADD operations are byref and point to
// within the object so GC will be able to correctly track and update them.

/* Add the object ref to the element's offset */
// TODO: in some cases even on ARM we better use 1) shape because if "index" is invariant and "arrRef" is not
// we at least will be able to hoist/CSE "index + elemOffset" in some cases.

// First element's offset
GenTree* elemOffset = gtNewIconNode(elemOffs, TYP_I_IMPL);
#ifdef TARGET_ARMARCH
GenTree* basePlusOffset = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, elemOffset);
addr = gtNewOperNode(GT_ADD, TYP_BYREF, basePlusOffset, addr);
#else
addr = gtNewOperNode(GT_ADD, TYP_I_IMPL, addr, elemOffset);
addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
#endif

assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_LARGE) != 0) ||
(GenTree::s_gtNodeSizes[GT_IND] == TREE_NODE_SZ_SMALL));
Expand Down Expand Up @@ -5539,16 +5553,16 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)
tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
}

JITDUMP("fgMorphArrayIndex (before remorph):\n");
DISPTREE(tree);
JITDUMP("fgMorphArrayIndex (before remorph):\n")
DISPTREE(tree)

// Currently we morph the tree to perform some folding operations prior
// to attaching fieldSeq info and labeling constant array index contributions
//
tree = fgMorphTree(tree);

JITDUMP("fgMorphArrayIndex (after remorph):\n");
DISPTREE(tree);
JITDUMP("fgMorphArrayIndex (after remorph):\n")
DISPTREE(tree)

// Ideally we just want to proceed to attaching fieldSeq info and labeling the
// constant array index contributions, but the morphing operation may have changed
Expand All @@ -5562,48 +5576,62 @@ GenTree* Compiler::fgMorphArrayIndex(GenTree* tree)

if (fgIsCommaThrow(tree))
{
if ((arrElem != indTree) || // A new tree node may have been created
(indTree->OperGet() != GT_IND)) // The GT_IND may have been changed to a GT_CNS_INT
if ((arrElem != indTree) || // A new tree node may have been created
(!indTree->OperIs(GT_IND))) // The GT_IND may have been changed to a GT_CNS_INT
{
return tree; // Just return the Comma-Throw, don't try to attach the fieldSeq info, etc..
}
}

assert(!fgGlobalMorph || (arrElem->gtDebugFlags & GTF_DEBUG_NODE_MORPHED));
DBEXEC(fgGlobalMorph && (arrElem == tree), tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED);

addr = arrElem->AsOp()->gtOp1;
DBEXEC(fgGlobalMorph && (arrElem == tree), tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED)

assert(addr->TypeGet() == TYP_BYREF);
addr = arrElem->gtGetOp1();

GenTree* cnsOff = nullptr;
if (addr->OperGet() == GT_ADD)
if (addr->OperIs(GT_ADD))
{
assert(addr->TypeGet() == TYP_BYREF);
assert(addr->AsOp()->gtOp1->TypeGet() == TYP_REF);

addr = addr->AsOp()->gtOp2;
GenTree* addrOp1 = addr->gtGetOp1();
#ifdef TARGET_ARMARCH
// On arm/arm64 addr tree is a bit different, see above ^
if (addrOp1->OperIs(GT_ADD) && addrOp1->gtGetOp2()->IsCnsIntOrI())
{
assert(addrOp1->gtGetOp1()->TypeIs(TYP_REF));
cnsOff = addrOp1->gtGetOp2();
addr = addr->gtGetOp2();
// Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
addr->LabelIndex(this);
}
else
{
assert(addr->gtGetOp2()->IsCnsIntOrI());
cnsOff = addr->gtGetOp2();
addr = nullptr;
}
#else
assert(addr->TypeIs(TYP_BYREF));
assert(addr->gtGetOp1()->TypeIs(TYP_REF));
addr = addr->gtGetOp2();

// Look for the constant [#FirstElem] node here, or as the RHS of an ADD.

if (addr->gtOper == GT_CNS_INT)
if (addr->IsCnsIntOrI())
{
cnsOff = addr;
addr = nullptr;
}
else
{
if ((addr->OperGet() == GT_ADD) && (addr->AsOp()->gtOp2->gtOper == GT_CNS_INT))
if ((addr->OperIs(GT_ADD)) && addr->gtGetOp2()->IsCnsIntOrI())
{
cnsOff = addr->AsOp()->gtOp2;
addr = addr->AsOp()->gtOp1;
cnsOff = addr->gtGetOp2();
addr = addr->gtGetOp1();
}

// Label any constant array index contributions with #ConstantIndex and any LclVars with GTF_VAR_ARR_INDEX
addr->LabelIndex(this);
}
#endif
}
else if (addr->OperGet() == GT_CNS_INT)
else if (addr->IsCnsIntOrI())
{
cnsOff = addr;
}
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/vartype.h
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,12 @@ inline bool varTypeIsLong(T vt)
return (TypeGet(vt) >= TYP_LONG) && (TypeGet(vt) <= TYP_ULONG);
}

template <class T>
inline bool varTypeIsInt(T vt)
{
return (TypeGet(vt) >= TYP_INT) && (TypeGet(vt) <= TYP_UINT);
}

template <class T>
inline bool varTypeIsMultiReg(T vt)
{
Expand Down