Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Reorder indirs on arm64 to make them amenable to ldp optimization #92768

Merged
merged 21 commits into from
Oct 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3044,7 +3044,7 @@ class Compiler
bool gtStoreDefinesField(
LclVarDsc* fieldVarDsc, ssize_t offset, unsigned size, ssize_t* pFieldStoreOffset, unsigned* pFieldStoreSize);

void gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq** fldSeq);
void gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq** fldSeq = nullptr);

// Return true if call is a recursive call; return false otherwise.
// Note when inlining, this looks for calls back to the root method.
Expand Down
356 changes: 353 additions & 3 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,8 +409,9 @@ GenTree* Lowering::LowerNode(GenTree* node)
{
case GT_NULLCHECK:
case GT_IND:
LowerIndir(node->AsIndir());
break;
{
return LowerIndir(node->AsIndir());
}

case GT_STOREIND:
LowerStoreIndirCommon(node->AsStoreInd());
Expand Down Expand Up @@ -7354,6 +7355,9 @@ void Lowering::LowerBlock(BasicBlock* block)
assert(block->isEmpty() || block->IsLIR());

m_block = block;
#ifdef TARGET_ARM64
m_blockIndirs.Reset();
#endif

// NOTE: some of the lowering methods insert calls before the node being
// lowered (See e.g. InsertPInvoke{Method,Call}{Prolog,Epilog}). In
Expand Down Expand Up @@ -8131,8 +8135,10 @@ void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind)
// Arguments:
// ind - the ind node we are lowering.
//
void Lowering::LowerIndir(GenTreeIndir* ind)
GenTree* Lowering::LowerIndir(GenTreeIndir* ind)
{
GenTree* next = ind->gtNext;

assert(ind->OperIs(GT_IND, GT_NULLCHECK));
// Process struct typed indirs separately unless they are unused;
// they only appear as the source of a block copy operation or a return node.
Expand Down Expand Up @@ -8179,8 +8185,352 @@ void Lowering::LowerIndir(GenTreeIndir* ind)
const bool isContainable = false;
TryCreateAddrMode(ind->Addr(), isContainable, ind);
}

#ifdef TARGET_ARM64
if (comp->opts.OptimizationEnabled() && ind->OperIs(GT_IND))
{
OptimizeForLdp(ind);
}
#endif

return next;
}

#ifdef TARGET_ARM64

// Max distance that we will try to move an indirection backwards to become
// adjacent to another indirection. As an empirical observation, increasing
// this number to 32 for the smoke_tests collection resulted in 3684 -> 3796
// cases passing the distance check, but 82 out of these 112 extra cases were
// then rejected due to interference. So 16 seems like a good number to balance
// the throughput costs.
const int LDP_REORDERING_MAX_DISTANCE = 16;

//------------------------------------------------------------------------
// OptimizeForLdp: Record information about an indirection, and try to optimize
// it by moving it to be adjacent with a previous indirection such that they
// can be transformed into 'ldp'.
//
// Arguments:
// ind - Indirection to record and to try to move.
//
// Returns:
// True if the optimization was successful.
//
bool Lowering::OptimizeForLdp(GenTreeIndir* ind)
{
if (!ind->TypeIs(TYP_INT, TYP_LONG, TYP_DOUBLE, TYP_SIMD8, TYP_SIMD16) || ind->IsVolatile())
{
return false;
}

target_ssize_t offs = 0;
GenTree* addr = ind->Addr();
comp->gtPeelOffsets(&addr, &offs);

if (!addr->OperIs(GT_LCL_VAR))
{
return false;
}

// Every indirection takes an expected 2+ nodes, so we only expect at most
// half the reordering distance to be candidates for the optimization.
int maxCount = min(m_blockIndirs.Height(), LDP_REORDERING_MAX_DISTANCE / 2);
for (int i = 0; i < maxCount; i++)
{
SavedIndir& prev = m_blockIndirs.TopRef(i);
if (prev.AddrBase->GetLclNum() != addr->AsLclVar()->GetLclNum())
{
continue;
}

GenTreeIndir* prevIndir = prev.Indir;
if ((prevIndir == nullptr) || (prevIndir->TypeGet() != ind->TypeGet()))
{
continue;
}

JITDUMP("[%06u] and [%06u] are indirs off the same base with offsets +%03u and +%03u\n",
Compiler::dspTreeID(ind), Compiler::dspTreeID(prevIndir), (unsigned)offs, (unsigned)prev.Offset);
if (abs(offs - prev.Offset) == genTypeSize(ind))
{
JITDUMP(" ..and they are amenable to ldp optimization\n");
if (TryMakeIndirsAdjacent(prevIndir, ind))
{
// Do not let the previous one participate in
// another instance; that can cause us to e.g. convert
// *(x+4), *(x+0), *(x+8), *(x+12) =>
// *(x+4), *(x+8), *(x+0), *(x+12)
prev.Indir = nullptr;
return true;
}
break;
}
else
{
JITDUMP(" ..but at non-adjacent offset\n");
}
}

m_blockIndirs.Emplace(ind, addr->AsLclVar(), offs);
return false;
}

//------------------------------------------------------------------------
// TryMakeIndirsAdjacent: Try to prove that it is legal to move an indirection
// to be adjacent to a previous indirection. If successful, perform the move.
//
// Arguments:
// prevIndir - Previous indirection
// indir - Indirection to try to move to be adjacent to 'prevIndir'
//
// Returns:
// True if the optimization was successful.
//
bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indir)
{
GenTree* cur = prevIndir;
for (int i = 0; i < LDP_REORDERING_MAX_DISTANCE; i++)
{
cur = cur->gtNext;
if (cur == indir)
break;

// We can reorder indirs with some calls, but introducing a LIR edge
// that spans a call can introduce spills (or callee-saves).
if (cur->IsCall() || (cur->OperIsStoreBlk() && (cur->AsBlk()->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper)))
{
JITDUMP(" ..but they are separated by node [%06u] that kills registers\n", Compiler::dspTreeID(cur));
return false;
}
}

if (cur != indir)
{
JITDUMP(" ..but they are too far separated\n");
return false;
}

JITDUMP(
" ..and they are close. Trying to move the following range (where * are nodes part of the data flow):\n\n");
#ifdef DEBUG
bool isClosed;
GenTree* startDumpNode = BlockRange().GetTreeRange(prevIndir, &isClosed).FirstNode();
GenTree* endDumpNode = indir->gtNext;

auto dumpWithMarks = [=]() {
if (!comp->verbose)
{
return;
}

for (GenTree* node = startDumpNode; node != endDumpNode; node = node->gtNext)
{
const char* prefix;
if (node == prevIndir)
prefix = "1. ";
else if (node == indir)
prefix = "2. ";
else if ((node->gtLIRFlags & LIR::Flags::Mark) != 0)
prefix = "* ";
else
prefix = " ";

comp->gtDispLIRNode(node, prefix);
}
};

#endif

MarkTree(indir);

INDEBUG(dumpWithMarks());
JITDUMP("\n");

m_scratchSideEffects.Clear();

for (GenTree* cur = prevIndir->gtNext; cur != indir; cur = cur->gtNext)
{
if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0)
{
// 'cur' is part of data flow of 'indir', so we will be moving the
// currently recorded effects past 'cur'.
if (m_scratchSideEffects.InterferesWith(comp, cur, true))
{
JITDUMP("Giving up due to interference with [%06u]\n", Compiler::dspTreeID(cur));
UnmarkTree(indir);
return false;
}
}
else
{
// Not part of dataflow; add its effects that will move past
// 'indir'.
m_scratchSideEffects.AddNode(comp, cur);
}
}

if (m_scratchSideEffects.InterferesWith(comp, indir, true))
{
// Try a bit harder, making use of the following facts:
//
// 1. We know the indir is non-faulting, so we do not need to worry
// about reordering exceptions
//
// 2. We can reorder with non-volatile INDs even if they have
// GTF_ORDER_SIDEEFF; these indirs only have GTF_ORDER_SIDEEFF due to
// being non-faulting
//
// 3. We can also reorder with non-volatile STOREINDs if we can prove
// no aliasing. We can do that for two common cases:
// * The addresses are based on the same local but at distinct offset ranges
// * The addresses are based off TYP_REF bases at distinct offset ranges

JITDUMP("Have conservative interference with last indir. Trying a smarter interference check...\n");

GenTree* indirAddr = indir->Addr();
target_ssize_t offs = 0;
comp->gtPeelOffsets(&indirAddr, &offs);

bool checkLocal = indirAddr->OperIsLocal();
if (checkLocal)
{
unsigned lclNum = indirAddr->AsLclVarCommon()->GetLclNum();
checkLocal = !comp->lvaGetDesc(lclNum)->IsAddressExposed() && !m_scratchSideEffects.WritesLocal(lclNum);
}

// Helper lambda to check if a single node interferes with 'indir'.
auto interferes = [=](GenTree* node) {
if (((node->gtFlags & GTF_ORDER_SIDEEFF) != 0) && node->OperSupportsOrderingSideEffect())
{
// Cannot normally reorder GTF_ORDER_SIDEEFF and GTF_GLOB_REF,
// except for some of the known cases described above.
if (!node->OperIs(GT_IND, GT_BLK, GT_STOREIND, GT_STORE_BLK) || node->AsIndir()->IsVolatile())
{
return true;
}
}

AliasSet::NodeInfo nodeInfo(comp, node);

if (nodeInfo.WritesAddressableLocation())
{
if (!node->OperIs(GT_STOREIND, GT_STORE_BLK))
{
return true;
}

GenTreeIndir* store = node->AsIndir();
GenTree* storeAddr = store->Addr();
target_ssize_t storeOffs = 0;
comp->gtPeelOffsets(&storeAddr, &storeOffs);

bool distinct = (storeOffs + (target_ssize_t)store->Size() <= offs) ||
(offs + (target_ssize_t)indir->Size() <= storeOffs);

if (checkLocal && GenTree::Compare(indirAddr, storeAddr) && distinct)
{
JITDUMP("Cannot interfere with [%06u] since they are off the same local V%02u and indir range "
"[%03u..%03u) does not interfere with store range [%03u..%03u)\n",
Compiler::dspTreeID(node), indirAddr->AsLclVarCommon()->GetLclNum(), (unsigned)offs,
(unsigned)offs + indir->Size(), (unsigned)storeOffs, (unsigned)storeOffs + store->Size());
}
// Two indirs off of TYP_REFs cannot overlap if their offset ranges are distinct.
else if (indirAddr->TypeIs(TYP_REF) && storeAddr->TypeIs(TYP_REF) && distinct)
{
JITDUMP("Cannot interfere with [%06u] since they are both off TYP_REF bases and indir range "
"[%03u..%03u) does not interfere with store range [%03u..%03u)\n",
Compiler::dspTreeID(node), (unsigned)offs, (unsigned)offs + indir->Size(),
(unsigned)storeOffs, (unsigned)storeOffs + store->Size());
}
else
{
return true;
}
}

return false;
};

for (GenTree* cur = indir->gtPrev; cur != prevIndir; cur = cur->gtPrev)
{
if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0)
{
continue;
}

if (interferes(cur))
{
JITDUMP("Indir [%06u] interferes with [%06u]\n", Compiler::dspTreeID(indir), Compiler::dspTreeID(cur));
UnmarkTree(indir);
return false;
}
}
}

JITDUMP("Interference checks passed. Moving nodes that are not part of data flow of [%06u]\n\n",
Compiler::dspTreeID(indir));
jakobbotsch marked this conversation as resolved.
Show resolved Hide resolved

GenTree* previous = prevIndir;
for (GenTree* node = prevIndir->gtNext;;)
{
GenTree* next = node->gtNext;

if ((node->gtLIRFlags & LIR::Flags::Mark) != 0)
{
// Part of data flow. Move it to happen right after 'previous'.
BlockRange().Remove(node);
BlockRange().InsertAfter(previous, node);
previous = node;
}

if (node == indir)
{
break;
}

node = next;
}

JITDUMP("Result:\n\n");
INDEBUG(dumpWithMarks());
JITDUMP("\n");
UnmarkTree(indir);
return true;
}

//------------------------------------------------------------------------
// MarkTree: Mark trees involved in the computation of 'node' recursively.
//
// Arguments:
// node - Root node.
//
void Lowering::MarkTree(GenTree* node)
{
node->gtLIRFlags |= LIR::Flags::Mark;
node->VisitOperands([=](GenTree* op) {
MarkTree(op);
return GenTree::VisitResult::Continue;
});
}

//------------------------------------------------------------------------
// UnmarkTree: Unmark trees involved in the computation of 'node' recursively.
//
// Arguments:
// node - Root node.
//
void Lowering::UnmarkTree(GenTree* node)
{
node->gtLIRFlags &= ~LIR::Flags::Mark;
node->VisitOperands([=](GenTree* op) {
UnmarkTree(op);
return GenTree::VisitResult::Continue;
});
}

#endif // TARGET_ARM64

//------------------------------------------------------------------------
// TransformUnusedIndirection: change the opcode and the type of the unused indirection.
//
Expand Down
Loading
Loading