From 86999fe3587e1fbcf3e730c1c0aea816e14a6b6c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 28 Sep 2023 16:17:56 +0200 Subject: [PATCH 01/19] JIT: Reorder indirs on arm64 to make them amenable to ldp optimization --- src/coreclr/jit/lower.cpp | 187 +++++++++++++++++++++++++++++++++++++- src/coreclr/jit/lower.h | 8 +- 2 files changed, 190 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index bb31df9240a57..0081eb7412dd8 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -409,8 +409,11 @@ GenTree* Lowering::LowerNode(GenTree* node) { case GT_NULLCHECK: case GT_IND: - LowerIndir(node->AsIndir()); - break; + { + GenTree* next = nullptr; + LowerIndir(node->AsIndir(), &next); + return next; + } case GT_STOREIND: LowerStoreIndirCommon(node->AsStoreInd()); @@ -7334,6 +7337,7 @@ void Lowering::LowerBlock(BasicBlock* block) assert(block->isEmpty() || block->IsLIR()); m_block = block; + m_blockIndirs.Reset(); // NOTE: some of the lowering methods insert calls before the node being // lowered (See e.g. InsertPInvoke{Method,Call}{Prolog,Epilog}). In @@ -7823,6 +7827,10 @@ void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind) LowerStoreIndir(ind); } + +#ifdef TARGET_ARM64 + m_blockIndirs.Push(ind); +#endif } //------------------------------------------------------------------------ @@ -7831,8 +7839,13 @@ void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind) // Arguments: // ind - the ind node we are lowering. // -void Lowering::LowerIndir(GenTreeIndir* ind) +void Lowering::LowerIndir(GenTreeIndir* ind, GenTree** next) { + if (next != nullptr) + { + *next = ind->gtNext; + } + assert(ind->OperIs(GT_IND, GT_NULLCHECK)); // Process struct typed indirs separately unless they are unused; // they only appear as the source of a block copy operation or a return node. @@ -7879,6 +7892,174 @@ void Lowering::LowerIndir(GenTreeIndir* ind) const bool isContainable = false; TryCreateAddrMode(ind->Addr(), isContainable, ind); } + +#ifdef TARGET_ARM64 + if (ind->OperIs(GT_IND) && comp->opts.OptimizationEnabled() && (next != nullptr)) + { + target_ssize_t offs1 = 0; + GenTree* addr = ind->Addr(); + if (addr->OperIs(GT_LEA) && !addr->AsAddrMode()->HasIndex()) + { + offs1 = (target_ssize_t)addr->AsAddrMode()->Offset(); + addr = addr->AsAddrMode()->Base(); + } + else if (addr->OperIs(GT_ADD) && addr->gtGetOp2()->IsCnsIntOrI()) + { + offs1 = (target_ssize_t)addr->gtGetOp2()->AsIntConCommon()->IconValue(); + addr = addr->gtGetOp1(); + } + + if (ind->TypeIs(TYP_INT, TYP_LONG, TYP_DOUBLE, TYP_SIMD16)) + { + for (int i = 0; i < m_blockIndirs.Height(); i++) + { + GenTreeIndir* prevIndir = m_blockIndirs.Top(i); + if (!prevIndir->OperIs(GT_IND)) + { + continue; + } + + target_ssize_t offs2 = 0; + GenTree* prevAddr = prevIndir->Addr(); + if (prevAddr->OperIs(GT_LEA) && !prevAddr->AsAddrMode()->HasIndex()) + { + offs2 = (target_ssize_t)prevAddr->AsAddrMode()->Offset(); + prevAddr = prevAddr->AsAddrMode()->Base(); + } + else if (prevAddr->OperIs(GT_ADD) && prevAddr->gtGetOp2()->IsCnsIntOrI()) + { + offs2 = (target_ssize_t)prevAddr->gtGetOp2()->AsIntConCommon()->IconValue(); + prevAddr = prevAddr->gtGetOp1(); + } + + if (addr->OperIsLocal() && GenTree::Compare(addr, prevAddr)) + { + JITDUMP("[%06u] and [%06u] are indirs off the same base with offsets +%03u and +%03u\n", + Compiler::dspTreeID(addr), Compiler::dspTreeID(prevAddr), offs1, offs2); + if (abs(offs1 - offs2) == genTypeSize(ind)) + { + JITDUMP(" ..and they are amenable to ldp optimization\n"); + if (TryOptimizeForLdp(prevIndir, ind)) + { + JITDUMP("Full block:\n\n"); + DISPRANGE(BlockRange()); + BlockRange().CheckLIR(comp, false); + } + break; + } + else + { + JITDUMP(" ..but at wrong offset\n"); + } + } + } + } + + m_blockIndirs.Push(ind); + } +#endif +} + +static void MarkTree(GenTree* node) +{ + node->gtLIRFlags |= LIR::Flags::Mark; + node->VisitOperands([](GenTree* op) { + MarkTree(op); + return GenTree::VisitResult::Continue; + }); +} + +static void UnmarkTree(GenTree* node) +{ + node->gtLIRFlags &= ~LIR::Flags::Mark; + node->VisitOperands([](GenTree* op) { + UnmarkTree(op); + return GenTree::VisitResult::Continue; + }); +} + +bool Lowering::TryOptimizeForLdp(GenTreeIndir* prevIndir, GenTreeIndir* indir) +{ + if ((indir->gtFlags & GTF_IND_VOLATILE) != 0) + { + JITDUMP(" ..but second indir is volatile\n"); + return false; + } + + GenTree* cur = prevIndir; + for (int i = 0; i < 16; i++) + { + cur = cur->gtNext; + if (cur == indir) + break; + } + + if (cur != indir) + { + JITDUMP(" ..but it is too far away\n"); + return false; + } + + JITDUMP(" ..and it is within range. Range:\n"); + DISPRANGE(LIR::ReadOnlyRange(prevIndir, indir)); + + MarkTree(indir); + + GenTree* curRangeStart = prevIndir->gtNext; + while ((curRangeStart->gtLIRFlags & LIR::Flags::Mark) != 0) + { + if (curRangeStart == indir) + { + UnmarkTree(indir); + return true; + } + + curRangeStart = curRangeStart->gtNext; + } + + GenTree* curRangeEnd = curRangeStart; + while (true) + { + GenTree* next = curRangeEnd->gtNext; + if ((next->gtLIRFlags & LIR::Flags::Mark) == 0) + { + curRangeEnd = next; + continue; + } + + while ((next != indir) && ((next->gtNext->gtLIRFlags & LIR::Flags::Mark) != 0)) + { + next = next->gtNext; + } + + JITDUMP("\n---------------------------------- Moving nodes that aren't part of data flow " + "-----------------------------------\n\n"); + DISPRANGE(LIR::ReadOnlyRange(curRangeStart, curRangeEnd)); + JITDUMP("\n-------------------------------------------- Past the following nodes " + "-------------------------------------------\n\n"); + DISPRANGE(LIR::ReadOnlyRange(curRangeEnd->gtNext, next)); + JITDUMP("\n----------------------------------------------------------------------------------------------------" + "-------------\n"); + + if (IsRangeInvariantInRange(curRangeStart, curRangeEnd, next->gtNext, indir)) + { + LIR::Range movedRange = BlockRange().Remove(curRangeStart, curRangeEnd); + BlockRange().InsertAfter(next, std::move(movedRange)); + JITDUMP("Moved!\n"); + + if (next == indir) + { + UnmarkTree(indir); + return true; + } + } + else + { + JITDUMP("Not invariant\n"); + UnmarkTree(indir); + return false; + } + } } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index bddb847755e76..7c0c9ab6cb102 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -23,7 +23,9 @@ class Lowering final : public Phase { public: inline Lowering(Compiler* compiler, LinearScanInterface* lsra) - : Phase(compiler, PHASE_LOWERING), vtableCallTemp(BAD_VAR_NUM) + : Phase(compiler, PHASE_LOWERING) + , vtableCallTemp(BAD_VAR_NUM) + , m_blockIndirs(compiler->getAllocator(CMK_ArrayStack)) { m_lsra = (LinearScan*)lsra; assert(m_lsra); @@ -310,7 +312,8 @@ class Lowering final : public Phase // Per tree node member functions void LowerStoreIndirCommon(GenTreeStoreInd* ind); - void LowerIndir(GenTreeIndir* ind); + void LowerIndir(GenTreeIndir* ind, GenTree** next = nullptr); + bool TryOptimizeForLdp(GenTreeIndir* prevIndir, GenTreeIndir* indir); void LowerStoreIndir(GenTreeStoreInd* node); GenTree* LowerAdd(GenTreeOp* node); GenTree* LowerMul(GenTreeOp* mul); @@ -559,6 +562,7 @@ class Lowering final : public Phase #ifdef FEATURE_FIXED_OUT_ARGS unsigned m_outgoingArgSpaceSize = 0; #endif + ArrayStack m_blockIndirs; }; #endif // _LOWER_H_ From 19b0fa1cabb30a49c70be2c62d58e9dd127a8d3e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 28 Sep 2023 16:32:03 +0200 Subject: [PATCH 02/19] Remove left-over debug code --- src/coreclr/jit/lower.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 0081eb7412dd8..0cdafcc901433 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7939,12 +7939,7 @@ void Lowering::LowerIndir(GenTreeIndir* ind, GenTree** next) if (abs(offs1 - offs2) == genTypeSize(ind)) { JITDUMP(" ..and they are amenable to ldp optimization\n"); - if (TryOptimizeForLdp(prevIndir, ind)) - { - JITDUMP("Full block:\n\n"); - DISPRANGE(BlockRange()); - BlockRange().CheckLIR(comp, false); - } + TryOptimizeForLdp(prevIndir, ind); break; } else From 0d1a0a52b79a19535f2c2916aae8cc75363a6d03 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 28 Sep 2023 22:35:06 +0200 Subject: [PATCH 03/19] Fix bug, revise approach slightly --- src/coreclr/jit/lower.cpp | 126 +++++++++++++++++++++----------- src/coreclr/jit/sideeffects.cpp | 19 +++++ src/coreclr/jit/sideeffects.h | 1 + 3 files changed, 103 insertions(+), 43 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 0cdafcc901433..613b8eecefa81 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7935,7 +7935,7 @@ void Lowering::LowerIndir(GenTreeIndir* ind, GenTree** next) if (addr->OperIsLocal() && GenTree::Compare(addr, prevAddr)) { JITDUMP("[%06u] and [%06u] are indirs off the same base with offsets +%03u and +%03u\n", - Compiler::dspTreeID(addr), Compiler::dspTreeID(prevAddr), offs1, offs2); + Compiler::dspTreeID(ind), Compiler::dspTreeID(prevIndir), offs1, offs2); if (abs(offs1 - offs2) == genTypeSize(ind)) { JITDUMP(" ..and they are amenable to ldp optimization\n"); @@ -7995,66 +7995,106 @@ bool Lowering::TryOptimizeForLdp(GenTreeIndir* prevIndir, GenTreeIndir* indir) return false; } - JITDUMP(" ..and it is within range. Range:\n"); - DISPRANGE(LIR::ReadOnlyRange(prevIndir, indir)); - - MarkTree(indir); + JITDUMP( + " ..and it is close by. Trying to move the following range (where * are nodes part of the data flow):\n\n"); +#ifdef DEBUG + bool isClosed; + GenTree* startDumpNode = BlockRange().GetTreeRange(prevIndir, &isClosed).FirstNode(); + GenTree* endDumpNode = indir->gtNext; - GenTree* curRangeStart = prevIndir->gtNext; - while ((curRangeStart->gtLIRFlags & LIR::Flags::Mark) != 0) - { - if (curRangeStart == indir) + auto dumpWithMarks = [=]() { + if (!comp->verbose) { - UnmarkTree(indir); - return true; + return; } - curRangeStart = curRangeStart->gtNext; - } - - GenTree* curRangeEnd = curRangeStart; - while (true) - { - GenTree* next = curRangeEnd->gtNext; - if ((next->gtLIRFlags & LIR::Flags::Mark) == 0) + for (GenTree* node = startDumpNode; node != endDumpNode; node = node->gtNext) { - curRangeEnd = next; - continue; - } + const char* prefix; + if (node == prevIndir) + prefix = "1. "; + else if (node == indir) + prefix = "2. "; + else if ((node->gtLIRFlags & LIR::Flags::Mark) != 0) + prefix = "* "; + else + prefix = " "; - while ((next != indir) && ((next->gtNext->gtLIRFlags & LIR::Flags::Mark) != 0)) - { - next = next->gtNext; + comp->gtDispLIRNode(node, prefix); } + }; - JITDUMP("\n---------------------------------- Moving nodes that aren't part of data flow " - "-----------------------------------\n\n"); - DISPRANGE(LIR::ReadOnlyRange(curRangeStart, curRangeEnd)); - JITDUMP("\n-------------------------------------------- Past the following nodes " - "-------------------------------------------\n\n"); - DISPRANGE(LIR::ReadOnlyRange(curRangeEnd->gtNext, next)); - JITDUMP("\n----------------------------------------------------------------------------------------------------" - "-------------\n"); +#endif - if (IsRangeInvariantInRange(curRangeStart, curRangeEnd, next->gtNext, indir)) - { - LIR::Range movedRange = BlockRange().Remove(curRangeStart, curRangeEnd); - BlockRange().InsertAfter(next, std::move(movedRange)); - JITDUMP("Moved!\n"); + MarkTree(indir); - if (next == indir) + INDEBUG(dumpWithMarks()); + JITDUMP("\n"); + + m_scratchSideEffects.Clear(); + + for (GenTree* cur = prevIndir->gtNext; cur != indir; cur = cur->gtNext) + { + if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0) + { + // Part of data flow of 'indir', so we will be moving past this node. + if (m_scratchSideEffects.InterferesWith(comp, cur, true)) { + JITDUMP("Giving up due to interference with [%06u]\n", Compiler::dspTreeID(cur)); UnmarkTree(indir); - return true; + return false; } } else { - JITDUMP("Not invariant\n"); - UnmarkTree(indir); - return false; + // Part of dataflow; add its effects. + m_scratchSideEffects.AddNode(comp, cur); } } + + // Can we move it past the 'indir'? We can ignore effect flags when + // checking this, as we know the previous indir will make it non-faulting + // and keep the ordering correct. This makes use of the fact that + // non-volatile indirs have ordering side effects only for the "suppressed + // NRE" case. + // We still need some interference cehcks to ensure that the indir reads + // the same value even after reordering. + if (m_scratchSideEffects.InterferesWith(comp, indir, GTF_EMPTY, true)) + { + JITDUMP("Giving up due to interference with [%06u]\n", Compiler::dspTreeID(indir)); + UnmarkTree(indir); + return false; + } + + JITDUMP("Interference checks passed. Moving nodes that are not part of data flow tree\n\n", + Compiler::dspTreeID(indir)); + + GenTree* previous = prevIndir; + for (GenTree* node = prevIndir->gtNext;;) + { + GenTree* next = node->gtNext; + + if ((node->gtLIRFlags & LIR::Flags::Mark) != 0) + { + // Part of data flow. Move it to happen right after 'previous'. + BlockRange().Remove(node); + BlockRange().InsertAfter(previous, node); + previous = node; + } + + if (node == indir) + { + break; + } + + node = next; + } + + JITDUMP("Result:\n\n"); + INDEBUG(dumpWithMarks()); + JITDUMP("\n"); + UnmarkTree(indir); + return true; } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/sideeffects.cpp b/src/coreclr/jit/sideeffects.cpp index cf0b3210e3853..4ef9edce25a3a 100644 --- a/src/coreclr/jit/sideeffects.cpp +++ b/src/coreclr/jit/sideeffects.cpp @@ -574,6 +574,25 @@ bool SideEffectSet::InterferesWith(Compiler* compiler, GenTree* node, bool stric return InterferesWith((node->gtFlags & GTF_ALL_EFFECT), AliasSet::NodeInfo(compiler, node), strict); } +//------------------------------------------------------------------------ +// SideEffectSet::InterferesWith: +// Returns true if the side effects in this set interfere with the side +// effects for the given node. +// +// A side effect set interferes with a given node iff it interferes +// with the side effect set of the node. +// +// Arguments: +// compiler - The compiler context. +// node - The node in question. +// overriddenSideEffects - Side effect flags to use in place of the ones from the node +// strict - True if the analysis should be strict as described above. +// +bool SideEffectSet::InterferesWith(Compiler* compiler, GenTree* node, unsigned overriddenSideEffects, bool strict) const +{ + return InterferesWith(overriddenSideEffects, AliasSet::NodeInfo(compiler, node), strict); +} + //------------------------------------------------------------------------ // SideEffectSet::Clear: // Clears the current side effect set. diff --git a/src/coreclr/jit/sideeffects.h b/src/coreclr/jit/sideeffects.h index f16fe59c9bd48..9cee0b79d6e63 100644 --- a/src/coreclr/jit/sideeffects.h +++ b/src/coreclr/jit/sideeffects.h @@ -179,6 +179,7 @@ class SideEffectSet final void AddNode(Compiler* compiler, GenTree* node); bool InterferesWith(const SideEffectSet& other, bool strict) const; bool InterferesWith(Compiler* compiler, GenTree* node, bool strict) const; + bool InterferesWith(Compiler* compiler, GenTree* node, unsigned overriddenSideEffects, bool strict) const; void Clear(); }; From 1dd729515df0a7830ac7f66d9f299c8a21cf6a2e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 29 Sep 2023 22:00:41 +0200 Subject: [PATCH 04/19] Factor into gtPeelOffsets, make interference checking more clever --- src/coreclr/jit/lower.cpp | 89 ++++++++++++++++------ src/coreclr/jit/promotiondecomposition.cpp | 50 ++++++++---- src/coreclr/jit/sideeffects.cpp | 15 ++++ src/coreclr/jit/sideeffects.h | 6 ++ 4 files changed, 119 insertions(+), 41 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 28f71ec1a43ec..d01e3c42dd142 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7896,18 +7896,10 @@ void Lowering::LowerIndir(GenTreeIndir* ind, GenTree** next) #ifdef TARGET_ARM64 if (ind->OperIs(GT_IND) && comp->opts.OptimizationEnabled() && (next != nullptr)) { - target_ssize_t offs1 = 0; - GenTree* addr = ind->Addr(); - if (addr->OperIs(GT_LEA) && !addr->AsAddrMode()->HasIndex()) - { - offs1 = (target_ssize_t)addr->AsAddrMode()->Offset(); - addr = addr->AsAddrMode()->Base(); - } - else if (addr->OperIs(GT_ADD) && addr->gtGetOp2()->IsCnsIntOrI()) - { - offs1 = (target_ssize_t)addr->gtGetOp2()->AsIntConCommon()->IconValue(); - addr = addr->gtGetOp1(); - } + target_ssize_t offs1 = 0; + GenTree* addr = ind->Addr(); + FieldSeq* fldSeq = nullptr; + comp->gtPeelOffsets(&addr, &offs1, &fldSeq); if (ind->TypeIs(TYP_INT, TYP_LONG, TYP_DOUBLE, TYP_SIMD16)) { @@ -7921,16 +7913,7 @@ void Lowering::LowerIndir(GenTreeIndir* ind, GenTree** next) target_ssize_t offs2 = 0; GenTree* prevAddr = prevIndir->Addr(); - if (prevAddr->OperIs(GT_LEA) && !prevAddr->AsAddrMode()->HasIndex()) - { - offs2 = (target_ssize_t)prevAddr->AsAddrMode()->Offset(); - prevAddr = prevAddr->AsAddrMode()->Base(); - } - else if (prevAddr->OperIs(GT_ADD) && prevAddr->gtGetOp2()->IsCnsIntOrI()) - { - offs2 = (target_ssize_t)prevAddr->gtGetOp2()->AsIntConCommon()->IconValue(); - prevAddr = prevAddr->gtGetOp1(); - } + comp->gtPeelOffsets(&prevAddr, &offs2, &fldSeq); if (addr->OperIsLocal() && GenTree::Compare(addr, prevAddr)) { @@ -8061,9 +8044,65 @@ bool Lowering::TryOptimizeForLdp(GenTreeIndir* prevIndir, GenTreeIndir* indir) // the same value even after reordering. if (m_scratchSideEffects.InterferesWith(comp, indir, GTF_EMPTY, true)) { - JITDUMP("Giving up due to interference with [%06u]\n", Compiler::dspTreeID(indir)); - UnmarkTree(indir); - return false; + JITDUMP("Have conservative interference. Trying smarter interference check...\n", Compiler::dspTreeID(indir)); + + GenTree* indirAddr = indir->Addr(); + target_ssize_t offs = 0; + FieldSeq* fieldSeq = nullptr; + comp->gtPeelOffsets(&indirAddr, &offs, &fieldSeq); + + bool checkLocal = indirAddr->OperIsLocal(); + if (checkLocal) + { + unsigned lclNum = indirAddr->AsLclVarCommon()->GetLclNum(); + checkLocal = !comp->lvaGetDesc(lclNum)->IsAddressExposed() && !m_scratchSideEffects.WritesLocal(lclNum); + } + + for (GenTree* cur = indir->gtPrev; cur != prevIndir; cur = cur->gtPrev) + { + if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0) + { + continue; + } + + AliasSet::NodeInfo nodeInfo(comp, cur); + if (!nodeInfo.WritesAddressableLocation()) + { + continue; + } + + if (cur->OperIs(GT_STOREIND, GT_STORE_BLK)) + { + GenTreeIndir* store = cur->AsIndir(); + GenTree* storeAddr = store->Addr(); + target_ssize_t storeOffs = 0; + comp->gtPeelOffsets(&storeAddr, &storeOffs, &fieldSeq); + + bool distinct = (storeOffs + store->Size() <= offs) || (offs + indir->Size() <= storeOffs); + if (checkLocal && GenTree::Compare(indirAddr, storeAddr) && distinct) + { + JITDUMP("Cannot interfere with [%06u] since they are off the same local V%02u and indir range " + "[%03u..%03u) does not interfere with store range [%03u..%03u)\n", + Compiler::dspTreeID(cur), indirAddr->AsLclVarCommon()->GetLclNum(), (unsigned)offs, + (unsigned)offs + indir->Size(), (unsigned)storeOffs, store->Size()); + continue; + } + + // Two indirs off of TYP_REFs cannot overlap if their offset ranges are distinct. + if (indirAddr->TypeIs(TYP_REF) && storeAddr->TypeIs(TYP_REF) && distinct) + { + JITDUMP("Cannot interfere with [%06u] since they are both off TYP_REF bases and indir range " + "[%03u..%03u) does not interfere with store range [%03u..%03u)\n", + Compiler::dspTreeID(cur), (unsigned)offs, (unsigned)offs + indir->Size(), + (unsigned)storeOffs, store->Size()); + continue; + } + } + + JITDUMP("Indir [%06u] interferes with [%06u]\n", Compiler::dspTreeID(indir), Compiler::dspTreeID(cur)); + UnmarkTree(indir); + return false; + } } JITDUMP("Interference checks passed. Moving nodes that are not part of data flow tree\n\n", diff --git a/src/coreclr/jit/promotiondecomposition.cpp b/src/coreclr/jit/promotiondecomposition.cpp index 628d7c91a1c51..86e2a19fd8bfe 100644 --- a/src/coreclr/jit/promotiondecomposition.cpp +++ b/src/coreclr/jit/promotiondecomposition.cpp @@ -1204,26 +1204,44 @@ void Compiler::gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq** assert((*addr)->TypeIs(TYP_I_IMPL, TYP_BYREF, TYP_REF)); *offset = 0; *fldSeq = nullptr; - while ((*addr)->OperIs(GT_ADD) && !(*addr)->gtOverflow()) + while (true) { - GenTree* op1 = (*addr)->gtGetOp1(); - GenTree* op2 = (*addr)->gtGetOp2(); - - if (op2->IsCnsIntOrI() && !op2->AsIntCon()->IsIconHandle()) + if ((*addr)->OperIs(GT_ADD) && !(*addr)->gtOverflow()) { - assert(op2->TypeIs(TYP_I_IMPL)); - GenTreeIntCon* intCon = op2->AsIntCon(); - *offset += (target_ssize_t)intCon->IconValue(); - *fldSeq = m_fieldSeqStore->Append(*fldSeq, intCon->gtFieldSeq); - *addr = op1; + GenTree* op1 = (*addr)->gtGetOp1(); + GenTree* op2 = (*addr)->gtGetOp2(); + + if (op2->IsCnsIntOrI() && !op2->AsIntCon()->IsIconHandle()) + { + assert(op2->TypeIs(TYP_I_IMPL)); + GenTreeIntCon* intCon = op2->AsIntCon(); + *offset += (target_ssize_t)intCon->IconValue(); + *fldSeq = m_fieldSeqStore->Append(*fldSeq, intCon->gtFieldSeq); + *addr = op1; + } + else if (op1->IsCnsIntOrI() && !op1->AsIntCon()->IsIconHandle()) + { + assert(op1->TypeIs(TYP_I_IMPL)); + GenTreeIntCon* intCon = op1->AsIntCon(); + *offset += (target_ssize_t)intCon->IconValue(); + *fldSeq = m_fieldSeqStore->Append(intCon->gtFieldSeq, *fldSeq); + *addr = op2; + } + else + { + break; + } } - else if (op1->IsCnsIntOrI() && !op1->AsIntCon()->IsIconHandle()) + else if ((*addr)->OperIs(GT_LEA)) { - assert(op1->TypeIs(TYP_I_IMPL)); - GenTreeIntCon* intCon = op1->AsIntCon(); - *offset += (target_ssize_t)intCon->IconValue(); - *fldSeq = m_fieldSeqStore->Append(intCon->gtFieldSeq, *fldSeq); - *addr = op2; + GenTreeAddrMode* addrMode = (*addr)->AsAddrMode(); + if (addrMode->HasIndex()) + { + break; + } + + *offset += (target_ssize_t)addrMode->Offset(); + *addr = addrMode->Base(); } else { diff --git a/src/coreclr/jit/sideeffects.cpp b/src/coreclr/jit/sideeffects.cpp index e664cfffca6f5..a8752acb4c211 100644 --- a/src/coreclr/jit/sideeffects.cpp +++ b/src/coreclr/jit/sideeffects.cpp @@ -420,6 +420,21 @@ bool AliasSet::InterferesWith(const NodeInfo& other) const return other.IsLclVarWrite() && m_lclVarReads.Contains(other.LclNum()); } +//------------------------------------------------------------------------ +// AliasSet::WritesLocal: +// Returns true if this alias set contains a write to the specified local. +// +// Arguments: +// lclNum - The local number. +// +// Returns: +// True if so. +// +bool AliasSet::WritesLocal(unsigned lclNum) +{ + return m_lclVarWrites.Contains(lclNum); +} + //------------------------------------------------------------------------ // AliasSet::Clear: // Clears the current alias set. diff --git a/src/coreclr/jit/sideeffects.h b/src/coreclr/jit/sideeffects.h index 9cee0b79d6e63..f213d718b6d22 100644 --- a/src/coreclr/jit/sideeffects.h +++ b/src/coreclr/jit/sideeffects.h @@ -150,6 +150,7 @@ class AliasSet final void AddNode(Compiler* compiler, GenTree* node); bool InterferesWith(const AliasSet& other) const; bool InterferesWith(const NodeInfo& node) const; + bool WritesLocal(unsigned lclNum); void Clear(); }; @@ -181,6 +182,11 @@ class SideEffectSet final bool InterferesWith(Compiler* compiler, GenTree* node, bool strict) const; bool InterferesWith(Compiler* compiler, GenTree* node, unsigned overriddenSideEffects, bool strict) const; void Clear(); + + bool WritesLocal(unsigned lclNum) + { + return m_aliasSet.WritesLocal(lclNum); + } }; #endif // _SIDEEFFECTS_H_ From a965f765c4c6e559d0c3fddd0ef59dd216ec801c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 29 Sep 2023 22:14:42 +0200 Subject: [PATCH 05/19] Remove extraneous argument --- src/coreclr/jit/lower.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index d01e3c42dd142..fe0d665a84ae7 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8044,7 +8044,7 @@ bool Lowering::TryOptimizeForLdp(GenTreeIndir* prevIndir, GenTreeIndir* indir) // the same value even after reordering. if (m_scratchSideEffects.InterferesWith(comp, indir, GTF_EMPTY, true)) { - JITDUMP("Have conservative interference. Trying smarter interference check...\n", Compiler::dspTreeID(indir)); + JITDUMP("Have conservative interference with last indir. Trying a smarter interference check...\n"); GenTree* indirAddr = indir->Addr(); target_ssize_t offs = 0; From 68633e48b7a063e0f0b553a2b78b08d34698d994 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 29 Sep 2023 22:40:37 +0200 Subject: [PATCH 06/19] Fix build --- src/coreclr/jit/lower.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index fe0d665a84ae7..1202c33b804bd 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8078,7 +8078,8 @@ bool Lowering::TryOptimizeForLdp(GenTreeIndir* prevIndir, GenTreeIndir* indir) target_ssize_t storeOffs = 0; comp->gtPeelOffsets(&storeAddr, &storeOffs, &fieldSeq); - bool distinct = (storeOffs + store->Size() <= offs) || (offs + indir->Size() <= storeOffs); + bool distinct = (storeOffs + (target_ssize_t)store->Size() <= offs) || + (offs + (target_ssize_t)indir->Size() <= storeOffs); if (checkLocal && GenTree::Compare(indirAddr, storeAddr) && distinct) { JITDUMP("Cannot interfere with [%06u] since they are off the same local V%02u and indir range " From b7830bbc28755dcd757e360710482c27d21729e0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 30 Sep 2023 01:10:31 +0200 Subject: [PATCH 07/19] Fix some regressions --- src/coreclr/jit/lower.cpp | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 1202c33b804bd..bd9ee4a58dcd7 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7906,7 +7906,7 @@ void Lowering::LowerIndir(GenTreeIndir* ind, GenTree** next) for (int i = 0; i < m_blockIndirs.Height(); i++) { GenTreeIndir* prevIndir = m_blockIndirs.Top(i); - if (!prevIndir->OperIs(GT_IND)) + if ((prevIndir == nullptr) || !prevIndir->OperIs(GT_IND) || (prevIndir->TypeGet() != ind->TypeGet())) { continue; } @@ -7922,7 +7922,14 @@ void Lowering::LowerIndir(GenTreeIndir* ind, GenTree** next) if (abs(offs1 - offs2) == genTypeSize(ind)) { JITDUMP(" ..and they are amenable to ldp optimization\n"); - TryOptimizeForLdp(prevIndir, ind); + if (TryOptimizeForLdp(prevIndir, ind)) + { + // Do not let the previous one participate in + // another instance; that can cause us to e.g. convert + // *(x+4), *(x+0), *(x+8), *(x+12) => + // *(x+4), *(x+8), *(x+0), *(x+12) + m_blockIndirs.TopRef(i) = nullptr; + } break; } else @@ -8018,6 +8025,13 @@ bool Lowering::TryOptimizeForLdp(GenTreeIndir* prevIndir, GenTreeIndir* indir) for (GenTree* cur = prevIndir->gtNext; cur != indir; cur = cur->gtNext) { + if (cur->IsCall() || (cur->OperIsStoreBlk() && (cur->AsBlk()->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper))) + { + JITDUMP("[Heuristic] Giving up due to node that kills registers [%06u]\n", Compiler::dspTreeID(cur)); + UnmarkTree(indir); + return false; + } + if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0) { // Part of data flow of 'indir', so we will be moving past this node. From de27c1fa941411e915a977354dd25bdb91b67d3c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 30 Sep 2023 01:28:21 +0200 Subject: [PATCH 08/19] Optimize throughput --- src/coreclr/jit/lower.cpp | 42 ++++++++++++++++++++------------------- src/coreclr/jit/lower.h | 13 +++++++++++- 2 files changed, 34 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index bd9ee4a58dcd7..08a2aaac3826c 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7337,7 +7337,9 @@ void Lowering::LowerBlock(BasicBlock* block) assert(block->isEmpty() || block->IsLIR()); m_block = block; +#ifdef TARGET_ARM64 m_blockIndirs.Reset(); +#endif // NOTE: some of the lowering methods insert calls before the node being // lowered (See e.g. InsertPInvoke{Method,Call}{Prolog,Epilog}). In @@ -7827,10 +7829,6 @@ void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind) LowerStoreIndir(ind); } - -#ifdef TARGET_ARM64 - m_blockIndirs.Push(ind); -#endif } //------------------------------------------------------------------------ @@ -7896,30 +7894,34 @@ void Lowering::LowerIndir(GenTreeIndir* ind, GenTree** next) #ifdef TARGET_ARM64 if (ind->OperIs(GT_IND) && comp->opts.OptimizationEnabled() && (next != nullptr)) { - target_ssize_t offs1 = 0; + target_ssize_t offs = 0; GenTree* addr = ind->Addr(); FieldSeq* fldSeq = nullptr; - comp->gtPeelOffsets(&addr, &offs1, &fldSeq); + comp->gtPeelOffsets(&addr, &offs, &fldSeq); - if (ind->TypeIs(TYP_INT, TYP_LONG, TYP_DOUBLE, TYP_SIMD16)) + if (addr->OperIsLocal() && ind->TypeIs(TYP_INT, TYP_LONG, TYP_DOUBLE, TYP_SIMD16)) { - for (int i = 0; i < m_blockIndirs.Height(); i++) + SavedIndir newInd; + newInd.Indir = ind; + newInd.AddrBase = addr->AsLclVarCommon(); + newInd.Offset = offs; + + int maxCount = min(m_blockIndirs.Height(), 8); + for (int i = 0; i < maxCount; i++) { - GenTreeIndir* prevIndir = m_blockIndirs.Top(i); - if ((prevIndir == nullptr) || !prevIndir->OperIs(GT_IND) || (prevIndir->TypeGet() != ind->TypeGet())) + SavedIndir& prev = m_blockIndirs.TopRef(i); + GenTreeIndir* prevIndir = prev.Indir; + if ((prevIndir == nullptr) || (prevIndir->TypeGet() != ind->TypeGet())) { continue; } - target_ssize_t offs2 = 0; - GenTree* prevAddr = prevIndir->Addr(); - comp->gtPeelOffsets(&prevAddr, &offs2, &fldSeq); - - if (addr->OperIsLocal() && GenTree::Compare(addr, prevAddr)) + if (GenTree::Compare(addr, prev.AddrBase)) { JITDUMP("[%06u] and [%06u] are indirs off the same base with offsets +%03u and +%03u\n", - Compiler::dspTreeID(ind), Compiler::dspTreeID(prevIndir), offs1, offs2); - if (abs(offs1 - offs2) == genTypeSize(ind)) + Compiler::dspTreeID(ind), Compiler::dspTreeID(prevIndir), (unsigned)offs, + (unsigned)prev.Offset); + if (abs(offs - prev.Offset) == genTypeSize(ind)) { JITDUMP(" ..and they are amenable to ldp optimization\n"); if (TryOptimizeForLdp(prevIndir, ind)) @@ -7928,7 +7930,7 @@ void Lowering::LowerIndir(GenTreeIndir* ind, GenTree** next) // another instance; that can cause us to e.g. convert // *(x+4), *(x+0), *(x+8), *(x+12) => // *(x+4), *(x+8), *(x+0), *(x+12) - m_blockIndirs.TopRef(i) = nullptr; + prev.Indir = nullptr; } break; } @@ -7938,9 +7940,9 @@ void Lowering::LowerIndir(GenTreeIndir* ind, GenTree** next) } } } - } - m_blockIndirs.Push(ind); + m_blockIndirs.Push(newInd); + } } #endif } diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 7c0c9ab6cb102..017133d432cf6 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -25,7 +25,9 @@ class Lowering final : public Phase inline Lowering(Compiler* compiler, LinearScanInterface* lsra) : Phase(compiler, PHASE_LOWERING) , vtableCallTemp(BAD_VAR_NUM) +#ifdef TARGET_ARM64 , m_blockIndirs(compiler->getAllocator(CMK_ArrayStack)) +#endif { m_lsra = (LinearScan*)lsra; assert(m_lsra); @@ -562,7 +564,16 @@ class Lowering final : public Phase #ifdef FEATURE_FIXED_OUT_ARGS unsigned m_outgoingArgSpaceSize = 0; #endif - ArrayStack m_blockIndirs; + +#ifdef TARGET_ARM64 + struct SavedIndir + { + GenTreeIndir* Indir; + GenTreeLclVarCommon* AddrBase; + target_ssize_t Offset; + }; + ArrayStack m_blockIndirs; +#endif }; #endif // _LOWER_H_ From 7a858bfbba2777d51a97128acc14b104172db8c4 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 30 Sep 2023 02:39:59 +0200 Subject: [PATCH 09/19] Fix build --- src/coreclr/jit/lower.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 08a2aaac3826c..10b43bdfa3a7a 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7947,6 +7947,7 @@ void Lowering::LowerIndir(GenTreeIndir* ind, GenTree** next) #endif } +#ifdef TARGET_ARM64 static void MarkTree(GenTree* node) { node->gtLIRFlags |= LIR::Flags::Mark; @@ -8152,6 +8153,7 @@ bool Lowering::TryOptimizeForLdp(GenTreeIndir* prevIndir, GenTreeIndir* indir) UnmarkTree(indir); return true; } +#endif //------------------------------------------------------------------------ // TransformUnusedIndirection: change the opcode and the type of the unused indirection. From 4dd6b75d6265332541faab96f356c082062544ce Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 30 Sep 2023 13:23:54 +0200 Subject: [PATCH 10/19] Add a check for volatile --- src/coreclr/jit/lower.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 10b43bdfa3a7a..df090c7f310f9 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8088,7 +8088,7 @@ bool Lowering::TryOptimizeForLdp(GenTreeIndir* prevIndir, GenTreeIndir* indir) continue; } - if (cur->OperIs(GT_STOREIND, GT_STORE_BLK)) + if (cur->OperIs(GT_STOREIND, GT_STORE_BLK) && !cur->AsIndir()->IsVolatile()) { GenTreeIndir* store = cur->AsIndir(); GenTree* storeAddr = store->Addr(); From c737d78f8eb3800a9bd8302fc34574dde1394d7e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 3 Oct 2023 17:32:41 +0200 Subject: [PATCH 11/19] Make checks more conservative --- src/coreclr/jit/lower.cpp | 89 ++++++++++++++++++++++----------- src/coreclr/jit/sideeffects.cpp | 19 ------- src/coreclr/jit/sideeffects.h | 1 - 3 files changed, 60 insertions(+), 49 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index df090c7f310f9..343a8542b10fc 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8052,15 +8052,22 @@ bool Lowering::TryOptimizeForLdp(GenTreeIndir* prevIndir, GenTreeIndir* indir) } } - // Can we move it past the 'indir'? We can ignore effect flags when - // checking this, as we know the previous indir will make it non-faulting - // and keep the ordering correct. This makes use of the fact that - // non-volatile indirs have ordering side effects only for the "suppressed - // NRE" case. - // We still need some interference cehcks to ensure that the indir reads - // the same value even after reordering. - if (m_scratchSideEffects.InterferesWith(comp, indir, GTF_EMPTY, true)) + if (m_scratchSideEffects.InterferesWith(comp, indir, true)) { + // Try a bit harder, making use of the following facts: + // + // 1. We know the indir is non-faulting, so we do not need to worry + // about reordering exceptions + // + // 2. We can reorder with non-volatile INDs even if they have + // GTF_ORDER_SIDEEFF; these indirs only have GTF_ORDER_SIDEEFF due to + // being non-faulting + // + // 3. We can also reorder with non-volatile STOREINDs if we can prove + // no aliasing. We can do that for two common cases: + // * The addresses are based on the same local but at distinct offset ranges + // * The addresses are based off TYP_REF bases at distinct offset ranges + JITDUMP("Have conservative interference with last indir. Trying a smarter interference check...\n"); GenTree* indirAddr = indir->Addr(); @@ -8075,51 +8082,75 @@ bool Lowering::TryOptimizeForLdp(GenTreeIndir* prevIndir, GenTreeIndir* indir) checkLocal = !comp->lvaGetDesc(lclNum)->IsAddressExposed() && !m_scratchSideEffects.WritesLocal(lclNum); } - for (GenTree* cur = indir->gtPrev; cur != prevIndir; cur = cur->gtPrev) - { - if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0) + // Helper lambda to check if a single node interferes with 'indir'. + auto interferes = [=](GenTree* node) { + if (((node->gtFlags & GTF_ORDER_SIDEEFF) != 0) && node->OperSupportsOrderingSideEffect()) { - continue; + // Cannot normally reorder GTF_ORDER_SIDEEFF and GTF_GLOB_REF, + // except for some of the known cases described above. + if (!node->OperIs(GT_IND, GT_BLK, GT_STOREIND, GT_STORE_BLK) || node->AsIndir()->IsVolatile()) + { + JITDUMP("IsVolatile\n"); + return true; + } } - AliasSet::NodeInfo nodeInfo(comp, cur); - if (!nodeInfo.WritesAddressableLocation()) - { - continue; - } + AliasSet::NodeInfo nodeInfo(comp, node); - if (cur->OperIs(GT_STOREIND, GT_STORE_BLK) && !cur->AsIndir()->IsVolatile()) + if (nodeInfo.WritesAddressableLocation()) { - GenTreeIndir* store = cur->AsIndir(); + if (!node->OperIs(GT_STOREIND, GT_STORE_BLK)) + { + JITDUMP("Wrong node\n"); + return true; + } + + GenTreeIndir* store = node->AsIndir(); GenTree* storeAddr = store->Addr(); target_ssize_t storeOffs = 0; + FieldSeq* fieldSeq = nullptr; comp->gtPeelOffsets(&storeAddr, &storeOffs, &fieldSeq); bool distinct = (storeOffs + (target_ssize_t)store->Size() <= offs) || (offs + (target_ssize_t)indir->Size() <= storeOffs); + if (checkLocal && GenTree::Compare(indirAddr, storeAddr) && distinct) { JITDUMP("Cannot interfere with [%06u] since they are off the same local V%02u and indir range " "[%03u..%03u) does not interfere with store range [%03u..%03u)\n", - Compiler::dspTreeID(cur), indirAddr->AsLclVarCommon()->GetLclNum(), (unsigned)offs, - (unsigned)offs + indir->Size(), (unsigned)storeOffs, store->Size()); - continue; + Compiler::dspTreeID(node), indirAddr->AsLclVarCommon()->GetLclNum(), (unsigned)offs, + (unsigned)offs + indir->Size(), (unsigned)storeOffs, (unsigned)storeOffs + store->Size()); } - // Two indirs off of TYP_REFs cannot overlap if their offset ranges are distinct. - if (indirAddr->TypeIs(TYP_REF) && storeAddr->TypeIs(TYP_REF) && distinct) + else if (indirAddr->TypeIs(TYP_REF) && storeAddr->TypeIs(TYP_REF) && distinct) { JITDUMP("Cannot interfere with [%06u] since they are both off TYP_REF bases and indir range " "[%03u..%03u) does not interfere with store range [%03u..%03u)\n", - Compiler::dspTreeID(cur), (unsigned)offs, (unsigned)offs + indir->Size(), - (unsigned)storeOffs, store->Size()); - continue; + Compiler::dspTreeID(node), (unsigned)offs, (unsigned)offs + indir->Size(), + (unsigned)storeOffs, (unsigned)storeOffs + store->Size()); + } + else + { + return true; } } - JITDUMP("Indir [%06u] interferes with [%06u]\n", Compiler::dspTreeID(indir), Compiler::dspTreeID(cur)); - UnmarkTree(indir); return false; + }; + + for (GenTree* cur = indir->gtPrev; cur != prevIndir; cur = cur->gtPrev) + { + if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0) + { + continue; + } + + if (interferes(cur)) + { + JITDUMP("Indir [%06u] interferes with [%06u]\n", Compiler::dspTreeID(indir), Compiler::dspTreeID(cur)); + UnmarkTree(indir); + return false; + } } } diff --git a/src/coreclr/jit/sideeffects.cpp b/src/coreclr/jit/sideeffects.cpp index a8752acb4c211..ee2b4176e0dfb 100644 --- a/src/coreclr/jit/sideeffects.cpp +++ b/src/coreclr/jit/sideeffects.cpp @@ -589,25 +589,6 @@ bool SideEffectSet::InterferesWith(Compiler* compiler, GenTree* node, bool stric return InterferesWith(node->OperEffects(compiler), AliasSet::NodeInfo(compiler, node), strict); } -//------------------------------------------------------------------------ -// SideEffectSet::InterferesWith: -// Returns true if the side effects in this set interfere with the side -// effects for the given node. -// -// A side effect set interferes with a given node iff it interferes -// with the side effect set of the node. -// -// Arguments: -// compiler - The compiler context. -// node - The node in question. -// overriddenSideEffects - Side effect flags to use in place of the ones from the node -// strict - True if the analysis should be strict as described above. -// -bool SideEffectSet::InterferesWith(Compiler* compiler, GenTree* node, unsigned overriddenSideEffects, bool strict) const -{ - return InterferesWith(overriddenSideEffects, AliasSet::NodeInfo(compiler, node), strict); -} - //------------------------------------------------------------------------ // SideEffectSet::Clear: // Clears the current side effect set. diff --git a/src/coreclr/jit/sideeffects.h b/src/coreclr/jit/sideeffects.h index f213d718b6d22..7f9b6d966e670 100644 --- a/src/coreclr/jit/sideeffects.h +++ b/src/coreclr/jit/sideeffects.h @@ -180,7 +180,6 @@ class SideEffectSet final void AddNode(Compiler* compiler, GenTree* node); bool InterferesWith(const SideEffectSet& other, bool strict) const; bool InterferesWith(Compiler* compiler, GenTree* node, bool strict) const; - bool InterferesWith(Compiler* compiler, GenTree* node, unsigned overriddenSideEffects, bool strict) const; void Clear(); bool WritesLocal(unsigned lclNum) From 111a5fafac8a7f907842fe0ac63bc068a3bf6133 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 3 Oct 2023 18:59:44 +0200 Subject: [PATCH 12/19] Clean up --- src/coreclr/jit/lower.cpp | 191 ++++++++++++++++++++++++-------------- src/coreclr/jit/lower.h | 7 +- 2 files changed, 124 insertions(+), 74 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 343a8542b10fc..5ae66c2f0f1e0 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -410,9 +410,7 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_NULLCHECK: case GT_IND: { - GenTree* next = nullptr; - LowerIndir(node->AsIndir(), &next); - return next; + return LowerIndir(node->AsIndir()); } case GT_STOREIND: @@ -7837,12 +7835,9 @@ void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind) // Arguments: // ind - the ind node we are lowering. // -void Lowering::LowerIndir(GenTreeIndir* ind, GenTree** next) +GenTree* Lowering::LowerIndir(GenTreeIndir* ind) { - if (next != nullptr) - { - *next = ind->gtNext; - } + GenTree* next = ind->gtNext; assert(ind->OperIs(GT_IND, GT_NULLCHECK)); // Process struct typed indirs separately unless they are unused; @@ -7892,81 +7887,104 @@ void Lowering::LowerIndir(GenTreeIndir* ind, GenTree** next) } #ifdef TARGET_ARM64 - if (ind->OperIs(GT_IND) && comp->opts.OptimizationEnabled() && (next != nullptr)) + if (comp->opts.OptimizationEnabled() && ind->OperIs(GT_IND)) + { + OptimizeForLdp(ind); + } +#endif + + return next; +} + +#ifdef TARGET_ARM64 + +//------------------------------------------------------------------------ +// OptimizeForLdp: Record information about an indirection, and try to optimize +// it by moving it to be adjacent with a previous indirection such that they +// can be transformed into 'ldp'. +// +// Arguments: +// ind - Indirection to record and to try to move. +// +// Returns: +// True if the optimization was successful. +// +bool Lowering::OptimizeForLdp(GenTreeIndir* ind) +{ + if (!ind->TypeIs(TYP_INT, TYP_LONG, TYP_DOUBLE, TYP_SIMD16)) + { + return false; + } + + target_ssize_t offs = 0; + GenTree* addr = ind->Addr(); + FieldSeq* fldSeq = nullptr; + comp->gtPeelOffsets(&addr, &offs, &fldSeq); + + if (!addr->OperIsLocal()) { - target_ssize_t offs = 0; - GenTree* addr = ind->Addr(); - FieldSeq* fldSeq = nullptr; - comp->gtPeelOffsets(&addr, &offs, &fldSeq); + return false; + } - if (addr->OperIsLocal() && ind->TypeIs(TYP_INT, TYP_LONG, TYP_DOUBLE, TYP_SIMD16)) + bool result = false; + + SavedIndir newInd; + newInd.Indir = ind; + newInd.AddrBase = addr->AsLclVarCommon(); + newInd.Offset = offs; + + int maxCount = min(m_blockIndirs.Height(), 8); + for (int i = 0; i < maxCount; i++) + { + SavedIndir& prev = m_blockIndirs.TopRef(i); + GenTreeIndir* prevIndir = prev.Indir; + if ((prevIndir == nullptr) || (prevIndir->TypeGet() != ind->TypeGet())) { - SavedIndir newInd; - newInd.Indir = ind; - newInd.AddrBase = addr->AsLclVarCommon(); - newInd.Offset = offs; + continue; + } - int maxCount = min(m_blockIndirs.Height(), 8); - for (int i = 0; i < maxCount; i++) + if (GenTree::Compare(addr, prev.AddrBase)) + { + JITDUMP("[%06u] and [%06u] are indirs off the same base with offsets +%03u and +%03u\n", + Compiler::dspTreeID(ind), Compiler::dspTreeID(prevIndir), (unsigned)offs, (unsigned)prev.Offset); + if (abs(offs - prev.Offset) == genTypeSize(ind)) { - SavedIndir& prev = m_blockIndirs.TopRef(i); - GenTreeIndir* prevIndir = prev.Indir; - if ((prevIndir == nullptr) || (prevIndir->TypeGet() != ind->TypeGet())) - { - continue; - } - - if (GenTree::Compare(addr, prev.AddrBase)) + JITDUMP(" ..and they are amenable to ldp optimization\n"); + if (TryMakeIndirsAdjacent(prevIndir, newInd.Indir)) { - JITDUMP("[%06u] and [%06u] are indirs off the same base with offsets +%03u and +%03u\n", - Compiler::dspTreeID(ind), Compiler::dspTreeID(prevIndir), (unsigned)offs, - (unsigned)prev.Offset); - if (abs(offs - prev.Offset) == genTypeSize(ind)) - { - JITDUMP(" ..and they are amenable to ldp optimization\n"); - if (TryOptimizeForLdp(prevIndir, ind)) - { - // Do not let the previous one participate in - // another instance; that can cause us to e.g. convert - // *(x+4), *(x+0), *(x+8), *(x+12) => - // *(x+4), *(x+8), *(x+0), *(x+12) - prev.Indir = nullptr; - } - break; - } - else - { - JITDUMP(" ..but at wrong offset\n"); - } + // Do not let the previous one participate in + // another instance; that can cause us to e.g. convert + // *(x+4), *(x+0), *(x+8), *(x+12) => + // *(x+4), *(x+8), *(x+0), *(x+12) + prev.Indir = nullptr; + result = true; + break; } + break; + } + else + { + JITDUMP(" ..but at wrong offset\n"); } - - m_blockIndirs.Push(newInd); } } -#endif -} - -#ifdef TARGET_ARM64 -static void MarkTree(GenTree* node) -{ - node->gtLIRFlags |= LIR::Flags::Mark; - node->VisitOperands([](GenTree* op) { - MarkTree(op); - return GenTree::VisitResult::Continue; - }); -} -static void UnmarkTree(GenTree* node) -{ - node->gtLIRFlags &= ~LIR::Flags::Mark; - node->VisitOperands([](GenTree* op) { - UnmarkTree(op); - return GenTree::VisitResult::Continue; - }); + m_blockIndirs.Push(newInd); + return result; } -bool Lowering::TryOptimizeForLdp(GenTreeIndir* prevIndir, GenTreeIndir* indir) +//------------------------------------------------------------------------ +// TryMakeIndirsAdjacent: Try to prove that it is legal to move an indirection +// to be adjacent to a previous indirection. If successful, perform the move. +// +// Arguments: +// ind - Indirection to record and to try to move. +// next - [out] Next node to lower. Only written if the optimization was successful. +// +// Returns: +// True if the optimization was successful. +// +bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indir) { if ((indir->gtFlags & GTF_IND_VOLATILE) != 0) { @@ -8090,7 +8108,6 @@ bool Lowering::TryOptimizeForLdp(GenTreeIndir* prevIndir, GenTreeIndir* indir) // except for some of the known cases described above. if (!node->OperIs(GT_IND, GT_BLK, GT_STOREIND, GT_STORE_BLK) || node->AsIndir()->IsVolatile()) { - JITDUMP("IsVolatile\n"); return true; } } @@ -8101,7 +8118,6 @@ bool Lowering::TryOptimizeForLdp(GenTreeIndir* prevIndir, GenTreeIndir* indir) { if (!node->OperIs(GT_STOREIND, GT_STORE_BLK)) { - JITDUMP("Wrong node\n"); return true; } @@ -8184,6 +8200,37 @@ bool Lowering::TryOptimizeForLdp(GenTreeIndir* prevIndir, GenTreeIndir* indir) UnmarkTree(indir); return true; } + +//------------------------------------------------------------------------ +// MarkTree: Mark trees involved in the computation of 'node' recursively. +// +// Arguments: +// node - Root node. +// +void Lowering::MarkTree(GenTree* node) +{ + node->gtLIRFlags |= LIR::Flags::Mark; + node->VisitOperands([=](GenTree* op) { + MarkTree(op); + return GenTree::VisitResult::Continue; + }); +} + +//------------------------------------------------------------------------ +// UnmarkTree: Unmark trees involved in the computation of 'node' recursively. +// +// Arguments: +// node - Root node. +// +void Lowering::UnmarkTree(GenTree* node) +{ + node->gtLIRFlags &= ~LIR::Flags::Mark; + node->VisitOperands([=](GenTree* op) { + UnmarkTree(op); + return GenTree::VisitResult::Continue; + }); +} + #endif //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 017133d432cf6..d17600594917b 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -314,8 +314,11 @@ class Lowering final : public Phase // Per tree node member functions void LowerStoreIndirCommon(GenTreeStoreInd* ind); - void LowerIndir(GenTreeIndir* ind, GenTree** next = nullptr); - bool TryOptimizeForLdp(GenTreeIndir* prevIndir, GenTreeIndir* indir); + GenTree* LowerIndir(GenTreeIndir* ind); + bool OptimizeForLdp(GenTreeIndir* ind); + bool TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indir); + void MarkTree(GenTree* root); + void UnmarkTree(GenTree* root); void LowerStoreIndir(GenTreeStoreInd* node); GenTree* LowerAdd(GenTreeOp* node); GenTree* LowerMul(GenTreeOp* mul); From 44a786fdecb25ea3ca7b0a4d93ab832a8b19dd80 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 6 Oct 2023 20:43:40 +0200 Subject: [PATCH 13/19] Add a constant for the distance --- src/coreclr/jit/lower.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 0cc62c6f22618..d055cae9223cd 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8198,6 +8198,14 @@ GenTree* Lowering::LowerIndir(GenTreeIndir* ind) #ifdef TARGET_ARM64 +// Max distance that we will try to move an indirection backwards to become +// adjacent to another indirection. As an empiric observation, increasing this +// number to 32 for the smoke_tests collection resulted in 3684 -> 3796 cases +// passing the distance check, but 82 out of these 112 extra cases were then +// rejected due to interference. So 16 seems like a good number to balance the +// throughput costs. +const int LDP_REORDERING_MAX_DISTANCE = 16; + //------------------------------------------------------------------------ // OptimizeForLdp: Record information about an indirection, and try to optimize // it by moving it to be adjacent with a previous indirection such that they @@ -8233,7 +8241,9 @@ bool Lowering::OptimizeForLdp(GenTreeIndir* ind) newInd.AddrBase = addr->AsLclVarCommon(); newInd.Offset = offs; - int maxCount = min(m_blockIndirs.Height(), 8); + // Every indirection takes an expected 2+ nodes, so we only expect at most + // half the reordering distance to be candidates for the optimization. + int maxCount = min(m_blockIndirs.Height(), LDP_REORDERING_MAX_DISTANCE / 2); for (int i = 0; i < maxCount; i++) { SavedIndir& prev = m_blockIndirs.TopRef(i); @@ -8264,7 +8274,7 @@ bool Lowering::OptimizeForLdp(GenTreeIndir* ind) } else { - JITDUMP(" ..but at wrong offset\n"); + JITDUMP(" ..but at non-adjacent offset\n"); } } } @@ -8293,7 +8303,7 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi } GenTree* cur = prevIndir; - for (int i = 0; i < 16; i++) + for (int i = 0; i < LDP_REORDERING_MAX_DISTANCE; i++) { cur = cur->gtNext; if (cur == indir) @@ -8302,12 +8312,12 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi if (cur != indir) { - JITDUMP(" ..but it is too far away\n"); + JITDUMP(" ..but they are too far separated\n"); return false; } JITDUMP( - " ..and it is close by. Trying to move the following range (where * are nodes part of the data flow):\n\n"); + " ..and they are close. Trying to move the following range (where * are nodes part of the data flow):\n\n"); #ifdef DEBUG bool isClosed; GenTree* startDumpNode = BlockRange().GetTreeRange(prevIndir, &isClosed).FirstNode(); From 7eda3afe6ccd81d393b6a8fabbbf27bdc4eb9f9f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 6 Oct 2023 21:09:34 +0200 Subject: [PATCH 14/19] Mark WritesLocal as const --- src/coreclr/jit/sideeffects.cpp | 2 +- src/coreclr/jit/sideeffects.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/sideeffects.cpp b/src/coreclr/jit/sideeffects.cpp index ee2b4176e0dfb..7ebb9865e9761 100644 --- a/src/coreclr/jit/sideeffects.cpp +++ b/src/coreclr/jit/sideeffects.cpp @@ -430,7 +430,7 @@ bool AliasSet::InterferesWith(const NodeInfo& other) const // Returns: // True if so. // -bool AliasSet::WritesLocal(unsigned lclNum) +bool AliasSet::WritesLocal(unsigned lclNum) const { return m_lclVarWrites.Contains(lclNum); } diff --git a/src/coreclr/jit/sideeffects.h b/src/coreclr/jit/sideeffects.h index 7f9b6d966e670..d94622d9f0ca8 100644 --- a/src/coreclr/jit/sideeffects.h +++ b/src/coreclr/jit/sideeffects.h @@ -150,7 +150,7 @@ class AliasSet final void AddNode(Compiler* compiler, GenTree* node); bool InterferesWith(const AliasSet& other) const; bool InterferesWith(const NodeInfo& node) const; - bool WritesLocal(unsigned lclNum); + bool WritesLocal(unsigned lclNum) const; void Clear(); }; @@ -182,7 +182,7 @@ class SideEffectSet final bool InterferesWith(Compiler* compiler, GenTree* node, bool strict) const; void Clear(); - bool WritesLocal(unsigned lclNum) + bool WritesLocal(unsigned lclNum) const { return m_aliasSet.WritesLocal(lclNum); } From 3ded2d231c91a9532eb537321e2d46202e776de0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 6 Oct 2023 21:55:01 +0200 Subject: [PATCH 15/19] Make fldSeq parameter to gtPeelOffsets optional --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/lower.cpp | 6 ++--- src/coreclr/jit/morph.cpp | 3 +-- src/coreclr/jit/promotiondecomposition.cpp | 27 +++++++++++++++++----- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index a575ac3a4c3dc..7eaa5bd75f039 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3044,7 +3044,7 @@ class Compiler bool gtStoreDefinesField( LclVarDsc* fieldVarDsc, ssize_t offset, unsigned size, ssize_t* pFieldStoreOffset, unsigned* pFieldStoreSize); - void gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq** fldSeq); + void gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq** fldSeq = nullptr); // Return true if call is a recursive call; return false otherwise. // Note when inlining, this looks for calls back to the root method. diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index d055cae9223cd..262724106e338 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8400,8 +8400,7 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi GenTree* indirAddr = indir->Addr(); target_ssize_t offs = 0; - FieldSeq* fieldSeq = nullptr; - comp->gtPeelOffsets(&indirAddr, &offs, &fieldSeq); + comp->gtPeelOffsets(&indirAddr, &offs); bool checkLocal = indirAddr->OperIsLocal(); if (checkLocal) @@ -8434,8 +8433,7 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi GenTreeIndir* store = node->AsIndir(); GenTree* storeAddr = store->Addr(); target_ssize_t storeOffs = 0; - FieldSeq* fieldSeq = nullptr; - comp->gtPeelOffsets(&storeAddr, &storeOffs, &fieldSeq); + comp->gtPeelOffsets(&storeAddr, &storeOffs); bool distinct = (storeOffs + (target_ssize_t)store->Size() <= offs) || (offs + (target_ssize_t)indir->Size() <= storeOffs); diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 24e08dd209706..763b65fd8f13d 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -9917,8 +9917,7 @@ GenTree* Compiler::fgMorphFinalizeIndir(GenTreeIndir* indir) // Check for a misaligned floating point indirection. GenTree* effAddr = addr->gtEffectiveVal(true); target_ssize_t offset; - FieldSeq* fldSeq; - gtPeelOffsets(&effAddr, &offset, &fldSeq); + gtPeelOffsets(&effAddr, &offset); if (((offset % genTypeSize(TYP_FLOAT)) != 0) || (effAddr->IsCnsIntOrI() && ((effAddr->AsIntConCommon()->IconValue() % genTypeSize(TYP_FLOAT)) != 0))) diff --git a/src/coreclr/jit/promotiondecomposition.cpp b/src/coreclr/jit/promotiondecomposition.cpp index 86e2a19fd8bfe..693e0ed30a2b6 100644 --- a/src/coreclr/jit/promotiondecomposition.cpp +++ b/src/coreclr/jit/promotiondecomposition.cpp @@ -1197,13 +1197,18 @@ class DecompositionPlan // Arguments: // addr - [in, out] The address node. // offset - [out] The sum of offset peeled such that ADD(addr, offset) is equivalent to the original addr. -// fldSeq - [out] The combined field sequence for all the peeled offsets. +// fldSeq - [out, optional] The combined field sequence for all the peeled offsets. // void Compiler::gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq** fldSeq) { assert((*addr)->TypeIs(TYP_I_IMPL, TYP_BYREF, TYP_REF)); *offset = 0; - *fldSeq = nullptr; + + if (fldSeq != nullptr) + { + *fldSeq = nullptr; + } + while (true) { if ((*addr)->OperIs(GT_ADD) && !(*addr)->gtOverflow()) @@ -1216,16 +1221,26 @@ void Compiler::gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq** assert(op2->TypeIs(TYP_I_IMPL)); GenTreeIntCon* intCon = op2->AsIntCon(); *offset += (target_ssize_t)intCon->IconValue(); - *fldSeq = m_fieldSeqStore->Append(*fldSeq, intCon->gtFieldSeq); - *addr = op1; + + if (fldSeq != nullptr) + { + *fldSeq = m_fieldSeqStore->Append(*fldSeq, intCon->gtFieldSeq); + } + + *addr = op1; } else if (op1->IsCnsIntOrI() && !op1->AsIntCon()->IsIconHandle()) { assert(op1->TypeIs(TYP_I_IMPL)); GenTreeIntCon* intCon = op1->AsIntCon(); *offset += (target_ssize_t)intCon->IconValue(); - *fldSeq = m_fieldSeqStore->Append(intCon->gtFieldSeq, *fldSeq); - *addr = op2; + + if (fldSeq != nullptr) + { + *fldSeq = m_fieldSeqStore->Append(intCon->gtFieldSeq, *fldSeq); + } + + *addr = op2; } else { From c6b48d74d1dbf96ae7997ca6e62b4221b5672d4b Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 6 Oct 2023 21:55:20 +0200 Subject: [PATCH 16/19] Address feedback * Move IsVolatile check into OptimizeForLdp * Allow the optimization for SIMD8 indirs * Make fldSeq parameter of gtPeelOffsets optional * Only check for GT_LCL_VARs; reorder some checks for TP purposes * Avoid adding an entry on successful optimization * Update some comments and JITDUMP * Add convenience constructor --- src/coreclr/jit/lower.cpp | 82 +++++++++++++++++---------------------- src/coreclr/jit/lower.h | 11 ++++-- 2 files changed, 43 insertions(+), 50 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 262724106e338..d7262b30ae028 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8219,68 +8219,61 @@ const int LDP_REORDERING_MAX_DISTANCE = 16; // bool Lowering::OptimizeForLdp(GenTreeIndir* ind) { - if (!ind->TypeIs(TYP_INT, TYP_LONG, TYP_DOUBLE, TYP_SIMD16)) + if (!ind->TypeIs(TYP_INT, TYP_LONG, TYP_DOUBLE, TYP_SIMD8, TYP_SIMD16) || ind->IsVolatile()) { return false; } - target_ssize_t offs = 0; - GenTree* addr = ind->Addr(); - FieldSeq* fldSeq = nullptr; - comp->gtPeelOffsets(&addr, &offs, &fldSeq); + target_ssize_t offs = 0; + GenTree* addr = ind->Addr(); + comp->gtPeelOffsets(&addr, &offs); - if (!addr->OperIsLocal()) + if (!addr->OperIs(GT_LCL_VAR)) { return false; } - bool result = false; - - SavedIndir newInd; - newInd.Indir = ind; - newInd.AddrBase = addr->AsLclVarCommon(); - newInd.Offset = offs; - // Every indirection takes an expected 2+ nodes, so we only expect at most // half the reordering distance to be candidates for the optimization. int maxCount = min(m_blockIndirs.Height(), LDP_REORDERING_MAX_DISTANCE / 2); for (int i = 0; i < maxCount; i++) { - SavedIndir& prev = m_blockIndirs.TopRef(i); + SavedIndir& prev = m_blockIndirs.TopRef(i); + if (prev.AddrBase->GetLclNum() != addr->AsLclVar()->GetLclNum()) + { + continue; + } + GenTreeIndir* prevIndir = prev.Indir; if ((prevIndir == nullptr) || (prevIndir->TypeGet() != ind->TypeGet())) { continue; } - if (GenTree::Compare(addr, prev.AddrBase)) + JITDUMP("[%06u] and [%06u] are indirs off the same base with offsets +%03u and +%03u\n", + Compiler::dspTreeID(ind), Compiler::dspTreeID(prevIndir), (unsigned)offs, (unsigned)prev.Offset); + if (abs(offs - prev.Offset) == genTypeSize(ind)) { - JITDUMP("[%06u] and [%06u] are indirs off the same base with offsets +%03u and +%03u\n", - Compiler::dspTreeID(ind), Compiler::dspTreeID(prevIndir), (unsigned)offs, (unsigned)prev.Offset); - if (abs(offs - prev.Offset) == genTypeSize(ind)) - { - JITDUMP(" ..and they are amenable to ldp optimization\n"); - if (TryMakeIndirsAdjacent(prevIndir, newInd.Indir)) - { - // Do not let the previous one participate in - // another instance; that can cause us to e.g. convert - // *(x+4), *(x+0), *(x+8), *(x+12) => - // *(x+4), *(x+8), *(x+0), *(x+12) - prev.Indir = nullptr; - result = true; - break; - } - break; - } - else + JITDUMP(" ..and they are amenable to ldp optimization\n"); + if (TryMakeIndirsAdjacent(prevIndir, ind)) { - JITDUMP(" ..but at non-adjacent offset\n"); + // Do not let the previous one participate in + // another instance; that can cause us to e.g. convert + // *(x+4), *(x+0), *(x+8), *(x+12) => + // *(x+4), *(x+8), *(x+0), *(x+12) + prev.Indir = nullptr; + return true; } + break; + } + else + { + JITDUMP(" ..but at non-adjacent offset\n"); } } - m_blockIndirs.Push(newInd); - return result; + m_blockIndirs.Emplace(ind, addr->AsLclVar(), offs); + return false; } //------------------------------------------------------------------------ @@ -8288,20 +8281,14 @@ bool Lowering::OptimizeForLdp(GenTreeIndir* ind) // to be adjacent to a previous indirection. If successful, perform the move. // // Arguments: -// ind - Indirection to record and to try to move. -// next - [out] Next node to lower. Only written if the optimization was successful. +// prevIndir - Previous indirection +// indir - Indirection to try to move to be adjacent to 'prevIndir' // // Returns: // True if the optimization was successful. // bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indir) { - if ((indir->gtFlags & GTF_IND_VOLATILE) != 0) - { - JITDUMP(" ..but second indir is volatile\n"); - return false; - } - GenTree* cur = prevIndir; for (int i = 0; i < LDP_REORDERING_MAX_DISTANCE; i++) { @@ -8365,7 +8352,8 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0) { - // Part of data flow of 'indir', so we will be moving past this node. + // 'cur' is part of data flow of 'indir', so we will be moving the + // currently recorded effects past 'cur'. if (m_scratchSideEffects.InterferesWith(comp, cur, true)) { JITDUMP("Giving up due to interference with [%06u]\n", Compiler::dspTreeID(cur)); @@ -8478,7 +8466,7 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi } } - JITDUMP("Interference checks passed. Moving nodes that are not part of data flow tree\n\n", + JITDUMP("Interference checks passed. Moving nodes that are not part of data flow of [%06u]\n\n", Compiler::dspTreeID(indir)); GenTree* previous = prevIndir; @@ -8539,7 +8527,7 @@ void Lowering::UnmarkTree(GenTree* node) }); } -#endif +#endif // TARGET_ARM64 //------------------------------------------------------------------------ // TransformUnusedIndirection: change the opcode and the type of the unused indirection. diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index f9d5a0899fe49..a7023fa751929 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -572,9 +572,14 @@ class Lowering final : public Phase #ifdef TARGET_ARM64 struct SavedIndir { - GenTreeIndir* Indir; - GenTreeLclVarCommon* AddrBase; - target_ssize_t Offset; + GenTreeIndir* Indir; + GenTreeLclVar* AddrBase; + target_ssize_t Offset; + + SavedIndir(GenTreeIndir* indir, GenTreeLclVar* addrBase, target_ssize_t offset) + : Indir(indir), AddrBase(addrBase), Offset(offset) + { + } }; ArrayStack m_blockIndirs; #endif From b9dfd8fcaae1239fce0ad1ab3731910bf8cb0dad Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 6 Oct 2023 22:10:09 +0200 Subject: [PATCH 17/19] Move call heuristic --- src/coreclr/jit/lower.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index d7262b30ae028..0f1132b005d68 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8295,6 +8295,14 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi cur = cur->gtNext; if (cur == indir) break; + + // We can reorder indirs with some calls, but introducing a LIR edge + // that spans a call can introduce spills (or callee-saves). + if (cur->IsCall() || (cur->OperIsStoreBlk() && (cur->AsBlk()->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper))) + { + JITDUMP(" ..but they are separated by node [%06u] that kills registers\n", Compiler::dspTreeID(cur)); + return false; + } } if (cur != indir) @@ -8343,13 +8351,6 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi for (GenTree* cur = prevIndir->gtNext; cur != indir; cur = cur->gtNext) { - if (cur->IsCall() || (cur->OperIsStoreBlk() && (cur->AsBlk()->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper))) - { - JITDUMP("[Heuristic] Giving up due to node that kills registers [%06u]\n", Compiler::dspTreeID(cur)); - UnmarkTree(indir); - return false; - } - if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0) { // 'cur' is part of data flow of 'indir', so we will be moving the From 586f0c8f75fa6cf29e2b49fa55b7e8ca6cd88ff3 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 6 Oct 2023 22:18:38 +0200 Subject: [PATCH 18/19] Nit --- src/coreclr/jit/lower.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 0f1132b005d68..02d3fb02fb5df 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8199,11 +8199,11 @@ GenTree* Lowering::LowerIndir(GenTreeIndir* ind) #ifdef TARGET_ARM64 // Max distance that we will try to move an indirection backwards to become -// adjacent to another indirection. As an empiric observation, increasing this -// number to 32 for the smoke_tests collection resulted in 3684 -> 3796 cases -// passing the distance check, but 82 out of these 112 extra cases were then -// rejected due to interference. So 16 seems like a good number to balance the -// throughput costs. +// adjacent to another indirection. As an empirical observation, increasing +// this number to 32 for the smoke_tests collection resulted in 3684 -> 3796 +// cases passing the distance check, but 82 out of these 112 extra cases were +// then rejected due to interference. So 16 seems like a good number to balance +// the throughput costs. const int LDP_REORDERING_MAX_DISTANCE = 16; //------------------------------------------------------------------------ From 86e919fcca965b28733f5e7bac38890da9ffbc9d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 6 Oct 2023 22:29:16 +0200 Subject: [PATCH 19/19] Nit --- src/coreclr/jit/lower.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 02d3fb02fb5df..d4f9fdcd264bf 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8364,7 +8364,8 @@ bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indi } else { - // Part of dataflow; add its effects. + // Not part of dataflow; add its effects that will move past + // 'indir'. m_scratchSideEffects.AddNode(comp, cur); } }