diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index a575ac3a4c3dc..7eaa5bd75f039 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3044,7 +3044,7 @@ class Compiler bool gtStoreDefinesField( LclVarDsc* fieldVarDsc, ssize_t offset, unsigned size, ssize_t* pFieldStoreOffset, unsigned* pFieldStoreSize); - void gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq** fldSeq); + void gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq** fldSeq = nullptr); // Return true if call is a recursive call; return false otherwise. // Note when inlining, this looks for calls back to the root method. diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 07f1f95ff4ebd..d4f9fdcd264bf 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -409,8 +409,9 @@ GenTree* Lowering::LowerNode(GenTree* node) { case GT_NULLCHECK: case GT_IND: - LowerIndir(node->AsIndir()); - break; + { + return LowerIndir(node->AsIndir()); + } case GT_STOREIND: LowerStoreIndirCommon(node->AsStoreInd()); @@ -7354,6 +7355,9 @@ void Lowering::LowerBlock(BasicBlock* block) assert(block->isEmpty() || block->IsLIR()); m_block = block; +#ifdef TARGET_ARM64 + m_blockIndirs.Reset(); +#endif // NOTE: some of the lowering methods insert calls before the node being // lowered (See e.g. InsertPInvoke{Method,Call}{Prolog,Epilog}). In @@ -8131,8 +8135,10 @@ void Lowering::LowerStoreIndirCommon(GenTreeStoreInd* ind) // Arguments: // ind - the ind node we are lowering. // -void Lowering::LowerIndir(GenTreeIndir* ind) +GenTree* Lowering::LowerIndir(GenTreeIndir* ind) { + GenTree* next = ind->gtNext; + assert(ind->OperIs(GT_IND, GT_NULLCHECK)); // Process struct typed indirs separately unless they are unused; // they only appear as the source of a block copy operation or a return node. @@ -8179,8 +8185,352 @@ void Lowering::LowerIndir(GenTreeIndir* ind) const bool isContainable = false; TryCreateAddrMode(ind->Addr(), isContainable, ind); } + +#ifdef TARGET_ARM64 + if (comp->opts.OptimizationEnabled() && ind->OperIs(GT_IND)) + { + OptimizeForLdp(ind); + } +#endif + + return next; +} + +#ifdef TARGET_ARM64 + +// Max distance that we will try to move an indirection backwards to become +// adjacent to another indirection. As an empirical observation, increasing +// this number to 32 for the smoke_tests collection resulted in 3684 -> 3796 +// cases passing the distance check, but 82 out of these 112 extra cases were +// then rejected due to interference. So 16 seems like a good number to balance +// the throughput costs. +const int LDP_REORDERING_MAX_DISTANCE = 16; + +//------------------------------------------------------------------------ +// OptimizeForLdp: Record information about an indirection, and try to optimize +// it by moving it to be adjacent with a previous indirection such that they +// can be transformed into 'ldp'. +// +// Arguments: +// ind - Indirection to record and to try to move. +// +// Returns: +// True if the optimization was successful. +// +bool Lowering::OptimizeForLdp(GenTreeIndir* ind) +{ + if (!ind->TypeIs(TYP_INT, TYP_LONG, TYP_DOUBLE, TYP_SIMD8, TYP_SIMD16) || ind->IsVolatile()) + { + return false; + } + + target_ssize_t offs = 0; + GenTree* addr = ind->Addr(); + comp->gtPeelOffsets(&addr, &offs); + + if (!addr->OperIs(GT_LCL_VAR)) + { + return false; + } + + // Every indirection takes an expected 2+ nodes, so we only expect at most + // half the reordering distance to be candidates for the optimization. + int maxCount = min(m_blockIndirs.Height(), LDP_REORDERING_MAX_DISTANCE / 2); + for (int i = 0; i < maxCount; i++) + { + SavedIndir& prev = m_blockIndirs.TopRef(i); + if (prev.AddrBase->GetLclNum() != addr->AsLclVar()->GetLclNum()) + { + continue; + } + + GenTreeIndir* prevIndir = prev.Indir; + if ((prevIndir == nullptr) || (prevIndir->TypeGet() != ind->TypeGet())) + { + continue; + } + + JITDUMP("[%06u] and [%06u] are indirs off the same base with offsets +%03u and +%03u\n", + Compiler::dspTreeID(ind), Compiler::dspTreeID(prevIndir), (unsigned)offs, (unsigned)prev.Offset); + if (abs(offs - prev.Offset) == genTypeSize(ind)) + { + JITDUMP(" ..and they are amenable to ldp optimization\n"); + if (TryMakeIndirsAdjacent(prevIndir, ind)) + { + // Do not let the previous one participate in + // another instance; that can cause us to e.g. convert + // *(x+4), *(x+0), *(x+8), *(x+12) => + // *(x+4), *(x+8), *(x+0), *(x+12) + prev.Indir = nullptr; + return true; + } + break; + } + else + { + JITDUMP(" ..but at non-adjacent offset\n"); + } + } + + m_blockIndirs.Emplace(ind, addr->AsLclVar(), offs); + return false; +} + +//------------------------------------------------------------------------ +// TryMakeIndirsAdjacent: Try to prove that it is legal to move an indirection +// to be adjacent to a previous indirection. If successful, perform the move. +// +// Arguments: +// prevIndir - Previous indirection +// indir - Indirection to try to move to be adjacent to 'prevIndir' +// +// Returns: +// True if the optimization was successful. +// +bool Lowering::TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indir) +{ + GenTree* cur = prevIndir; + for (int i = 0; i < LDP_REORDERING_MAX_DISTANCE; i++) + { + cur = cur->gtNext; + if (cur == indir) + break; + + // We can reorder indirs with some calls, but introducing a LIR edge + // that spans a call can introduce spills (or callee-saves). + if (cur->IsCall() || (cur->OperIsStoreBlk() && (cur->AsBlk()->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper))) + { + JITDUMP(" ..but they are separated by node [%06u] that kills registers\n", Compiler::dspTreeID(cur)); + return false; + } + } + + if (cur != indir) + { + JITDUMP(" ..but they are too far separated\n"); + return false; + } + + JITDUMP( + " ..and they are close. Trying to move the following range (where * are nodes part of the data flow):\n\n"); +#ifdef DEBUG + bool isClosed; + GenTree* startDumpNode = BlockRange().GetTreeRange(prevIndir, &isClosed).FirstNode(); + GenTree* endDumpNode = indir->gtNext; + + auto dumpWithMarks = [=]() { + if (!comp->verbose) + { + return; + } + + for (GenTree* node = startDumpNode; node != endDumpNode; node = node->gtNext) + { + const char* prefix; + if (node == prevIndir) + prefix = "1. "; + else if (node == indir) + prefix = "2. "; + else if ((node->gtLIRFlags & LIR::Flags::Mark) != 0) + prefix = "* "; + else + prefix = " "; + + comp->gtDispLIRNode(node, prefix); + } + }; + +#endif + + MarkTree(indir); + + INDEBUG(dumpWithMarks()); + JITDUMP("\n"); + + m_scratchSideEffects.Clear(); + + for (GenTree* cur = prevIndir->gtNext; cur != indir; cur = cur->gtNext) + { + if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0) + { + // 'cur' is part of data flow of 'indir', so we will be moving the + // currently recorded effects past 'cur'. + if (m_scratchSideEffects.InterferesWith(comp, cur, true)) + { + JITDUMP("Giving up due to interference with [%06u]\n", Compiler::dspTreeID(cur)); + UnmarkTree(indir); + return false; + } + } + else + { + // Not part of dataflow; add its effects that will move past + // 'indir'. + m_scratchSideEffects.AddNode(comp, cur); + } + } + + if (m_scratchSideEffects.InterferesWith(comp, indir, true)) + { + // Try a bit harder, making use of the following facts: + // + // 1. We know the indir is non-faulting, so we do not need to worry + // about reordering exceptions + // + // 2. We can reorder with non-volatile INDs even if they have + // GTF_ORDER_SIDEEFF; these indirs only have GTF_ORDER_SIDEEFF due to + // being non-faulting + // + // 3. We can also reorder with non-volatile STOREINDs if we can prove + // no aliasing. We can do that for two common cases: + // * The addresses are based on the same local but at distinct offset ranges + // * The addresses are based off TYP_REF bases at distinct offset ranges + + JITDUMP("Have conservative interference with last indir. Trying a smarter interference check...\n"); + + GenTree* indirAddr = indir->Addr(); + target_ssize_t offs = 0; + comp->gtPeelOffsets(&indirAddr, &offs); + + bool checkLocal = indirAddr->OperIsLocal(); + if (checkLocal) + { + unsigned lclNum = indirAddr->AsLclVarCommon()->GetLclNum(); + checkLocal = !comp->lvaGetDesc(lclNum)->IsAddressExposed() && !m_scratchSideEffects.WritesLocal(lclNum); + } + + // Helper lambda to check if a single node interferes with 'indir'. + auto interferes = [=](GenTree* node) { + if (((node->gtFlags & GTF_ORDER_SIDEEFF) != 0) && node->OperSupportsOrderingSideEffect()) + { + // Cannot normally reorder GTF_ORDER_SIDEEFF and GTF_GLOB_REF, + // except for some of the known cases described above. + if (!node->OperIs(GT_IND, GT_BLK, GT_STOREIND, GT_STORE_BLK) || node->AsIndir()->IsVolatile()) + { + return true; + } + } + + AliasSet::NodeInfo nodeInfo(comp, node); + + if (nodeInfo.WritesAddressableLocation()) + { + if (!node->OperIs(GT_STOREIND, GT_STORE_BLK)) + { + return true; + } + + GenTreeIndir* store = node->AsIndir(); + GenTree* storeAddr = store->Addr(); + target_ssize_t storeOffs = 0; + comp->gtPeelOffsets(&storeAddr, &storeOffs); + + bool distinct = (storeOffs + (target_ssize_t)store->Size() <= offs) || + (offs + (target_ssize_t)indir->Size() <= storeOffs); + + if (checkLocal && GenTree::Compare(indirAddr, storeAddr) && distinct) + { + JITDUMP("Cannot interfere with [%06u] since they are off the same local V%02u and indir range " + "[%03u..%03u) does not interfere with store range [%03u..%03u)\n", + Compiler::dspTreeID(node), indirAddr->AsLclVarCommon()->GetLclNum(), (unsigned)offs, + (unsigned)offs + indir->Size(), (unsigned)storeOffs, (unsigned)storeOffs + store->Size()); + } + // Two indirs off of TYP_REFs cannot overlap if their offset ranges are distinct. + else if (indirAddr->TypeIs(TYP_REF) && storeAddr->TypeIs(TYP_REF) && distinct) + { + JITDUMP("Cannot interfere with [%06u] since they are both off TYP_REF bases and indir range " + "[%03u..%03u) does not interfere with store range [%03u..%03u)\n", + Compiler::dspTreeID(node), (unsigned)offs, (unsigned)offs + indir->Size(), + (unsigned)storeOffs, (unsigned)storeOffs + store->Size()); + } + else + { + return true; + } + } + + return false; + }; + + for (GenTree* cur = indir->gtPrev; cur != prevIndir; cur = cur->gtPrev) + { + if ((cur->gtLIRFlags & LIR::Flags::Mark) != 0) + { + continue; + } + + if (interferes(cur)) + { + JITDUMP("Indir [%06u] interferes with [%06u]\n", Compiler::dspTreeID(indir), Compiler::dspTreeID(cur)); + UnmarkTree(indir); + return false; + } + } + } + + JITDUMP("Interference checks passed. Moving nodes that are not part of data flow of [%06u]\n\n", + Compiler::dspTreeID(indir)); + + GenTree* previous = prevIndir; + for (GenTree* node = prevIndir->gtNext;;) + { + GenTree* next = node->gtNext; + + if ((node->gtLIRFlags & LIR::Flags::Mark) != 0) + { + // Part of data flow. Move it to happen right after 'previous'. + BlockRange().Remove(node); + BlockRange().InsertAfter(previous, node); + previous = node; + } + + if (node == indir) + { + break; + } + + node = next; + } + + JITDUMP("Result:\n\n"); + INDEBUG(dumpWithMarks()); + JITDUMP("\n"); + UnmarkTree(indir); + return true; +} + +//------------------------------------------------------------------------ +// MarkTree: Mark trees involved in the computation of 'node' recursively. +// +// Arguments: +// node - Root node. +// +void Lowering::MarkTree(GenTree* node) +{ + node->gtLIRFlags |= LIR::Flags::Mark; + node->VisitOperands([=](GenTree* op) { + MarkTree(op); + return GenTree::VisitResult::Continue; + }); } +//------------------------------------------------------------------------ +// UnmarkTree: Unmark trees involved in the computation of 'node' recursively. +// +// Arguments: +// node - Root node. +// +void Lowering::UnmarkTree(GenTree* node) +{ + node->gtLIRFlags &= ~LIR::Flags::Mark; + node->VisitOperands([=](GenTree* op) { + UnmarkTree(op); + return GenTree::VisitResult::Continue; + }); +} + +#endif // TARGET_ARM64 + //------------------------------------------------------------------------ // TransformUnusedIndirection: change the opcode and the type of the unused indirection. // diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 8ffe86c9a51b1..a7023fa751929 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -23,7 +23,11 @@ class Lowering final : public Phase { public: inline Lowering(Compiler* compiler, LinearScanInterface* lsra) - : Phase(compiler, PHASE_LOWERING), vtableCallTemp(BAD_VAR_NUM) + : Phase(compiler, PHASE_LOWERING) + , vtableCallTemp(BAD_VAR_NUM) +#ifdef TARGET_ARM64 + , m_blockIndirs(compiler->getAllocator(CMK_ArrayStack)) +#endif { m_lsra = (LinearScan*)lsra; assert(m_lsra); @@ -310,7 +314,11 @@ class Lowering final : public Phase // Per tree node member functions void LowerStoreIndirCommon(GenTreeStoreInd* ind); - void LowerIndir(GenTreeIndir* ind); + GenTree* LowerIndir(GenTreeIndir* ind); + bool OptimizeForLdp(GenTreeIndir* ind); + bool TryMakeIndirsAdjacent(GenTreeIndir* prevIndir, GenTreeIndir* indir); + void MarkTree(GenTree* root); + void UnmarkTree(GenTree* root); void LowerStoreIndir(GenTreeStoreInd* node); void LowerStoreIndirCoalescing(GenTreeStoreInd* node); GenTree* LowerAdd(GenTreeOp* node); @@ -560,6 +568,21 @@ class Lowering final : public Phase #ifdef FEATURE_FIXED_OUT_ARGS unsigned m_outgoingArgSpaceSize = 0; #endif + +#ifdef TARGET_ARM64 + struct SavedIndir + { + GenTreeIndir* Indir; + GenTreeLclVar* AddrBase; + target_ssize_t Offset; + + SavedIndir(GenTreeIndir* indir, GenTreeLclVar* addrBase, target_ssize_t offset) + : Indir(indir), AddrBase(addrBase), Offset(offset) + { + } + }; + ArrayStack m_blockIndirs; +#endif }; #endif // _LOWER_H_ diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 24e08dd209706..763b65fd8f13d 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -9917,8 +9917,7 @@ GenTree* Compiler::fgMorphFinalizeIndir(GenTreeIndir* indir) // Check for a misaligned floating point indirection. GenTree* effAddr = addr->gtEffectiveVal(true); target_ssize_t offset; - FieldSeq* fldSeq; - gtPeelOffsets(&effAddr, &offset, &fldSeq); + gtPeelOffsets(&effAddr, &offset); if (((offset % genTypeSize(TYP_FLOAT)) != 0) || (effAddr->IsCnsIntOrI() && ((effAddr->AsIntConCommon()->IconValue() % genTypeSize(TYP_FLOAT)) != 0))) diff --git a/src/coreclr/jit/promotiondecomposition.cpp b/src/coreclr/jit/promotiondecomposition.cpp index 628d7c91a1c51..693e0ed30a2b6 100644 --- a/src/coreclr/jit/promotiondecomposition.cpp +++ b/src/coreclr/jit/promotiondecomposition.cpp @@ -1197,33 +1197,66 @@ class DecompositionPlan // Arguments: // addr - [in, out] The address node. // offset - [out] The sum of offset peeled such that ADD(addr, offset) is equivalent to the original addr. -// fldSeq - [out] The combined field sequence for all the peeled offsets. +// fldSeq - [out, optional] The combined field sequence for all the peeled offsets. // void Compiler::gtPeelOffsets(GenTree** addr, target_ssize_t* offset, FieldSeq** fldSeq) { assert((*addr)->TypeIs(TYP_I_IMPL, TYP_BYREF, TYP_REF)); *offset = 0; - *fldSeq = nullptr; - while ((*addr)->OperIs(GT_ADD) && !(*addr)->gtOverflow()) + + if (fldSeq != nullptr) { - GenTree* op1 = (*addr)->gtGetOp1(); - GenTree* op2 = (*addr)->gtGetOp2(); + *fldSeq = nullptr; + } - if (op2->IsCnsIntOrI() && !op2->AsIntCon()->IsIconHandle()) + while (true) + { + if ((*addr)->OperIs(GT_ADD) && !(*addr)->gtOverflow()) { - assert(op2->TypeIs(TYP_I_IMPL)); - GenTreeIntCon* intCon = op2->AsIntCon(); - *offset += (target_ssize_t)intCon->IconValue(); - *fldSeq = m_fieldSeqStore->Append(*fldSeq, intCon->gtFieldSeq); - *addr = op1; + GenTree* op1 = (*addr)->gtGetOp1(); + GenTree* op2 = (*addr)->gtGetOp2(); + + if (op2->IsCnsIntOrI() && !op2->AsIntCon()->IsIconHandle()) + { + assert(op2->TypeIs(TYP_I_IMPL)); + GenTreeIntCon* intCon = op2->AsIntCon(); + *offset += (target_ssize_t)intCon->IconValue(); + + if (fldSeq != nullptr) + { + *fldSeq = m_fieldSeqStore->Append(*fldSeq, intCon->gtFieldSeq); + } + + *addr = op1; + } + else if (op1->IsCnsIntOrI() && !op1->AsIntCon()->IsIconHandle()) + { + assert(op1->TypeIs(TYP_I_IMPL)); + GenTreeIntCon* intCon = op1->AsIntCon(); + *offset += (target_ssize_t)intCon->IconValue(); + + if (fldSeq != nullptr) + { + *fldSeq = m_fieldSeqStore->Append(intCon->gtFieldSeq, *fldSeq); + } + + *addr = op2; + } + else + { + break; + } } - else if (op1->IsCnsIntOrI() && !op1->AsIntCon()->IsIconHandle()) + else if ((*addr)->OperIs(GT_LEA)) { - assert(op1->TypeIs(TYP_I_IMPL)); - GenTreeIntCon* intCon = op1->AsIntCon(); - *offset += (target_ssize_t)intCon->IconValue(); - *fldSeq = m_fieldSeqStore->Append(intCon->gtFieldSeq, *fldSeq); - *addr = op2; + GenTreeAddrMode* addrMode = (*addr)->AsAddrMode(); + if (addrMode->HasIndex()) + { + break; + } + + *offset += (target_ssize_t)addrMode->Offset(); + *addr = addrMode->Base(); } else { diff --git a/src/coreclr/jit/sideeffects.cpp b/src/coreclr/jit/sideeffects.cpp index 3627e5fa3083b..7ebb9865e9761 100644 --- a/src/coreclr/jit/sideeffects.cpp +++ b/src/coreclr/jit/sideeffects.cpp @@ -420,6 +420,21 @@ bool AliasSet::InterferesWith(const NodeInfo& other) const return other.IsLclVarWrite() && m_lclVarReads.Contains(other.LclNum()); } +//------------------------------------------------------------------------ +// AliasSet::WritesLocal: +// Returns true if this alias set contains a write to the specified local. +// +// Arguments: +// lclNum - The local number. +// +// Returns: +// True if so. +// +bool AliasSet::WritesLocal(unsigned lclNum) const +{ + return m_lclVarWrites.Contains(lclNum); +} + //------------------------------------------------------------------------ // AliasSet::Clear: // Clears the current alias set. diff --git a/src/coreclr/jit/sideeffects.h b/src/coreclr/jit/sideeffects.h index f16fe59c9bd48..d94622d9f0ca8 100644 --- a/src/coreclr/jit/sideeffects.h +++ b/src/coreclr/jit/sideeffects.h @@ -150,6 +150,7 @@ class AliasSet final void AddNode(Compiler* compiler, GenTree* node); bool InterferesWith(const AliasSet& other) const; bool InterferesWith(const NodeInfo& node) const; + bool WritesLocal(unsigned lclNum) const; void Clear(); }; @@ -180,6 +181,11 @@ class SideEffectSet final bool InterferesWith(const SideEffectSet& other, bool strict) const; bool InterferesWith(Compiler* compiler, GenTree* node, bool strict) const; void Clear(); + + bool WritesLocal(unsigned lclNum) const + { + return m_aliasSet.WritesLocal(lclNum); + } }; #endif // _SIDEEFFECTS_H_