From 26db6c9534cfd22551d356657f00986dfac15cc0 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 19 Mar 2025 04:37:49 +0100 Subject: [PATCH] Address Bruce's feedback --- src/coreclr/jit/clrjit.natvis | 1 + src/coreclr/jit/compiler.h | 1 + src/coreclr/jit/importercalls.cpp | 2 + src/coreclr/jit/loopcloning.cpp | 173 ++++++++++++++++++++++++++++-- src/coreclr/jit/loopcloning.h | 83 ++++++++++++++ src/coreclr/jit/loopcloningopts.h | 1 + 6 files changed, 250 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/clrjit.natvis b/src/coreclr/jit/clrjit.natvis index 54661833ef8552..58fadf2251da47 100644 --- a/src/coreclr/jit/clrjit.natvis +++ b/src/coreclr/jit/clrjit.natvis @@ -266,6 +266,7 @@ Documentation for VS debugger format specifiers: https://learn.microsoft.com/vis (LcJaggedArrayOptInfo*)this,nd (LcMdArrayOptInfo*)this,nd + (LcSpanOptInfo*)this,nd diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index f0ecd63fab1a06..7e15f85cc2928e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8185,6 +8185,7 @@ class Compiler bool optIsStackLocalInvariant(FlowGraphNaturalLoop* loop, unsigned lclNum); bool optExtractArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsNum, bool* topLevelIsFinal); + bool optExtractSpanIndex(GenTree* tree, SpanIndex* result); bool optReconstructArrIndexHelp(GenTree* tree, ArrIndex* result, unsigned lhsNum, bool* topLevelIsFinal); bool optReconstructArrIndex(GenTree* tree, ArrIndex* result); bool optIdentifyLoopOptInfo(FlowGraphNaturalLoop* loop, LoopCloneContext* context); diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index 23ceed3cde357d..06345b06f5bef8 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -3671,6 +3671,8 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd, case NI_System_Span_get_Item: case NI_System_ReadOnlySpan_get_Item: { + optMethodFlags |= OMF_HAS_ARRAYREF; + // Have index, stack pointer-to Span s on the stack. Expand to: // // For Span diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 427f28850cd839..8e7e894dde2ce1 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -45,6 +45,22 @@ void ArrIndex::PrintBoundsCheckNodes(unsigned dim /* = -1 */) } } +//-------------------------------------------------------------------------------------------------- +// Print: debug print an SpanIndex struct in form: `V01[V02]`. +// +void SpanIndex::Print() +{ + printf("V%02d[V%02d]", lenLcl, indLcl); +} + +//-------------------------------------------------------------------------------------------------- +// PrintBoundsCheckNode: - debug print an SpanIndex struct bounds check node tree id +// +void SpanIndex::PrintBoundsCheckNode() +{ + Compiler::printTreeID(bndsChk); +} + #endif // DEBUG //-------------------------------------------------------------------------------------------------- @@ -115,6 +131,20 @@ GenTree* LC_Array::ToGenTree(Compiler* comp, BasicBlock* bb) return nullptr; } +//-------------------------------------------------------------------------------------------------- +// ToGenTree: Convert a Span.Length operation into a GenTree node. +// +// Arguments: +// comp - Compiler instance to allocate trees +// +// Return Values: +// Returns the gen tree representation for Span.Length +// +GenTree* LC_Span::ToGenTree(Compiler* comp) +{ + return comp->gtNewLclvNode(spanIndex->lenLcl, comp->lvaTable[spanIndex->lenLcl].lvType); +} + //-------------------------------------------------------------------------------------------------- // ToGenTree - Convert an "identifier" into a GenTree node. // @@ -138,6 +168,8 @@ GenTree* LC_Ident::ToGenTree(Compiler* comp, BasicBlock* bb) return comp->gtNewLclvNode(lclNum, comp->lvaTable[lclNum].lvType); case ArrAccess: return arrAccess.ToGenTree(comp, bb); + case SpanAccess: + return spanAccess.ToGenTree(comp); case Null: return comp->gtNewIconNode(0, TYP_REF); case ClassHandle: @@ -1080,6 +1112,10 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl JitExpandArrayStack* optInfos = context->GetLoopOptInfo(loop->GetIndex()); assert(optInfos->Size() > 0); + // If we have spans, that means we have to be careful about the stride (see below). + // + bool hasSpans = false; + // We only need to check for iteration behavior if we have array checks. // bool checkIterationBehavior = false; @@ -1094,6 +1130,11 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl checkIterationBehavior = true; break; + case LcOptInfo::LcSpan: + checkIterationBehavior = true; + hasSpans = true; + break; + case LcOptInfo::LcTypeTest: { LcTypeTestOptInfo* ttInfo = optInfo->AsLcTypeTestOptInfo(); @@ -1154,16 +1195,22 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl } const bool isIncreasingLoop = iterInfo->IsIncreasingLoop(); - assert(isIncreasingLoop || iterInfo->IsDecreasingLoop()); + if (!isIncreasingLoop && !iterInfo->IsDecreasingLoop()) + { + // Normally, we reject weird-looking loops in optIsLoopClonable, but it's not the case + // when we have both GDVs and array checks inside such loops. + return false; + } // We already know that this is either increasing or decreasing loop and the // stride is (> 0) or (< 0). Here, just take the abs() value and check if it // is beyond the limit. int stride = abs(iterInfo->IterConst()); - if (stride >= 58) + static_assert_no_msg(INT32_MAX >= CORINFO_Array_MaxLength); + if (stride >= (INT32_MAX - (CORINFO_Array_MaxLength - 1) + 1)) { - // Array.MaxLength can have maximum of 0X7FFFFFC7 elements, so make sure + // Array.MaxLength can have maximum of 0x7fffffc7 elements, so make sure // the stride increment doesn't overflow or underflow the index. Hence, // the maximum stride limit is set to // (int.MaxValue - (Array.MaxLength - 1) + 1), which is @@ -1171,6 +1218,14 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl return false; } + // We don't know exactly whether we might be dealing with a Span or not, + // but if we suspect we are, we need to be careful about the stride: + // As Span<>.Length can be INT32_MAX unlike arrays. + if (hasSpans && (stride > 1)) + { + return false; + } + LC_Ident ident; // Init conditions if (iterInfo->HasConstInit) @@ -1313,6 +1368,15 @@ bool Compiler::optDeriveLoopCloningConditions(FlowGraphNaturalLoop* loop, LoopCl context->EnsureArrayDerefs(loop->GetIndex())->Push(array); } break; + case LcOptInfo::LcSpan: + { + LcSpanOptInfo* spanInfo = optInfo->AsLcSpanOptInfo(); + LC_Span spanLen(&spanInfo->spanIndex); + LC_Ident spanLenIdent = LC_Ident::CreateSpanAccess(spanLen); + LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(spanLenIdent)); + context->EnsureConditions(loop->GetIndex())->Push(cond); + } + break; case LcOptInfo::LcMdArray: { LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo(); @@ -1455,10 +1519,6 @@ bool Compiler::optComputeDerefConditions(FlowGraphNaturalLoop* loop, LoopCloneCo JitExpandArrayStack* const arrayDeref = context->EnsureArrayDerefs(loop->GetIndex()); JitExpandArrayStack* const objDeref = context->EnsureObjDerefs(loop->GetIndex()); - // We currently expect to have at least one of these. - // - assert((arrayDeref->Size() != 0) || (objDeref->Size() != 0)); - // Generate the array dereference checks. // // For each array in the dereference list, construct a tree, @@ -1679,6 +1739,39 @@ void Compiler::optPerformStaticOptimizations(FlowGraphNaturalLoop* loop, DBEXEC(dynamicPath, optDebugLogLoopCloning(arrIndexInfo->arrIndex.useBlock, arrIndexInfo->stmt)); } break; + case LcOptInfo::LcSpan: + { + LcSpanOptInfo* spanIndexInfo = optInfo->AsLcSpanOptInfo(); + compCurBB = spanIndexInfo->spanIndex.useBlock; + GenTree* bndsChkNode = spanIndexInfo->spanIndex.bndsChk; + +#ifdef DEBUG + if (verbose) + { + printf("Remove bounds check "); + printTreeID(bndsChkNode->gtGetOp1()); + printf(" for " FMT_STMT ", ", spanIndexInfo->stmt->GetID()); + spanIndexInfo->spanIndex.Print(); + printf(", bounds check nodes: "); + spanIndexInfo->spanIndex.PrintBoundsCheckNode(); + printf("\n"); + } +#endif // DEBUG + + if (bndsChkNode->gtGetOp1()->OperIs(GT_BOUNDS_CHECK)) + { + optRemoveCommaBasedRangeCheck(bndsChkNode, spanIndexInfo->stmt); + } + else + { + JITDUMP(" Bounds check already removed\n"); + + // If the bounds check node isn't there, it better have been converted to a GT_NOP. + assert(bndsChkNode->gtGetOp1()->OperIs(GT_NOP)); + } + DBEXEC(dynamicPath, optDebugLogLoopCloning(spanIndexInfo->spanIndex.useBlock, spanIndexInfo->stmt)); + } + break; case LcOptInfo::LcMdArray: // TODO-CQ: CLONE: Implement. break; @@ -1913,7 +2006,6 @@ BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* contex BasicBlock* insertAfter) { JITDUMP("Inserting loop " FMT_LP " loop choice conditions\n", loop->GetIndex()); - assert(context->HasBlockConditions(loop->GetIndex())); assert(slowPreheader != nullptr); if (context->HasBlockConditions(loop->GetIndex())) @@ -2087,9 +2179,6 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex // ... // slowPreheader --> slowHeader // - // We should always have block conditions. - - assert(context->HasBlockConditions(loop->GetIndex())); // If any condition is false, go to slowPreheader (which branches or falls through to header of the slow loop). BasicBlock* slowHeader = nullptr; @@ -2272,6 +2361,44 @@ bool Compiler::optExtractArrIndex(GenTree* tree, ArrIndex* result, unsigned lhsN return true; } +//--------------------------------------------------------------------------------------------------------------- +// optExtractSpanIndex: Try to extract the Span element access from "tree". +// +// Arguments: +// tree - the tree to be checked if it is the Span [] operation. +// result - the extracted information is updated in result. +// +// Return Value: +// Returns true if Span index can be extracted, else, return false. +// +// Notes: +// The way loop cloning works for Span is that we don't actually know (or care) +// if it's a Span or an array, we just extract index and length locals out +/// of the GT_BOUNDS_CHECK node. The fact that the length is a local var +/// allows us to not worry about array/span dereferencing. +// +bool Compiler::optExtractSpanIndex(GenTree* tree, SpanIndex* result) +{ + // Bounds checks are almost always wrapped in a comma node + // and are the first operand. + if (!tree->OperIs(GT_COMMA) || !tree->gtGetOp1()->OperIs(GT_BOUNDS_CHECK)) + { + return false; + } + + GenTreeBoundsChk* arrBndsChk = tree->gtGetOp1()->AsBoundsChk(); + if (!arrBndsChk->GetIndex()->OperIs(GT_LCL_VAR) || !arrBndsChk->GetArrayLength()->OperIs(GT_LCL_VAR)) + { + return false; + } + + result->lenLcl = arrBndsChk->GetArrayLength()->AsLclVarCommon()->GetLclNum(); + result->indLcl = arrBndsChk->GetIndex()->AsLclVarCommon()->GetLclNum(); + result->bndsChk = tree; + result->useBlock = compCurBB; + return true; +} + //--------------------------------------------------------------------------------------------------------------- // optReconstructArrIndexHelp: Helper function for optReconstructArrIndex. See that function for more details. // @@ -2535,6 +2662,30 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop return WALK_SKIP_SUBTREES; } + SpanIndex spanIndex = SpanIndex(); + if (info->cloneForArrayBounds && optExtractSpanIndex(tree, &spanIndex)) + { + // Check that the span's length local variable is invariant within the loop body. + if (!optIsStackLocalInvariant(info->loop, spanIndex.lenLcl)) + { + JITDUMP("Span.Length V%02d is not loop invariant\n", spanIndex.lenLcl); + return WALK_SKIP_SUBTREES; + } + + unsigned iterVar = info->context->GetLoopIterInfo(info->loop->GetIndex())->IterVar; + if (spanIndex.indLcl == iterVar) + { + // Update the loop context. + info->context->EnsureLoopOptInfo(info->loop->GetIndex()) + ->Push(new (this, CMK_LoopOpt) LcSpanOptInfo(spanIndex, info->stmt)); + } + else + { + JITDUMP("Induction V%02d is not used as index\n", iterVar); + } + return WALK_SKIP_SUBTREES; + } + if (info->cloneForGDVTests && tree->OperIs(GT_JTRUE)) { JITDUMP("...GDV considering [%06u]\n", dspTreeID(tree)); diff --git a/src/coreclr/jit/loopcloning.h b/src/coreclr/jit/loopcloning.h index cfda1be87a8b9d..ecdda09775f87a 100644 --- a/src/coreclr/jit/loopcloning.h +++ b/src/coreclr/jit/loopcloning.h @@ -211,6 +211,28 @@ struct ArrIndex #endif }; +// SpanIndex represents a span element access and associated bounds check. +struct SpanIndex +{ + unsigned lenLcl; // The Span length local num + unsigned indLcl; // The index local num + GenTree* bndsChk; // The bounds check node + BasicBlock* useBlock; // Block where the [] occurs + + SpanIndex() + : lenLcl(BAD_VAR_NUM) + , indLcl(BAD_VAR_NUM) + , bndsChk(nullptr) + , useBlock(nullptr) + { + } + +#ifdef DEBUG + void Print(); + void PrintBoundsCheckNode(); +#endif +}; + // Forward declarations #define LC_OPT(en) struct en##OptInfo; #include "loopcloningopts.h" @@ -317,6 +339,21 @@ struct LcJaggedArrayOptInfo : public LcOptInfo } }; +// Optimization info for a Span +// +struct LcSpanOptInfo : public LcOptInfo +{ + SpanIndex spanIndex; // SpanIndex representation of the Span. + Statement* stmt; // "stmt" where the optimization opportunity occurs. + + LcSpanOptInfo(SpanIndex& spanIndex, Statement* stmt) + : LcOptInfo(LcSpan) + , spanIndex(spanIndex) + , stmt(stmt) + { + } +}; + // Optimization info for a type test // struct LcTypeTestOptInfo : public LcOptInfo @@ -481,6 +518,38 @@ struct LC_Array GenTree* ToGenTree(Compiler* comp, BasicBlock* bb); }; +// Symbolic representation of Span.Length +struct LC_Span +{ + SpanIndex* spanIndex; + +#ifdef DEBUG + void Print() + { + spanIndex->Print(); + } +#endif + + LC_Span() + : spanIndex(nullptr) + { + } + + LC_Span(SpanIndex* arrIndex) + : spanIndex(arrIndex) + { + } + + // Equality operator + bool operator==(const LC_Span& that) const + { + return (spanIndex->lenLcl == that.spanIndex->lenLcl) && (spanIndex->indLcl == that.spanIndex->indLcl); + } + + // Get a tree representation for this symbolic Span.Length + GenTree* ToGenTree(Compiler* comp); +}; + //------------------------------------------------------------------------ // LC_Ident: symbolic representation of "a value" // @@ -492,6 +561,7 @@ struct LC_Ident Const, Var, ArrAccess, + SpanAccess, Null, ClassHandle, IndirOfLocal, @@ -509,6 +579,7 @@ struct LC_Ident unsigned indirOffs; }; LC_Array arrAccess; + LC_Span spanAccess; CORINFO_CLASS_HANDLE clsHnd; struct { @@ -553,6 +624,8 @@ struct LC_Ident return (lclNum == that.lclNum) && (indirOffs == that.indirOffs); case ArrAccess: return (arrAccess == that.arrAccess); + case SpanAccess: + return (spanAccess == that.spanAccess); case Null: return true; case MethodAddr: @@ -598,6 +671,9 @@ struct LC_Ident case ArrAccess: arrAccess.Print(); break; + case SpanAccess: + spanAccess.Print(); + break; case Null: printf("null"); break; @@ -646,6 +722,13 @@ struct LC_Ident return id; } + static LC_Ident CreateSpanAccess(const LC_Span& spanLen) + { + LC_Ident id(SpanAccess); + id.spanAccess = spanLen; + return id; + } + static LC_Ident CreateNull() { return LC_Ident(Null); diff --git a/src/coreclr/jit/loopcloningopts.h b/src/coreclr/jit/loopcloningopts.h index 2fb13937e2f86a..e27a3d802e11c4 100644 --- a/src/coreclr/jit/loopcloningopts.h +++ b/src/coreclr/jit/loopcloningopts.h @@ -13,5 +13,6 @@ LC_OPT(LcMdArray) LC_OPT(LcJaggedArray) LC_OPT(LcTypeTest) LC_OPT(LcMethodAddrTest) +LC_OPT(LcSpan) #undef LC_OPT