diff --git a/src/coreclr/jit/lir.cpp b/src/coreclr/jit/lir.cpp index feabec03d02ca..44570ea986404 100644 --- a/src/coreclr/jit/lir.cpp +++ b/src/coreclr/jit/lir.cpp @@ -1872,6 +1872,22 @@ GenTree* LIR::LastNode(GenTree** nodes, size_t numNodes) return lastNode; } +//------------------------------------------------------------------------ +// LIR::FirstNode: +// Given two nodes in the same block range, find which node appears first. +// +// Arguments: +// node1 - The first node +// node2 - The second node +// +// Returns: +// Node that appears first. +// +GenTree* LIR::FirstNode(GenTree* node1, GenTree* node2) +{ + return LastNode(node1, node2) == node1 ? node2 : node1; +} + #ifdef DEBUG void GenTree::dumpLIRFlags() { diff --git a/src/coreclr/jit/lir.h b/src/coreclr/jit/lir.h index 8a3a9a507a38b..a3271e832fa8d 100644 --- a/src/coreclr/jit/lir.h +++ b/src/coreclr/jit/lir.h @@ -317,6 +317,7 @@ class LIR final static GenTree* LastNode(GenTree* node1, GenTree* node2); static GenTree* LastNode(GenTree** nodes, size_t numNodes); + static GenTree* FirstNode(GenTree* node1, GenTree* node2); }; inline void GenTree::SetUnusedValue() diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 1eafc393d498a..aa0581a8ceb39 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3060,52 +3060,22 @@ void Lowering::LowerFastTailCall(GenTreeCall* call) // call could over-write the stack arg that is setup earlier. ArrayStack putargs(comp->getAllocator(CMK_ArrayStack)); - for (CallArg& arg : call->gtArgs.EarlyArgs()) - { - if (arg.GetEarlyNode()->OperIs(GT_PUTARG_STK)) - { - putargs.Push(arg.GetEarlyNode()); - } - } - - for (CallArg& arg : call->gtArgs.LateArgs()) + for (CallArg& arg : call->gtArgs.Args()) { - if (arg.GetLateNode()->OperIs(GT_PUTARG_STK)) + if (arg.GetNode()->OperIs(GT_PUTARG_STK)) { - putargs.Push(arg.GetLateNode()); + putargs.Push(arg.GetNode()); } } GenTree* startNonGCNode = nullptr; if (!putargs.Empty()) { - // Get the earliest operand of the first PUTARG_STK node. We will make - // the required copies of args before this node. - bool unused; - GenTree* insertionPoint = BlockRange().GetTreeRange(putargs.Bottom(), &unused).FirstNode(); - // Insert GT_START_NONGC node before we evaluate the PUTARG_STK args. - // Note that if there are no args to be setup on stack, no need to - // insert GT_START_NONGC node. - startNonGCNode = new (comp, GT_START_NONGC) GenTree(GT_START_NONGC, TYP_VOID); - BlockRange().InsertBefore(insertionPoint, startNonGCNode); - - // Gc-interruptability in the following case: - // foo(a, b, c, d, e) { bar(a, b, c, d, e); } - // bar(a, b, c, d, e) { foo(a, b, d, d, e); } - // - // Since the instruction group starting from the instruction that sets up first - // stack arg to the end of the tail call is marked as non-gc interruptible, - // this will form a non-interruptible tight loop causing gc-starvation. To fix - // this we insert GT_NO_OP as embedded stmt before GT_START_NONGC, if the method - // has a single basic block and is not a GC-safe point. The presence of a single - // nop outside non-gc interruptible region will prevent gc starvation. - if ((comp->fgBBcount == 1) && !comp->compCurBB->HasFlag(BBF_GC_SAFE_POINT)) + GenTree* firstPutargStk = putargs.Bottom(0); + for (int i = 1; i < putargs.Height(); i++) { - assert(comp->fgFirstBB == comp->compCurBB); - GenTree* noOp = new (comp, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID); - BlockRange().InsertBefore(startNonGCNode, noOp); + firstPutargStk = LIR::FirstNode(firstPutargStk, putargs.Bottom(i)); } - // Since this is a fast tailcall each PUTARG_STK will place the argument in the // _incoming_ arg space area. This will effectively overwrite our already existing // incoming args that live in that area. If we have later uses of those args, this @@ -3175,10 +3145,10 @@ void Lowering::LowerFastTailCall(GenTreeCall* call) GenTree* lookForUsesFrom = put->gtNext; if (overwrittenStart != argStart) { - lookForUsesFrom = insertionPoint; + lookForUsesFrom = firstPutargStk; } - RehomeArgForFastTailCall(callerArgLclNum, insertionPoint, lookForUsesFrom, call); + RehomeArgForFastTailCall(callerArgLclNum, firstPutargStk, lookForUsesFrom, call); // The above call can introduce temps and invalidate the pointer. callerArgDsc = comp->lvaGetDesc(callerArgLclNum); @@ -3192,10 +3162,33 @@ void Lowering::LowerFastTailCall(GenTreeCall* call) unsigned int fieldsEnd = fieldsFirst + callerArgDsc->lvFieldCnt; for (unsigned int j = fieldsFirst; j < fieldsEnd; j++) { - RehomeArgForFastTailCall(j, insertionPoint, lookForUsesFrom, call); + RehomeArgForFastTailCall(j, firstPutargStk, lookForUsesFrom, call); } } } + + // Now insert GT_START_NONGC node before we evaluate the first PUTARG_STK. + // Note that if there are no args to be setup on stack, no need to + // insert GT_START_NONGC node. + startNonGCNode = new (comp, GT_START_NONGC) GenTree(GT_START_NONGC, TYP_VOID); + BlockRange().InsertBefore(firstPutargStk, startNonGCNode); + + // Gc-interruptability in the following case: + // foo(a, b, c, d, e) { bar(a, b, c, d, e); } + // bar(a, b, c, d, e) { foo(a, b, d, d, e); } + // + // Since the instruction group starting from the instruction that sets up first + // stack arg to the end of the tail call is marked as non-gc interruptible, + // this will form a non-interruptible tight loop causing gc-starvation. To fix + // this we insert GT_NO_OP as embedded stmt before GT_START_NONGC, if the method + // has a single basic block and is not a GC-safe point. The presence of a single + // nop outside non-gc interruptible region will prevent gc starvation. + if ((comp->fgBBcount == 1) && !comp->compCurBB->HasFlag(BBF_GC_SAFE_POINT)) + { + assert(comp->fgFirstBB == comp->compCurBB); + GenTree* noOp = new (comp, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID); + BlockRange().InsertBefore(startNonGCNode, noOp); + } } // Insert GT_PROF_HOOK node to emit profiler tail call hook. This should be