Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Fix placement of GT_START_NOGC for tailcalls in face of bulk copy with write barrier calls #105551

Merged
merged 2 commits into from
Jul 26, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/coreclr/jit/lir.cpp
Original file line number Diff line number Diff line change
@@ -1872,6 +1872,22 @@ GenTree* LIR::LastNode(GenTree** nodes, size_t numNodes)
return lastNode;
}

//------------------------------------------------------------------------
// LIR::FirstNode:
// Given two nodes in the same block range, find which node appears first.
//
// Arguments:
// node1 - The first node
// node2 - The second node
//
// Returns:
// Node that appears first.
//
GenTree* LIR::FirstNode(GenTree* node1, GenTree* node2)
{
return LastNode(node1, node2) == node1 ? node2 : node1;
}

#ifdef DEBUG
void GenTree::dumpLIRFlags()
{
1 change: 1 addition & 0 deletions src/coreclr/jit/lir.h
Original file line number Diff line number Diff line change
@@ -317,6 +317,7 @@ class LIR final

static GenTree* LastNode(GenTree* node1, GenTree* node2);
static GenTree* LastNode(GenTree** nodes, size_t numNodes);
static GenTree* FirstNode(GenTree* node1, GenTree* node2);
};

inline void GenTree::SetUnusedValue()
71 changes: 32 additions & 39 deletions src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
@@ -3060,52 +3060,22 @@ void Lowering::LowerFastTailCall(GenTreeCall* call)
// call could over-write the stack arg that is setup earlier.
ArrayStack<GenTree*> putargs(comp->getAllocator(CMK_ArrayStack));

for (CallArg& arg : call->gtArgs.EarlyArgs())
{
if (arg.GetEarlyNode()->OperIs(GT_PUTARG_STK))
{
putargs.Push(arg.GetEarlyNode());
}
}

for (CallArg& arg : call->gtArgs.LateArgs())
for (CallArg& arg : call->gtArgs.Args())
{
if (arg.GetLateNode()->OperIs(GT_PUTARG_STK))
if (arg.GetNode()->OperIs(GT_PUTARG_STK))
{
putargs.Push(arg.GetLateNode());
putargs.Push(arg.GetNode());
}
}

GenTree* startNonGCNode = nullptr;
if (!putargs.Empty())
{
// Get the earliest operand of the first PUTARG_STK node. We will make
// the required copies of args before this node.
bool unused;
GenTree* insertionPoint = BlockRange().GetTreeRange(putargs.Bottom(), &unused).FirstNode();
// Insert GT_START_NONGC node before we evaluate the PUTARG_STK args.
// Note that if there are no args to be setup on stack, no need to
// insert GT_START_NONGC node.
startNonGCNode = new (comp, GT_START_NONGC) GenTree(GT_START_NONGC, TYP_VOID);
BlockRange().InsertBefore(insertionPoint, startNonGCNode);

// Gc-interruptability in the following case:
// foo(a, b, c, d, e) { bar(a, b, c, d, e); }
// bar(a, b, c, d, e) { foo(a, b, d, d, e); }
//
// Since the instruction group starting from the instruction that sets up first
// stack arg to the end of the tail call is marked as non-gc interruptible,
// this will form a non-interruptible tight loop causing gc-starvation. To fix
// this we insert GT_NO_OP as embedded stmt before GT_START_NONGC, if the method
// has a single basic block and is not a GC-safe point. The presence of a single
// nop outside non-gc interruptible region will prevent gc starvation.
if ((comp->fgBBcount == 1) && !comp->compCurBB->HasFlag(BBF_GC_SAFE_POINT))
GenTree* firstPutargStk = putargs.Bottom(0);
for (int i = 1; i < putargs.Height(); i++)
{
assert(comp->fgFirstBB == comp->compCurBB);
GenTree* noOp = new (comp, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID);
BlockRange().InsertBefore(startNonGCNode, noOp);
firstPutargStk = LIR::FirstNode(firstPutargStk, putargs.Bottom(i));
}

// Since this is a fast tailcall each PUTARG_STK will place the argument in the
// _incoming_ arg space area. This will effectively overwrite our already existing
// incoming args that live in that area. If we have later uses of those args, this
@@ -3175,10 +3145,10 @@ void Lowering::LowerFastTailCall(GenTreeCall* call)
GenTree* lookForUsesFrom = put->gtNext;
if (overwrittenStart != argStart)
{
lookForUsesFrom = insertionPoint;
lookForUsesFrom = firstPutargStk;
}

RehomeArgForFastTailCall(callerArgLclNum, insertionPoint, lookForUsesFrom, call);
RehomeArgForFastTailCall(callerArgLclNum, firstPutargStk, lookForUsesFrom, call);
// The above call can introduce temps and invalidate the pointer.
callerArgDsc = comp->lvaGetDesc(callerArgLclNum);

@@ -3192,10 +3162,33 @@ void Lowering::LowerFastTailCall(GenTreeCall* call)
unsigned int fieldsEnd = fieldsFirst + callerArgDsc->lvFieldCnt;
for (unsigned int j = fieldsFirst; j < fieldsEnd; j++)
{
RehomeArgForFastTailCall(j, insertionPoint, lookForUsesFrom, call);
RehomeArgForFastTailCall(j, firstPutargStk, lookForUsesFrom, call);
}
}
}

// Now insert GT_START_NONGC node before we evaluate the first PUTARG_STK.
// Note that if there are no args to be setup on stack, no need to
// insert GT_START_NONGC node.
startNonGCNode = new (comp, GT_START_NONGC) GenTree(GT_START_NONGC, TYP_VOID);
BlockRange().InsertBefore(firstPutargStk, startNonGCNode);

// Gc-interruptability in the following case:
// foo(a, b, c, d, e) { bar(a, b, c, d, e); }
// bar(a, b, c, d, e) { foo(a, b, d, d, e); }
//
// Since the instruction group starting from the instruction that sets up first
// stack arg to the end of the tail call is marked as non-gc interruptible,
// this will form a non-interruptible tight loop causing gc-starvation. To fix
// this we insert GT_NO_OP as embedded stmt before GT_START_NONGC, if the method
// has a single basic block and is not a GC-safe point. The presence of a single
// nop outside non-gc interruptible region will prevent gc starvation.
if ((comp->fgBBcount == 1) && !comp->compCurBB->HasFlag(BBF_GC_SAFE_POINT))
{
assert(comp->fgFirstBB == comp->compCurBB);
GenTree* noOp = new (comp, GT_NO_OP) GenTree(GT_NO_OP, TYP_VOID);
BlockRange().InsertBefore(startNonGCNode, noOp);
}
}

// Insert GT_PROF_HOOK node to emit profiler tail call hook. This should be