diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 2512f7488f1..314f1580d39 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -733,8 +733,7 @@ GenTree* Lowering::LowerNode(GenTree* node) break; case GT_ASYNC_CONTINUATION: - LowerAsyncContinuation(node); - break; + return LowerAsyncContinuation(node); case GT_RETURN_SUSPEND: LowerReturnSuspend(node); @@ -5424,14 +5423,21 @@ void Lowering::LowerRetSingleRegStructLclVar(GenTreeUnOp* ret) // Arguments: // asyncCont - Async continuation node // -void Lowering::LowerAsyncContinuation(GenTree* asyncCont) +// Returns: +// Next node to lower. +// +GenTree* Lowering::LowerAsyncContinuation(GenTree* asyncCont) { assert(asyncCont->OperIs(GT_ASYNC_CONTINUATION)); - // When the ASYNC_CONTINUATION was created as a result of - // StubHelpers.AsyncCallContinuation() the previous call hasn't been - // marked as an async call. We need to do that to get the right GC - // reporting behavior for the returned async continuation. + GenTree* next = asyncCont->gtNext; + + // When the ASYNC_CONTINUATION was created as a result of the + // AsyncCallContinuation() intrinsic the previous call hasn't been marked + // as an async call. We need to do that to get the right GC reporting + // behavior for the returned async continuation. Furthermore, we ensure the + // async continuation follows the call to simplify marking the registers + // busy in LSRA. GenTree* node = asyncCont; while (true) { @@ -5447,9 +5453,13 @@ void Lowering::LowerAsyncContinuation(GenTree* asyncCont) node->AsCall()->gtIsAsyncCall = true; } + BlockRange().Remove(asyncCont); + BlockRange().InsertAfter(node, asyncCont); break; } } + + return next; } //---------------------------------------------------------------------------------------------- diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 940144f38ce..b0adf251eec 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -180,7 +180,7 @@ class Lowering final : public Phase GenTree* LowerStoreLocCommon(GenTreeLclVarCommon* lclVar); void LowerRetStruct(GenTreeUnOp* ret); void LowerRetSingleRegStructLclVar(GenTreeUnOp* ret); - void LowerAsyncContinuation(GenTree* asyncCont); + GenTree* LowerAsyncContinuation(GenTree* asyncCont); void LowerReturnSuspend(GenTree* retSuspend); void LowerRetFieldList(GenTreeOp* ret, GenTreeFieldList* fieldList); bool IsFieldListCompatibleWithReturn(GenTreeFieldList* fieldList); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 415cddc0fb0..6a1bc89c8c0 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -3902,11 +3902,6 @@ void LinearScan::processKills(RefPosition* killRefPosition) regsBusyUntilKill &= ~killRefPosition->getKilledRegisters(); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KILL_REGS, nullptr, REG_NA, nullptr, NONE, killRefPosition->getKilledRegisters())); - - if (killRefPosition->busyUntilNextKill) - { - regsBusyUntilKill |= killRefPosition->getKilledRegisters(); - } } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 7cdb8f50b2a..2a547b11b25 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -2603,9 +2603,6 @@ class RefPosition unsigned char liveVarUpperSave : 1; #endif - // For a phys reg, indicates that it should be marked as busy until the next time it is killed. - unsigned char busyUntilNextKill : 1; - #ifdef DEBUG // Minimum number registers that needs to be ensured while // constraining candidates for this ref position under @@ -2649,7 +2646,6 @@ class RefPosition , isLocalDefUse(false) , delayRegFree(false) , outOfOrder(false) - , busyUntilNextKill(false) #ifdef DEBUG , minRegCandidateCount(1) , rpNum(0) diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index 286dbb00d0c..8b7e2a9e086 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -634,11 +634,6 @@ int LinearScan::BuildNode(GenTree* tree) case GT_ASYNC_CONTINUATION: srcCount = 0; assert(dstCount == 1); - // We kill the continuation arg here to communicate to the - // selection phase that the argument is no longer busy. This is a - // hack to make sure we do not overwrite the continuation between - // the call and this node. - addKillForRegs(RBM_ASYNC_CONTINUATION_RET, currentLoc); BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); break; diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 9bf2105a571..a044509b464 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1322,12 +1322,6 @@ int LinearScan::BuildNode(GenTree* tree) case GT_ASYNC_CONTINUATION: srcCount = 0; - assert(dstCount == 1); - // We kill the continuation arg here to communicate to the - // selection phase that the argument is no longer busy. This is a - // hack to make sure we do not overwrite the continuation between - // the call and this node. - addKillForRegs(RBM_ASYNC_CONTINUATION_RET, currentLoc); BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); break; diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 2610501b4b7..13fc2bd1ede 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -274,6 +274,11 @@ int LinearScan::BuildCall(GenTreeCall* call) buildInternalRegisterUses(); // Now generate defs and kills. + if (call->IsAsync() && compiler->compIsAsync() && !call->IsFastTailCall()) + { + MarkAsyncContinuationBusyForCall(call); + } + regMaskTP killMask = getKillSetForCall(call); if (dstCount > 0) { @@ -302,11 +307,6 @@ int LinearScan::BuildCall(GenTreeCall* call) } #endif // SWIFT_SUPPORT - if (call->IsAsync() && compiler->compIsAsync()) - { - MarkAsyncContinuationBusyForCall(call); - } - // No args are placed in registers anymore. placedArgRegs = RBM_NONE; numPlacedArgLocals = 0; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 3384ac44651..28c2bea643b 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -4754,16 +4754,13 @@ void LinearScan::MarkSwiftErrorBusyForCall(GenTreeCall* call) // void LinearScan::MarkAsyncContinuationBusyForCall(GenTreeCall* call) { - // Async calls return an async continuation argument in a separate - // register. Since we do not have a flexible representation for - // multiple definitions (multi-reg support is tied into promotion) we - // have to utilize a hack here to make it work. We expect the return - // value to be consumed by an upcoming ASYNC_CONTINUATION node, but we - // must take care not to overwrite the register until we get to that - // node. To accomplish that we mark the register as "busy until next - // kill" when we see the call's kill, and then we have - // ASYNC_CONTINUATION insert its own kill to free up the register - // again. - RefPosition* refPos = addKillForRegs(RBM_ASYNC_CONTINUATION_RET, currentLoc + 1); - refPos->busyUntilNextKill = true; + // We model the async continuation like the swift error register: we ensure + // the node follows the call in lowering, and make it delay freed to ensure + // nothing is allocated into the register between the call and + // ASYNC_CONTINUATION node. We need to add a kill here in the right spot as + // not all targets may naturally have one created. + assert(call->gtNext != nullptr); + assert(call->gtNext->OperIs(GT_ASYNC_CONTINUATION)); + RefPosition* refPos = addKillForRegs(RBM_ASYNC_CONTINUATION_RET, currentLoc + 1); + setDelayFree(refPos); } diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 3dd551bd275..b1f699c95ce 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -565,12 +565,6 @@ int LinearScan::BuildNode(GenTree* tree) case GT_ASYNC_CONTINUATION: srcCount = 0; - assert(dstCount == 1); - // We kill the continuation arg here to communicate to the - // selection phase that the argument is no longer busy. This is a - // hack to make sure we do not overwrite the continuation between - // the call and this node. - addKillForRegs(RBM_ASYNC_CONTINUATION_RET, currentLoc); BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); break; @@ -793,6 +787,11 @@ int LinearScan::BuildCall(GenTreeCall* call) buildInternalRegisterUses(); // Now generate defs and kills. + if (call->IsAsync() && compiler->compIsAsync() && !call->IsFastTailCall()) + { + MarkAsyncContinuationBusyForCall(call); + } + regMaskTP killMask = getKillSetForCall(call); if (dstCount > 0) { @@ -814,11 +813,6 @@ int LinearScan::BuildCall(GenTreeCall* call) BuildKills(call, killMask); } - if (call->IsAsync() && compiler->compIsAsync()) - { - MarkAsyncContinuationBusyForCall(call); - } - // No args are placed in registers anymore. placedArgRegs = RBM_NONE; numPlacedArgLocals = 0; diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index 9f5d26813bf..764c89f4b00 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -758,12 +758,6 @@ int LinearScan::BuildNode(GenTree* tree) case GT_ASYNC_CONTINUATION: srcCount = 0; - assert(dstCount == 1); - // We kill the continuation arg here to communicate to the - // selection phase that the argument is no longer busy. This is a - // hack to make sure we do not overwrite the continuation between - // the call and this node. - addKillForRegs(RBM_ASYNC_CONTINUATION_RET, currentLoc); BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); break; @@ -998,6 +992,11 @@ int LinearScan::BuildCall(GenTreeCall* call) buildInternalRegisterUses(); // Now generate defs and kills. + if (call->IsAsync() && compiler->compIsAsync() && !call->IsFastTailCall()) + { + MarkAsyncContinuationBusyForCall(call); + } + regMaskTP killMask = getKillSetForCall(call); if (dstCount > 0) { @@ -1019,11 +1018,6 @@ int LinearScan::BuildCall(GenTreeCall* call) BuildKills(call, killMask); } - if (call->IsAsync() && compiler->compIsAsync()) - { - MarkAsyncContinuationBusyForCall(call); - } - // No args are placed in registers anymore. placedArgRegs = RBM_NONE; numPlacedArgLocals = 0; diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index e00e4345bd0..2a7e39be76e 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -629,12 +629,6 @@ int LinearScan::BuildNode(GenTree* tree) case GT_ASYNC_CONTINUATION: srcCount = 0; - assert(dstCount == 1); - // We kill the continuation arg here to communicate to the - // selection phase that the argument is no longer busy. This is a - // hack to make sure we do not overwrite the continuation between - // the call and this node. - addKillForRegs(RBM_ASYNC_CONTINUATION_RET, currentLoc); BuildDef(tree, RBM_ASYNC_CONTINUATION_RET.GetIntRegSet()); break; @@ -1369,6 +1363,11 @@ int LinearScan::BuildCall(GenTreeCall* call) buildInternalRegisterUses(); // Now generate defs and kills. + if (call->IsAsync() && compiler->compIsAsync() && !call->IsFastTailCall()) + { + MarkAsyncContinuationBusyForCall(call); + } + regMaskTP killMask = getKillSetForCall(call); if (dstCount > 0) { @@ -1397,11 +1396,6 @@ int LinearScan::BuildCall(GenTreeCall* call) } #endif // SWIFT_SUPPORT - if (call->IsAsync() && compiler->compIsAsync()) - { - MarkAsyncContinuationBusyForCall(call); - } - // No args are placed in registers anymore. placedArgRegs = RBM_NONE; numPlacedArgLocals = 0;