From d9ea23e6ec0bf6062fb6cb5c46ad4288e4e7d742 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 5 Oct 2023 13:23:13 +0200 Subject: [PATCH 1/2] JIT: Support retbuf optimization for non 'lvIsTemp' locals * Handle liveness for the LCL_ADDR definitions when we get to the call * Remove lvIsTemp check from retbuf optimization --- src/coreclr/jit/compiler.h | 3 +- src/coreclr/jit/gentree.h | 2 +- src/coreclr/jit/lclmorph.cpp | 8 +--- src/coreclr/jit/lclvars.cpp | 6 --- src/coreclr/jit/liveness.cpp | 76 ++++++++++++++++++++++++++++++------ 5 files changed, 68 insertions(+), 27 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index c98b1331bb831..214c1ac8d3c9e 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4898,6 +4898,7 @@ class Compiler void fgLocalVarLivenessInit(); + template void fgPerNodeLocalVarLiveness(GenTree* node); void fgPerBlockLocalVarLiveness(); @@ -4909,7 +4910,7 @@ class Compiler void fgLiveVarAnalysis(bool updateInternalOnly = false); - void fgComputeLifeCall(VARSET_TP& life, GenTreeCall* call); + void fgComputeLifeCall(VARSET_TP& life, VARSET_VALARG_TP keepAliveVars, GenTreeCall* call); void fgComputeLifeTrackedLocalUse(VARSET_TP& life, LclVarDsc& varDsc, GenTreeLclVarCommon* node); bool fgComputeLifeTrackedLocalDef(VARSET_TP& life, diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 525d55d94ecf4..96cb4c3fb803e 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -2217,7 +2217,7 @@ struct GenTree bool IsCall() const { - return OperGet() == GT_CALL; + return OperIs(GT_CALL); } inline bool IsHelperCall(); diff --git a/src/coreclr/jit/lclmorph.cpp b/src/coreclr/jit/lclmorph.cpp index 11be8bdad550e..989e06b993c4a 100644 --- a/src/coreclr/jit/lclmorph.cpp +++ b/src/coreclr/jit/lclmorph.cpp @@ -721,12 +721,8 @@ class LocalAddressVisitor final : public GenTreeVisitor if (m_compiler->opts.compJitOptimizeStructHiddenBuffer && (callUser != nullptr) && IsValidLclAddr(lclNum, val.Offset())) { - // We will only attempt this optimization for locals that are: - // a) Not susceptible to liveness bugs (see "lvaSetHiddenBufferStructArg"). - // b) Do not later turn into indirections. - // - bool isSuitableLocal = - varTypeIsStruct(varDsc) && varDsc->lvIsTemp && !m_compiler->lvaIsImplicitByRefLocal(lclNum); + // We will only attempt this optimization for locals that do not later turn into indirections. + bool isSuitableLocal = varTypeIsStruct(varDsc) && !m_compiler->lvaIsImplicitByRefLocal(lclNum); #ifdef TARGET_X86 if (m_compiler->lvaIsArgAccessedViaVarArgsCookie(lclNum)) { diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 8af56fa167317..07d9f327b129a 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -2553,12 +2553,6 @@ void Compiler::lvaSetVarAddrExposed(unsigned varNum DEBUGARG(AddressExposedReaso // *do not* mark the local address-exposed and treat the call much like a local store node throughout // the compilation. // -// TODO-ADDR-Bug: currently, we rely on these locals not being present in call argument lists, -// outside of the buffer address argument itself, as liveness - currently - treats the location node -// associated with the address itself as the definition point, and call arguments can be reordered -// rather arbitrarily. We should fix liveness to treat the call as the definition point instead and -// enable this optimization for "!lvIsTemp" locals. -// void Compiler::lvaSetHiddenBufferStructArg(unsigned varNum) { LclVarDsc* varDsc = lvaGetDesc(varNum); diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp index d32854e4224c7..de71c4f11757b 100644 --- a/src/coreclr/jit/liveness.cpp +++ b/src/coreclr/jit/liveness.cpp @@ -193,8 +193,13 @@ void Compiler::fgLocalVarLivenessInit() // Call fgMarkUseDef for any Local variables encountered // // Arguments: -// tree - The current node. +// tree - The current node. // +// Template arguments: +// lowered - Whether or not this is liveness on lowered IR, where LCL_ADDRs +// on tracked locals may appear. +// +template void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree) { assert(tree != nullptr); @@ -209,12 +214,31 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree) case GT_LCL_VAR: case GT_LCL_FLD: - case GT_LCL_ADDR: case GT_STORE_LCL_VAR: case GT_STORE_LCL_FLD: fgMarkUseDef(tree->AsLclVarCommon()); break; + case GT_LCL_ADDR: + if (lowered) + { + // If this is a definition of a retbuf then we process it as + // part of the GT_CALL node. + LclVarDsc* dsc = lvaGetDesc(tree->AsLclVarCommon()); + LIR::Use use; + if (varTypeIsStruct(dsc) && LIR::AsRange(compCurBB).TryGetUse(tree, &use)) + { + GenTree* user = use.User(); + if (user->IsCall() && (gtCallGetDefinedRetBufLclAddr(user->AsCall()) == tree)) + { + break; + } + } + + fgMarkUseDef(tree->AsLclVarCommon()); + } + break; + case GT_IND: case GT_BLK: // For Volatile indirection, first mutate GcHeap/ByrefExposed @@ -304,6 +328,12 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree) } } } + + GenTreeLclVarCommon* definedLcl = gtCallGetDefinedRetBufLclAddr(call); + if (definedLcl != nullptr) + { + fgMarkUseDef(definedLcl); + } break; } @@ -421,7 +451,7 @@ void Compiler::fgPerBlockLocalVarLiveness() { for (GenTree* node : LIR::AsRange(block)) { - fgPerNodeLocalVarLiveness(node); + fgPerNodeLocalVarLiveness(node); } } else if (fgNodeThreading == NodeThreading::AllTrees) @@ -431,7 +461,7 @@ void Compiler::fgPerBlockLocalVarLiveness() compCurStmt = stmt; for (GenTree* const node : stmt->TreeList()) { - fgPerNodeLocalVarLiveness(node); + fgPerNodeLocalVarLiveness(node); } } } @@ -1378,10 +1408,11 @@ void Compiler::fgLiveVarAnalysis(bool updateInternalOnly) // due to a GT_CALL node. // // Arguments: -// life - The live set that is being computed. -// call - The call node in question. +// life - The live set that is being computed. +// keepAliveVars - Tracked locals that must be kept alive everywhere in the block +// call - The call node in question. // -void Compiler::fgComputeLifeCall(VARSET_TP& life, GenTreeCall* call) +void Compiler::fgComputeLifeCall(VARSET_TP& life, VARSET_VALARG_TP keepAliveVars, GenTreeCall* call) { assert(call != nullptr); @@ -1408,7 +1439,7 @@ void Compiler::fgComputeLifeCall(VARSET_TP& life, GenTreeCall* call) // TODO: we should generate the code for saving to/restoring // from the inlined N/Direct frame instead. - /* Is this call to unmanaged code? */ + // Is this call to unmanaged code? if (call->IsUnmanaged() && compMethodRequiresPInvokeFrame()) { // Get the FrameListRoot local and make it live. @@ -1442,6 +1473,12 @@ void Compiler::fgComputeLifeCall(VARSET_TP& life, GenTreeCall* call) } } } + + GenTreeLclVarCommon* definedLcl = gtCallGetDefinedRetBufLclAddr(call); + if (definedLcl != nullptr) + { + fgComputeLifeLocal(life, keepAliveVars, definedLcl); + } } //------------------------------------------------------------------------ @@ -1776,11 +1813,11 @@ void Compiler::fgComputeLife(VARSET_TP& life, AGAIN: assert(tree->OperGet() != GT_QMARK); - if (tree->gtOper == GT_CALL) + if (tree->IsCall()) { - fgComputeLifeCall(life, tree->AsCall()); + fgComputeLifeCall(life, keepAliveVars, tree->AsCall()); } - else if (tree->OperIsNonPhiLocal() || tree->OperIs(GT_LCL_ADDR)) + else if (tree->OperIsNonPhiLocal()) { bool isDeadStore = fgComputeLifeLocal(life, keepAliveVars, tree); if (isDeadStore) @@ -1886,7 +1923,7 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR } else { - fgComputeLifeCall(life, call); + fgComputeLifeCall(life, keepAliveVars, call); } break; } @@ -1934,11 +1971,24 @@ void Compiler::fgComputeLifeLIR(VARSET_TP& life, BasicBlock* block, VARSET_VALAR } else { + // For LCL_ADDRs that are defined by being passed as a + // retbuf we will handle them when we get to the call. + LclVarDsc* dsc = lvaGetDesc(node->AsLclVarCommon()); + if (varTypeIsStruct(dsc)) + { + LIR::Use addrUse; + if (blockRange.TryGetUse(node, &addrUse) && addrUse.User()->IsCall() && + (gtCallGetDefinedRetBufLclAddr(addrUse.User()->AsCall()) == node)) + { + break; + } + } + isDeadStore = fgComputeLifeLocal(life, keepAliveVars, node); if (isDeadStore) { LIR::Use addrUse; - if (blockRange.TryGetUse(node, &addrUse) && (addrUse.User()->OperIs(GT_STOREIND, GT_STORE_BLK))) + if (blockRange.TryGetUse(node, &addrUse) && addrUse.User()->OperIs(GT_STOREIND, GT_STORE_BLK)) { GenTreeIndir* const store = addrUse.User()->AsIndir(); From 1869691d6f2ac89d3bb9ef3b50f6840a9da04b2f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 7 Oct 2023 21:36:09 +0200 Subject: [PATCH 2/2] Shadow var pass fixes --- src/coreclr/jit/gschecks.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coreclr/jit/gschecks.cpp b/src/coreclr/jit/gschecks.cpp index 0953920d6192e..0a2fb4293c03f 100644 --- a/src/coreclr/jit/gschecks.cpp +++ b/src/coreclr/jit/gschecks.cpp @@ -416,6 +416,7 @@ void Compiler::gsParamsToShadows() shadowVarDsc->lvDoNotEnregister = varDsc->lvDoNotEnregister; #ifdef DEBUG shadowVarDsc->SetDoNotEnregReason(varDsc->GetDoNotEnregReason()); + shadowVarDsc->SetHiddenBufferStructArg(varDsc->IsHiddenBufferStructArg()); #endif if (varTypeIsStruct(type)) @@ -502,6 +503,7 @@ void Compiler::gsParamsToShadows() } } + compCurBB = fgFirstBB; // Now insert code to copy the params to their shadow copy. for (UINT lclNum = 0; lclNum < lvaOldCount; lclNum++) { @@ -520,6 +522,7 @@ void Compiler::gsParamsToShadows() fgEnsureFirstBBisScratch(); (void)fgNewStmtAtBeg(fgFirstBB, fgMorphTree(store)); } + compCurBB = nullptr; // If the method has "Jmp CalleeMethod", then we need to copy shadow params back to original // params before "jmp" to CalleeMethod.