From 7ed8eac54a941619c71fb62c8b146386f68e86d8 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 4 Jul 2024 13:15:58 +0200 Subject: [PATCH] JIT: Remove CallArg::m_tmpNum Before this change the JIT has two places where it may introduce temps during call args morphing: 1. Directly during `fgMorphArgs` as part of making struct copies for some struct args, like implicit byref args and other struct args requiring to be of `LCL_VAR` shape 2. During `EvalArgsToTemps` as part of making sure evaluation order is maintained while we get the call into the right shape with registers To make this work we have `CallArg::m_tmpNum` that communicates the local from 1 to 2; the responsibility of creating the actual `LCL_VAR` node to put in the late argument list was left to `EvalArgsToTemps` while `fgMorphArgs` would just change the early setup node to a store into the temporary. This PR changes it so that 1 directly introduces the right late node when it creates its temporary. That is, it puts the call argument into the right shape immediately and avoids relying on `EvalArgsToTemps` to create the local node in the late list. The benefit of that is that we no longer need to communicate the temporary as part of every `CallArg`. However, the motivation here is something else: as part of #104370 we may have arguments that need multiple temporaries to copy, so having things working in this way introduces complications for that. --- src/coreclr/jit/gentree.cpp | 2 - src/coreclr/jit/gentree.h | 10 +- src/coreclr/jit/morph.cpp | 176 ++++++++++++++---------------------- 3 files changed, 69 insertions(+), 119 deletions(-) diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 786e7c92f5f89..15d87fe6f13e7 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -9738,12 +9738,10 @@ void CallArgs::InternalCopyFrom(Compiler* comp, CallArgs* other, CopyNodeFunc co carg->m_earlyNode = arg.m_earlyNode != nullptr ? copyNode(arg.m_earlyNode) : nullptr; carg->m_lateNode = arg.m_lateNode != nullptr ? copyNode(arg.m_lateNode) : nullptr; carg->m_signatureClsHnd = arg.m_signatureClsHnd; - carg->m_tmpNum = arg.m_tmpNum; carg->m_signatureType = arg.m_signatureType; carg->m_wellKnownArg = arg.m_wellKnownArg; carg->m_needTmp = arg.m_needTmp; carg->m_needPlace = arg.m_needPlace; - carg->m_isTmp = arg.m_isTmp; carg->m_processed = arg.m_processed; carg->AbiInfo = arg.AbiInfo; carg->NewAbiInfo = arg.NewAbiInfo; diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index b17480b6d296a..2292a4e04b2cf 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -4708,8 +4708,6 @@ class CallArg // The class handle for the signature type (when varTypeIsStruct(SignatureType)). CORINFO_CLASS_HANDLE m_signatureClsHnd; - // The LclVar number if we had to force evaluation of this arg. - unsigned m_tmpNum; // The type of the argument in the signature. var_types m_signatureType : 5; // The type of well-known argument this is. @@ -4718,8 +4716,6 @@ class CallArg bool m_needTmp : 1; // True when we must replace this argument with a placeholder node. bool m_needPlace : 1; - // True when we setup a temp LclVar for this argument. - bool m_isTmp : 1; // True when we have decided the evaluation order for this argument in LateArgs bool m_processed : 1; @@ -4730,12 +4726,10 @@ class CallArg , m_next(nullptr) , m_lateNext(nullptr) , m_signatureClsHnd(NO_CLASS_HANDLE) - , m_tmpNum(BAD_VAR_NUM) , m_signatureType(TYP_UNDEF) , m_wellKnownArg(WellKnownArg::None) , m_needTmp(false) , m_needPlace(false) - , m_isTmp(false) , m_processed(false) { } @@ -4772,7 +4766,6 @@ class CallArg CORINFO_CLASS_HANDLE GetSignatureClassHandle() { return m_signatureClsHnd; } var_types GetSignatureType() { return m_signatureType; } WellKnownArg GetWellKnownArg() { return m_wellKnownArg; } - bool IsTemp() { return m_isTmp; } // clang-format on // Get the real argument node, i.e. not a setup or placeholder node. @@ -4884,8 +4877,7 @@ class CallArgs void SetNeedsTemp(CallArg* arg); bool IsNonStandard(Compiler* comp, GenTreeCall* call, CallArg* arg); - GenTree* MakeTmpArgNode(Compiler* comp, CallArg* arg); - void SetTemp(CallArg* arg, unsigned tmpNum); + GenTree* MakeTmpArgNode(Compiler* comp, CallArg* arg, unsigned lclNum); // clang-format off bool HasThisPointer() const { return m_hasThisPointer; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 1805435c5ad3d..0370223003099 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -821,18 +821,10 @@ void CallArg::Dump(Compiler* comp) { printf(", isSplit"); } - if (m_needTmp) - { - printf(", tmpNum=V%02u", m_tmpNum); - } if (m_needPlace) { printf(", needPlace"); } - if (m_isTmp) - { - printf(", isTmp"); - } if (m_processed) { printf(", processed"); @@ -849,15 +841,6 @@ void CallArg::Dump(Compiler* comp) } #endif -//------------------------------------------------------------------------ -// SetTemp: Set that the specified argument was evaluated into a temp. -// -void CallArgs::SetTemp(CallArg* arg, unsigned tmpNum) -{ - arg->m_tmpNum = tmpNum; - arg->m_isTmp = true; -} - //------------------------------------------------------------------------ // ArgsComplete: Make final decisions on which arguments to evaluate into temporaries. // @@ -874,13 +857,7 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) for (CallArg& arg : Args()) { GenTree* argx = arg.GetEarlyNode(); - - if (argx == nullptr) - { - // Should only happen if remorphing in which case we do not need to - // make a decision about temps. - continue; - } + assert(argx != nullptr); bool canEvalToTemp = true; if (arg.AbiInfo.GetRegNum() == REG_STK) @@ -909,19 +886,16 @@ void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call) // if ((argx->gtFlags & GTF_ASG) != 0) { - // If this is not the only argument, or it's a copyblk, or it - // already evaluates the expression to a tmp then we need a temp in - // the late arg list. - // In the latter case this might not even be a value; - // fgMakeOutgoingStructArgCopy will leave the copying nodes here - // for FEATURE_FIXED_OUT_ARGS. - if (canEvalToTemp && ((argCount > 1) || argx->OperIsCopyBlkOp() || (FEATURE_FIXED_OUT_ARGS && arg.m_isTmp))) + // fgMakeOutgoingStructArgCopy can have introduced a temp already, + // in which case it will have created a setup node in the early + // node. + if (!argx->IsValue()) { - SetNeedsTemp(&arg); + assert(arg.m_needTmp); } - else + else if (canEvalToTemp && (argCount > 1)) { - assert(argx->IsValue()); + SetNeedsTemp(&arg); } // For all previous arguments that may interfere with the store we @@ -1318,8 +1292,6 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) regCount++; } - assert(arg->GetLateNode() == nullptr); - // Skip any already processed args // if (!arg->m_processed) @@ -1556,9 +1528,8 @@ void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs) // Return Value: // the newly created temp var tree. // -GenTree* CallArgs::MakeTmpArgNode(Compiler* comp, CallArg* arg) +GenTree* CallArgs::MakeTmpArgNode(Compiler* comp, CallArg* arg, unsigned lclNum) { - unsigned lclNum = arg->m_tmpNum; LclVarDsc* varDsc = comp->lvaGetDesc(lclNum); var_types argType = varDsc->TypeGet(); assert(genActualType(argType) == genActualType(arg->GetSignatureType())); @@ -1633,7 +1604,16 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) for (size_t i = 0; i < numArgs; i++) { CallArg& arg = *(sortedArgs[i]); - assert(arg.GetLateNode() == nullptr); + + if (arg.GetLateNode() != nullptr) + { + // We may already have created the temp as part of + // fgMakeOutgoingStructArgCopy. In that case there is no work to be + // done. + *lateTail = &arg; + lateTail = &arg.LateNextRef(); + continue; + } GenTree* argx = arg.GetEarlyNode(); assert(argx != nullptr); @@ -1655,82 +1635,60 @@ void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call) if (arg.m_needTmp) { - if (arg.m_isTmp) - { - // Create a copy of the temp to go into the late argument list - defArg = MakeTmpArgNode(comp, &arg); - } - else - { - // Create a temp store for the argument - // Put the temp in the late arg list + // Create a temp store for the argument + // Put the temp in the late arg list #ifdef DEBUG - if (comp->verbose) - { - printf("Argument with 'side effect'...\n"); - comp->gtDispTree(argx); - } + if (comp->verbose) + { + printf("Argument with 'side effect'...\n"); + comp->gtDispTree(argx); + } #endif #if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) - noway_assert(argx->gtType != TYP_STRUCT); + noway_assert(argx->gtType != TYP_STRUCT); #endif - unsigned tmpVarNum = comp->lvaGrabTemp(true DEBUGARG("argument with side effect")); + unsigned tmpVarNum = comp->lvaGrabTemp(true DEBUGARG("argument with side effect")); - if (setupArg != nullptr) - { - // Now keep the mkrefany for the late argument list - defArg = argx; + setupArg = comp->gtNewTempStore(tmpVarNum, argx); - // Clear the side-effect flags because now both op1 and op2 have no side-effects - defArg->gtFlags &= ~GTF_ALL_EFFECT; - } - else - { - setupArg = comp->gtNewTempStore(tmpVarNum, argx); + LclVarDsc* varDsc = comp->lvaGetDesc(tmpVarNum); + var_types lclVarType = genActualType(argx->gtType); + var_types scalarType = TYP_UNKNOWN; - LclVarDsc* varDsc = comp->lvaGetDesc(tmpVarNum); - var_types lclVarType = genActualType(argx->gtType); - var_types scalarType = TYP_UNKNOWN; - - if (setupArg->OperIsCopyBlkOp()) - { - setupArg = comp->fgMorphCopyBlock(setupArg); + if (setupArg->OperIsCopyBlkOp()) + { + setupArg = comp->fgMorphCopyBlock(setupArg); #if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - if ((lclVarType == TYP_STRUCT) && (arg.AbiInfo.ArgType != TYP_STRUCT)) - { - scalarType = arg.AbiInfo.ArgType; - } + if ((lclVarType == TYP_STRUCT) && (arg.AbiInfo.ArgType != TYP_STRUCT)) + { + scalarType = arg.AbiInfo.ArgType; + } #endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - } - - // scalarType can be set to a wider type for ARM or unix amd64 architectures: (3 => 4) or (5,6,7 => - // 8) - if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType)) - { - // Create a GT_LCL_FLD using the wider type to go to the late argument list - defArg = comp->gtNewLclFldNode(tmpVarNum, scalarType, 0); - } - else - { - // Create a copy of the temp to go to the late argument list - defArg = comp->gtNewLclvNode(tmpVarNum, lclVarType); - } + } - arg.m_isTmp = true; - arg.m_tmpNum = tmpVarNum; - } + // scalarType can be set to a wider type for ARM or unix amd64 architectures: (3 => 4) or (5,6,7 => + // 8) + if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType)) + { + // Create a GT_LCL_FLD using the wider type to go to the late argument list + defArg = comp->gtNewLclFldNode(tmpVarNum, scalarType, 0); + } + else + { + // Create a copy of the temp to go to the late argument list + defArg = comp->gtNewLclvNode(tmpVarNum, lclVarType); + } #ifdef DEBUG - if (comp->verbose) - { - printf("\n Evaluate to a temp:\n"); - comp->gtDispTree(setupArg); - } -#endif + if (comp->verbose) + { + printf("\n Evaluate to a temp:\n"); + comp->gtDispTree(setupArg); } +#endif } else // curArgTabEntry->needTmp == false { @@ -3264,29 +3222,31 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg) assert(!fgGlobalMorph); } + call->gtArgs.SetNeedsTemp(arg); + // Copy the valuetype to the temp GenTree* copyBlk = gtNewStoreLclVarNode(tmp, argx); copyBlk = fgMorphCopyBlock(copyBlk); - call->gtArgs.SetTemp(arg, tmp); #if FEATURE_FIXED_OUT_ARGS - // Do the copy early, and evaluate the temp later (see EvalArgsToTemps) - // When on Unix create LCL_FLD for structs passed in more than one registers. See fgMakeTmpArgNode - GenTree* argNode = copyBlk; + // For fixed out args we create the setup node here; EvalArgsToTemps knows + // to handle the case of "already have a setup node" properly. + arg->SetEarlyNode(copyBlk); + arg->SetLateNode(call->gtArgs.MakeTmpArgNode(this, arg, tmp)); #else // !FEATURE_FIXED_OUT_ARGS // Structs are always on the stack, and thus never need temps // so we have to put the copy and temp all into one expression. - GenTree* argNode = call->gtArgs.MakeTmpArgNode(this, arg); + GenTree* argNode = call->gtArgs.MakeTmpArgNode(this, arg, tmp); // Change the expression to "(tmp=val),tmp" argNode = gtNewOperNode(GT_COMMA, argNode->TypeGet(), copyBlk, argNode); -#endif // !FEATURE_FIXED_OUT_ARGS - arg->SetEarlyNode(argNode); + +#endif // !FEATURE_FIXED_OUT_ARGS } /***************************************************************************** @@ -6747,12 +6707,12 @@ Statement* Compiler::fgAssignRecursiveCallArgToCallerParam(GenTree* arg, // TODO-CQ: enable calls with struct arguments passed in registers. noway_assert(!varTypeIsStruct(arg->TypeGet())); - if (callArg->IsTemp() || arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl()) + if (arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl()) { // The argument is already assigned to a temp or is a const. argInTemp = arg; } - else if (arg->OperGet() == GT_LCL_VAR) + else if (arg->OperIs(GT_LCL_VAR)) { unsigned lclNum = arg->AsLclVar()->GetLclNum(); LclVarDsc* varDsc = lvaGetDesc(lclNum);