Skip to content

Commit

Permalink
Structs: Do not mark "Hidden buffer pointer" as address-exposed (dotn…
Browse files Browse the repository at this point in the history
…et#64130)

* Preliminary changes

* Add the missing push of ssadef

* Fix an issue for call tree cloning

* formatting

* minor review comments incorporation

* Make GT_CALL as full-def

* Merge RenameCall into RenameDef

* Fix DefinesLocalAddr and lateuse arg

* Minor comments

* fix the recently added code to account that GT_CALL can also have defintion:

* clone the return buffer arg

* Another round of review comments

* Handle return buffer calls in optIsVarAssgCB

* Update locals defined by calls in NodeInfo

* Fix a case where arg can be putarg

* Restructure the cases in GetLclRetBufArgNode()

* move ivaMaskCall outside the condition

* Add back to call for DoNotEnregister which was missed for Release builds

* Retrieve the appropriate node in case of copyReload

* Added an assert

* remove assert and add comment
  • Loading branch information
kunalspathak authored and radekdoulik committed Mar 30, 2022
1 parent 1d38864 commit 697bd73
Show file tree
Hide file tree
Showing 16 changed files with 400 additions and 100 deletions.
22 changes: 11 additions & 11 deletions src/coreclr/jit/clrjit.natvis
Original file line number Diff line number Diff line change
Expand Up @@ -39,26 +39,26 @@ Documentation for VS debugger format specifiers: https://docs.microsoft.com/en-u
<!-- GenTree -->

<Type Name="GenTree">
<DisplayString>[{gtOper,en}, {gtType,en}]</DisplayString>
<DisplayString>{gtTreeID, d}: [[{gtOper,en}, {gtType,en}]</DisplayString>
</Type>
<Type Name="GenTreeIntCon">
<DisplayString>[IntCon={((GenTreeIntCon*)this)-&gt;gtIconVal, d}]</DisplayString>
<DisplayString>{gtTreeID, d}: [[IntCon={((GenTreeIntCon*)this)-&gt;gtIconVal, d}]</DisplayString>
</Type>
<Type Name="GenTreeDblCon">
<DisplayString>[DblCon={((GenTreeDblCon*)this)-&gt;gtDconVal, g}]</DisplayString>
<DisplayString>{gtTreeID, d}: [[DblCon={((GenTreeDblCon*)this)-&gt;gtDconVal, g}]</DisplayString>
</Type>
<Type Name="GenTreeStrCon">
<DisplayString>CNS_STR</DisplayString>
</Type>
<Type Name="GenTreeLngCon">
<DisplayString>[LngCon={((GenTreeLngCon*)this)-&gt;gtLconVal, l}]</DisplayString>
<DisplayString>{gtTreeID, d}: [[LngCon={((GenTreeLngCon*)this)-&gt;gtLconVal, l}]</DisplayString>
</Type>
<Type Name="GenTreeOp">
<DisplayString Condition="this->gtOper==GT_ASG">[{this-&gt;gtOp1,na}={this-&gt;gtOp2,na}]</DisplayString>
<DisplayString Condition="this->gtOper==GT_CAST">[{((GenTreeCast*)this)-&gt;gtCastType,en} &lt;- {((GenTreeUnOp*)this)-&gt;gtOp1-&gt;gtType,en}]</DisplayString>
<DisplayString Condition="this->gtOper==GT_SIMD">[{((GenTreeSIMD*)this)-&gt;gtSIMDIntrinsicID,en}, {gtType,en}]</DisplayString>
<DisplayString Condition="this->gtOper==GT_HWINTRINSIC">[{((GenTreeHWIntrinsic*)this)-&gt;gtHWIntrinsicId,en}, {gtType,en}]</DisplayString>
<DisplayString>[{gtOper,en}, {gtType,en}]</DisplayString>
<DisplayString Condition="this->gtOper==GT_ASG">{gtTreeID, d}: [[{this-&gt;gtOp1,na}={this-&gt;gtOp2,na}]</DisplayString>
<DisplayString Condition="this->gtOper==GT_CAST">{gtTreeID, d}: [[{((GenTreeCast*)this)-&gt;gtCastType,en} &lt;- {((GenTreeUnOp*)this)-&gt;gtOp1-&gt;gtType,en}]</DisplayString>
<DisplayString Condition="this->gtOper==GT_SIMD">{gtTreeID, d}: [[{((GenTreeSIMD*)this)-&gt;gtSIMDIntrinsicID,en}, {gtType,en}]</DisplayString>
<DisplayString Condition="this->gtOper==GT_HWINTRINSIC">{gtTreeID, d}: [[{((GenTreeHWIntrinsic*)this)-&gt;gtHWIntrinsicId,en}, {gtType,en}]</DisplayString>
<DisplayString>{gtTreeID, d}: [[{gtOper,en}, {gtType,en}]</DisplayString>
</Type>

<Type Name="Statement">
Expand All @@ -77,11 +77,11 @@ Documentation for VS debugger format specifiers: https://docs.microsoft.com/en-u
</Type>

<Type Name="GenTreeLclVar" Inheritable="false">
<DisplayString>[{gtOper,en}, {gtType,en} V{((GenTreeLclVar*)this)-&gt;_gtLclNum,u}]</DisplayString>
<DisplayString>{gtTreeID, d}: [[{gtOper,en}, {gtType,en} V{((GenTreeLclVar*)this)-&gt;_gtLclNum,u}]</DisplayString>
</Type>

<Type Name="GenTreeLclFld" Inheritable="false">
<DisplayString>[{gtOper,en}, {gtType,en} V{((GenTreeLclFld*)this)-&gt;_gtLclNum,u}[+{((GenTreeLclFld*)this)-&gt;m_lclOffs,u}]]</DisplayString>
<DisplayString>{gtTreeID, d}: [[{gtOper,en}, {gtType,en} V{((GenTreeLclFld*)this)-&gt;_gtLclNum,u}[+{((GenTreeLclFld*)this)-&gt;m_lclOffs,u}]]</DisplayString>
</Type>

<!-- Register allocation -->
Expand Down
24 changes: 15 additions & 9 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2234,7 +2234,7 @@ void Compiler::compSetProcessor()
opts.compUseCMOV = jitFlags.IsSet(JitFlags::JIT_FLAG_USE_CMOV);
#ifdef DEBUG
if (opts.compUseCMOV)
opts.compUseCMOV = !compStressCompile(STRESS_USE_CMOV, 50);
opts.compUseCMOV = !compStressCompile(STRESS_USE_CMOV, 50);
#endif // DEBUG

#endif // TARGET_X86
Expand Down Expand Up @@ -2409,15 +2409,17 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
opts.compJitAlignLoopBoundary = (unsigned short)JitConfig.JitAlignLoopBoundary();
opts.compJitAlignLoopMinBlockWeight = (unsigned short)JitConfig.JitAlignLoopMinBlockWeight();

opts.compJitAlignLoopForJcc = JitConfig.JitAlignLoopForJcc() == 1;
opts.compJitAlignLoopMaxCodeSize = (unsigned short)JitConfig.JitAlignLoopMaxCodeSize();
opts.compJitHideAlignBehindJmp = JitConfig.JitHideAlignBehindJmp() == 1;
opts.compJitAlignLoopForJcc = JitConfig.JitAlignLoopForJcc() == 1;
opts.compJitAlignLoopMaxCodeSize = (unsigned short)JitConfig.JitAlignLoopMaxCodeSize();
opts.compJitHideAlignBehindJmp = JitConfig.JitHideAlignBehindJmp() == 1;
opts.compJitOptimizeStructHiddenBuffer = JitConfig.JitOptimizeStructHiddenBuffer() == 1;
#else
opts.compJitAlignLoopAdaptive = true;
opts.compJitAlignLoopBoundary = DEFAULT_ALIGN_LOOP_BOUNDARY;
opts.compJitAlignLoopMinBlockWeight = DEFAULT_ALIGN_LOOP_MIN_BLOCK_WEIGHT;
opts.compJitAlignLoopMaxCodeSize = DEFAULT_MAX_LOOPSIZE_FOR_ALIGN;
opts.compJitHideAlignBehindJmp = true;
opts.compJitAlignLoopAdaptive = true;
opts.compJitAlignLoopBoundary = DEFAULT_ALIGN_LOOP_BOUNDARY;
opts.compJitAlignLoopMinBlockWeight = DEFAULT_ALIGN_LOOP_MIN_BLOCK_WEIGHT;
opts.compJitAlignLoopMaxCodeSize = DEFAULT_MAX_LOOPSIZE_FOR_ALIGN;
opts.compJitHideAlignBehindJmp = true;
opts.compJitOptimizeStructHiddenBuffer = true;
#endif

#ifdef TARGET_XARCH
Expand Down Expand Up @@ -9880,6 +9882,9 @@ void Compiler::EnregisterStats::RecordLocal(const LclVarDsc* varDsc)
case DoNotEnregisterReason::AddrExposed:
m_addrExposed++;
break;
case DoNotEnregisterReason::HiddenBufferStructArg:
m_hiddenStructArg++;
break;
case DoNotEnregisterReason::DontEnregStructs:
m_dontEnregStructs++;
break;
Expand Down Expand Up @@ -10050,6 +10055,7 @@ void Compiler::EnregisterStats::Dump(FILE* fout) const
}

PRINT_STATS(m_addrExposed, notEnreg);
PRINT_STATS(m_hiddenStructArg, notEnreg);
PRINT_STATS(m_dontEnregStructs, notEnreg);
PRINT_STATS(m_notRegSizeStruct, notEnreg);
PRINT_STATS(m_localField, notEnreg);
Expand Down
44 changes: 34 additions & 10 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,10 @@ class LclSsaVarDsc
{
}

LclSsaVarDsc(BasicBlock* block) : m_block(block), m_asg(nullptr)
{
}

LclSsaVarDsc(BasicBlock* block, GenTreeOp* asg) : m_block(block), m_asg(asg)
{
assert((asg == nullptr) || asg->OperIs(GT_ASG));
Expand Down Expand Up @@ -392,12 +396,13 @@ enum class DoNotEnregisterReason
#endif
LclAddrNode, // the local is accessed with LCL_ADDR_VAR/FLD.
CastTakesAddr,
StoreBlkSrc, // the local is used as STORE_BLK source.
OneAsgRetyping, // fgMorphOneAsgBlockOp prevents this local from being enregister.
SwizzleArg, // the local is passed using LCL_FLD as another type.
BlockOpRet, // the struct is returned and it promoted or there is a cast.
ReturnSpCheck, // the local is used to do SP check
SimdUserForcesDep // a promoted struct was used by a SIMD/HWI node; it must be dependently promoted
StoreBlkSrc, // the local is used as STORE_BLK source.
OneAsgRetyping, // fgMorphOneAsgBlockOp prevents this local from being enregister.
SwizzleArg, // the local is passed using LCL_FLD as another type.
BlockOpRet, // the struct is returned and it promoted or there is a cast.
ReturnSpCheck, // the local is used to do SP check
SimdUserForcesDep, // a promoted struct was used by a SIMD/HWI node; it must be dependently promoted
HiddenBufferStructArg // the argument is a hidden return buffer passed to a method.
};

enum class AddressExposedReason
Expand Down Expand Up @@ -527,6 +532,11 @@ class LclVarDsc
unsigned char lvIsMultiRegArg : 1; // true if this is a multireg LclVar struct used in an argument context
unsigned char lvIsMultiRegRet : 1; // true if this is a multireg LclVar struct assigned from a multireg call

#ifdef DEBUG
unsigned char lvHiddenBufferStructArg : 1; // True when this struct (or its field) are passed as hidden buffer
// pointer.
#endif

#ifdef FEATURE_HFA_FIELDS_PRESENT
CorInfoHFAElemType _lvHfaElemKind : 3; // What kind of an HFA this is (CORINFO_HFA_ELEM_NONE if it is not an HFA).
#endif // FEATURE_HFA_FIELDS_PRESENT
Expand Down Expand Up @@ -752,6 +762,18 @@ class LclVarDsc
return m_addrExposed;
}

#ifdef DEBUG
void SetHiddenBufferStructArg(char value)
{
lvHiddenBufferStructArg = value;
}

bool IsHiddenBufferStructArg() const
{
return lvHiddenBufferStructArg;
}
#endif

private:
regNumberSmall _lvRegNum; // Used to store the register this variable is in (or, the low register of a
// register pair). It is set during codegen any time the
Expand Down Expand Up @@ -3784,6 +3806,7 @@ class Compiler
// Getters and setters for address-exposed and do-not-enregister local var properties.
bool lvaVarAddrExposed(unsigned varNum) const;
void lvaSetVarAddrExposed(unsigned varNum DEBUGARG(AddressExposedReason reason));
void lvaSetHiddenBufferStructArg(unsigned varNum);
void lvaSetVarLiveInOutOfHandler(unsigned varNum);
bool lvaVarDoNotEnregister(unsigned varNum);

Expand Down Expand Up @@ -7494,7 +7517,7 @@ class Compiler
void optCopyProp(Statement* stmt, GenTreeLclVarCommon* tree, unsigned lclNum, LclNumToLiveDefsMap* curSsaName);
void optBlockCopyPropPopStacks(BasicBlock* block, LclNumToLiveDefsMap* curSsaName);
void optBlockCopyProp(BasicBlock* block, LclNumToLiveDefsMap* curSsaName);
void optCopyPropPushDef(GenTreeOp* asg,
void optCopyPropPushDef(GenTree* defNode,
GenTreeLclVarCommon* lclNode,
unsigned lclNum,
LclNumToLiveDefsMap* curSsaName);
Expand Down Expand Up @@ -9921,6 +9944,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// If set, tries to hide alignment instructions behind unconditional jumps.
bool compJitHideAlignBehindJmp;

// If set, tracks the hidden return buffer for struct arg.
bool compJitOptimizeStructHiddenBuffer;

#ifdef LATE_DISASM
bool doLateDisasm; // Run the late disassembler
#endif // LATE_DISASM
Expand Down Expand Up @@ -10570,9 +10596,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// size of the type these describe.
unsigned compGetTypeSize(CorInfoType cit, CORINFO_CLASS_HANDLE clsHnd);

// Returns true if the method being compiled has a return buffer.
bool compHasRetBuffArg();

#ifdef DEBUG
// Components used by the compiler may write unit test suites, and
// have them run within this method. They will be run only once per process, and only
Expand Down Expand Up @@ -10628,6 +10651,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
unsigned m_totalNumberOfStructEnregVars;

unsigned m_addrExposed;
unsigned m_hiddenStructArg;
unsigned m_VMNeedsStackAddr;
unsigned m_localField;
unsigned m_blockOp;
Expand Down
14 changes: 7 additions & 7 deletions src/coreclr/jit/copyprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ void Compiler::optBlockCopyPropPopStacks(BasicBlock* block, LclNumToLiveDefsMap*
for (GenTree* const tree : stmt->TreeList())
{
GenTreeLclVarCommon* lclDefNode = nullptr;
if (tree->OperIs(GT_ASG) && tree->DefinesLocal(this, &lclDefNode))
if (tree->OperIsSsaDef() && tree->DefinesLocal(this, &lclDefNode))
{
const unsigned lclNum = optIsSsaLocal(lclDefNode);

Expand Down Expand Up @@ -279,12 +279,12 @@ unsigned Compiler::optIsSsaLocal(GenTreeLclVarCommon* lclNode)
// optCopyPropPushDef: Push the new live SSA def on the stack for "lclNode".
//
// Arguments:
// asg - The assignment node for this def (will be "nullptr" for "use" defs)
// defNode - The definition node for this def (GT_ASG/GT_CALL) (will be "nullptr" for "use" defs)
// lclNode - The local tree representing "the def" (that can actually be a use)
// lclNum - The local's number (see "optIsSsaLocal")
// curSsaName - The map of local numbers to stacks of their defs
//
void Compiler::optCopyPropPushDef(GenTreeOp* asg,
void Compiler::optCopyPropPushDef(GenTree* defNode,
GenTreeLclVarCommon* lclNode,
unsigned lclNum,
LclNumToLiveDefsMap* curSsaName)
Expand All @@ -301,7 +301,7 @@ void Compiler::optCopyPropPushDef(GenTreeOp* asg,
}

unsigned ssaDefNum = SsaConfig::RESERVED_SSA_NUM;
if (asg == nullptr)
if (defNode == nullptr)
{
// Parameters, this pointer etc.
assert((lclNode->gtFlags & GTF_VAR_DEF) == 0);
Expand All @@ -313,7 +313,7 @@ void Compiler::optCopyPropPushDef(GenTreeOp* asg,
assert((lclNode->gtFlags & GTF_VAR_DEF) != 0);

// TODO-CQ: design better heuristics for propagation and remove this condition.
if (!asg->IsPhiDefn())
if (!defNode->IsPhiDefn())
{
ssaDefNum = GetSsaNumForLocalVarDef(lclNode);

Expand Down Expand Up @@ -377,7 +377,7 @@ void Compiler::optBlockCopyProp(BasicBlock* block, LclNumToLiveDefsMap* curSsaNa
treeLifeUpdater.UpdateLife(tree);

GenTreeLclVarCommon* lclDefNode = nullptr;
if (tree->OperIs(GT_ASG) && tree->DefinesLocal(this, &lclDefNode))
if (tree->OperIsSsaDef() && tree->DefinesLocal(this, &lclDefNode))
{
const unsigned lclNum = optIsSsaLocal(lclDefNode);

Expand All @@ -386,7 +386,7 @@ void Compiler::optBlockCopyProp(BasicBlock* block, LclNumToLiveDefsMap* curSsaNa
continue;
}

optCopyPropPushDef(tree->AsOp(), lclDefNode, lclNum, curSsaName);
optCopyPropPushDef(tree, lclDefNode, lclNum, curSsaName);
}
// TODO-CQ: propagate on LCL_FLDs too.
else if (tree->OperIs(GT_LCL_VAR) && ((tree->gtFlags & GTF_VAR_DEF) == 0))
Expand Down
65 changes: 60 additions & 5 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1266,6 +1266,16 @@ bool GenTreeCall::AreArgsComplete() const
return false;
}

//-------------------------------------------------------------------------
// SetRetBufArg: Sets the "return buffer" argument use.
//
void GenTreeCall::SetLclRetBufArg(Use* retBufArg)
{
assert(retBufArg->GetNode()->TypeIs(TYP_I_IMPL, TYP_BYREF) && retBufArg->GetNode()->OperIs(GT_ADDR, GT_ASG));
assert(HasRetBufArg());
gtRetBufArg = retBufArg;
}

//--------------------------------------------------------------------------
// Equals: Check if 2 CALL nodes are equal.
//
Expand Down Expand Up @@ -5376,6 +5386,12 @@ bool GenTree::OperRequiresAsgFlag()
}
}
#endif // FEATURE_HW_INTRINSICS
if (gtOper == GT_CALL)
{
// If the call has return buffer argument, it produced a definition and hence
// should be marked with assignment.
return AsCall()->GetLclRetBufArgNode() != nullptr;
}
return false;
}

Expand Down Expand Up @@ -7942,21 +7958,46 @@ GenTreeCall* Compiler::gtCloneExprCallHelper(GenTreeCall* tree,
copy->gtCallMoreFlags = tree->gtCallMoreFlags;
copy->gtCallArgs = nullptr;
copy->gtCallLateArgs = nullptr;
copy->gtRetBufArg = nullptr;

GenTreeCall::Use** argsTail = &copy->gtCallArgs;
for (GenTreeCall::Use& use : tree->Args())
{
*argsTail = gtNewCallArgs(gtCloneExpr(use.GetNode(), addFlags, deepVarNum, deepVarVal));
argsTail = &((*argsTail)->NextRef());
GenTree* argNode = use.GetNode();
GenTree* copyArgNode = gtCloneExpr(argNode, addFlags, deepVarNum, deepVarVal);
*argsTail = gtNewCallArgs(copyArgNode);

if (tree->gtRetBufArg == &use)
{
// Set the return buffer arg, if any.
assert(copy->gtRetBufArg == nullptr);
copy->gtRetBufArg = *argsTail;
}

argsTail = &((*argsTail)->NextRef());
}

argsTail = &copy->gtCallLateArgs;
for (GenTreeCall::Use& use : tree->LateArgs())
{
*argsTail = gtNewCallArgs(gtCloneExpr(use.GetNode(), addFlags, deepVarNum, deepVarVal));
argsTail = &((*argsTail)->NextRef());
GenTree* argNode = use.GetNode();
GenTree* copyArgNode = gtCloneExpr(argNode, addFlags, deepVarNum, deepVarVal);
*argsTail = gtNewCallArgs(copyArgNode);

if (tree->gtRetBufArg == &use)
{
// Set the return buffer arg, if any.
assert(copy->gtRetBufArg == nullptr);
copy->gtRetBufArg = *argsTail;
}

argsTail = &((*argsTail)->NextRef());
}

// Either there was not return buffer for the "tree" or else we successfully set the
// return buffer in the copy.
assert((tree->gtRetBufArg == nullptr) || (copy->gtRetBufArg != nullptr));

// The call sig comes from the EE and doesn't change throughout the compilation process, meaning
// we only really need one physical copy of it. Therefore a shallow pointer copy will suffice.
// (Note that this still holds even if the tree we are cloning was created by an inlinee compiler,
Expand Down Expand Up @@ -9781,7 +9822,10 @@ void Compiler::gtDispNode(GenTree* tree, IndentStack* indentStack, _In_ _In_opt_
{
printf("(AX)"); // Variable has address exposed.
}

if (varDsc->IsHiddenBufferStructArg())
{
printf("(RB)"); // Variable is hidden return buffer
}
if (varDsc->lvUnusedStruct)
{
assert(varDsc->lvPromoted);
Expand Down Expand Up @@ -15461,6 +15505,17 @@ bool GenTree::DefinesLocal(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, bo
blkNode = AsOp()->gtOp1->AsBlk();
}
}
else if (OperIs(GT_CALL))
{
GenTree* retBufArg = AsCall()->GetLclRetBufArgNode();
if (retBufArg == nullptr)
{
return false;
}

unsigned size = comp->typGetObjLayout(AsCall()->gtRetClsHnd)->GetSize();
return retBufArg->DefinesLocalAddr(comp, size, pLclVarTree, pIsEntire);
}
else if (OperIsBlk())
{
blkNode = this->AsBlk();
Expand Down
Loading

0 comments on commit 697bd73

Please sign in to comment.