Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Issue 620 - Incorrect IND(ADDR(LCL_VAR)) folding when types do not match #40059

Closed
wants to merge 10 commits into from
13 changes: 7 additions & 6 deletions src/coreclr/src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -471,10 +471,11 @@ class LclVarDsc
// 32-bit target. For implicit byref parameters, this gets hijacked between
// fgRetypeImplicitByRefArgs and fgMarkDemotedImplicitByRefArgs to indicate whether
// references to the arg are being rewritten as references to a promoted shadow local.
unsigned char lvIsStructField : 1; // Is this local var a field of a promoted struct local?
unsigned char lvOverlappingFields : 1; // True when we have a struct with possibly overlapping fields
unsigned char lvContainsHoles : 1; // True when we have a promoted struct that contains holes
unsigned char lvCustomLayout : 1; // True when this struct has "CustomLayout"
unsigned char lvIsStructField : 1; // Is this local var a field of a promoted struct local?
unsigned char lvOverlappingFields : 1; // True when we have a struct with possibly overlapping fields
unsigned char lvContainsHoles : 1; // True when we have a promoted struct that contains holes
unsigned char lvCustomLayout : 1; // True when this struct has "CustomLayout"
unsigned char lvForceLoadNormalize : 1; // True when this local had a cast on the LHS of an assignment

unsigned char lvIsMultiRegArg : 1; // true if this is a multireg LclVar struct used in an argument context
unsigned char lvIsMultiRegRet : 1; // true if this is a multireg LclVar struct assigned from a multireg call
Expand Down Expand Up @@ -884,14 +885,14 @@ class LclVarDsc
{
return varTypeIsSmall(TypeGet()) &&
// lvIsStructField is treated the same as the aliased local, see fgDoNormalizeOnStore.
(lvIsParam || lvAddrExposed || lvIsStructField);
(lvIsParam || lvAddrExposed || lvIsStructField || lvForceLoadNormalize);
}

bool lvNormalizeOnStore() const
{
return varTypeIsSmall(TypeGet()) &&
// lvIsStructField is treated the same as the aliased local, see fgDoNormalizeOnStore.
!(lvIsParam || lvAddrExposed || lvIsStructField);
!(lvIsParam || lvAddrExposed || lvIsStructField || lvForceLoadNormalize);
}

void incRefCnts(BasicBlock::weight_t weight,
Expand Down
12 changes: 6 additions & 6 deletions src/coreclr/src/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10102,6 +10102,12 @@ void Compiler::gtDispNode(GenTree* tree, IndentStack* indentStack, __in __in_z _
// We prefer printing V or U
if ((tree->gtFlags & (GTF_IND_VOLATILE | GTF_IND_UNALIGNED)) == 0)
{
if (tree->gtFlags & GTF_IND_ASG_LHS)
{
printf("D"); // print a D for definition
--msgLength;
break;
}
if (tree->gtFlags & GTF_IND_TGTANYWHERE)
{
printf("*");
Expand Down Expand Up @@ -10132,12 +10138,6 @@ void Compiler::gtDispNode(GenTree* tree, IndentStack* indentStack, __in __in_z _
--msgLength;
break;
}
if (tree->gtFlags & GTF_IND_ASG_LHS)
{
printf("D"); // print a D for definition
--msgLength;
break;
}
}
__fallthrough;

Expand Down
196 changes: 139 additions & 57 deletions src/coreclr/src/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9736,6 +9736,10 @@ GenTree* Compiler::fgMorphOneAsgBlockOp(GenTree* tree)

asg->ChangeType(asgType);
dest->gtFlags |= GTF_DONT_CSE;
if (dest->OperIs(GT_IND))
{
dest->gtFlags |= GTF_IND_ASG_LHS; // Mark this GT_IND as being the left side of an assignment
}
asg->gtFlags &= ~GTF_EXCEPT;
asg->gtFlags |= ((dest->gtFlags | src->gtFlags) & GTF_ALL_EFFECT);
// Un-set GTF_REVERSE_OPS, and it will be set later if appropriate.
Expand Down Expand Up @@ -11643,6 +11647,10 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
{
op1->gtFlags |= GTF_DONT_CSE;
if (op1->OperIs(GT_IND))
{
op1->gtFlags |= GTF_IND_ASG_LHS; // Mark this GT_IND as being the left side of an assignment
}
}
break;

Expand Down Expand Up @@ -12737,6 +12745,10 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
{
op1->gtFlags |= GTF_DONT_CSE;
if (op1->OperIs(GT_IND))
{
op1->gtFlags |= GTF_IND_ASG_LHS; // Mark this GT_IND as being the left side of an assignment
}
}
break;

Expand Down Expand Up @@ -13648,18 +13660,26 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
break;
}

bool foldAndReturnTemp;
bool foldAndReturnTemp;
bool isStore;
var_types addCastToType;

foldAndReturnTemp = false;
temp = nullptr;
ival1 = 0;
isStore = (tree->gtFlags & GTF_IND_ASG_LHS) != 0;
addCastToType = TYP_VOID; // TYP_VOID means we don't need to insert a cast

temp = nullptr;
ival1 = 0;

assert(typ != TYP_UINT); // We never generate a GT_IND of a UINT

// Don't remove a volatile GT_IND, even if the address points to a local variable.
if ((tree->gtFlags & GTF_IND_VOLATILE) == 0)
{
/* Try to Fold *(&X) into X */
if (op1->gtOper == GT_ADDR)
{
// Can not remove a GT_ADDR if it is currently a CSE candidate.
// Cannot remove a GT_ADDR if it is currently a CSE candidate.
if (gtIsActiveCSE_Candidate(op1))
{
break;
Expand All @@ -13678,51 +13698,89 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
}
else if (temp->OperIsLocal())
{
unsigned lclNum = temp->AsLclVarCommon()->GetLclNum();
LclVarDsc* varDsc = &lvaTable[lclNum];

// We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset
if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0))
if (temp->OperIs(GT_LCL_VAR))
{
noway_assert(varTypeIsStruct(varDsc));
unsigned lclNum = temp->AsLclVarCommon()->GetLclNum();
LclVarDsc* varDsc = &lvaTable[lclNum];
var_types lclType = varDsc->lvType;

// We will try to optimize when we have a single field struct that is being struct promoted
if (varDsc->lvFieldCnt == 1)
// We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset
if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0))
{
unsigned lclNumFld = varDsc->lvFieldLclStart;
// just grab the promoted field
LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld];
noway_assert(varTypeIsStruct(varDsc));

// Also make sure that the tree type matches the fieldVarType and that it's lvFldOffset
// is zero
if (fieldVarDsc->TypeGet() == typ && (fieldVarDsc->lvFldOffset == 0))
// We will try to optimize when we have a single field struct that is being struct
// promoted
if (varDsc->lvFieldCnt == 1)
{
// We can just use the existing promoted field LclNum
temp->AsLclVarCommon()->SetLclNum(lclNumFld);
temp->gtType = fieldVarDsc->TypeGet();
unsigned lclNumFld = varDsc->lvFieldLclStart;
// just grab the promoted field
LclVarDsc* fieldVarDsc = &lvaTable[lclNumFld];

// Also make sure that the tree type matches the fieldVarType and that it's
// lvFldOffset
// is zero
if (fieldVarDsc->TypeGet() == typ && (fieldVarDsc->lvFldOffset == 0))
{
// We can just use the existing promoted field LclNum
temp->AsLclVarCommon()->SetLclNum(lclNumFld);
temp->gtType = fieldVarDsc->TypeGet();

foldAndReturnTemp = true;
foldAndReturnTemp = true;
}
}
}
bool matchingTypes = (lclType == typ);
bool matchingWidths = (genTypeSize(lclType) == genTypeSize(typ));
bool matchingFloat = (varTypeIsFloating(lclType) == varTypeIsFloating(typ));

// TYP_BOOL and TYP_UBYTE are also matching types
if (!matchingTypes)
{
if ((lclType == TYP_BOOL) && (typ == TYP_UBYTE))
{
matchingTypes = true;
}
else if ((lclType == TYP_UBYTE) && (typ == TYP_BOOL))
{
matchingTypes = true;
}
}

// If the sizes match and we don't have a float or TYP_STRUCT,
// then we will fold the IND/ADDR and use the LCL_VAR directly
//
if (matchingWidths && matchingFloat && (typ != TYP_STRUCT))
{
// For some small types we might need to change to normalize loads or insert a cast here
//
if (varTypeIsSmall(typ))
{
// For any stores of small types, we will force loads to be normalized
// this is necessary as we need to zero/sign extend any load
// after this kind of store.
//
if (isStore)
{
varDsc->lvForceLoadNormalize = true;
}
// otherwise we have a load operation
//
// Do we have a non matching small type load?
// if so we need to insert a cast operation
else if (!matchingTypes)
{
addCastToType = typ;
}
}
// we will fold the IND/ADDR and reduce to just the local variable
//
tree->gtType = typ = temp->TypeGet();
foldAndReturnTemp = true;
}
}
// If the type of the IND (typ) is a "small int", and the type of the local has the
// same width, then we can reduce to just the local variable -- it will be
// correctly normalized, and signed/unsigned differences won't matter.
//
// The below transformation cannot be applied if the local var needs to be normalized on load.
else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) &&
!lvaTable[lclNum].lvNormalizeOnLoad())
{
tree->gtType = typ = temp->TypeGet();
foldAndReturnTemp = true;
}
else if (!varTypeIsStruct(typ) && (lvaTable[lclNum].lvType == typ) &&
!lvaTable[lclNum].lvNormalizeOnLoad())
{
tree->gtType = typ = temp->TypeGet();
foldAndReturnTemp = true;
}
else

if (!foldAndReturnTemp)
briansull marked this conversation as resolved.
Show resolved Hide resolved
{
// Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e.
// nullptr)
Expand All @@ -13740,12 +13798,13 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
foldAndReturnTemp = true;
}
}
// Otherwise will will fold this into a GT_LCL_FLD below
// where we check (temp != nullptr)

// Otherwise will will fold this GT_LCL_VAR into a GT_LCL_FLD below
// where we check: if ((temp != nullptr) && !foldAndReturnTemp)
}
else // !temp->OperIsLocal()
else // X/temp is somethinge else, not an OperIsLocal()
{
// We don't try to fold away the GT_IND/GT_ADDR for this case
// We don't try to fold away the GT_IND/GT_ADDR/temp for this case
temp = nullptr;
}
}
Expand Down Expand Up @@ -13912,19 +13971,42 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
// normalization.
if (temp->OperIs(GT_LCL_VAR))
{
#ifdef DEBUG
// We clear this flag on `temp` because `fgMorphLocalVar` may assert that this bit is clear
// and the node in question must have this bit set (as it has already been morphed).
temp->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
#endif // DEBUG
const bool forceRemorph = true;
temp = fgMorphLocalVar(temp, forceRemorph);
#ifdef DEBUG
// We then set this flag on `temp` because `fgMorhpLocalVar` may not set it itself, and the
// caller of `fgMorphSmpOp` may assert that this flag is set on `temp` once this function
// returns.
temp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
#endif // DEBUG
unsigned lclNum = temp->AsLclVarCommon()->GetLclNum();
briansull marked this conversation as resolved.
Show resolved Hide resolved
var_types lclType = lvaTable[lclNum].lvType;

// We build a new 'result' tree to return, as we want to call fgMorphTree on it
//
GenTree* result = gtNewLclvNode(lclNum, lclType);
assert(result->OperGet() == GT_LCL_VAR);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is it that we need to build a new result tree? It seems an unfortunate additional cost; why doesn't it work to clearn the GTF_DEBUG_NODE_MORPHED flag?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The whole clearing/setting the GTF_DEBUG_NODE_MORPHED flag is a total hack.

But fine I will revert the change to my original fix which is much simpler but introduces some regressions.

I believe that this fix is better, but will go with whatever you want.\

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I totally agree that the GTF_DEBUG_NODE_MOPRHED flag is a frustratingly fragile mechanism - though I do think it would be best to do a minimal fix for now.


// Copy the GTF_DONT_CSE flag from the original tree to `result`
// because fgMorphLocalVar uses this flag to determine if we are loading or storing
// and will insert a cast when we are loading from a lvNormalizeOnLoad() local
//
result->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);

// If the indirection node was a different small type than our local variable
// we insert a cast to that type.
//
if (addCastToType != TYP_VOID)
{
assert(varTypeIsSmall(addCastToType));

// Insert a narrowing cast to the old indirection type
//
result = gtNewCastNode(TYP_INT, result, false, addCastToType);
}

// On this path we return 'result' after calling fgMorphTree on it.
// so now we can destroy the unused'temp' node.
//
DEBUG_DESTROY_NODE(temp);

temp = fgMorphTree(result);
}
else
{
assert(addCastToType == TYP_VOID);
}

return temp;
Expand Down
Loading