Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Issue 620 - Incorrect IND(ADDR(LCL_VAR)) folding when types do not match #40059

Closed
wants to merge 10 commits into from
13 changes: 7 additions & 6 deletions src/coreclr/src/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -471,10 +471,11 @@ class LclVarDsc
// 32-bit target. For implicit byref parameters, this gets hijacked between
// fgRetypeImplicitByRefArgs and fgMarkDemotedImplicitByRefArgs to indicate whether
// references to the arg are being rewritten as references to a promoted shadow local.
unsigned char lvIsStructField : 1; // Is this local var a field of a promoted struct local?
unsigned char lvOverlappingFields : 1; // True when we have a struct with possibly overlapping fields
unsigned char lvContainsHoles : 1; // True when we have a promoted struct that contains holes
unsigned char lvCustomLayout : 1; // True when this struct has "CustomLayout"
unsigned char lvIsStructField : 1; // Is this local var a field of a promoted struct local?
unsigned char lvOverlappingFields : 1; // True when we have a struct with possibly overlapping fields
unsigned char lvContainsHoles : 1; // True when we have a promoted struct that contains holes
unsigned char lvCustomLayout : 1; // True when this struct has "CustomLayout"
unsigned char lvForceLoadNormalize : 1; // True when this local had a cast on the LHS of an assignment

unsigned char lvIsMultiRegArg : 1; // true if this is a multireg LclVar struct used in an argument context
unsigned char lvIsMultiRegRet : 1; // true if this is a multireg LclVar struct assigned from a multireg call
Expand Down Expand Up @@ -884,14 +885,14 @@ class LclVarDsc
{
return varTypeIsSmall(TypeGet()) &&
// lvIsStructField is treated the same as the aliased local, see fgDoNormalizeOnStore.
(lvIsParam || lvAddrExposed || lvIsStructField);
(lvIsParam || lvAddrExposed || lvIsStructField || lvForceLoadNormalize);
}

bool lvNormalizeOnStore() const
{
return varTypeIsSmall(TypeGet()) &&
// lvIsStructField is treated the same as the aliased local, see fgDoNormalizeOnStore.
!(lvIsParam || lvAddrExposed || lvIsStructField);
!(lvIsParam || lvAddrExposed || lvIsStructField || lvForceLoadNormalize);
}

void incRefCnts(BasicBlock::weight_t weight,
Expand Down
129 changes: 98 additions & 31 deletions src/coreclr/src/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11642,6 +11642,9 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
// TODO-1stClassStructs: improve this.
if (op1->IsLocal() || (op1->TypeGet() != TYP_STRUCT))
{
// TODO-Cleanup: consider using GTF_IND_ASG_LHS here, as GTF_DONT_CSE here is used to indicate
// that this GT_IND is the LHS of an assignment
//
op1->gtFlags |= GTF_DONT_CSE;
}
break;
Expand Down Expand Up @@ -13648,18 +13651,25 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
break;
}

bool foldAndReturnTemp;
bool foldAndReturnTemp;
bool isStore;
var_types addCastToType;

foldAndReturnTemp = false;
temp = nullptr;
ival1 = 0;
isStore = (tree->gtFlags & GTF_DONT_CSE) != 0;
;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks like an unintentional new line with ;.

Copy link
Contributor Author

@briansull briansull Aug 6, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can fix this in a follow on checkin

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, there should be a comment here that this should be checking GTF_IND_ASG_LHS once the above cleanup item is addressed.

addCastToType = TYP_VOID; // TYP_VOID means we don't need to insert a cast

temp = nullptr;
ival1 = 0;

// Don't remove a volatile GT_IND, even if the address points to a local variable.
if ((tree->gtFlags & GTF_IND_VOLATILE) == 0)
{
/* Try to Fold *(&X) into X */
if (op1->gtOper == GT_ADDR)
{
// Can not remove a GT_ADDR if it is currently a CSE candidate.
// Cannot remove a GT_ADDR if it is currently a CSE candidate.
if (gtIsActiveCSE_Candidate(op1))
{
break;
Expand All @@ -13678,8 +13688,9 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
}
else if (temp->OperIsLocal())
{
unsigned lclNum = temp->AsLclVarCommon()->GetLclNum();
LclVarDsc* varDsc = &lvaTable[lclNum];
unsigned lclNum = temp->AsLclVarCommon()->GetLclNum();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is still confusing that we are checking parent struct type for local fields even if these checks never pass and the code is working because of that.

I would suggest separate this else if (temp->OperIsLocal())
as

else if (temp->OperIs(GT_LCL_VAR)) { the new logic}
else if  (temp->OperIs(GT_LCL_FLD)) // the old logic that was applying to `LCL_FLD` only.
{
    // Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e.
    // nullptr)
    assert(fieldSeq == nullptr);
    bool b = GetZeroOffsetFieldMap()->Lookup(op1, &fieldSeq);
    assert(b || fieldSeq == nullptr);

    if (fieldSeq != nullptr) // that is from the initial CoreCLR commit, no idea why we do it only
                             // when we have a zero offset.
    {
        // Append the field sequence, change the type.
        temp->AsLclFld()->SetFieldSeq(
            GetFieldSeqStore()->Append(temp->AsLclFld()->GetFieldSeq(), fieldSeq));
        temp->gtType = typ;

        foldAndReturnTemp = true;
    }
} 

Copy link
Contributor Author

@briansull briansull Aug 6, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that we can address any other change as a follow up.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure why there is a rush to check this in today; I think it's worth making the change @sandreenko suggests, as it doesn't seem reasonable to use the parent type in the `GT_LCL_FLD' case.

Copy link
Contributor Author

@briansull briansull Aug 7, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried this change:

else if  (temp->OperIs(GT_LCL_FLD)) // the old logic that was applying to `LCL_FLD` only.
else if (temp->OperIs(GT_LCL_VAR)) { the new logic}

But it results in a failure, to propagate a field sequence on x86:

fgMorphTree BB04, STMT00037 (before)
               [000221] -A----------              *  ASG       struct (copy)
               [000220] ------------              +--*  BLK       struct<System.DateTime, 8>
               [000219] ------------              |  \--*  ADDR      byref 
               [000218] -------N----              |     \--*  FIELD     struct Start
               [000215] ------------              |        \--*  ADDR      byref 
               [000216] -------N----              |           \--*  LCL_VAR   struct<System.Globalization.DaylightTimeStruct, 24> V11 tmp2         
               [000217] ------------              \--*  LCL_VAR   struct<System.DateTime, 8>(P) V24 tmp15        
                                                  \--*    long   V24._dateData (offs=0x00) -> V42 tmp33        

fgMorphTree BB04, STMT00037 (after)
               [000355] -A---+------              *  ASG       long  
               [000353] *------N----              +--*  IND       long  
               [000351] ------------              |  \--*  ADDR      byref 
<              [000352] U------N----              |     \--*  LCL_FLD   struct V11 tmp2         [+0] Fseq[Start, _dateData]
>              [000352] U------N----              |     \--*  LCL_FLD   struct V11 tmp2         [+0]
               [000354] ------------              \--*  LCL_VAR   long   V42 tmp33        

So that sequence is used by both GT_LCL_FLD and for GT_LCL_VAR that fail the optimization step.

Copy link
Contributor Author

@briansull briansull Aug 7, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ended up changing it to this:

                    else if (temp->OperIsLocal())
                    {
                        if (temp->OperIs(GT_LCL_VAR))
                        {
                           // newer code
                        }
                        if (!foldAndReturnTemp)
                        {
                           // old code

LclVarDsc* varDsc = &lvaTable[lclNum];
var_types lclType = varDsc->lvType;

// We will try to optimize when we have a promoted struct promoted with a zero lvFldOffset
if (varDsc->lvPromoted && (varDsc->lvFldOffset == 0))
Expand All @@ -13705,24 +13716,56 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
}
}
}
// If the type of the IND (typ) is a "small int", and the type of the local has the
// same width, then we can reduce to just the local variable -- it will be
// correctly normalized, and signed/unsigned differences won't matter.
//
// The below transformation cannot be applied if the local var needs to be normalized on load.
else if (varTypeIsSmall(typ) && (genTypeSize(lvaTable[lclNum].lvType) == genTypeSize(typ)) &&
!lvaTable[lclNum].lvNormalizeOnLoad())
bool matchingTypes = (lclType == typ);
briansull marked this conversation as resolved.
Show resolved Hide resolved
bool matchingWidths = (genTypeSize(lclType) == genTypeSize(typ));
bool matchingFloat = (varTypeIsFloating(lclType) == varTypeIsFloating(typ));

// TYP_BOOL and TYP_UBYTE are also matching types
if (!matchingTypes)
{
tree->gtType = typ = temp->TypeGet();
foldAndReturnTemp = true;
if ((lclType == TYP_BOOL) && (typ == TYP_UBYTE))
{
matchingTypes = true;
}
else if ((lclType == TYP_UBYTE) && (typ == TYP_BOOL))
{
matchingTypes = true;
}
}
else if (!varTypeIsStruct(typ) && (lvaTable[lclNum].lvType == typ) &&
!lvaTable[lclNum].lvNormalizeOnLoad())

// If the sizes match and we don't have a float or TYP_STRUCT,
// then we will fold the IND/ADDR and use the LCL_VAR directly
//
if (matchingWidths && matchingFloat && (typ != TYP_STRUCT))
{
// For some small types we might need to change to normalize loads or insert a cast here
//
if (varTypeIsSmall(typ))
{
// For any stores of small types, we will force loads to be normalized
// this is necessary as we need to zero/sign extend any load
// after this kind of store.
//
if (isStore)
{
varDsc->lvForceLoadNormalize = true;
}
// otherwise we have a load operation
//
// Do we have a non matching small type load?
// if so we need to insert a cast operation
else if (!matchingTypes)
{
addCastToType = typ;
}
}
// we will fold the IND/ADDR and reduce to just the local variable
briansull marked this conversation as resolved.
Show resolved Hide resolved
//
tree->gtType = typ = temp->TypeGet();
foldAndReturnTemp = true;
}
else

if (!foldAndReturnTemp)
briansull marked this conversation as resolved.
Show resolved Hide resolved
{
// Assumes that when Lookup returns "false" it will leave "fieldSeq" unmodified (i.e.
// nullptr)
Expand All @@ -13740,6 +13783,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
foldAndReturnTemp = true;
}
}

// Otherwise will will fold this into a GT_LCL_FLD below
// where we check (temp != nullptr)
}
Expand Down Expand Up @@ -13912,19 +13956,42 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
// normalization.
if (temp->OperIs(GT_LCL_VAR))
{
#ifdef DEBUG
// We clear this flag on `temp` because `fgMorphLocalVar` may assert that this bit is clear
// and the node in question must have this bit set (as it has already been morphed).
temp->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
#endif // DEBUG
const bool forceRemorph = true;
temp = fgMorphLocalVar(temp, forceRemorph);
#ifdef DEBUG
// We then set this flag on `temp` because `fgMorhpLocalVar` may not set it itself, and the
// caller of `fgMorphSmpOp` may assert that this flag is set on `temp` once this function
// returns.
temp->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
#endif // DEBUG
unsigned lclNum = temp->AsLclVarCommon()->GetLclNum();
briansull marked this conversation as resolved.
Show resolved Hide resolved
var_types lclType = lvaTable[lclNum].lvType;

// We build a new 'result' tree to return, as we want to call fgMorphTree on it
//
GenTree* result = gtNewLclvNode(lclNum, lclType);
assert(result->OperGet() == GT_LCL_VAR);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is it that we need to build a new result tree? It seems an unfortunate additional cost; why doesn't it work to clearn the GTF_DEBUG_NODE_MORPHED flag?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The whole clearing/setting the GTF_DEBUG_NODE_MORPHED flag is a total hack.

But fine I will revert the change to my original fix which is much simpler but introduces some regressions.

I believe that this fix is better, but will go with whatever you want.\

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I totally agree that the GTF_DEBUG_NODE_MOPRHED flag is a frustratingly fragile mechanism - though I do think it would be best to do a minimal fix for now.


// Copy the GTF_DONT_CSE flag from the original tree to `result`
// because fgMorphLocalVar uses this flag to determine if we are loading or storing
// and will insert a cast when we are loading from a lvNormalizeOnLoad() local
//
result->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);

// If the indirection node was a different small type than our local variable
// we insert a cast to that type.
//
if (addCastToType != TYP_VOID)
{
assert(varTypeIsSmall(addCastToType));

// Insert a narrowing cast to the old indirection type
//
result = gtNewCastNode(TYP_INT, result, false, addCastToType);
}

// On this path we return 'result' after calling fgMorphTree on it.
// so now we can destroy the unused'temp' node.
//
DEBUG_DESTROY_NODE(temp);

temp = fgMorphTree(result);
}
else
{
assert(addCastToType == TYP_VOID);
}

return temp;
Expand Down
153 changes: 153 additions & 0 deletions src/tests/JIT/Regression/JitBlue/Runtime_620/Runtime_620.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
// Licensed to the .NET Foundation under one or more agreements.
briansull marked this conversation as resolved.
Show resolved Hide resolved
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System;
using System.Runtime.CompilerServices;

namespace NormalizeTest
{
class Program
{
static int testResult = 100;

[MethodImpl(MethodImplOptions.NoInlining)]
static unsafe int Test1(short a, int b)
{
short c = (short)(a * 2);
int d = *((ushort*)&c) / b;
Console.WriteLine(d);
return d;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static unsafe short Test2(short a, int b)
{
short c = (short)(b * 2);
*((ushort*)&c) = (ushort)(a * b);
Console.WriteLine(c);
return c;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static unsafe int Test3(ushort a, int b)
{
ushort c = (ushort)(a * 2);
int d = *((short*)&c) / b;
Console.WriteLine(d);
return d;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static unsafe ushort Test4(ushort a, int b)
{
ushort c = (ushort)(b * 2);
*((short*)&c) = (short)(a * b);
Console.WriteLine(c);
return c;
}

struct S {
public short s16;
public ushort u16;
};

[MethodImpl(MethodImplOptions.NoInlining)]
static unsafe int TestF1(short a, int b)
{
S s;
s.s16 = (short)(a * 2);
int d = *((ushort*)&s.s16) / b;
Console.WriteLine(d);
return d;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static unsafe short TestF2(short a, int b)
{
S s;
s.s16 = (short)(b * 2);
*((ushort*)&s.s16) = (ushort)(a * b);
Console.WriteLine(s.s16);
return s.s16;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static unsafe int TestF3(ushort a, int b)
{
S s;
s.u16 = (ushort)(a * 2);
int d = *((short*)&s.u16) / b;
Console.WriteLine(d);
return d;
}

[MethodImpl(MethodImplOptions.NoInlining)]
static unsafe ushort TestF4(ushort a, int b)
{
S s;
s.u16 = (ushort)(b * 2);
*((short*)&s.u16) = (short)(a * b);
Console.WriteLine(s.u16);
return s.u16;
}

static void Check(String id, int result, int expected)
{
if (result != expected)
{
Console.WriteLine("FAILED: {0} -- result {1}, expected {2}", id, result, expected);
testResult = -1;
}
}

static int Main()
{
int result1a = Test1(-1,1);
Check("Test1a", result1a, 65534);

int result1b = Test1(-1,-1);
Check("Test1b", result1b, -65534);

short result2a = Test2(-1,1);
Check("Test2a", (int) result2a, -1);

short result2b = Test2(-1,-1);
Check("Test2b", (int) result2b, 1);

int result3 = Test3(32767,-1);
Check("Test3", (int) result3, 2);

ushort result4 = Test4(32767,-1);
Check("Test4", (int) result4, 32769);



int resultF1a = TestF1(-1,1);
Check("TestF1a", resultF1a, 65534);

int resultF1b = TestF1(-1,-1);
Check("TestF1b", resultF1b, -65534);

short resultF2a = TestF2(-1,1);
Check("TestF2a", (int) resultF2a, -1);

short resultF2b = TestF2(-1,-1);
Check("TestF2b", (int) resultF2b, 1);

int resultF3 = TestF3(32767,-1);
Check("TestF3", (int) resultF3, 2);

ushort resultF4 = TestF4(32767,-1);
Check("TestF4", (int) resultF4, 32769);



if (testResult == 100)
{
Console.WriteLine("Test Passed");
}
return testResult;
}
}
}
11 changes: 11 additions & 0 deletions src/tests/JIT/Regression/JitBlue/Runtime_620/Runtime_620.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<DebugType>None</DebugType>
<Optimize>True</Optimize>
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>