Skip to content

Commit

Permalink
Loop alignment: Fix loop size calculation to exclude IG's size that a…
Browse files Browse the repository at this point in the history
…re marked as not aligned (#68869)

* Flag to track IGs whose alignment was removed

* Update src/coreclr/jit/emit.cpp

Co-authored-by: Wraith <wraith2@gmail.com>

* Fix for 65988: Unmark the firstBB as not aligned

* Add test case for #65690

* Replace IGF_REMOVE_FLAG with different logic

* improve loops natvis

* Add back IGF_REMOVED_ALIGN

* review comments

Co-authored-by: Wraith <wraith2@gmail.com>
  • Loading branch information
kunalspathak and Wraith2 authored May 11, 2022
1 parent 336a5d2 commit 478b1b4
Show file tree
Hide file tree
Showing 9 changed files with 268 additions and 9 deletions.
4 changes: 2 additions & 2 deletions src/coreclr/jit/clrjit.natvis
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ Documentation for VS debugger format specifiers: https://docs.microsoft.com/en-u

<Type Name="Compiler::LoopDsc">
<DisplayString Condition="lpFlags &amp; LPFLG_REMOVED">REMOVED</DisplayString>
<DisplayString Condition="lpFlags &amp; LPFLG_HAS_PREHEAD">BB{lpTop->bbNum,d}-BB{lpBottom->bbNum,d} pre-h:BB{lpHead->bbNum,d} e:BB{lpEntry->bbNum,d} {lpFlags,en}</DisplayString>
<DisplayString>BB{lpTop->bbNum,d}-BB{lpBottom->bbNum,d} h:BB{lpHead->bbNum,d} e:BB{lpEntry->bbNum,d} {lpFlags,en}</DisplayString>
<DisplayString Condition="lpFlags &amp; LPFLG_HAS_PREHEAD">[BB{lpTop->bbNum,d}..BB{lpBottom->bbNum,d}] pre-h:BB{lpHead->bbNum,d} e:BB{lpEntry->bbNum,d} {lpFlags,en}</DisplayString>
<DisplayString>[BB{lpTop->bbNum,d}..BB{lpBottom->bbNum,d}] h:BB{lpHead->bbNum,d} e:BB{lpEntry->bbNum,d} {lpFlags,en}</DisplayString>
</Type>

<Type Name="EHblkDsc">
Expand Down
5 changes: 3 additions & 2 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5142,7 +5142,6 @@ void Compiler::placeLoopAlignInstructions()
return;
}

int loopsToProcess = loopAlignCandidates;
JITDUMP("Inside placeLoopAlignInstructions for %d loops.\n", loopAlignCandidates);

// Add align only if there were any loops that needed alignment
Expand All @@ -5154,9 +5153,11 @@ void Compiler::placeLoopAlignInstructions()
{
// Adding align instruction in prolog is not supported
// hence just remove that loop from our list.
loopsToProcess--;
fgFirstBB->unmarkLoopAlign(this DEBUG_ARG("prolog block"));
}

int loopsToProcess = loopAlignCandidates;

for (BasicBlock* const block : Blocks())
{
if (currentAlignedLoopNum != BasicBlock::NOT_IN_LOOP)
Expand Down
27 changes: 23 additions & 4 deletions src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5096,10 +5096,14 @@ unsigned emitter::getLoopSize(insGroup* igLoopHeader, unsigned maxLoopSize DEBUG
{
unsigned loopSize = 0;

JITDUMP("*************** In getLoopSize() for %s\n", emitLabelString(igLoopHeader));

for (insGroup* igInLoop = igLoopHeader; igInLoop != nullptr; igInLoop = igInLoop->igNext)
{
loopSize += igInLoop->igSize;
if (igInLoop->endsWithAlignInstr())
JITDUMP(" %s has %u bytes.", emitLabelString(igInLoop), igInLoop->igSize);

if (igInLoop->endsWithAlignInstr() || igInLoop->hadAlignInstr())
{
// If IGF_HAS_ALIGN is present, igInLoop contains align instruction at the end,
// for next IG or some future IG.
Expand Down Expand Up @@ -5179,16 +5183,31 @@ unsigned emitter::getLoopSize(insGroup* igLoopHeader, unsigned maxLoopSize DEBUG
else
#endif
{
JITDUMP(" but ends with align instruction, taking off %u bytes.",
emitComp->opts.compJitAlignPaddingLimit);
// The current loop size should exclude the align instruction size reserved for next loop.
loopSize -= emitComp->opts.compJitAlignPaddingLimit;
}
}
if ((igInLoop->igLoopBackEdge == igLoopHeader) || (loopSize > maxLoopSize))
{
#ifdef DEBUG
if (igInLoop->igLoopBackEdge == igLoopHeader)
{
JITDUMP(" -- Found the back edge.");
}
else
{
JITDUMP(" -- loopSize exceeded the threshold of %u bytes.", maxLoopSize);
}
JITDUMP("\n");
break;
#endif
}
JITDUMP("\n");
}

JITDUMP("loopSize of %s = %u bytes.\n", emitLabelString(igLoopHeader), loopSize);
return loopSize;
}

Expand Down Expand Up @@ -5282,7 +5301,7 @@ void emitter::emitSetLoopBackEdge(BasicBlock* loopTopBlock)
alignInstr->removeAlignFlags();

markedCurrLoop = true;
JITDUMP("** Skip alignment for current loop IG%02u ~ IG%02u because it encloses an aligned loop "
JITDUMP(";; Skip alignment for current loop IG%02u ~ IG%02u because it encloses an aligned loop "
"IG%02u ~ IG%02u.\n",
currLoopStart, currLoopEnd, emitLastLoopStart, emitLastLoopEnd);
}
Expand All @@ -5292,13 +5311,13 @@ void emitter::emitSetLoopBackEdge(BasicBlock* loopTopBlock)
if (!alignLastLoop && (loopHeadIG != nullptr) && (loopHeadIG->igNum == emitLastLoopStart))
{
assert(!markedLastLoop);
assert(alignInstr->idaIG->endsWithAlignInstr());
assert(alignInstr->idaIG->endsWithAlignInstr() || alignInstr->idaIG->hadAlignInstr());

// This IG should no longer contain alignment instruction
alignInstr->removeAlignFlags();

markedLastLoop = true;
JITDUMP("** Skip alignment for aligned loop IG%02u ~ IG%02u because it encloses the current loop "
JITDUMP(";; Skip alignment for aligned loop IG%02u ~ IG%02u because it encloses the current loop "
"IG%02u ~ IG%02u.\n",
emitLastLoopStart, emitLastLoopEnd, currLoopStart, currLoopEnd);
}
Expand Down
13 changes: 13 additions & 0 deletions src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,8 @@ struct insGroup
// and the emitter should continue to track GC info as if there was no new block.
#define IGF_HAS_ALIGN 0x0400 // this group contains an alignment instruction(s) at the end to align either the next
// IG, or, if this IG contains with an unconditional branch, some subsequent IG.
#define IGF_REMOVED_ALIGN 0x0800 // IG was marked as having an alignment instruction(s), but was later unmarked
// without updating the IG's size/offsets.

// Mask of IGF_* flags that should be propagated to new blocks when they are created.
// This allows prologs and epilogs to be any number of IGs, but still be
Expand Down Expand Up @@ -354,6 +356,16 @@ struct insGroup
return (igFlags & IGF_HAS_ALIGN) != 0;
}

// hadAlignInstr: Checks if this IG was ever marked as aligned and later
// decided to not align. Sometimes, a loop is marked as not
// needing alignment, but the igSize was not adjusted immediately.
// This method is used during loopSize calculation, where we adjust
// the loop size by removed alignment bytes.
bool hadAlignInstr() const
{
return (igFlags & IGF_REMOVED_ALIGN) != 0;
}

}; // end of struct insGroup

// For AMD64 the maximum prolog/epilog size supported on the OS is 256 bytes
Expand Down Expand Up @@ -1554,6 +1566,7 @@ class emitter
void removeAlignFlags()
{
idaIG->igFlags &= ~IGF_HAS_ALIGN;
idaIG->igFlags |= IGF_REMOVED_ALIGN;
}
};
void emitCheckAlignFitInCurIG(unsigned nAlignInstr);
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2626,7 +2626,7 @@ void Compiler::optIdentifyLoopsForAlignment()
}
else
{
JITDUMP("Skip alignment for " FMT_LP " that starts at " FMT_BB " weight=" FMT_WT ".\n", loopInd,
JITDUMP(";; Skip alignment for " FMT_LP " that starts at " FMT_BB " weight=" FMT_WT ".\n", loopInd,
top->bbNum, topWeight);
}
}
Expand Down
120 changes: 120 additions & 0 deletions src/tests/JIT/Regression/JitBlue/GitHub_65690/GitHub_65690.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
//
// Note: In below test case, there are two backedges to a loop that is marked for align.
// If we see intersecting aligned loops, we mark that loop as not needing alignment
// but then when we see 2nd backedge we again try to mark it as not needing alignment
// and that triggers an assert.
// Found by Antigen
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
public class TestClass_65690
{
public struct S1
{
public struct S1_D1_F1
{
public decimal decimal_0;
public bool bool_1;
public sbyte sbyte_2;
}
public double double_3;
}
public struct S2
{
public S1.S1_D1_F1 s1_s1_d1_f1_5;
}
static byte s_byte_7 = 2;
static decimal s_decimal_9 = -1.9647058823529411764705882353m;
static int s_int_12 = 1;
static long s_long_13 = -2147483647;
static ulong s_ulong_19 = 1;
static S1.S1_D1_F1 s_s1_s1_d1_f1_20 = new S1.S1_D1_F1();
bool bool_23 = true;
byte byte_24 = 5;
int int_29 = 1;
ushort ushort_34 = 5;
uint uint_35 = 5;
S1.S1_D1_F1 s1_s1_d1_f1_37 = new S1.S1_D1_F1();
S1 s1_38 = new S1();
S2 s2_39 = new S2();
static int s_loopInvariant = 4;
[MethodImpl(MethodImplOptions.NoInlining)]
public byte LeafMethod1()
{
unchecked
{
return 15 + 4;
}
}

public double Method3(out sbyte p_sbyte_93, out double p_double_94, out S1.S1_D1_F1 p_s1_s1_d1_f1_98)
{
unchecked
{
p_double_94 = 15 + 4;
p_s1_s1_d1_f1_98 = new S1.S1_D1_F1();
p_sbyte_93 = 15 % 4;
return 15 + 4;
}
}

public void Method0()
{
unchecked
{
int int_145 = -5;
ulong ulong_152 = 5;
S1.S1_D1_F1 s1_s1_d1_f1_153 = new S1.S1_D1_F1();
S2 s2_155 = new S2();
if (15 - 4 != LeafMethod1())
{
}
else
{
try
{
ulong_152 >>= s_int_12 = int_145 -= 15 + 4;
}
finally
{
s2_39.s1_s1_d1_f1_5.decimal_0 = 15 % 4 + s1_s1_d1_f1_153.decimal_0 - 15 + 4 + 40;
}
int __loopvar25 = 15 + 4;
do
{
if (__loopvar25 < s_loopInvariant - 1)
break;
}
while (s1_s1_d1_f1_37.bool_1 = s1_s1_d1_f1_37.bool_1 = bool_23);
}
if (15 / 4 % ulong_152 + 22 + 78 - s_ulong_19 > 19)
{
int __loopvar26 = 15 - 4;
for (; ; )
{
if (__loopvar26 >= s_loopInvariant)
break;
if (s1_s1_d1_f1_37.bool_1)
{
Method3(out s2_155.s1_s1_d1_f1_5.sbyte_2, out s1_38.double_3, out s1_s1_d1_f1_153);
}
else
{
ushort_34 *= 15 % 4;
}
if ((byte_24 = s_byte_7 ^= 15 | 4) >= (15 | 4))
{
}
}
}
return;
}
}
public static int Main(string[] args)
{
new TestClass_65690().Method0();
return 100;
}
}
27 changes: 27 additions & 0 deletions src/tests/JIT/Regression/JitBlue/GitHub_65690/GitHub_65690.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
</PropertyGroup>
<PropertyGroup>
<DebugType>None</DebugType>
<Optimize>True</Optimize>
</PropertyGroup>
<PropertyGroup>
<CLRTestBatchPreCommands><![CDATA[
$(CLRTestBatchPreCommands)
set COMPlus_JitDoValueNumber=0
set COMPlus_JitStressRegs=3
set COMPlus_TieredCompilation=0
]]></CLRTestBatchPreCommands>
<BashCLRTestPreCommands><![CDATA[
$(BashCLRTestPreCommands)
export COMPlus_JitDoValueNumber=0
export COMPlus_JitStressRegs=3
export COMPlus_TieredCompilation=0
]]></BashCLRTestPreCommands>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>
48 changes: 48 additions & 0 deletions src/tests/JIT/Regression/JitBlue/GitHub_65988/GitHub_65988.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
//
// Note: In below test case, we were skipping the first block that is an alignment candidate,
// but were not unmarking it such. As a result, we would hit assert during backedge setup.
// Found by Antigen
public class TestClass_65988
{
public struct S1
{
public decimal decimal_0;
}
public struct S2
{
public S1 s1_2;
}
static int s_int_9 = 1;
decimal decimal_22 = 0.08m;
S1 s1_33 = new S1();
public decimal LeafMethod3() => 67.1m;

public void Method0()
{
unchecked
{
int int_85 = 76;
S1 s1_93 = new S1();
for (; ; )
{
if ((15 << 4) < (s_int_9 *= int_85))
{
s1_33.decimal_0 += decimal_22 -= 15 * 4 - -2147483646.9375m;
}
s1_93.decimal_0 /= (s1_33.decimal_0 /= LeafMethod3()) + 28;
if (int_85++ == 77000)
{
break;
}
}
return;
}
}
public static int Main(string[] args)
{
new TestClass_65988().Method0();
return 100;
}
}
31 changes: 31 additions & 0 deletions src/tests/JIT/Regression/JitBlue/GitHub_65988/GitHub_65988.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<AllowUnsafeBlocks>True</AllowUnsafeBlocks>
</PropertyGroup>
<PropertyGroup>
<DebugType>None</DebugType>
<Optimize>True</Optimize>
</PropertyGroup>
<PropertyGroup>
<CLRTestBatchPreCommands><![CDATA[
$(CLRTestBatchPreCommands)
set COMPlus_EnableSSE2=0
set COMPlus_OSR_HitLimit=1
set COMPlus_TC_OnStackReplacement=1
set COMPlus_TC_OnStackReplacement_InitialCounter=1
set COMPlus_TC_QuickJitForLoops=1
]]></CLRTestBatchPreCommands>
<BashCLRTestPreCommands><![CDATA[
$(BashCLRTestPreCommands)
export COMPlus_EnableSSE2=0
export COMPlus_OSR_HitLimit=1
export COMPlus_TC_OnStackReplacement=1
export COMPlus_TC_OnStackReplacement_InitialCounter=1
export COMPlus_TC_QuickJitForLoops=1
]]></BashCLRTestPreCommands>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
</Project>

0 comments on commit 478b1b4

Please sign in to comment.