Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify alignment block marking #62940

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 0 additions & 19 deletions src/coreclr/jit/block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1685,22 +1685,3 @@ BBswtDesc::BBswtDesc(Compiler* comp, const BBswtDesc* other)
bbsDstTab[i] = other->bbsDstTab[i];
}
}

//------------------------------------------------------------------------
// unmarkLoopAlign: Unmarks the LOOP_ALIGN flag from the block and reduce the
// loop alignment count.
//
// Arguments:
// compiler - Compiler instance
// reason - Reason to print in JITDUMP
//
void BasicBlock::unmarkLoopAlign(Compiler* compiler DEBUG_ARG(const char* reason))
{
// Make sure we unmark and count just once.
if (isLoopAlign())
{
compiler->loopAlignCandidates--;
bbFlags &= ~BBF_LOOP_ALIGN;
JITDUMP("Unmarking LOOP_ALIGN from " FMT_BB ". Reason= %s.\n", bbNum, reason);
}
}
2 changes: 0 additions & 2 deletions src/coreclr/jit/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -657,8 +657,6 @@ struct BasicBlock : private LIR::Range
return ((bbFlags & BBF_LOOP_ALIGN) != 0);
}

void unmarkLoopAlign(Compiler* comp DEBUG_ARG(const char* reason));

bool hasAlign() const
{
return ((bbFlags & BBF_HAS_ALIGN) != 0);
Expand Down
107 changes: 98 additions & 9 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4776,6 +4776,10 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
// Unroll loops
//
DoPhase(this, PHASE_UNROLL_LOOPS, &Compiler::optUnrollLoops);

// Clear loop table info that is not used after this point, and might become invalid.
//
DoPhase(this, PHASE_CLEAR_LOOP_INFO, &Compiler::optClearLoopIterInfo);
}

#ifdef DEBUG
Expand Down Expand Up @@ -5088,6 +5092,77 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl

#if FEATURE_LOOP_ALIGN

//------------------------------------------------------------------------
// optIdentifyLoopsForAlignment: Determine which loops should be considered for alignment.
//
// All innermost loops whose block weight meets a threshold are candidates for alignment.
// The `top` block of the loop is marked with the BBF_LOOP_ALIGN flag to indicate this
// (the loop table itself is not changed).
//
// Depends on the loop table, and on block weights being set.
//
// Sets `loopAlignCandidates` to the number of loop candidates for alignment.
//
void Compiler::optIdentifyLoopsForAlignment()
{
assert(codeGen->ShouldAlignLoops());

loopAlignCandidates = 0;

for (BasicBlock::loopNumber loopInd = 0; loopInd < optLoopCount; loopInd++)
{
const LoopDsc& loop = optLoopTable[loopInd];

if (loop.lpFlags & LPFLG_REMOVED)
{
// Don't need JitDump output for this common case.
continue;
}

if (loop.lpChild != BasicBlock::NOT_IN_LOOP)
{
JITDUMP("Skipping alignment for " FMT_LP "; not an innermost loop\n", loopInd);
continue;
}

if (loop.lpTop == fgFirstBB)
{
// Adding align instruction in prolog is not supported. (This currently seems unlikely since
// loops normally (always?) have a predecessor `head` block.)
// TODO: Insert an empty block before the loop, if want to align it, so we have a place to put
// the align instruction.
JITDUMP("Skipping alignment for " FMT_LP "; loop starts in first block\n", loopInd);
continue;
}

if (loop.lpFlags & LPFLG_CONTAINS_CALL)
{
// Heuristic: it is not valuable to align loops with calls.
JITDUMP("Skipping alignment for " FMT_LP "; loop contains call\n", loopInd);
continue;
}

// Now we have an innerloop candidate that might need alignment

BasicBlock* top = loop.lpTop;
weight_t topWeight = top->getBBWeight(this);
weight_t compareWeight = opts.compJitAlignLoopMinBlockWeight * BB_UNITY_WEIGHT;
if (topWeight >= compareWeight)
{
loopAlignCandidates++;
assert(!top->isLoopAlign());
top->bbFlags |= BBF_LOOP_ALIGN;
JITDUMP("Aligning " FMT_LP " that starts at " FMT_BB ", weight=" FMT_WT " >= " FMT_WT ".\n", loopInd,
top->bbNum, topWeight, compareWeight);
}
else
{
JITDUMP("Skipping alignment for " FMT_LP " that starts at " FMT_BB ", weight=" FMT_WT " < " FMT_WT ".\n",
loopInd, top->bbNum, topWeight, compareWeight);
}
}
}

//------------------------------------------------------------------------
// placeLoopAlignInstructions: Iterate over all the blocks and determine
// the best position to place the 'align' instruction. Inserting 'align'
Expand All @@ -5100,26 +5175,39 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
//
void Compiler::placeLoopAlignInstructions()
{
#ifdef DEBUG
if (verbose)
{
printf("*************** In placeLoopAlignInstructions()\n");
}
#endif

if (!codeGen->ShouldAlignLoops())
{
JITDUMP("Not aligning loops; ShouldAlignLoops is false\n");
return;
}

// Print out the current loop table that we're working on.
DBEXEC(verbose, optPrintLoopTable());

assert(loopAlignCandidates == 0); // We currently expect nobody has touched this yet.
optIdentifyLoopsForAlignment();

if (loopAlignCandidates == 0)
{
JITDUMP("No loop candidates\n");
return;
}

int loopsToProcess = loopAlignCandidates;
JITDUMP("Inside placeLoopAlignInstructions for %d loops.\n", loopAlignCandidates);
JITDUMP("Considering %d loops\n", loopAlignCandidates);

// Add align only if there were any loops that needed alignment
weight_t minBlockSoFar = BB_MAX_WEIGHT;
BasicBlock* bbHavingAlign = nullptr;
BasicBlock::loopNumber currentAlignedLoopNum = BasicBlock::NOT_IN_LOOP;

if ((fgFirstBB != nullptr) && fgFirstBB->isLoopAlign())
{
// Adding align instruction in prolog is not supported
// hence just remove that loop from our list.
loopsToProcess--;
}

for (BasicBlock* const block : Blocks())
{
if (currentAlignedLoopNum != BasicBlock::NOT_IN_LOOP)
Expand Down Expand Up @@ -5178,7 +5266,8 @@ void Compiler::placeLoopAlignInstructions()

assert(loopsToProcess == 0);
}
#endif

#endif // FEATURE_LOOP_ALIGN

//------------------------------------------------------------------------
// generatePatchpointInfo: allocate and fill in patchpoint info data,
Expand Down
8 changes: 5 additions & 3 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3670,6 +3670,8 @@ class Compiler
#endif

BasicBlock* bbNewBasicBlock(BBjumpKinds jumpKind);

void optIdentifyLoopsForAlignment();
void placeLoopAlignInstructions();

/*
Expand Down Expand Up @@ -6987,6 +6989,8 @@ class Compiler
BasicBlock* exit,
unsigned char exitCnt);

void optClearLoopIterInfo();

#ifdef DEBUG
void optPrintLoopInfo(unsigned lnum, bool printVerbose = false);
void optPrintLoopInfo(const LoopDsc* loop, bool printVerbose = false);
Expand Down Expand Up @@ -7025,8 +7029,6 @@ class Compiler

void optFindNaturalLoops();

void optIdentifyLoopsForAlignment();

// Ensures that all the loops in the loop nest rooted at "loopInd" (an index into the loop table) are 'canonical' --
// each loop has a unique "top." Returns "true" iff the flowgraph has been modified.
bool optCanonicalizeLoopNest(unsigned char loopInd);
Expand Down Expand Up @@ -9754,7 +9756,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// based on experimenting with various benchmarks.

// Default minimum loop block weight required to enable loop alignment.
#define DEFAULT_ALIGN_LOOP_MIN_BLOCK_WEIGHT 4
#define DEFAULT_ALIGN_LOOP_MIN_BLOCK_WEIGHT 3

// By default a loop will be aligned at 32B address boundary to get better
// performance as per architecture manuals.
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/compphases.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ CompPhaseNameMacro(PHASE_ZERO_INITS, "Redundant zero Inits",
CompPhaseNameMacro(PHASE_FIND_LOOPS, "Find loops", "LOOP-FND", false, -1, false)
CompPhaseNameMacro(PHASE_CLONE_LOOPS, "Clone loops", "LP-CLONE", false, -1, false)
CompPhaseNameMacro(PHASE_UNROLL_LOOPS, "Unroll loops", "UNROLL", false, -1, false)
CompPhaseNameMacro(PHASE_CLEAR_LOOP_INFO, "Clear loop info", "LP-CLEAR", false, -1, false)
CompPhaseNameMacro(PHASE_HOIST_LOOP_CODE, "Hoist loop code", "LP-HOIST", false, -1, false)
CompPhaseNameMacro(PHASE_MARK_LOCAL_VARS, "Mark local vars", "MARK-LCL", false, -1, false)
CompPhaseNameMacro(PHASE_OPTIMIZE_BOOLS, "Optimize bools", "OPT-BOOL", false, -1, false)
Expand Down
11 changes: 0 additions & 11 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4712,14 +4712,6 @@ void Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable)
{
succBlock->bbFlags |= BBF_LOOP_HEAD;

if (block->isLoopAlign())
{
loopAlignCandidates++;
succBlock->bbFlags |= BBF_LOOP_ALIGN;
JITDUMP("Propagating LOOP_ALIGN flag from " FMT_BB " to " FMT_BB " for " FMT_LP "\n ", block->bbNum,
succBlock->bbNum, block->bbNatLoopNum);
}

if (fgDomsComputed && fgReachable(succBlock, block))
{
// Mark all the reachable blocks between 'succBlock' and 'bPrev'
Expand Down Expand Up @@ -4867,9 +4859,6 @@ void Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable)
block->bbFlags |= BBF_REMOVED;
}

// If this was marked for alignment, remove it
block->unmarkLoopAlign(this DEBUG_ARG("Removed block"));

if (bPrev != nullptr)
{
switch (bPrev->bbJumpKind)
Expand Down
10 changes: 0 additions & 10 deletions src/coreclr/jit/fgopt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2147,13 +2147,6 @@ void Compiler::fgCompactBlocks(BasicBlock* block, BasicBlock* bNext)
break;
}

if (bNext->isLoopAlign())
{
block->bbFlags |= BBF_LOOP_ALIGN;
JITDUMP("Propagating LOOP_ALIGN flag from " FMT_BB " to " FMT_BB " during compacting.\n", bNext->bbNum,
block->bbNum);
}

// If we're collapsing a block created after the dominators are
// computed, copy block number the block and reuse dominator
// information from bNext to block.
Expand Down Expand Up @@ -5993,9 +5986,6 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication)
// Update the loop table if we removed the bottom of a loop, for example.
fgUpdateLoopsAfterCompacting(block, bNext);

// If this block was aligned, unmark it
bNext->unmarkLoopAlign(this DEBUG_ARG("Optimized jump"));

// If this is the first Cold basic block update fgFirstColdBlock
if (bNext == fgFirstColdBlock)
{
Expand Down
13 changes: 1 addition & 12 deletions src/coreclr/jit/loopcloning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1956,18 +1956,7 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
newBlk->scaleBBWeight(slowPathWeightScaleFactor);
blk->scaleBBWeight(fastPathWeightScaleFactor);

// TODO: scale the pred edges of `blk`?

#if FEATURE_LOOP_ALIGN
// If the original loop is aligned, do not align the cloned loop because cloned loop will be executed in
// rare scenario. Additionally, having to align cloned loop will force us to disable some VEX prefix encoding
// and adding compensation for over-estimated instructions.
if (blk->isLoopAlign())
{
newBlk->bbFlags &= ~BBF_LOOP_ALIGN;
JITDUMP("Removing LOOP_ALIGN flag from cloned loop in " FMT_BB "\n", newBlk->bbNum);
}
#endif
// TODO: scale the pred edges of `blk`?

// TODO-Cleanup: The above clones the bbNatLoopNum, which is incorrect. Eventually, we should probably insert
// the cloned loop in the loop table. For now, however, we'll just make these blocks be part of the surrounding
Expand Down
2 changes: 0 additions & 2 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15366,8 +15366,6 @@ bool Compiler::fgFoldConditional(BasicBlock* block)

optMarkLoopRemoved(loopNum);

optLoopTable[loopNum].lpTop->unmarkLoopAlign(this DEBUG_ARG("Bogus loop"));

#ifdef DEBUG
if (verbose)
{
Expand Down
Loading