Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: profile checking through inlining #101834

Merged
merged 3 commits into from
May 4, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4697,11 +4697,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
return;
}

// Drop back to just checking profile likelihoods.
//
activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE;
activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS;

// At this point in the phase list, all the inlinee phases have
// been run, and inlinee compiles have exited, so we should only
// get this far if we are jitting the root method.
Expand All @@ -4718,6 +4713,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
// Record "start" values for post-inlining cycles and elapsed time.
RecordStateAtEndOfInlining();

// Drop back to just checking profile likelihoods.
//
activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE;
activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS;

// Transform each GT_ALLOCOBJ node into either an allocation helper call or
// local variable allocation on the stack.
ObjectAllocator objectAllocator(this); // PHASE_ALLOCATE_OBJECTS
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -5284,6 +5284,7 @@ class Compiler

// The number of separate return points in the method.
unsigned fgReturnCount;
unsigned fgThrowCount;

PhaseStatus fgAddInternal();

Expand Down Expand Up @@ -6228,7 +6229,7 @@ class Compiler

void fgLinkBasicBlocks();

unsigned fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget);
void fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget);

void fgCheckBasicBlockControlFlow();

Expand Down
38 changes: 21 additions & 17 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ void Compiler::fgInit()
fgDomBBcount = 0;
fgBBVarSetsInited = false;
fgReturnCount = 0;
fgThrowCount = 0;

m_dfsTree = nullptr;
m_loops = nullptr;
Expand Down Expand Up @@ -233,8 +234,9 @@ bool Compiler::fgEnsureFirstBBisScratch()
{
// If the result is clearly nonsensical, just inherit
//
JITDUMP("\fgEnsureFirstBBisScratch: Profile data could not be locally repaired. Data %s inconsisent.\n",
fgPgoConsistent ? "is now" : "was already");
JITDUMP(
"\fgEnsureFirstBBisScratch: Profile data could not be locally repaired. Data %s inconsistent.\n",
fgPgoConsistent ? "is now" : "was already");

if (fgPgoConsistent)
{
Expand Down Expand Up @@ -3099,19 +3101,18 @@ void Compiler::fgLinkBasicBlocks()
// codeSize -- length of the IL stream
// jumpTarget -- [in] bit vector of jump targets found by fgFindJumpTargets
//
// Returns:
// number of return blocks (BBJ_RETURN) in the method (may be zero)
//
// Notes:
// Invoked for prejited and jitted methods, and for all inlinees

unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget)
// Invoked for prejited and jitted methods, and for all inlinees.
AndyAyersMS marked this conversation as resolved.
Show resolved Hide resolved
// Sets fgReturnCount and fgThrowCount
//
void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget)
{
unsigned retBlocks = 0;
const BYTE* codeBegp = codeAddr;
const BYTE* codeEndp = codeAddr + codeSize;
bool tailCall = false;
unsigned curBBoffs = 0;
unsigned retBlocks = 0;
unsigned throwBlocks = 0;
const BYTE* codeBegp = codeAddr;
const BYTE* codeEndp = codeAddr + codeSize;
bool tailCall = false;
unsigned curBBoffs = 0;
BasicBlock* curBBdesc;

// Keep track of where we are in the scope lists, as we will also
Expand Down Expand Up @@ -3312,7 +3313,8 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
// can be dispatched as tail calls from the caller.
compInlineResult->NoteFatal(InlineObservation::CALLEE_EXPLICIT_TAIL_PREFIX);
retBlocks++;
return retBlocks;
fgReturnCount = retBlocks;
return;
}

FALLTHROUGH;
Expand Down Expand Up @@ -3415,6 +3417,7 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F

case CEE_THROW:
case CEE_RETHROW:
throwBlocks++;
jmpKind = BBJ_THROW;
break;

Expand Down Expand Up @@ -3597,7 +3600,8 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F

fgLinkBasicBlocks();

return retBlocks;
fgReturnCount = retBlocks;
fgThrowCount = throwBlocks;
}

/*****************************************************************************
Expand Down Expand Up @@ -3709,7 +3713,7 @@ void Compiler::fgFindBasicBlocks()

/* Now create the basic blocks */

fgReturnCount = fgMakeBasicBlocks(info.compCode, info.compILCodeSize, jumpTarget);
fgMakeBasicBlocks(info.compCode, info.compILCodeSize, jumpTarget);

if (compIsForInlining())
{
Expand Down Expand Up @@ -4269,7 +4273,7 @@ void Compiler::fgFixEntryFlowForOSR()
//
if ((fgEntryBB->bbPreds != nullptr) && (fgEntryBB != fgOSREntryBB))
{
JITDUMP("OSR: profile data could not be locally repaired. Data %s inconsisent.\n",
JITDUMP("OSR: profile data could not be locally repaired. Data %s inconsistent.\n",
fgPgoConsistent ? "is now" : "was already");
fgPgoConsistent = false;
}
Expand Down
150 changes: 147 additions & 3 deletions src/coreclr/jit/fginline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -624,24 +624,86 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
{
JITDUMP(" ... found foldable jtrue at [%06u] in " FMT_BB "\n", m_compiler->dspTreeID(tree),
block->bbNum);
m_compiler->Metrics.InlinerBranchFold++;

// We have a constant operand, and should have the all clear to optimize.
// Update side effects on the tree, assert there aren't any, and bash to nop.
m_compiler->gtUpdateNodeSideEffects(tree);
assert((tree->gtFlags & GTF_SIDE_EFFECT) == 0);
tree->gtBashToNOP();
m_madeChanges = true;
m_madeChanges = true;
FlowEdge* removedEdge = nullptr;

if (condTree->IsIntegralConst(0))
{
m_compiler->fgRemoveRefPred(block->GetTrueEdge());
removedEdge = block->GetTrueEdge();
m_compiler->fgRemoveRefPred(removedEdge);
block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetFalseEdge());
}
else
{
m_compiler->fgRemoveRefPred(block->GetFalseEdge());
removedEdge = block->GetFalseEdge();
m_compiler->fgRemoveRefPred(removedEdge);
block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetTrueEdge());
}

// Update profile; make it consistent if possible
//
if (block->hasProfileWeight())
{
bool repairWasComplete = true;
weight_t const weight = removedEdge->getLikelyWeight();

if (weight > 0)
{
// Target block weight will increase.
//
BasicBlock* const target = block->GetTarget();
assert(target->hasProfileWeight());
target->setBBProfileWeight(target->bbWeight + weight);

// Alternate weight will decrease
//
BasicBlock* const alternate = removedEdge->getDestinationBlock();
assert(alternate->hasProfileWeight());
weight_t const alternateNewWeight = alternate->bbWeight - weight;

// If profile weights are consistent, expect at worst a slight underflow.
//
if (m_compiler->fgPgoConsistent && (alternateNewWeight < 0))
{
assert(m_compiler->fgProfileWeightsEqual(alternateNewWeight, 0));
}
alternate->setBBProfileWeight(max(0.0, alternateNewWeight));

// This will affect profile transitively, so in general
// the profile will become inconsistent.
//
repairWasComplete = false;

// But we can check for the special case where the
// block's postdominator is target's target (simple
// if/then/else/join).
//
if (target->KindIs(BBJ_ALWAYS))
{
repairWasComplete =
alternate->KindIs(BBJ_ALWAYS) && (alternate->GetTarget() == target->GetTarget());
AndyAyersMS marked this conversation as resolved.
Show resolved Hide resolved
}
}

if (!repairWasComplete)
{
JITDUMP("Profile data could not be locally repaired. Data %s inconsistent.\n",
m_compiler->fgPgoConsistent ? "is now" : "was already");

if (m_compiler->fgPgoConsistent)
{
m_compiler->Metrics.ProfileInconsistentInlinerBranchFold++;
m_compiler->fgPgoConsistent = false;
}
}
}
}
}
else
Expand Down Expand Up @@ -692,6 +754,11 @@ PhaseStatus Compiler::fgInline()
JitConfig.JitPrintInlinedMethods().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args);
#endif // DEBUG

if (fgPgoConsistent)
{
Metrics.ProfileConsistentBeforeInline++;
}

noway_assert(fgFirstBB != nullptr);

BasicBlock* block = fgFirstBB;
Expand Down Expand Up @@ -822,6 +889,14 @@ PhaseStatus Compiler::fgInline()
fgRenumberBlocks();
}

if (fgPgoConsistent)
{
Metrics.ProfileConsistentAfterInline++;
}

Metrics.InlineCount = m_inlineStrategy->GetInlineCount();
Metrics.InlineAttempt = m_inlineStrategy->GetImportCount();

return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
}

Expand Down Expand Up @@ -1611,6 +1686,75 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
}
#endif

// Update profile consistency
//
// If inlinee is inconsistent, root method will be inconsistent too.
//
if (!InlineeCompiler->fgPgoConsistent)
{
if (fgPgoConsistent)
{
JITDUMP("INLINER: profile data in root now inconsistent -- inlinee had inconsistency\n");
Metrics.ProfileInconsistentInlinee++;
fgPgoConsistent = false;
}
}

// If we inline a no-return call at a site with profile weight,
// we will introduce inconsistency.
//
if (InlineeCompiler->fgReturnCount == 0)
{
JITDUMP("INLINER: no-return inlinee\n");

if (iciBlock->bbWeight > 0)
{
if (fgPgoConsistent)
{
JITDUMP("INLINER: profile data in root now inconsistent -- no-return inlinee at call site in " FMT_BB
" with weight " FMT_WT "\n",
iciBlock->bbNum, iciBlock->bbWeight);
Metrics.ProfileInconsistentNoReturnInlinee++;
fgPgoConsistent = false;
}
}
else
{
// Inlinee scaling should assure this is so.
//
assert(InlineeCompiler->fgFirstBB->bbWeight == 0);
}
}

// If the call site is not in a try and the callee has a throw,
// we may introduce inconsistency.
//
// Technically we should check if the callee has a throw not in a try, but since
// we can't inline methods with EH yet we don't see those.
//
if (InlineeCompiler->fgThrowCount > 0)
{
JITDUMP("INLINER: may-throw inlinee\n");

if (iciBlock->bbWeight > 0)
{
if (fgPgoConsistent)
{
JITDUMP("INLINER: profile data in root now inconsistent -- may-throw inlinee at call site in " FMT_BB
" with weight " FMT_WT "\n",
iciBlock->bbNum, iciBlock->bbWeight);
Metrics.ProfileInconsistentMayThrowInlinee++;
fgPgoConsistent = false;
}
}
else
{
// Inlinee scaling should assure this is so.
//
assert(InlineeCompiler->fgFirstBB->bbWeight == 0);
}
}

// If an inlinee needs GS cookie we need to make sure that the cookie will not be allocated at zero stack offset.
// Note that if the root method needs GS cookie then this has already been taken care of.
if (!getNeedsGSSecurityCookie() && InlineeCompiler->getNeedsGSSecurityCookie())
Expand Down
36 changes: 30 additions & 6 deletions src/coreclr/jit/fgprofile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,22 +141,38 @@ void Compiler::fgApplyProfileScale()
JITDUMP(" ... no callee profile data, will use non-pgo weight to scale\n");
}

// Ostensibly this should be fgCalledCount for the callee, but that's not available
// as it requires some analysis.
// Determine the weight of the first block preds, if any.
// (only happens if the first block is a loop head).
//
// For most callees it will be the same as the entry block count.
//
// Note when/if we early do normalization this may need to change.
weight_t firstBlockPredWeight = 0;
for (FlowEdge* const firstBlockPred : fgFirstBB->PredEdges())
{
firstBlockPredWeight += firstBlockPred->getLikelyWeight();
}

// Determine the "input" weight for the callee
//
weight_t calleeWeight = fgFirstBB->bbWeight;

// Callee entry weight is nonzero?
// If so, just choose the smallest plausible weight.
//
if (calleeWeight == BB_ZERO_WEIGHT)
if ((calleeWeight == BB_ZERO_WEIGHT) || (calleeWeight <= firstBlockPredWeight))
AndyAyersMS marked this conversation as resolved.
Show resolved Hide resolved
{
calleeWeight = fgHaveProfileWeights() ? 1.0 : BB_UNITY_WEIGHT;
JITDUMP(" ... callee entry has weight zero, will use weight of " FMT_WT " to scale\n", calleeWeight);
JITDUMP("Profile data could not be scaled consistently. Data %s inconsistent.\n",
fgPgoConsistent ? "is now" : "was already");

if (fgPgoConsistent)
{
Metrics.ProfileInconsistentInlineeScale++;
fgPgoConsistent = false;
}
}
else
{
calleeWeight -= firstBlockPredWeight;
}

// Call site has profile weight?
Expand Down Expand Up @@ -2829,6 +2845,14 @@ PhaseStatus Compiler::fgInstrumentMethod()
//
PhaseStatus Compiler::fgIncorporateProfileData()
{
// For now we only rely on profile data when optimizing.
//
if (!opts.OptimizationEnabled())
{
JITDUMP("not optimizing, so not incorproating any profile data\n");
AndyAyersMS marked this conversation as resolved.
Show resolved Hide resolved
return PhaseStatus::MODIFIED_NOTHING;
}

// Are we doing profile stress?
//
if (fgStressBBProf() > 0)
Expand Down
Loading
Loading