Skip to content

Commit

Permalink
Spanning tree instrumentation (#47959)
Browse files Browse the repository at this point in the history
Add a new instrumentation mode that only instruments a subset of the edges in
the control flow graph. This reduces the total number of counters and so has
less compile time and runtime overhead than instrumenting every block.

Add a matching count reconstruction algorithm that recovers the missing edge
counts and all block counts.

See #46882 for more details on this approach.

Also in runtime pgo support, add the header offset to the copy source.
This fixes #47930.
  • Loading branch information
AndyAyersMS authored Feb 9, 2021
1 parent 8aa1f58 commit 95f8f00
Show file tree
Hide file tree
Showing 13 changed files with 1,962 additions and 451 deletions.
1 change: 1 addition & 0 deletions eng/pipelines/common/templates/runtimes/run-test-job.yml
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ jobs:
- jitpgo
- jitpgo_inline
- jitpgo_classes
- jitpgo_edgeinstrumentation
${{ if in(parameters.testGroup, 'ilasm') }}:
scenarios:
- ilasmroundtrip
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/inc/corjit.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ class ICorJitInfo : public ICorDynamicInfo
TypeHandleHistogramTypeHandle = (DescriptorMin * 3) | TypeHandle, // TypeHandle that is part of a type histogram
Version = (DescriptorMin * 4) | None, // Version is encoded in the Other field of the schema
NumRuns = (DescriptorMin * 5) | None, // Number of runs is encoded in the Other field of the schema
EdgeIntCount = (DescriptorMin * 6) | FourByte, // 4 byte edge counter, using unsigned 4 byte int
};

struct PgoInstrumentationSchema
Expand Down
11 changes: 8 additions & 3 deletions src/coreclr/jit/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -882,9 +882,14 @@ struct BasicBlock : private LIR::Range
void ensurePredListOrder(Compiler* compiler);
void reorderPredList(Compiler* compiler);

BlockSet bbReach; // Set of all blocks that can reach this one
BasicBlock* bbIDom; // Represent the closest dominator to this block (called the Immediate
// Dominator) used to compute the dominance tree.
BlockSet bbReach; // Set of all blocks that can reach this one

union {
BasicBlock* bbIDom; // Represent the closest dominator to this block (called the Immediate
// Dominator) used to compute the dominance tree.
void* bbSparseProbeList; // Used early on by fgInstrument
void* bbSparseCountInfo; // Used early on by fgIncorporateEdgeCounts
};

unsigned bbPostOrderNum; // the block's post order number in the graph.

Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4403,6 +4403,14 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags
//
DoPhase(this, PHASE_INCPROFILE, &Compiler::fgIncorporateProfileData);

// If we're going to instrument code, we may need to prepare before
// we import.
//
if (compileFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR))
{
DoPhase(this, PHASE_IBCPREP, &Compiler::fgPrepareToInstrumentMethod);
}

// Import: convert the instrs in each basic block to a tree based intermediate representation
//
DoPhase(this, PHASE_IMPORTATION, &Compiler::fgImport);
Expand Down
36 changes: 24 additions & 12 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,13 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*/

struct InfoHdr; // defined in GCInfo.h
struct escapeMapping_t; // defined in flowgraph.cpp
struct escapeMapping_t; // defined in fgdiagnostic.cpp
class emitter; // defined in emit.h
struct ShadowParamVarInfo; // defined in GSChecks.cpp
struct InitVarDscInfo; // defined in register_arg_convention.h
class FgStack; // defined in flowgraph.cpp
class FgStack; // defined in fgbasic.cpp
class Instrumentor; // defined in fgprofile.cpp
class SpanningTreeVisitor; // defined in fgprofile.cpp
#if FEATURE_ANYCSE
class CSE_DataFlow; // defined in OptCSE.cpp
#endif
Expand Down Expand Up @@ -5535,15 +5537,6 @@ class Compiler

void fgAdjustForAddressExposedOrWrittenThis();

bool fgProfileData_ILSizeMismatch;
ICorJitInfo::PgoInstrumentationSchema* fgPgoSchema;
BYTE* fgPgoData;
UINT32 fgPgoSchemaCount;
HRESULT fgPgoQueryResult;
UINT32 fgNumProfileRuns;
UINT32 fgPgoBlockCounts;
UINT32 fgPgoClassProfiles;

unsigned fgStressBBProf()
{
#ifdef DEBUG
Expand All @@ -5562,13 +5555,32 @@ class Compiler
}

bool fgHaveProfileData();
void fgComputeProfileScale();
bool fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::weight_t* weight);

Instrumentor* fgCountInstrumentor;
Instrumentor* fgClassInstrumentor;

PhaseStatus fgPrepareToInstrumentMethod();
PhaseStatus fgInstrumentMethod();
PhaseStatus fgIncorporateProfileData();
void fgIncorporateBlockCounts();
void fgIncorporateEdgeCounts();

public:
bool fgProfileData_ILSizeMismatch;
ICorJitInfo::PgoInstrumentationSchema* fgPgoSchema;
BYTE* fgPgoData;
UINT32 fgPgoSchemaCount;
HRESULT fgPgoQueryResult;
UINT32 fgNumProfileRuns;
UINT32 fgPgoBlockCounts;
UINT32 fgPgoEdgeCounts;
UINT32 fgPgoClassProfiles;

void WalkSpanningTree(SpanningTreeVisitor* visitor);
void fgSetProfileWeight(BasicBlock* block, BasicBlock::weight_t weight);
void fgComputeProfileScale();

// fgIsUsingProfileWeights - returns true if we have real profile data for this method
// or if we have some fake profile data for the stress mode
bool fgIsUsingProfileWeights()
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/compphases.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ CompPhaseNameMacro(PHASE_IMPORTATION, "Importation",
CompPhaseNameMacro(PHASE_INDXCALL, "Indirect call transform", "INDXCALL", false, -1, true)
CompPhaseNameMacro(PHASE_PATCHPOINTS, "Expand patchpoints", "PPOINT", false, -1, true)
CompPhaseNameMacro(PHASE_POST_IMPORT, "Post-import", "POST-IMP", false, -1, false)
CompPhaseNameMacro(PHASE_IBCPREP, "Profile instrumentation prep", "IBCPREP", false, -1, false)
CompPhaseNameMacro(PHASE_IBCINSTR, "Profile instrumentation", "IBCINSTR", false, -1, false)
CompPhaseNameMacro(PHASE_INCPROFILE, "Profile incorporation", "INCPROF", false, -1, false)
CompPhaseNameMacro(PHASE_MORPH_INIT, "Morph - Init", "MOR-INIT" ,false, -1, false)
Expand Down
79 changes: 54 additions & 25 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,10 @@ void Compiler::fgInit()
fgPgoSchemaCount = 0;
fgNumProfileRuns = 0;
fgPgoBlockCounts = 0;
fgPgoEdgeCounts = 0;
fgPgoClassProfiles = 0;
fgCountInstrumentor = nullptr;
fgClassInstrumentor = nullptr;
fgPredListSortVector = nullptr;
}

Expand Down Expand Up @@ -418,15 +421,20 @@ void Compiler::fgChangeSwitchBlock(BasicBlock* oldSwitchBlock, BasicBlock* newSw
}
}

/*****************************************************************************
* fgReplaceSwitchJumpTarget:
*
* We have a BBJ_SWITCH at 'blockSwitch' and we want to replace all entries
* in the jumpTab[] such that so that jumps that previously went to
* 'oldTarget' now go to 'newTarget'.
* We also must update the predecessor lists for 'oldTarget' and 'newPred'.
*/

//------------------------------------------------------------------------
// fgReplaceSwitchJumpTarget: update BBJ_SWITCH block so that all control
// that previously flowed to oldTarget now flows to newTarget.
//
// Arguments:
// blockSwitch - block ending in a switch
// newTarget - new branch target
// oldTarget - old branch target
//
// Notes:
// Updates the jump table and the cached unique target set (if any).
// Can be called before or after pred lists are built.
// If pred lists are built, updates pred lists.
//
void Compiler::fgReplaceSwitchJumpTarget(BasicBlock* blockSwitch, BasicBlock* newTarget, BasicBlock* oldTarget)
{
noway_assert(blockSwitch != nullptr);
Expand All @@ -450,7 +458,10 @@ void Compiler::fgReplaceSwitchJumpTarget(BasicBlock* blockSwitch, BasicBlock* ne
{
// Remove the old edge [oldTarget from blockSwitch]
//
fgRemoveAllRefPreds(oldTarget, blockSwitch);
if (fgComputePredsDone)
{
fgRemoveAllRefPreds(oldTarget, blockSwitch);
}

//
// Change the jumpTab entry to branch to the new location
Expand All @@ -460,7 +471,12 @@ void Compiler::fgReplaceSwitchJumpTarget(BasicBlock* blockSwitch, BasicBlock* ne
//
// Create the new edge [newTarget from blockSwitch]
//
flowList* newEdge = fgAddRefPred(newTarget, blockSwitch);
flowList* newEdge = nullptr;

if (fgComputePredsDone)
{
newEdge = fgAddRefPred(newTarget, blockSwitch);
}

// Now set the correct value of newEdge->flDupCount
// and replace any other jumps in jumpTab[] that go to oldTarget.
Expand All @@ -479,7 +495,10 @@ void Compiler::fgReplaceSwitchJumpTarget(BasicBlock* blockSwitch, BasicBlock* ne
//
// Increment the flDupCount
//
newEdge->flDupCount++;
if (fgComputePredsDone)
{
newEdge->flDupCount++;
}
}
i++; // Check the next entry in jumpTab[]
}
Expand Down Expand Up @@ -1780,11 +1799,6 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
}
}

if (compIsForInlining())
{
fgComputeProfileScale();
}

do
{
unsigned jmpAddr = DUMMY_INIT(BAD_IL_OFFSET);
Expand Down Expand Up @@ -3591,22 +3605,27 @@ BasicBlock* Compiler::fgSplitBlockAtBeginning(BasicBlock* curr)
// 'succ' might be the fall-through path or the branch path from 'curr'.
//
// Arguments:
// curr - A block which branches conditionally to 'succ'
// curr - A block which branches to 'succ'
// succ - The target block
//
// Return Value:
// Returns a new block, that is a successor of 'curr' and which branches unconditionally to 'succ'
//
// Assumptions:
// 'curr' must have a bbJumpKind of BBJ_COND or BBJ_SWITCH
// 'curr' must have a bbJumpKind of BBJ_COND, BBJ_ALWAYS, or BBJ_SWITCH
//
// Notes:
// The returned block is empty.
// Can be invoked before pred lists are built.

BasicBlock* Compiler::fgSplitEdge(BasicBlock* curr, BasicBlock* succ)
{
assert(curr->bbJumpKind == BBJ_COND || curr->bbJumpKind == BBJ_SWITCH);
assert(fgGetPredForBlock(succ, curr) != nullptr);
assert(curr->bbJumpKind == BBJ_COND || curr->bbJumpKind == BBJ_SWITCH || curr->bbJumpKind == BBJ_ALWAYS);

if (fgComputePredsDone)
{
assert(fgGetPredForBlock(succ, curr) != nullptr);
}

BasicBlock* newBlock;
if (succ == curr->bbNext)
Expand Down Expand Up @@ -3639,20 +3658,30 @@ BasicBlock* Compiler::fgSplitEdge(BasicBlock* curr, BasicBlock* succ)
}
fgAddRefPred(newBlock, curr);
}
else
else if (curr->bbJumpKind == BBJ_SWITCH)
{
assert(curr->bbJumpKind == BBJ_SWITCH);

// newBlock replaces 'succ' in the switch.
fgReplaceSwitchJumpTarget(curr, newBlock, succ);

// And 'succ' has 'newBlock' as a new predecessor.
fgAddRefPred(succ, newBlock);
}
else
{
assert(curr->bbJumpKind == BBJ_ALWAYS);
fgReplacePred(succ, curr, newBlock);
curr->bbJumpDest = newBlock;
newBlock->bbFlags |= BBF_JMP_TARGET;
fgAddRefPred(newBlock, curr);
}

// This isn't accurate, but it is complex to compute a reasonable number so just assume that we take the
// branch 50% of the time.
newBlock->inheritWeightPercentage(curr, 50);
//
if (curr->bbJumpKind != BBJ_ALWAYS)
{
newBlock->inheritWeightPercentage(curr, 50);
}

// The bbLiveIn and bbLiveOut are both equal to the bbLiveIn of 'succ'
if (fgLocalVarLivenessDone)
Expand Down
Loading

0 comments on commit 95f8f00

Please sign in to comment.