Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BOLT] Drop high discrepancy profiles in matching #95156

7 changes: 6 additions & 1 deletion bolt/docs/CommandLineArgumentReference.md
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,11 @@

The maximum size of a function to consider for inference.

- `--stale-matching-min-matched-block=<uint>`

Minimum percent of exact match block for a function to be considered for
profile inference.

- `--stale-threshold=<uint>`

Maximum percentage of stale functions to tolerate (default: 100)
Expand Down Expand Up @@ -1161,4 +1166,4 @@

- `--print-options`

Print non-default options after command line parsing
Print non-default options after command line parsing
30 changes: 23 additions & 7 deletions bolt/lib/Profile/StaleProfileMatching.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ cl::opt<bool>
cl::desc("Infer counts from stale profile data."),
cl::init(false), cl::Hidden, cl::cat(BoltOptCategory));

cl::opt<unsigned> StaleMatchingMinMatchedBlock(
"stale-matching-min-matched-block",
cl::desc("Percentage threshold of matched basic blocks at which stale "
"profile inference is executed."),
cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));

cl::opt<unsigned> StaleMatchingMaxFuncSize(
"stale-matching-max-func-size",
cl::desc("The maximum size of a function to consider for inference."),
Expand Down Expand Up @@ -391,10 +397,9 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
/// of the basic blocks in the binary, the count is "matched" to the block.
/// Similarly, if both the source and the target of a count in the profile are
/// matched to a jump in the binary, the count is recorded in CFG.
void matchWeightsByHashes(BinaryContext &BC,
const BinaryFunction::BasicBlockOrderType &BlockOrder,
const yaml::bolt::BinaryFunctionProfile &YamlBF,
FlowFunction &Func) {
uint64_t matchWeightsByHashes(
shawbyoung marked this conversation as resolved.
Show resolved Hide resolved
BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder,
const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func) {
assert(Func.Blocks.size() == BlockOrder.size() + 1);

std::vector<FlowBlock *> Blocks;
Expand Down Expand Up @@ -500,6 +505,8 @@ void matchWeightsByHashes(BinaryContext &BC,
Block.HasUnknownWeight = false;
Block.Weight = std::max(OutWeight[Block.Index], InWeight[Block.Index]);
}

return MatchedBlocks.size();
}

/// The function finds all blocks that are (i) reachable from the Entry block
Expand Down Expand Up @@ -575,10 +582,16 @@ void preprocessUnreachableBlocks(FlowFunction &Func) {
/// Decide if stale profile matching can be applied for a given function.
/// Currently we skip inference for (very) large instances and for instances
/// having "unexpected" control flow (e.g., having no sink basic blocks).
bool canApplyInference(const FlowFunction &Func) {
bool canApplyInference(const FlowFunction &Func,
shawbyoung marked this conversation as resolved.
Show resolved Hide resolved
const yaml::bolt::BinaryFunctionProfile &YamlBF,
const uint64_t &MatchedBlocks) {
if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
return false;

if (MatchedBlocks * 100 <
opts::StaleMatchingMinMatchedBlock * YamlBF.Blocks.size())
return false;

bool HasExitBlocks = llvm::any_of(
Func.Blocks, [&](const FlowBlock &Block) { return Block.isExit(); });
if (!HasExitBlocks)
Expand Down Expand Up @@ -725,18 +738,21 @@ bool YAMLProfileReader::inferStaleProfile(
const BinaryFunction::BasicBlockOrderType BlockOrder(
BF.getLayout().block_begin(), BF.getLayout().block_end());

// Tracks the number of matched blocks.

// Create a wrapper flow function to use with the profile inference algorithm.
FlowFunction Func = createFlowFunction(BlockOrder);

// Match as many block/jump counts from the stale profile as possible
matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func);
uint64_t MatchedBlocks =
matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func);

// Adjust the flow function by marking unreachable blocks Unlikely so that
// they don't get any counts assigned.
preprocessUnreachableBlocks(Func);

// Check if profile inference can be applied for the instance.
if (!canApplyInference(Func))
if (!canApplyInference(Func, YamlBF, MatchedBlocks))
return false;

// Apply the profile inference algorithm.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
---
header:
profile-version: 1
binary-name: 'reader-yaml.test.tmp.exe'
binary-build-id: '<unknown>'
profile-flags: [ lbr ]
profile-origin: branch profile reader
profile-events: ''
dfs-order: false
hash-func: xxh3
functions:
- name: SolveCubic
fid: 6
hash: 0x0000000000000000
exec: 151
nblocks: 18
blocks:
- bid: 0
insns: 43
hash: 0x4600940a609c0000
exec: 151
succ: [ { bid: 1, cnt: 151, mis: 2 }, { bid: 7, cnt: 0 } ]
- bid: 1
insns: 7
hash: 0x167a1f084f130088
succ: [ { bid: 13, cnt: 151 }, { bid: 2, cnt: 0 } ]
- bid: 13
insns: 26
hash: 0xa8d50000f81902a7
succ: [ { bid: 3, cnt: 89 }, { bid: 2, cnt: 10 } ]
- bid: 3
insns: 9
hash: 0xc516000073dc00a0
succ: [ { bid: 5, cnt: 151 } ]
- bid: 5
insns: 9
hash: 0x6446e1ea500111
- name: usqrt
fid: 7
hash: 0x0000000000000000
exec: 20
nblocks: 6
blocks:
- bid: 0
insns: 4
hash: 0x0000000000000001
exec: 20
succ: [ { bid: 1, cnt: 0 } ]
- bid: 1
insns: 9
hash: 0x0000000000000001
succ: [ { bid: 3, cnt: 320, mis: 171 }, { bid: 2, cnt: 0 } ]
- bid: 3
insns: 2
hash: 0x0000000000000001
succ: [ { bid: 1, cnt: 300, mis: 33 }, { bid: 4, cnt: 20 } ]
...
10 changes: 10 additions & 0 deletions bolt/test/X86/stale-matching-min-matched-block.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
## This script checks the stale-matching-min-matched-block flag.

RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe

## Testing "usqrt"
RUN: llvm-bolt %t.exe -o %t.null --b %p/Inputs/blarge_profile_stale_low_matched_blocks.yaml \
RUN: --infer-stale-profile=1 --stale-matching-min-matched-block=75 \
RUN: --profile-ignore-hash=1 --debug-only=bolt-prof 2>&1 | FileCheck %s

CHECK: BOLT-INFO: inferred profile for 1 (50.00% of profiled, 50.00% of stale) functions responsible for 46.31% samples (552 out of 1192)