Skip to content

Commit

Permalink
Random interleaving of benchmark repetitions - the sequel (fixes goog…
Browse files Browse the repository at this point in the history
…le#1051)

Based on the original implementation by Hai Huang @haih-g)
from google#1105.
  • Loading branch information
LebedevRI committed Jun 3, 2021
1 parent d17ea66 commit f293839
Show file tree
Hide file tree
Showing 10 changed files with 254 additions and 48 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,8 @@ too (`-lkstat`).

[Setting the Time Unit](#setting-the-time-unit)

[Random Interleaving](docs/random_interleaving.md)

[User-Requested Performance Counters](docs/perf_counters.md)

[Preventing Optimization](#preventing-optimization)
Expand Down
13 changes: 13 additions & 0 deletions docs/random_interleaving.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<a name="interleaving" />

# Random Interleaving

[Random Interleaving](https://github.com/google/benchmark/issues/1051) is a
technique to lower run-to-run variance. It randomly interleaves repetitions of a
microbenchmark with repetitions from other microbenchmarks in the same benchmark
test. Data shows it is able to lower run-to-run variance by
[40%](https://github.com/google/benchmark/issues/1051) on average.

To use, you mainly need to set `--benchmark_enable_random_interleaving=true`,
and optionally specify non-zero repetition count `--benchmark_repetitions=9`
and optionally decrease the per-repetition time `--benchmark_min_time=0.1`.
13 changes: 13 additions & 0 deletions include/benchmark/benchmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -1472,6 +1472,19 @@ class BenchmarkReporter {
int64_t max_bytes_used;
};

struct PerFamilyRunReports {
PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {}

// How many runs will all instances of this benchmark perform?
int num_runs_total;

// How many runs have happened already?
int num_runs_done;

// The reports about (non-errneous!) runs of this family.
std::vector<BenchmarkReporter::Run> Runs;
};

// Construct a BenchmarkReporter with the output stream set to 'std::cout'
// and the error stream set to 'std::cerr'
BenchmarkReporter();
Expand Down
71 changes: 63 additions & 8 deletions src/benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <limits>
#include <map>
#include <memory>
#include <random>
#include <string>
#include <thread>
#include <utility>
Expand Down Expand Up @@ -73,6 +75,10 @@ DEFINE_double(benchmark_min_time, 0.5);
// standard deviation of the runs will be reported.
DEFINE_int32(benchmark_repetitions, 1);

// If set, enable random interleaving of repetitions of all benchmarks.
// See http://github.com/google/benchmark/issues/1051 for details.
DEFINE_bool(benchmark_enable_random_interleaving, false);

// Report the result of each benchmark repetitions. When 'true' is specified
// only the mean, standard deviation, and other statistics are reported for
// repeated benchmarks. Affects all reporters.
Expand Down Expand Up @@ -297,23 +303,69 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
context.name_field_width = name_field_width;

// Keep track of running times of all instances of each benchmark family.
std::map<int /*family_index*/, std::vector<BenchmarkReporter::Run>>
complexity_reports;
std::map<int /*family_index*/, BenchmarkReporter::PerFamilyRunReports>
per_family_reports;

if (display_reporter->ReportContext(context) &&
(!file_reporter || file_reporter->ReportContext(context))) {
FlushStreams(display_reporter);
FlushStreams(file_reporter);

size_t num_repetitions_total = 0;

std::vector<internal::BenchmarkRunner> runners;
runners.reserve(benchmarks.size());
for (const BenchmarkInstance& benchmark : benchmarks) {
std::vector<BenchmarkReporter::Run>* complexity_reports_for_family =
nullptr;
BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr;
if (benchmark.complexity() != oNone)
complexity_reports_for_family =
&complexity_reports[benchmark.family_index()];
reports_for_family = &per_family_reports[benchmark.family_index()];

runners.emplace_back(benchmark, reports_for_family);
int num_repeats_of_this_instance = runners.back().GetNumRepeats();
num_repetitions_total += num_repeats_of_this_instance;
if (reports_for_family)
reports_for_family->num_runs_total += num_repeats_of_this_instance;
}
assert(runners.size() == benchmarks.size() && "Unexpected runner count.");

std::vector<int> repetition_indices;
repetition_indices.reserve(num_repetitions_total);
for (size_t runner_index = 0, num_runners = runners.size();
runner_index != num_runners; ++runner_index) {
const internal::BenchmarkRunner& runner = runners[runner_index];
std::fill_n(std::back_inserter(repetition_indices),
runner.GetNumRepeats(), runner_index);
}
assert(repetition_indices.size() == num_repetitions_total &&
"Unexpected number of repetition indexes.");

if (FLAGS_benchmark_enable_random_interleaving) {
std::random_device rd;
std::mt19937 g(rd());
std::shuffle(repetition_indices.begin(), repetition_indices.end(), g);
}

RunResults run_results =
RunBenchmark(benchmark, complexity_reports_for_family);
for (size_t repetition_index : repetition_indices) {
internal::BenchmarkRunner& runner = runners[repetition_index];
runner.DoOneRepetition();
if (runner.HasRepeatsRemaining()) continue;
// FIXME: report each repetition separately, not all of them in bulk.

RunResults run_results = runner.GetResults();

// Maybe calculate complexity report
if (BenchmarkReporter::PerFamilyRunReports* reports_for_family =
runner.GetReportsForFamily()) {
if (reports_for_family->num_runs_done ==
reports_for_family->num_runs_total) {
auto additional_run_stats = ComputeBigO(reports_for_family->Runs);
run_results.aggregates_only.insert(run_results.aggregates_only.end(),
additional_run_stats.begin(),
additional_run_stats.end());
per_family_reports.erase(
(int)reports_for_family->Runs.front().family_index);
}
}

Report(display_reporter, file_reporter, run_results);
}
Expand Down Expand Up @@ -471,6 +523,7 @@ void PrintUsageAndExit() {
" [--benchmark_filter=<regex>]\n"
" [--benchmark_min_time=<min_time>]\n"
" [--benchmark_repetitions=<num_repetitions>]\n"
" [--benchmark_enable_random_interleaving={true|false}]\n"
" [--benchmark_report_aggregates_only={true|false}]\n"
" [--benchmark_display_aggregates_only={true|false}]\n"
" [--benchmark_format=<console|json|csv>]\n"
Expand All @@ -495,6 +548,8 @@ void ParseCommandLineFlags(int* argc, char** argv) {
&FLAGS_benchmark_min_time) ||
ParseInt32Flag(argv[i], "benchmark_repetitions",
&FLAGS_benchmark_repetitions) ||
ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving",
&FLAGS_benchmark_enable_random_interleaving) ||
ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
&FLAGS_benchmark_report_aggregates_only) ||
ParseBoolFlag(argv[i], "benchmark_display_aggregates_only",
Expand Down
2 changes: 0 additions & 2 deletions src/benchmark_api_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ class BenchmarkInstance {
IterationCount iterations() const { return iterations_; }
int threads() const { return threads_; }

bool last_benchmark_instance;

State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
internal::ThreadManager* manager,
internal::PerfCountersMeasurement* perf_counters_measurement) const;
Expand Down
1 change: 0 additions & 1 deletion src/benchmark_register.cc
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ bool BenchmarkFamilies::FindBenchmarks(
const auto full_name = instance.name().str();
if ((re.Match(full_name) && !isNegativeFilter) ||
(!re.Match(full_name) && isNegativeFilter)) {
instance.last_benchmark_instance = (&args == &family->args_.back());
benchmarks->push_back(std::move(instance));

++per_family_instance_index;
Expand Down
48 changes: 20 additions & 28 deletions src/benchmark_runner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,9 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,

BenchmarkRunner::BenchmarkRunner(
const benchmark::internal::BenchmarkInstance& b_,
std::vector<BenchmarkReporter::Run>* complexity_reports_)
BenchmarkReporter::PerFamilyRunReports* reports_for_family_)
: b(b_),
complexity_reports(complexity_reports_),
reports_for_family(reports_for_family_),
min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time),
repeats(b.repetitions() != 0 ? b.repetitions()
: FLAGS_benchmark_repetitions),
Expand All @@ -172,22 +172,6 @@ BenchmarkRunner::BenchmarkRunner(
perf_counters_measurement.IsValid())
<< "Perf counters were requested but could not be set up.";
}

for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
DoOneRepetition(repetition_num);
}

// Calculate additional statistics
run_results.aggregates_only = ComputeStats(run_results.non_aggregates);

// Maybe calculate complexity report
if (complexity_reports && b.last_benchmark_instance) {
auto additional_run_stats = ComputeBigO(*complexity_reports);
run_results.aggregates_only.insert(run_results.aggregates_only.end(),
additional_run_stats.begin(),
additional_run_stats.end());
complexity_reports->clear();
}
}

BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() {
Expand Down Expand Up @@ -283,8 +267,10 @@ bool BenchmarkRunner::ShouldReportIterationResults(
((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time());
}

void BenchmarkRunner::DoOneRepetition(int64_t repetition_index) {
const bool is_the_first_repetition = repetition_index == 0;
void BenchmarkRunner::DoOneRepetition() {
assert(HasRepeatsRemaining() && "Already done all repetitions?");

const bool is_the_first_repetition = num_repetitions_done == 0;
IterationResults i;

// We *may* be gradually increasing the length (iteration count)
Expand Down Expand Up @@ -337,19 +323,25 @@ void BenchmarkRunner::DoOneRepetition(int64_t repetition_index) {
// Ok, now actualy report.
BenchmarkReporter::Run report =
CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds,
repetition_index, repeats);
num_repetitions_done, repeats);

if (complexity_reports && !report.error_occurred)
complexity_reports->push_back(report);
if (reports_for_family) {
++reports_for_family->num_runs_done;
if (!report.error_occurred) reports_for_family->Runs.push_back(report);
}

run_results.non_aggregates.push_back(report);

++num_repetitions_done;
}

RunResults RunBenchmark(
const benchmark::internal::BenchmarkInstance& b,
std::vector<BenchmarkReporter::Run>* complexity_reports) {
internal::BenchmarkRunner r(b, complexity_reports);
return r.get_results();
RunResults&& BenchmarkRunner::GetResults() {
assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?");

// Calculate additional statistics over the repetitions of this instance.
run_results.aggregates_only = ComputeStats(run_results.non_aggregates);

return std::move(run_results);
}

} // end namespace internal
Expand Down
26 changes: 17 additions & 9 deletions src/benchmark_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,20 +50,34 @@ struct RunResults {
class BenchmarkRunner {
public:
BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
std::vector<BenchmarkReporter::Run>* complexity_reports_);
BenchmarkReporter::PerFamilyRunReports* reports_for_family);

RunResults&& get_results() { return std::move(run_results); }
int GetNumRepeats() const { return repeats; }

bool HasRepeatsRemaining() const {
return GetNumRepeats() != num_repetitions_done;
}

void DoOneRepetition();

RunResults&& GetResults();

BenchmarkReporter::PerFamilyRunReports* GetReportsForFamily() const {
return reports_for_family;
};

private:
RunResults run_results;

const benchmark::internal::BenchmarkInstance& b;
std::vector<BenchmarkReporter::Run>* complexity_reports;
BenchmarkReporter::PerFamilyRunReports* reports_for_family;

const double min_time;
const int repeats;
const bool has_explicit_iteration_count;

int num_repetitions_done = 0;

std::vector<std::thread> pool;

IterationCount iters; // preserved between repetitions!
Expand All @@ -83,14 +97,8 @@ class BenchmarkRunner {
IterationCount PredictNumItersNeeded(const IterationResults& i) const;

bool ShouldReportIterationResults(const IterationResults& i) const;

void DoOneRepetition(int64_t repetition_index);
};

RunResults RunBenchmark(
const benchmark::internal::BenchmarkInstance& b,
std::vector<BenchmarkReporter::Run>* complexity_reports);

} // namespace internal

} // end namespace benchmark
Expand Down
1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ if (BENCHMARK_ENABLE_GTEST_TESTS)

add_gtest(benchmark_gtest)
add_gtest(benchmark_name_gtest)
add_gtest(benchmark_random_interleaving_gtest)
add_gtest(commandlineflags_gtest)
add_gtest(statistics_gtest)
add_gtest(string_util_gtest)
Expand Down
Loading

0 comments on commit f293839

Please sign in to comment.