From f2938395528d0ed6e88b33f7cce2f3dfb057d73d Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Tue, 1 Jun 2021 20:16:11 +0300 Subject: [PATCH] Random interleaving of benchmark repetitions - the sequel (fixes #1051) Based on the original implementation by Hai Huang @haih-g) from https://github.com/google/benchmark/pull/1105. --- README.md | 2 + docs/random_interleaving.md | 13 ++ include/benchmark/benchmark.h | 13 ++ src/benchmark.cc | 71 +++++++++-- src/benchmark_api_internal.h | 2 - src/benchmark_register.cc | 1 - src/benchmark_runner.cc | 48 ++++---- src/benchmark_runner.h | 26 ++-- test/CMakeLists.txt | 1 + test/benchmark_random_interleaving_gtest.cc | 125 ++++++++++++++++++++ 10 files changed, 254 insertions(+), 48 deletions(-) create mode 100644 docs/random_interleaving.md create mode 100644 test/benchmark_random_interleaving_gtest.cc diff --git a/README.md b/README.md index 7947229499..aa61cef1b1 100644 --- a/README.md +++ b/README.md @@ -299,6 +299,8 @@ too (`-lkstat`). [Setting the Time Unit](#setting-the-time-unit) +[Random Interleaving](docs/random_interleaving.md) + [User-Requested Performance Counters](docs/perf_counters.md) [Preventing Optimization](#preventing-optimization) diff --git a/docs/random_interleaving.md b/docs/random_interleaving.md new file mode 100644 index 0000000000..c083036841 --- /dev/null +++ b/docs/random_interleaving.md @@ -0,0 +1,13 @@ + + +# Random Interleaving + +[Random Interleaving](https://github.com/google/benchmark/issues/1051) is a +technique to lower run-to-run variance. It randomly interleaves repetitions of a +microbenchmark with repetitions from other microbenchmarks in the same benchmark +test. Data shows it is able to lower run-to-run variance by +[40%](https://github.com/google/benchmark/issues/1051) on average. + +To use, you mainly need to set `--benchmark_enable_random_interleaving=true`, +and optionally specify non-zero repetition count `--benchmark_repetitions=9` +and optionally decrease the per-repetition time `--benchmark_min_time=0.1`. diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index 290c5c5011..9b5480244d 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -1472,6 +1472,19 @@ class BenchmarkReporter { int64_t max_bytes_used; }; + struct PerFamilyRunReports { + PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {} + + // How many runs will all instances of this benchmark perform? + int num_runs_total; + + // How many runs have happened already? + int num_runs_done; + + // The reports about (non-errneous!) runs of this family. + std::vector Runs; + }; + // Construct a BenchmarkReporter with the output stream set to 'std::cout' // and the error stream set to 'std::cerr' BenchmarkReporter(); diff --git a/src/benchmark.cc b/src/benchmark.cc index c4392bfdcf..a726ef8af2 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -33,8 +33,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -73,6 +75,10 @@ DEFINE_double(benchmark_min_time, 0.5); // standard deviation of the runs will be reported. DEFINE_int32(benchmark_repetitions, 1); +// If set, enable random interleaving of repetitions of all benchmarks. +// See http://github.com/google/benchmark/issues/1051 for details. +DEFINE_bool(benchmark_enable_random_interleaving, false); + // Report the result of each benchmark repetitions. When 'true' is specified // only the mean, standard deviation, and other statistics are reported for // repeated benchmarks. Affects all reporters. @@ -297,23 +303,69 @@ void RunBenchmarks(const std::vector& benchmarks, context.name_field_width = name_field_width; // Keep track of running times of all instances of each benchmark family. - std::map> - complexity_reports; + std::map + per_family_reports; if (display_reporter->ReportContext(context) && (!file_reporter || file_reporter->ReportContext(context))) { FlushStreams(display_reporter); FlushStreams(file_reporter); + size_t num_repetitions_total = 0; + + std::vector runners; + runners.reserve(benchmarks.size()); for (const BenchmarkInstance& benchmark : benchmarks) { - std::vector* complexity_reports_for_family = - nullptr; + BenchmarkReporter::PerFamilyRunReports* reports_for_family = nullptr; if (benchmark.complexity() != oNone) - complexity_reports_for_family = - &complexity_reports[benchmark.family_index()]; + reports_for_family = &per_family_reports[benchmark.family_index()]; + + runners.emplace_back(benchmark, reports_for_family); + int num_repeats_of_this_instance = runners.back().GetNumRepeats(); + num_repetitions_total += num_repeats_of_this_instance; + if (reports_for_family) + reports_for_family->num_runs_total += num_repeats_of_this_instance; + } + assert(runners.size() == benchmarks.size() && "Unexpected runner count."); + + std::vector repetition_indices; + repetition_indices.reserve(num_repetitions_total); + for (size_t runner_index = 0, num_runners = runners.size(); + runner_index != num_runners; ++runner_index) { + const internal::BenchmarkRunner& runner = runners[runner_index]; + std::fill_n(std::back_inserter(repetition_indices), + runner.GetNumRepeats(), runner_index); + } + assert(repetition_indices.size() == num_repetitions_total && + "Unexpected number of repetition indexes."); + + if (FLAGS_benchmark_enable_random_interleaving) { + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(repetition_indices.begin(), repetition_indices.end(), g); + } - RunResults run_results = - RunBenchmark(benchmark, complexity_reports_for_family); + for (size_t repetition_index : repetition_indices) { + internal::BenchmarkRunner& runner = runners[repetition_index]; + runner.DoOneRepetition(); + if (runner.HasRepeatsRemaining()) continue; + // FIXME: report each repetition separately, not all of them in bulk. + + RunResults run_results = runner.GetResults(); + + // Maybe calculate complexity report + if (BenchmarkReporter::PerFamilyRunReports* reports_for_family = + runner.GetReportsForFamily()) { + if (reports_for_family->num_runs_done == + reports_for_family->num_runs_total) { + auto additional_run_stats = ComputeBigO(reports_for_family->Runs); + run_results.aggregates_only.insert(run_results.aggregates_only.end(), + additional_run_stats.begin(), + additional_run_stats.end()); + per_family_reports.erase( + (int)reports_for_family->Runs.front().family_index); + } + } Report(display_reporter, file_reporter, run_results); } @@ -471,6 +523,7 @@ void PrintUsageAndExit() { " [--benchmark_filter=]\n" " [--benchmark_min_time=]\n" " [--benchmark_repetitions=]\n" + " [--benchmark_enable_random_interleaving={true|false}]\n" " [--benchmark_report_aggregates_only={true|false}]\n" " [--benchmark_display_aggregates_only={true|false}]\n" " [--benchmark_format=]\n" @@ -495,6 +548,8 @@ void ParseCommandLineFlags(int* argc, char** argv) { &FLAGS_benchmark_min_time) || ParseInt32Flag(argv[i], "benchmark_repetitions", &FLAGS_benchmark_repetitions) || + ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving", + &FLAGS_benchmark_enable_random_interleaving) || ParseBoolFlag(argv[i], "benchmark_report_aggregates_only", &FLAGS_benchmark_report_aggregates_only) || ParseBoolFlag(argv[i], "benchmark_display_aggregates_only", diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h index e2afbd833c..9296b7d2c8 100644 --- a/src/benchmark_api_internal.h +++ b/src/benchmark_api_internal.h @@ -39,8 +39,6 @@ class BenchmarkInstance { IterationCount iterations() const { return iterations_; } int threads() const { return threads_; } - bool last_benchmark_instance; - State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer, internal::ThreadManager* manager, internal::PerfCountersMeasurement* perf_counters_measurement) const; diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc index 435c48b4aa..574462220e 100644 --- a/src/benchmark_register.cc +++ b/src/benchmark_register.cc @@ -166,7 +166,6 @@ bool BenchmarkFamilies::FindBenchmarks( const auto full_name = instance.name().str(); if ((re.Match(full_name) && !isNegativeFilter) || (!re.Match(full_name) && isNegativeFilter)) { - instance.last_benchmark_instance = (&args == &family->args_.back()); benchmarks->push_back(std::move(instance)); ++per_family_instance_index; diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index 55d6cf15d2..6742d42dbe 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -143,9 +143,9 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, BenchmarkRunner::BenchmarkRunner( const benchmark::internal::BenchmarkInstance& b_, - std::vector* complexity_reports_) + BenchmarkReporter::PerFamilyRunReports* reports_for_family_) : b(b_), - complexity_reports(complexity_reports_), + reports_for_family(reports_for_family_), min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time), repeats(b.repetitions() != 0 ? b.repetitions() : FLAGS_benchmark_repetitions), @@ -172,22 +172,6 @@ BenchmarkRunner::BenchmarkRunner( perf_counters_measurement.IsValid()) << "Perf counters were requested but could not be set up."; } - - for (int repetition_num = 0; repetition_num < repeats; repetition_num++) { - DoOneRepetition(repetition_num); - } - - // Calculate additional statistics - run_results.aggregates_only = ComputeStats(run_results.non_aggregates); - - // Maybe calculate complexity report - if (complexity_reports && b.last_benchmark_instance) { - auto additional_run_stats = ComputeBigO(*complexity_reports); - run_results.aggregates_only.insert(run_results.aggregates_only.end(), - additional_run_stats.begin(), - additional_run_stats.end()); - complexity_reports->clear(); - } } BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() { @@ -283,8 +267,10 @@ bool BenchmarkRunner::ShouldReportIterationResults( ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time()); } -void BenchmarkRunner::DoOneRepetition(int64_t repetition_index) { - const bool is_the_first_repetition = repetition_index == 0; +void BenchmarkRunner::DoOneRepetition() { + assert(HasRepeatsRemaining() && "Already done all repetitions?"); + + const bool is_the_first_repetition = num_repetitions_done == 0; IterationResults i; // We *may* be gradually increasing the length (iteration count) @@ -337,19 +323,25 @@ void BenchmarkRunner::DoOneRepetition(int64_t repetition_index) { // Ok, now actualy report. BenchmarkReporter::Run report = CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds, - repetition_index, repeats); + num_repetitions_done, repeats); - if (complexity_reports && !report.error_occurred) - complexity_reports->push_back(report); + if (reports_for_family) { + ++reports_for_family->num_runs_done; + if (!report.error_occurred) reports_for_family->Runs.push_back(report); + } run_results.non_aggregates.push_back(report); + + ++num_repetitions_done; } -RunResults RunBenchmark( - const benchmark::internal::BenchmarkInstance& b, - std::vector* complexity_reports) { - internal::BenchmarkRunner r(b, complexity_reports); - return r.get_results(); +RunResults&& BenchmarkRunner::GetResults() { + assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?"); + + // Calculate additional statistics over the repetitions of this instance. + run_results.aggregates_only = ComputeStats(run_results.non_aggregates); + + return std::move(run_results); } } // end namespace internal diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h index 9730ad386c..8a855236b2 100644 --- a/src/benchmark_runner.h +++ b/src/benchmark_runner.h @@ -50,20 +50,34 @@ struct RunResults { class BenchmarkRunner { public: BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, - std::vector* complexity_reports_); + BenchmarkReporter::PerFamilyRunReports* reports_for_family); - RunResults&& get_results() { return std::move(run_results); } + int GetNumRepeats() const { return repeats; } + + bool HasRepeatsRemaining() const { + return GetNumRepeats() != num_repetitions_done; + } + + void DoOneRepetition(); + + RunResults&& GetResults(); + + BenchmarkReporter::PerFamilyRunReports* GetReportsForFamily() const { + return reports_for_family; + }; private: RunResults run_results; const benchmark::internal::BenchmarkInstance& b; - std::vector* complexity_reports; + BenchmarkReporter::PerFamilyRunReports* reports_for_family; const double min_time; const int repeats; const bool has_explicit_iteration_count; + int num_repetitions_done = 0; + std::vector pool; IterationCount iters; // preserved between repetitions! @@ -83,14 +97,8 @@ class BenchmarkRunner { IterationCount PredictNumItersNeeded(const IterationResults& i) const; bool ShouldReportIterationResults(const IterationResults& i) const; - - void DoOneRepetition(int64_t repetition_index); }; -RunResults RunBenchmark( - const benchmark::internal::BenchmarkInstance& b, - std::vector* complexity_reports); - } // namespace internal } // end namespace benchmark diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 012f5a8bcd..79cdf53b40 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -199,6 +199,7 @@ if (BENCHMARK_ENABLE_GTEST_TESTS) add_gtest(benchmark_gtest) add_gtest(benchmark_name_gtest) + add_gtest(benchmark_random_interleaving_gtest) add_gtest(commandlineflags_gtest) add_gtest(statistics_gtest) add_gtest(string_util_gtest) diff --git a/test/benchmark_random_interleaving_gtest.cc b/test/benchmark_random_interleaving_gtest.cc new file mode 100644 index 0000000000..48c4efc98e --- /dev/null +++ b/test/benchmark_random_interleaving_gtest.cc @@ -0,0 +1,125 @@ +#include +#include +#include + +#include "../src/commandlineflags.h" +#include "../src/string_util.h" +#include "benchmark/benchmark.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +DECLARE_bool(benchmark_enable_random_interleaving); +DECLARE_string(benchmark_filter); +DECLARE_int32(benchmark_repetitions); + +namespace benchmark { +namespace internal { +namespace { + +class EventQueue : public std::queue { + public: + void Put(const std::string& event) { push(event); } + + void Clear() { + while (!empty()) { + pop(); + } + } + + std::string Get() { + std::string event = front(); + pop(); + return event; + } +}; + +static EventQueue* queue = new EventQueue; + +class NullReporter : public BenchmarkReporter { + public: + bool ReportContext(const Context& /*context*/) override { return true; } + void ReportRuns(const std::vector& /* report */) override {} +}; + +class BenchmarkTest : public testing::Test { + public: + static void SetupHook(int /* num_threads */) { queue->push("Setup"); } + + static void TeardownHook(int /* num_threads */) { queue->push("Teardown"); } + + void Execute(const std::string& pattern) { + queue->Clear(); + + BenchmarkReporter* reporter = new NullReporter; + FLAGS_benchmark_filter = pattern; + RunSpecifiedBenchmarks(reporter); + delete reporter; + + queue->Put("DONE"); // End marker + } +}; + +static void BM_Match1(benchmark::State& state) { + const int64_t arg = state.range(0); + + for (auto _ : state) { + } + queue->Put(StrFormat("BM_Match1/%d", static_cast(arg))); +} +BENCHMARK(BM_Match1) + ->Iterations(100) + ->Arg(1) + ->Arg(2) + ->Arg(3) + ->Range(10, 80) + ->Args({90}) + ->Args({100}); + +TEST_F(BenchmarkTest, Match1) { + Execute("BM_Match1"); + ASSERT_EQ("BM_Match1/1", queue->Get()); + ASSERT_EQ("BM_Match1/2", queue->Get()); + ASSERT_EQ("BM_Match1/3", queue->Get()); + ASSERT_EQ("BM_Match1/10", queue->Get()); + ASSERT_EQ("BM_Match1/64", queue->Get()); + ASSERT_EQ("BM_Match1/80", queue->Get()); + ASSERT_EQ("BM_Match1/90", queue->Get()); + ASSERT_EQ("BM_Match1/100", queue->Get()); + ASSERT_EQ("DONE", queue->Get()); +} + +TEST_F(BenchmarkTest, Match1WithRepetition) { + FLAGS_benchmark_repetitions = 2; + + Execute("BM_Match1/(64|80)"); + ASSERT_EQ("BM_Match1/64", queue->Get()); + ASSERT_EQ("BM_Match1/64", queue->Get()); + ASSERT_EQ("BM_Match1/80", queue->Get()); + ASSERT_EQ("BM_Match1/80", queue->Get()); + ASSERT_EQ("DONE", queue->Get()); +} + +TEST_F(BenchmarkTest, Match1WithRandomInterleaving) { + FLAGS_benchmark_enable_random_interleaving = true; + FLAGS_benchmark_repetitions = 100; + + std::map interleaving_count; + const char* regexp = "BM_Match1/(64|80)"; + Execute(regexp); + for (int i = 0; i < 100; ++i) { + std::vector interleaving; + interleaving.push_back(queue->Get()); + interleaving.push_back(queue->Get()); + EXPECT_THAT(interleaving, + testing::UnorderedElementsAre(testing::MatchesRegex(regexp), + testing::MatchesRegex(regexp))); + interleaving_count[StrFormat("%s,%s", interleaving[0].c_str(), + interleaving[1].c_str())]++; + } + EXPECT_GE(interleaving_count.size(), 2) << "Interleaving was not randomized."; + ASSERT_EQ("DONE", queue->Get()); +} + +} // namespace +} // namespace internal +} // namespace benchmark