diff --git a/README.md b/README.md index 6e5844b010..41cada9553 100644 --- a/README.md +++ b/README.md @@ -179,7 +179,7 @@ BENCHMARK_MAIN(); ``` To run the benchmark, compile and link against the `benchmark` library -(libbenchmark.a/.so). If you followed the build steps above, this library will +(libbenchmark.a/.so). If you followed the build steps above, this library will be under the build directory you created. ```bash @@ -299,8 +299,6 @@ too (`-lkstat`). [Setting the Time Unit](#setting-the-time-unit) -[Random Interleaving](docs/random_interleaving.md) - [User-Requested Performance Counters](docs/perf_counters.md) [Preventing Optimization](#preventing-optimization) @@ -401,8 +399,8 @@ Write benchmark results to a file with the `--benchmark_out=` option (or set `BENCHMARK_OUT`). Specify the output format with `--benchmark_out_format={json|console|csv}` (or set `BENCHMARK_OUT_FORMAT={json|console|csv}`). Note that the 'csv' reporter is -deprecated and the saved `.csv` file -[is not parsable](https://github.com/google/benchmark/issues/794) by csv +deprecated and the saved `.csv` file +[is not parsable](https://github.com/google/benchmark/issues/794) by csv parsers. Specifying `--benchmark_out` does not suppress the console output. diff --git a/docs/random_interleaving.md b/docs/random_interleaving.md deleted file mode 100644 index 2471b46bb0..0000000000 --- a/docs/random_interleaving.md +++ /dev/null @@ -1,26 +0,0 @@ - - -# Random Interleaving - -[Random Interleaving](https://github.com/google/benchmark/issues/1051) is a -technique to lower run-to-run variance. It breaks the execution of a -microbenchmark into multiple chunks and randomly interleaves them with chunks -from other microbenchmarks in the same benchmark test. Data shows it is able to -lower run-to-run variance by -[40%](https://github.com/google/benchmark/issues/1051) on average. - -To use, set `--benchmark_enable_random_interleaving=true`. - -It's a known issue that random interleaving may increase the benchmark execution -time, if: - -1. A benchmark has costly setup and / or teardown. Random interleaving will run - setup and teardown many times and may increase test execution time - significantly. -2. The time to run a single benchmark iteration is larger than the desired time - per repetition (i.e., `benchmark_min_time / benchmark_repetitions`). - -The overhead of random interleaving can be controlled by -`--benchmark_random_interleaving_max_overhead`. The default value is 0.4 meaning -the total execution time under random interlaving is limited by 1.4 x original -total execution time. Set it to `inf` for unlimited overhead. diff --git a/src/benchmark.cc b/src/benchmark.cc index 97d657a08f..d205232b8a 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -33,10 +33,8 @@ #include #include #include -#include #include #include -#include #include #include #include @@ -56,18 +54,6 @@ #include "thread_manager.h" #include "thread_timer.h" -// Each benchmark can be repeated a number of times, and within each -// *repetition*, we run the user-defined benchmark function a number of -// *iterations*. The number of repetitions is determined based on flags -// (--benchmark_repetitions). -namespace { - -// Attempt to make each repetition run for at least this much of time. -constexpr double kDefaultMinTimeTotalSecs = 0.5; -constexpr int kRandomInterleavingDefaultRepetitions = 12; - -} // namespace - // Print a list of benchmarks. This option overrides all other options. DEFINE_bool(benchmark_list_tests, false); @@ -76,39 +62,16 @@ DEFINE_bool(benchmark_list_tests, false); // linked into the binary are run. DEFINE_string(benchmark_filter, "."); -// Do NOT read these flags directly. Use Get*() to read them. -namespace do_not_read_flag_directly { - -// Minimum number of seconds we should run benchmark per repetition before -// results are considered significant. For cpu-time based tests, this is the -// lower bound on the total cpu time used by all threads that make up the test. -// For real-time based tests, this is the lower bound on the elapsed time of the -// benchmark execution, regardless of number of threads. If left unset, will use -// kDefaultMinTimeTotalSecs / FLAGS_benchmark_repetitions, if random -// interleaving is enabled. Otherwise, will use kDefaultMinTimeTotalSecs. -// Do NOT read this flag directly. Use GetMinTime() to read this flag. -DEFINE_double(benchmark_min_time, -1.0); +// Minimum number of seconds we should run benchmark before results are +// considered significant. For cpu-time based tests, this is the lower bound +// on the total cpu time used by all threads that make up the test. For +// real-time based tests, this is the lower bound on the elapsed time of the +// benchmark execution, regardless of number of threads. +DEFINE_double(benchmark_min_time, 0.5); // The number of runs of each benchmark. If greater than 1, the mean and -// standard deviation of the runs will be reported. By default, the number of -// repetitions is 1 if random interleaving is disabled, and up to -// kDefaultRepetitions if random interleaving is enabled. (Read the -// documentation for random interleaving to see why it might be less than -// kDefaultRepetitions.) -// Do NOT read this flag directly, Use GetRepetitions() to access this flag. -DEFINE_int32(benchmark_repetitions, -1); - -} // namespace do_not_read_flag_directly - -// The maximum overhead allowed for random interleaving. A value X means total -// execution time under random interleaving is limited by -// (1 + X) * original total execution time. Set to 'inf' to allow infinite -// overhead. -DEFINE_double(benchmark_random_interleaving_max_overhead, 0.4); - -// If set, enable random interleaving. See -// http://github.com/google/benchmark/issues/1051 for details. -DEFINE_bool(benchmark_enable_random_interleaving, false); +// standard deviation of the runs will be reported. +DEFINE_int32(benchmark_repetitions, 1); // Report the result of each benchmark repetitions. When 'true' is specified // only the mean, standard deviation, and other statistics are reported for @@ -159,30 +122,6 @@ DEFINE_kvpairs(benchmark_context, {}); std::map* global_context = nullptr; -// Performance measurements always come with random variances. Defines a -// factor by which the required number of iterations is overestimated in order -// to reduce the probability that the minimum time requirement will not be met. -const double kSafetyMultiplier = 1.4; - -// Wraps --benchmark_min_time and returns valid default values if not supplied. -double GetMinTime() { - const double default_min_time = kDefaultMinTimeTotalSecs / GetRepetitions(); - const double flag_min_time = - do_not_read_flag_directly::FLAGS_benchmark_min_time; - return flag_min_time >= 0.0 ? flag_min_time : default_min_time; -} - -// Wraps --benchmark_repetitions and return valid default value if not supplied. -int GetRepetitions() { - const int default_repetitions = - FLAGS_benchmark_enable_random_interleaving - ? kRandomInterleavingDefaultRepetitions - : 1; - const int flag_repetitions = - do_not_read_flag_directly::FLAGS_benchmark_repetitions; - return flag_repetitions >= 0 ? flag_repetitions : default_repetitions; -} - // FIXME: wouldn't LTO mess this up? void UseCharPointer(char const volatile*) {} @@ -302,39 +241,6 @@ void State::FinishKeepRunning() { namespace internal { namespace { -// Flushes streams after invoking reporter methods that write to them. This -// ensures users get timely updates even when streams are not line-buffered. -void FlushStreams(BenchmarkReporter* reporter) { - if (!reporter) return; - std::flush(reporter->GetOutputStream()); - std::flush(reporter->GetErrorStream()); -} - -// Reports in both display and file reporters. -void Report(BenchmarkReporter* display_reporter, - BenchmarkReporter* file_reporter, const RunResults& run_results) { - auto report_one = [](BenchmarkReporter* reporter, - bool aggregates_only, - const RunResults& results) { - assert(reporter); - // If there are no aggregates, do output non-aggregates. - aggregates_only &= !results.aggregates_only.empty(); - if (!aggregates_only) - reporter->ReportRuns(results.non_aggregates); - if (!results.aggregates_only.empty()) - reporter->ReportRuns(results.aggregates_only); - }; - - report_one(display_reporter, run_results.display_report_aggregates_only, - run_results); - if (file_reporter) - report_one(file_reporter, run_results.file_report_aggregates_only, - run_results); - - FlushStreams(display_reporter); - FlushStreams(file_reporter); -} - void RunBenchmarks(const std::vector& benchmarks, BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter) { @@ -342,7 +248,7 @@ void RunBenchmarks(const std::vector& benchmarks, CHECK(display_reporter != nullptr); // Determine the width of the name field using a minimum width of 10. - bool might_have_aggregates = GetRepetitions() > 1; + bool might_have_aggregates = FLAGS_benchmark_repetitions > 1; size_t name_field_width = 10; size_t stat_field_width = 0; for (const BenchmarkInstance& benchmark : benchmarks) { @@ -350,9 +256,8 @@ void RunBenchmarks(const std::vector& benchmarks, std::max(name_field_width, benchmark.name().str().size()); might_have_aggregates |= benchmark.repetitions() > 1; - for (const auto& Stat : benchmark.statistics()) { + for (const auto& Stat : benchmark.statistics()) stat_field_width = std::max(stat_field_width, Stat.name_.size()); - } } if (might_have_aggregates) name_field_width += 1 + stat_field_width; @@ -363,61 +268,45 @@ void RunBenchmarks(const std::vector& benchmarks, // Keep track of running times of all instances of current benchmark std::vector complexity_reports; + // We flush streams after invoking reporter methods that write to them. This + // ensures users get timely updates even when streams are not line-buffered. + auto flushStreams = [](BenchmarkReporter* reporter) { + if (!reporter) return; + std::flush(reporter->GetOutputStream()); + std::flush(reporter->GetErrorStream()); + }; + if (display_reporter->ReportContext(context) && (!file_reporter || file_reporter->ReportContext(context))) { - FlushStreams(display_reporter); - FlushStreams(file_reporter); - - // Without random interleaving, benchmarks are executed in the order of: - // A, A, ..., A, B, B, ..., B, C, C, ..., C, ... - // That is, repetition is within RunBenchmark(), hence the name - // inner_repetitions. - // With random interleaving, benchmarks are executed in the order of: - // {Random order of A, B, C, ...}, {Random order of A, B, C, ...}, ... - // That is, repetitions is outside of RunBenchmark(), hence the name - // outer_repetitions. - int inner_repetitions = - FLAGS_benchmark_enable_random_interleaving ? 1 : GetRepetitions(); - int outer_repetitions = - FLAGS_benchmark_enable_random_interleaving ? GetRepetitions() : 1; - std::vector benchmark_indices(benchmarks.size()); - for (size_t i = 0; i < benchmarks.size(); ++i) { - benchmark_indices[i] = i; - } - - std::random_device rd; - std::mt19937 g(rd()); - // 'run_results_vector' and 'benchmarks' are parallel arrays. - std::vector run_results_vector(benchmarks.size()); - for (int i = 0; i < outer_repetitions; i++) { - if (FLAGS_benchmark_enable_random_interleaving) { - std::shuffle(benchmark_indices.begin(), benchmark_indices.end(), g); - } - for (size_t j : benchmark_indices) { - // Repetitions will be automatically adjusted under random interleaving. - if (!FLAGS_benchmark_enable_random_interleaving || - i < benchmarks[j].RandomInterleavingRepetitions()) { - RunBenchmark(benchmarks[j], outer_repetitions, inner_repetitions, - &complexity_reports, &run_results_vector[j]); - if (!FLAGS_benchmark_enable_random_interleaving) { - // Print out reports as they come in. - Report(display_reporter, file_reporter, run_results_vector.at(j)); - } - } - } - } - - if (FLAGS_benchmark_enable_random_interleaving) { - // Print out all reports at the end of the test. - for (const RunResults& run_results : run_results_vector) { - Report(display_reporter, file_reporter, run_results); - } + flushStreams(display_reporter); + flushStreams(file_reporter); + + for (const auto& benchmark : benchmarks) { + RunResults run_results = RunBenchmark(benchmark, &complexity_reports); + + auto report = [&run_results](BenchmarkReporter* reporter, + bool report_aggregates_only) { + assert(reporter); + // If there are no aggregates, do output non-aggregates. + report_aggregates_only &= !run_results.aggregates_only.empty(); + if (!report_aggregates_only) + reporter->ReportRuns(run_results.non_aggregates); + if (!run_results.aggregates_only.empty()) + reporter->ReportRuns(run_results.aggregates_only); + }; + + report(display_reporter, run_results.display_report_aggregates_only); + if (file_reporter) + report(file_reporter, run_results.file_report_aggregates_only); + + flushStreams(display_reporter); + flushStreams(file_reporter); } } display_reporter->Finalize(); if (file_reporter) file_reporter->Finalize(); - FlushStreams(display_reporter); - FlushStreams(file_reporter); + flushStreams(display_reporter); + flushStreams(file_reporter); } // Disable deprecated warnings temporarily because we need to reference @@ -567,7 +456,6 @@ void PrintUsageAndExit() { " [--benchmark_filter=]\n" " [--benchmark_min_time=]\n" " [--benchmark_repetitions=]\n" - " [--benchmark_enable_random_interleaving={true|false}]\n" " [--benchmark_report_aggregates_only={true|false}]\n" " [--benchmark_display_aggregates_only={true|false}]\n" " [--benchmark_format=]\n" @@ -588,16 +476,10 @@ void ParseCommandLineFlags(int* argc, char** argv) { if (ParseBoolFlag(argv[i], "benchmark_list_tests", &FLAGS_benchmark_list_tests) || ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || - ParseDoubleFlag( - argv[i], "benchmark_min_time", - &do_not_read_flag_directly::FLAGS_benchmark_min_time) || - ParseInt32Flag( - argv[i], "benchmark_repetitions", - &do_not_read_flag_directly::FLAGS_benchmark_repetitions) || - ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving", - &FLAGS_benchmark_enable_random_interleaving) || - ParseDoubleFlag(argv[i], "benchmark_random_interleaving_max_overhead", - &FLAGS_benchmark_random_interleaving_max_overhead) || + ParseDoubleFlag(argv[i], "benchmark_min_time", + &FLAGS_benchmark_min_time) || + ParseInt32Flag(argv[i], "benchmark_repetitions", + &FLAGS_benchmark_repetitions) || ParseBoolFlag(argv[i], "benchmark_report_aggregates_only", &FLAGS_benchmark_report_aggregates_only) || ParseBoolFlag(argv[i], "benchmark_display_aggregates_only", diff --git a/src/benchmark_adjust_repetitions.cc b/src/benchmark_adjust_repetitions.cc deleted file mode 100644 index 2847927628..0000000000 --- a/src/benchmark_adjust_repetitions.cc +++ /dev/null @@ -1,125 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "benchmark_adjust_repetitions.h" - -#include "benchmark_api_internal.h" -#include "log.h" - -namespace benchmark { -namespace internal { - -namespace { - -constexpr double kNanosecondInSecond = 1e-9; - -} // namespace - -int ComputeRandomInterleavingRepetitions( - InternalRandomInterleavingRepetitionsInput input) { - // Find the repetitions such that total overhead is bounded. Let - // n = desired number of repetitions, i.e., the output of this method. - // t = total real execution time per repetition including overhead, - // (input.total_execution_time_per_repetition). - // o = maximum allowed increase in total real execution time due to random - // interleaving, measured as a fraction (input.max_overhead). - // e = estimated total execution time without Random Interleaving - // We want - // t * n / e <= 1 + o - // I.e., - // n <= (1 + o) * e / t - // - // Let - // h = overhead per repetition, which include all setup / teardown time and - // also the execution time of preliminary trials used to search for the - // correct number of iterations. - // r = real execution time per repetition not including overhead - // (input.real_accumulated_time_per_repetition). - // s = measured execution time per repetition not including overhead, - // which can be either real or CPU time - // (input.accumulated_time_per_repetition). - // We have - // h = t - r - // - // Let - // m = total minimum measured execution time for all repetitions - // (input.min_time_per_repetition * input.max_repetitions). - // Let - // f = m / s - // f is the scale factor between m and s, and will be used to estimate - // l, the total real execution time for all repetitions excluding the - // overhead. It's reasonable to assume that the real execution time excluding - // the overhead is proportional to the measured time. Hence we expect to see - // l / r to be equal to m / s. That is, l / r = f, thus, l = r * f. Then the - // total execution time e can be estimated by h + l, which is h + r * f. - // e = h + r * f - // Note that this might be an underestimation. If number of repetitions is - // reduced, we may need to run more iterations per repetition, and that may - // increase the number of preliminary trials needed to find the correct - // number of iterations. - - double h = std::max(0.0, input.total_execution_time_per_repetition - - input.real_time_used_per_repetition); - double r = - std::max(input.real_time_used_per_repetition, kNanosecondInSecond); - double s = - std::max(input.time_used_per_repetition, kNanosecondInSecond); - double m = input.min_time_per_repetition * input.max_repetitions; - - // f = m / s - // RunBenchmark() always overshoot the iteration count by kSafetyMultiplier. - // Apply the same factor here. - // f = kSafetyMultiplier * m / s - // Also we want to make sure 1 <= f <= input.max_repetitions. Note that we - // may not be able to reach m because the total iters per repetition is - // upper bounded by --benchmark_max_iters. This behavior is preserved in - // Random Interleaving, as we won't run repetitions more than - // input.max_repetitions to reach m. - - double f = kSafetyMultiplier * m / s; - f = std::min(std::max(f, 1.0), static_cast(input.max_repetitions)); - - double e = h + r * f; - // n <= (1 + o) * e / t = (1 + o) * e / (h + r) - // Also we want to make sure 1 <= n <= input.max_repetition, and (h + r) > 0. - double n = (1 + input.max_overhead) * e / (h + r); - n = std::min(std::max(n, 1.0), static_cast(input.max_repetitions)); - - int n_int = static_cast(n); - - VLOG(2) << "Computed random interleaving repetitions" - << "\n input.total_execution_time_per_repetition: " - << input.total_execution_time_per_repetition - << "\n input.time_used_per_repetition: " - << input.time_used_per_repetition - << "\n input.real_time_used_per_repetition: " - << input.real_time_used_per_repetition - << "\n input.min_time_per_repetitions: " - << input.min_time_per_repetition - << "\n input.max_repetitions: " << input.max_repetitions - << "\n input.max_overhead: " << input.max_overhead - << "\n h: " << h - << "\n r: " << r - << "\n s: " << s - << "\n f: " << f - << "\n m: " << m - << "\n e: " << e - << "\n n: " << n - << "\n n_int: " << n_int; - - return n_int; -} - -} // internal -} // benchmark diff --git a/src/benchmark_adjust_repetitions.h b/src/benchmark_adjust_repetitions.h deleted file mode 100644 index 21a666afe0..0000000000 --- a/src/benchmark_adjust_repetitions.h +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2015 Google Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef BENCHMARK_ADJUST_REPETITIONS_H -#define BENCHMARK_ADJUST_REPETITIONS_H - -#include "benchmark/benchmark.h" -#include "commandlineflags.h" - -namespace benchmark { -namespace internal { - -// Defines the input tuple to ComputeRandomInterleavingRepetitions(). -struct InternalRandomInterleavingRepetitionsInput { - double total_execution_time_per_repetition; - double time_used_per_repetition; - double real_time_used_per_repetition; - double min_time_per_repetition; - double max_overhead; - int max_repetitions; -}; - -// Should be called right after the first repetition is completed to estimate -// the number of iterations. -int ComputeRandomInterleavingRepetitions( - InternalRandomInterleavingRepetitionsInput input); - -} // end namespace internal -} // end namespace benchmark - -#endif // BENCHMARK_ADJUST_REPETITIONS_H diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc index ddd46bee63..553ff44e5c 100644 --- a/src/benchmark_api_internal.cc +++ b/src/benchmark_api_internal.cc @@ -2,11 +2,8 @@ #include -#include "check.h" #include "string_util.h" -DECLARE_bool(benchmark_enable_random_interleaving); - namespace benchmark { namespace internal { @@ -24,12 +21,9 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, complexity_lambda_(benchmark_.complexity_lambda_), statistics_(benchmark_.statistics_), repetitions_(benchmark_.repetitions_), - min_time_(!IsZero(benchmark_.min_time_) ? benchmark_.min_time_ - : GetMinTime()), + min_time_(benchmark_.min_time_), iterations_(benchmark_.iterations_), threads_(thread_count) { - CHECK(!IsZero(min_time_)) << "min_time must be non-zero."; - name_.function_name = benchmark_.name_; size_t arg_i = 0; @@ -83,32 +77,6 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, } } -double BenchmarkInstance::MinTime() const { - if (FLAGS_benchmark_enable_random_interleaving) { - // Random Interleaving will automatically adjust - // random_interleaving_repetitions(). Dividing - // total execution time by random_interleaving_repetitions() gives - // the adjusted min_time per repetition. - return min_time_ * GetRepetitions() / RandomInterleavingRepetitions(); - } - return min_time_; -} - -int BenchmarkInstance::RandomInterleavingRepetitions() const { - return random_interleaving_repetitions_ < 0 - ? GetRepetitions() - : random_interleaving_repetitions_; -} - -bool BenchmarkInstance::RandomInterleavingRepetitionsInitialized() const { - return random_interleaving_repetitions_ >= 0; -} - -void BenchmarkInstance::InitRandomInterleavingRepetitions( - int reps) const { - random_interleaving_repetitions_ = reps; -} - State BenchmarkInstance::Run( IterationCount iters, int thread_id, internal::ThreadTimer* timer, internal::ThreadManager* manager, diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h index 0ff8dafbe6..b0595e5a12 100644 --- a/src/benchmark_api_internal.h +++ b/src/benchmark_api_internal.h @@ -1,10 +1,6 @@ #ifndef BENCHMARK_API_INTERNAL_H #define BENCHMARK_API_INTERNAL_H -#include "benchmark/benchmark.h" -#include "commandlineflags.h" - -#include #include #include #include @@ -18,25 +14,12 @@ namespace benchmark { namespace internal { -extern const double kSafetyMultiplier; - // Information kept per benchmark we may want to run class BenchmarkInstance { public: BenchmarkInstance(Benchmark* benchmark, const std::vector& args, int threads); - // Returns number of repetitions for Random Interleaving. This will be - // initialized later once we finish the first repetition, if Random - // Interleaving is enabled. See also ComputeRandominterleavingrepetitions(). - int RandomInterleavingRepetitions() const; - - // Returns true if repetitions for Random Interleaving is initialized. - bool RandomInterleavingRepetitionsInitialized() const; - - // Initializes number of repetitions for random interleaving. - void InitRandomInterleavingRepetitions(int reps) const; - const BenchmarkName& name() const { return name_; } AggregationReportMode aggregation_report_mode() const { return aggregation_report_mode_; @@ -49,7 +32,7 @@ class BenchmarkInstance { BigOFunc& complexity_lambda() const { return *complexity_lambda_; } const std::vector& statistics() const { return statistics_; } int repetitions() const { return repetitions_; } - double MinTime() const; + double min_time() const { return min_time_; } IterationCount iterations() const { return iterations_; } int threads() const { return threads_; } @@ -70,13 +53,12 @@ class BenchmarkInstance { bool use_manual_time_; BigO complexity_; BigOFunc* complexity_lambda_; - std::vector statistics_; + UserCounters counters_; + const std::vector& statistics_; int repetitions_; double min_time_; IterationCount iterations_; - int threads_; - UserCounters counters_; - mutable int random_interleaving_repetitions_ = -1; + int threads_; // Number of concurrent threads to us }; bool FindBenchmarksInternal(const std::string& re, @@ -87,10 +69,6 @@ bool IsZero(double n); ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false); -double GetMinTime(); - -int GetRepetitions(); - } // end namespace internal } // end namespace benchmark diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index 330cb4494b..3cffbbfad5 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -15,7 +15,6 @@ #include "benchmark_runner.h" #include "benchmark/benchmark.h" #include "benchmark_api_internal.h" -#include "benchmark_adjust_repetitions.h" #include "internal_macros.h" #ifndef BENCHMARK_OS_WINDOWS @@ -53,9 +52,6 @@ #include "thread_manager.h" #include "thread_timer.h" -DECLARE_bool(benchmark_enable_random_interleaving); -DECLARE_double(benchmark_random_interleaving_max_overhead); - namespace benchmark { namespace internal { @@ -71,7 +67,7 @@ BenchmarkReporter::Run CreateRunReport( const internal::ThreadManager::Result& results, IterationCount memory_iterations, const MemoryManager::Result& memory_result, double seconds, - int repetition_index) { + int64_t repetition_index) { // Create report about this benchmark run. BenchmarkReporter::Run report; @@ -142,16 +138,12 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, class BenchmarkRunner { public: BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, - int outer_repetitions_, - int inner_repetitions_, - std::vector* complexity_reports_, - RunResults* run_results_) + std::vector* complexity_reports_) : b(b_), complexity_reports(*complexity_reports_), - run_results(*run_results_), - outer_repetitions(outer_repetitions_), - inner_repetitions(inner_repetitions_), - repeats(b.repetitions() != 0 ? b.repetitions() : inner_repetitions), + min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time), + repeats(b.repetitions() != 0 ? b.repetitions() + : FLAGS_benchmark_repetitions), has_explicit_iteration_count(b.iterations() != 0), pool(b.threads() - 1), iters(has_explicit_iteration_count ? b.iterations() : 1), @@ -171,7 +163,6 @@ class BenchmarkRunner { internal::ARM_DisplayReportAggregatesOnly); run_results.file_report_aggregates_only = (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly); - CHECK(FLAGS_benchmark_perf_counters.empty() || perf_counters_measurement.IsValid()) << "Perf counters were requested but could not be set up."; @@ -188,20 +179,21 @@ class BenchmarkRunner { if ((b.complexity() != oNone) && b.last_benchmark_instance) { auto additional_run_stats = ComputeBigO(complexity_reports); run_results.aggregates_only.insert(run_results.aggregates_only.end(), - additional_run_stats.begin(), - additional_run_stats.end()); + additional_run_stats.begin(), + additional_run_stats.end()); complexity_reports.clear(); } } + RunResults&& get_results() { return std::move(run_results); } + private: + RunResults run_results; + const benchmark::internal::BenchmarkInstance& b; std::vector& complexity_reports; - RunResults& run_results; - - const int outer_repetitions; - const int inner_repetitions; + const double min_time; const int repeats; const bool has_explicit_iteration_count; @@ -276,14 +268,13 @@ class BenchmarkRunner { IterationCount PredictNumItersNeeded(const IterationResults& i) const { // See how much iterations should be increased by. // Note: Avoid division by zero with max(seconds, 1ns). - double multiplier = - b.MinTime() * kSafetyMultiplier / std::max(i.seconds, 1e-9); + double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9); // If our last run was at least 10% of FLAGS_benchmark_min_time then we // use the multiplier directly. // Otherwise we use at most 10 times expansion. // NOTE: When the last run was at least 10% of the min time the max // expansion should be 14x. - bool is_significant = (i.seconds / b.MinTime()) > 0.1; + bool is_significant = (i.seconds / min_time) > 0.1; multiplier = is_significant ? multiplier : std::min(10.0, multiplier); if (multiplier <= 1.0) multiplier = 2.0; @@ -304,17 +295,14 @@ class BenchmarkRunner { // or because an error was reported. return i.results.has_error_ || i.iters >= kMaxIterations || // Too many iterations already. - i.seconds >= b.MinTime() || // The elapsed time is large enough. - // CPU time is specified but the - // elapsed real time greatly exceeds - // the minimum time. Note that user - // provided timers are except from this - // sanity check. - ((i.results.real_time_used >= 5 * b.MinTime()) && - !b.use_manual_time()); + i.seconds >= min_time || // The elapsed time is large enough. + // CPU time is specified but the elapsed real time greatly exceeds + // the minimum time. + // Note that user provided timers are except from this sanity check. + ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time()); } - void DoOneRepetition(int repetition_index) { + void DoOneRepetition(int64_t repetition_index) { const bool is_the_first_repetition = repetition_index == 0; IterationResults i; @@ -324,10 +312,8 @@ class BenchmarkRunner { // Please do note that the if there are repetitions, the iteration count // is *only* calculated for the *first* repetition, and other repetitions // simply use that precomputed iteration count. - const auto exec_start = benchmark::ChronoClockNow(); for (;;) { i = DoNIterations(); - const auto exec_end = benchmark::ChronoClockNow(); // Do we consider the results to be significant? // If we are doing repetitions, and the first repetition was already done, @@ -338,38 +324,7 @@ class BenchmarkRunner { has_explicit_iteration_count || ShouldReportIterationResults(i); - if (results_are_significant) { - // The number of repetitions for random interleaving may be reduced - // to limit the increase in benchmark execution time. When this happens - // the target execution time for each repetition is increased. We may - // need to rerun trials to calculate iters according to the increased - // target execution time. - bool rerun_trial = false; - // If random interleaving is enabled and the repetitions is not - // initialized, do it now. - if (FLAGS_benchmark_enable_random_interleaving && - !b.RandomInterleavingRepetitionsInitialized()) { - InternalRandomInterleavingRepetitionsInput input; - input.total_execution_time_per_repetition = exec_end - exec_start; - input.time_used_per_repetition = i.seconds; - input.real_time_used_per_repetition = i.results.real_time_used; - input.min_time_per_repetition = GetMinTime(); - input.max_overhead = FLAGS_benchmark_random_interleaving_max_overhead; - input.max_repetitions = GetRepetitions(); - b.InitRandomInterleavingRepetitions( - ComputeRandomInterleavingRepetitions(input)); - // If the number of repetitions changed, need to rerun the last trial - // because iters may also change. Note that we only need to do this - // if accumulated_time < b.MinTime(), i.e., the iterations we have - // run is not enough for the already adjusted b.MinTime(). - // Otherwise, we will still skip the rerun. - rerun_trial = - b.RandomInterleavingRepetitions() < GetRepetitions() && - i.seconds < b.MinTime() && !has_explicit_iteration_count; - } - - if (!rerun_trial) break; // Good, let's report them! - } + if (results_are_significant) break; // Good, let's report them! // Nope, bad iteration. Let's re-estimate the hopefully-sufficient // iteration count, and run the benchmark again... @@ -386,8 +341,7 @@ class BenchmarkRunner { if (memory_manager != nullptr) { // Only run a few iterations to reduce the impact of one-time // allocations in benchmarks that are not properly managed. - memory_iterations = std::min( - 16 / outer_repetitions + (16 % outer_repetitions != 0), iters); + memory_iterations = std::min(16, iters); memory_manager->Start(); std::unique_ptr manager; manager.reset(new internal::ThreadManager(1)); @@ -413,12 +367,11 @@ class BenchmarkRunner { } // end namespace -void RunBenchmark(const benchmark::internal::BenchmarkInstance& b, - const int outer_repetitions, const int inner_repetitions, - std::vector* complexity_reports, - RunResults* run_results) { - internal::BenchmarkRunner r(b, outer_repetitions, inner_repetitions, - complexity_reports, run_results); +RunResults RunBenchmark( + const benchmark::internal::BenchmarkInstance& b, + std::vector* complexity_reports) { + internal::BenchmarkRunner r(b, complexity_reports); + return r.get_results(); } } // end namespace internal diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h index e29aa32306..9b0cf2a64e 100644 --- a/src/benchmark_runner.h +++ b/src/benchmark_runner.h @@ -42,10 +42,9 @@ struct RunResults { bool file_report_aggregates_only = false; }; -void RunBenchmark(const benchmark::internal::BenchmarkInstance& b, - int outer_repetitions, int inner_repetitions, - std::vector* complexity_reports, - RunResults* run_results); +RunResults RunBenchmark( + const benchmark::internal::BenchmarkInstance& b, + std::vector* complexity_reports); } // namespace internal diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8945f5005f..1e7b6829f2 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -196,7 +196,6 @@ if (BENCHMARK_ENABLE_GTEST_TESTS) add_gtest(benchmark_gtest) add_gtest(benchmark_name_gtest) - add_gtest(benchmark_random_interleaving_gtest) add_gtest(commandlineflags_gtest) add_gtest(statistics_gtest) add_gtest(string_util_gtest) diff --git a/test/benchmark_random_interleaving_gtest.cc b/test/benchmark_random_interleaving_gtest.cc deleted file mode 100644 index 5e8329a4e6..0000000000 --- a/test/benchmark_random_interleaving_gtest.cc +++ /dev/null @@ -1,271 +0,0 @@ -#include -#include -#include - -#include "../src/benchmark_adjust_repetitions.h" -#include "../src/string_util.h" -#include "benchmark/benchmark.h" -#include "gmock/gmock.h" -#include "gtest/gtest.h" - -DECLARE_bool(benchmark_enable_random_interleaving); -DECLARE_string(benchmark_filter); -DECLARE_double(benchmark_random_interleaving_max_overhead); - -namespace do_not_read_flag_directly { -DECLARE_int32(benchmark_repetitions); -} // namespace do_not_read_flag_directly - -namespace benchmark { -namespace internal { -namespace { - -class EventQueue : public std::queue { - public: - void Put(const std::string& event) { - push(event); - } - - void Clear() { - while (!empty()) { - pop(); - } - } - - std::string Get() { - std::string event = front(); - pop(); - return event; - } -}; - -static EventQueue* queue = new EventQueue; - -class NullReporter : public BenchmarkReporter { - public: - bool ReportContext(const Context& /*context*/) override { - return true; - } - void ReportRuns(const std::vector& /* report */) override {} -}; - -class BenchmarkTest : public testing::Test { - public: - static void SetupHook(int /* num_threads */) { queue->push("Setup"); } - - static void TeardownHook(int /* num_threads */) { queue->push("Teardown"); } - - void Execute(const std::string& pattern) { - queue->Clear(); - - BenchmarkReporter* reporter = new NullReporter; - FLAGS_benchmark_filter = pattern; - RunSpecifiedBenchmarks(reporter); - delete reporter; - - queue->Put("DONE"); // End marker - } -}; - -static void BM_Match1(benchmark::State& state) { - const int64_t arg = state.range(0); - - for (auto _ : state) {} - queue->Put(StrFormat("BM_Match1/%d", static_cast(arg))); -} -BENCHMARK(BM_Match1) - ->Iterations(100) - ->Arg(1) - ->Arg(2) - ->Arg(3) - ->Range(10, 80) - ->Args({90}) - ->Args({100}); - -static void BM_MatchOverhead(benchmark::State& state) { - const int64_t arg = state.range(0); - - for (auto _ : state) {} - queue->Put(StrFormat("BM_MatchOverhead/%d", static_cast(arg))); -} -BENCHMARK(BM_MatchOverhead) - ->Iterations(100) - ->Arg(64) - ->Arg(80); - -TEST_F(BenchmarkTest, Match1) { - Execute("BM_Match1"); - ASSERT_EQ("BM_Match1/1", queue->Get()); - ASSERT_EQ("BM_Match1/2", queue->Get()); - ASSERT_EQ("BM_Match1/3", queue->Get()); - ASSERT_EQ("BM_Match1/10", queue->Get()); - ASSERT_EQ("BM_Match1/64", queue->Get()); - ASSERT_EQ("BM_Match1/80", queue->Get()); - ASSERT_EQ("BM_Match1/90", queue->Get()); - ASSERT_EQ("BM_Match1/100", queue->Get()); - ASSERT_EQ("DONE", queue->Get()); -} - -TEST_F(BenchmarkTest, Match1WithRepetition) { - do_not_read_flag_directly::FLAGS_benchmark_repetitions = 2; - - Execute("BM_Match1/(64|80)"); - ASSERT_EQ("BM_Match1/64", queue->Get()); - ASSERT_EQ("BM_Match1/64", queue->Get()); - ASSERT_EQ("BM_Match1/80", queue->Get()); - ASSERT_EQ("BM_Match1/80", queue->Get()); - ASSERT_EQ("DONE", queue->Get()); -} - -TEST_F(BenchmarkTest, Match1WithRandomInterleaving) { - FLAGS_benchmark_enable_random_interleaving = true; - do_not_read_flag_directly::FLAGS_benchmark_repetitions = 100; - FLAGS_benchmark_random_interleaving_max_overhead = - std::numeric_limits::infinity(); - - std::vector expected({"BM_Match1/64", "BM_Match1/80"}); - std::map interleaving_count; - Execute("BM_Match1/(64|80)"); - for (int i = 0; i < 100; ++i) { - std::vector interleaving; - interleaving.push_back(queue->Get()); - interleaving.push_back(queue->Get()); - EXPECT_THAT(interleaving, testing::UnorderedElementsAreArray(expected)); - interleaving_count[StrFormat("%s,%s", interleaving[0].c_str(), - interleaving[1].c_str())]++; - } - EXPECT_GE(interleaving_count.size(), 2) << "Interleaving was not randomized."; - ASSERT_EQ("DONE", queue->Get()); -} - -TEST_F(BenchmarkTest, Match1WithRandomInterleavingAndZeroOverhead) { - FLAGS_benchmark_enable_random_interleaving = true; - do_not_read_flag_directly::FLAGS_benchmark_repetitions = 100; - FLAGS_benchmark_random_interleaving_max_overhead = 0; - - // ComputeRandomInterleavingRepetitions() will kick in and rerun each - // benchmark once with increased iterations. Then number of repetitions will - // be reduced to < 100. The first 4 executions should be - // 2 x BM_MatchOverhead/64 and 2 x BM_MatchOverhead/80. - std::vector expected( - {"BM_MatchOverhead/64", "BM_MatchOverhead/80", "BM_MatchOverhead/64", - "BM_MatchOverhead/80"}); - std::map interleaving_count; - Execute("BM_MatchOverhead/(64|80)"); - std::vector interleaving; - interleaving.push_back(queue->Get()); - interleaving.push_back(queue->Get()); - interleaving.push_back(queue->Get()); - interleaving.push_back(queue->Get()); - EXPECT_THAT(interleaving, testing::UnorderedElementsAreArray(expected)); - ASSERT_LT(queue->size(), 100) << "# Repetitions was not reduced to < 100."; -} - -InternalRandomInterleavingRepetitionsInput CreateInput( - double total, double time, double real_time, double min_time, - double overhead, int repetitions) { - InternalRandomInterleavingRepetitionsInput input; - input.total_execution_time_per_repetition = total; - input.time_used_per_repetition = time; - input.real_time_used_per_repetition = real_time; - input.min_time_per_repetition = min_time; - input.max_overhead = overhead; - input.max_repetitions = repetitions; - return input; -} - -TEST(Benchmark, ComputeRandomInterleavingRepetitions) { - // On wall clock time. - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.05, 0.05, 0.05, 0.05, 0.0, 10)), - 10); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.05, 0.05, 0.05, 0.05, 0.4, 10)), - 10); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.06, 0.05, 0.05, 0.05, 0.0, 10)), - 8); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.06, 0.05, 0.05, 0.05, 0.4, 10)), - 10); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.08, 0.05, 0.05, 0.05, 0.0, 10)), - 6); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.08, 0.05, 0.05, 0.05, 0.4, 10)), - 9); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.26, 0.25, 0.25, 0.05, 0.0, 10)), - 2); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.25, 0.25, 0.25, 0.05, 0.4, 10)), - 3); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.26, 0.25, 0.25, 0.05, 0.0, 10)), - 2); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.26, 0.25, 0.25, 0.05, 0.4, 10)), - 3); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.38, 0.25, 0.25, 0.05, 0.0, 10)), - 2); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.38, 0.25, 0.25, 0.05, 0.4, 10)), - 3); - - // On CPU time. - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.1, 0.05, 0.1, 0.05, 0.0, 10)), - 10); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.1, 0.05, 0.1, 0.05, 0.4, 10)), - 10); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.11, 0.05, 0.1, 0.05, 0.0, 10)), - 9); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.11, 0.05, 0.1, 0.05, 0.4, 10)), - 10); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.15, 0.05, 0.1, 0.05, 0.0, 10)), - 7); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.15, 0.05, 0.1, 0.05, 0.4, 10)), - 9); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.5, 0.25, 0.5, 0.05, 0.0, 10)), - 2); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.5, 0.25, 0.5, 0.05, 0.4, 10)), - 3); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.51, 0.25, 0.5, 0.05, 0.0, 10)), - 2); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.51, 0.25, 0.5, 0.05, 0.4, 10)), - 3); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.8, 0.25, 0.5, 0.05, 0.0, 10)), - 2); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.8, 0.25, 0.5, 0.05, 0.4, 10)), - 2); - - // Corner cases. - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.0, 0.25, 0.5, 0.05, 0.4, 10)), - 3); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.8, 0.0, 0.5, 0.05, 0.4, 10)), - 9); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.8, 0.25, 0.0, 0.05, 0.4, 10)), - 1); - EXPECT_EQ(ComputeRandomInterleavingRepetitions( - CreateInput(0.8, 0.25, 0.5, 0.0, 0.4, 10)), - 1); -} - -} // namespace -} // namespace internal -} // namespace benchmark