From 25a5fdfcb6a325816b31790aea225d35aa430a5d Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Sun, 28 Mar 2021 15:21:34 -0400 Subject: [PATCH 01/21] Implementation of random interleaving. See http://github.com/google/benchmark/issues/1051 for the feature requests. Committer: Hai Huang (http://github.com/haih-g) On branch fr-1051 Changes to be committed: modified: include/benchmark/benchmark.h modified: src/benchmark.cc new file: src/benchmark_adjust_repetitions.cc new file: src/benchmark_adjust_repetitions.h modified: src/benchmark_api_internal.cc modified: src/benchmark_api_internal.h modified: src/benchmark_register.cc modified: src/benchmark_runner.cc modified: src/benchmark_runner.h modified: test/CMakeLists.txt new file: test/benchmark_random_interleaving_gtest.cc --- include/benchmark/benchmark.h | 5 +- src/benchmark.cc | 156 ++++++-- src/benchmark_adjust_repetitions.cc | 111 ++++++ src/benchmark_adjust_repetitions.h | 28 ++ src/benchmark_api_internal.cc | 156 +++++++- src/benchmark_api_internal.h | 93 ++++- src/benchmark_register.cc | 73 +--- src/benchmark_runner.cc | 148 +++++--- src/benchmark_runner.h | 7 +- test/CMakeLists.txt | 1 + test/benchmark_random_interleaving_gtest.cc | 394 ++++++++++++++++++++ 11 files changed, 996 insertions(+), 176 deletions(-) create mode 100644 src/benchmark_adjust_repetitions.cc create mode 100644 src/benchmark_adjust_repetitions.h create mode 100644 test/benchmark_random_interleaving_gtest.cc diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index f57e3e79bd..42ac7849f1 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -434,7 +434,7 @@ struct Statistics { : name_(name), compute_(compute) {} }; -struct BenchmarkInstance; +class BenchmarkInstance; class ThreadTimer; class ThreadManager; @@ -686,7 +686,7 @@ class State { internal::ThreadTimer* timer_; internal::ThreadManager* manager_; - friend struct internal::BenchmarkInstance; + friend class internal::BenchmarkInstance; }; inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() { @@ -961,6 +961,7 @@ class Benchmark { private: friend class BenchmarkFamilies; + friend class BenchmarkInstance; std::string name_; AggregationReportMode aggregation_report_mode_; diff --git a/src/benchmark.cc b/src/benchmark.cc index ffe4bf45a6..32f01b8272 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -32,7 +32,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -51,6 +53,18 @@ #include "thread_manager.h" #include "thread_timer.h" +// Each benchmark can be repeated a number of times, and within each +// *repetition*, we run the user-defined benchmark function a number of +// *iterations*. The number of repetitions is determined based on flags +// (--benchmark_repetitions). +namespace { + +// Attempt to make each repetition run for at least this much of time. +constexpr double kDefaultMinTimeTotal = 0.5; +constexpr size_t kDefaultRepetitions = 12; + +} // namespace + // Print a list of benchmarks. This option overrides all other options. DEFINE_bool(benchmark_list_tests, false); @@ -59,16 +73,32 @@ DEFINE_bool(benchmark_list_tests, false); // linked into the binary are run. DEFINE_string(benchmark_filter, "."); -// Minimum number of seconds we should run benchmark before results are -// considered significant. For cpu-time based tests, this is the lower bound -// on the total cpu time used by all threads that make up the test. For -// real-time based tests, this is the lower bound on the elapsed time of the -// benchmark execution, regardless of number of threads. -DEFINE_double(benchmark_min_time, 0.5); +// Minimum number of seconds we should run benchmark per repetition before +// results are considered significant. For cpu-time based tests, this is the +// lower bound on the total cpu time used by all threads that make up the test. +// For real-time based tests, this is the lower bound on the elapsed time of the +// benchmark execution, regardless of number of threads. If left unset, will use +// 0.5 / 12 if random interleaving is enabled. Otherwise, will use 0.5. +// Do NOT read this flag directly. Use GetMinTime() to read this flag. +DEFINE_double(benchmark_min_time, -1.0); // The number of runs of each benchmark. If greater than 1, the mean and -// standard deviation of the runs will be reported. -DEFINE_int32(benchmark_repetitions, 1); +// standard deviation of the runs will be reported. By default, the number of +// repetitions is 1 if random interleaving is disabled, and up to 12 if random +// interleaving is enabled. (Read the documentation for random interleaving to +// see why it might be less than 12.) +// Do NOT read this flag directly, Use GetRepetitions() to access this flag. +DEFINE_int32(benchmark_repetitions, -1); + +// The maximum overhead allowed for random interleaving. A value X means total +// execution time under random interleaving is limited by +// (1 + X) * original total execution time. Set to 'inf' to allow infinite +// overhead. +DEFINE_double(benchmark_random_interleaving_max_overhead, 0.4); + +// If set, enable random interleaving. See +// http://github.com/google/benchmark/issues/1051 for details. +DEFINE_bool(benchmark_enable_random_interleaving, false); // Report the result of each benchmark repetitions. When 'true' is specified // only the mean, standard deviation, and other statistics are reported for @@ -110,6 +140,37 @@ namespace benchmark { namespace internal { +// Performance measurements always come with random variances. Defines a +// factor by which the required number of iterations is overestimated in order +// to reduce the probability that the minimum time requirement will not be met. +const double kSafetyMultiplier = 1.4; + +// Wraps --benchmark_min_time and returns valid default values if not supplied. +double GetMinTime() { + const double min_time = FLAGS_benchmark_min_time; + if (min_time >= 0.0) { + return min_time; + } + + if (FLAGS_benchmark_enable_random_interleaving) { + return kDefaultMinTimeTotal / kDefaultRepetitions; + } + return kDefaultMinTimeTotal; +} + +// Wraps --benchmark_repetitions and return valid default value if not supplied. +size_t GetRepetitions() { + const int repetitions = FLAGS_benchmark_repetitions; + if (repetitions >= 0) { + return static_cast(repetitions); + } + + if (FLAGS_benchmark_enable_random_interleaving) { + return kDefaultRepetitions; + } + return 1; +} + // FIXME: wouldn't LTO mess this up? void UseCharPointer(char const volatile*) {} @@ -222,15 +283,15 @@ void RunBenchmarks(const std::vector& benchmarks, CHECK(display_reporter != nullptr); // Determine the width of the name field using a minimum width of 10. - bool might_have_aggregates = FLAGS_benchmark_repetitions > 1; + bool might_have_aggregates = GetRepetitions() > 1; size_t name_field_width = 10; size_t stat_field_width = 0; for (const BenchmarkInstance& benchmark : benchmarks) { name_field_width = - std::max(name_field_width, benchmark.name.str().size()); - might_have_aggregates |= benchmark.repetitions > 1; + std::max(name_field_width, benchmark.name().str().size()); + might_have_aggregates |= benchmark.repetitions() > 1; - for (const auto& Stat : *benchmark.statistics) + for (const auto& Stat : *benchmark.statistics()) stat_field_width = std::max(stat_field_width, Stat.name_.size()); } if (might_have_aggregates) name_field_width += 1 + stat_field_width; @@ -255,23 +316,56 @@ void RunBenchmarks(const std::vector& benchmarks, flushStreams(display_reporter); flushStreams(file_reporter); - for (const auto& benchmark : benchmarks) { - RunResults run_results = RunBenchmark(benchmark, &complexity_reports); - - auto report = [&run_results](BenchmarkReporter* reporter, - bool report_aggregates_only) { - assert(reporter); - // If there are no aggregates, do output non-aggregates. - report_aggregates_only &= !run_results.aggregates_only.empty(); - if (!report_aggregates_only) - reporter->ReportRuns(run_results.non_aggregates); - if (!run_results.aggregates_only.empty()) - reporter->ReportRuns(run_results.aggregates_only); - }; - - report(display_reporter, run_results.display_report_aggregates_only); + // Without random interleaving, benchmarks are executed in the order of: + // A, A, ..., A, B, B, ..., B, C, C, ..., C, ... + // That is, repetition is within RunBenchmark(), hence the name + // inner_repetitions. + // With random interleaving, benchmarks are executed in the order of: + // {Random order of A, B, C, ...}, {Random order of A, B, C, ...}, ... + // That is, repetitions is outside of RunBenchmark(), hence the name + // outer_repetitions. + size_t inner_repetitions = + FLAGS_benchmark_enable_random_interleaving ? 1 : GetRepetitions(); + size_t outer_repetitions = + FLAGS_benchmark_enable_random_interleaving ? GetRepetitions() : 1; + std::vector benchmark_indices(benchmarks.size()); + for (size_t i = 0; i < benchmarks.size(); ++i) { + benchmark_indices[i] = i; + } + + // 'run_results_vector' and 'benchmarks' are parallel arrays. + std::vector run_results_vector(benchmarks.size()); + for (size_t i = 0; i < outer_repetitions; i++) { + if (FLAGS_benchmark_enable_random_interleaving) { + std::random_shuffle(benchmark_indices.begin(), benchmark_indices.end()); + } + for (size_t j : benchmark_indices) { + // Repetitions will be automatically adjusted under random interleaving. + if (!FLAGS_benchmark_enable_random_interleaving || + i < benchmarks[j].random_interleaving_repetitions()) { + RunBenchmark(benchmarks[j], outer_repetitions, inner_repetitions, + &complexity_reports, &run_results_vector[j]); + } + } + } + + auto report = [](BenchmarkReporter* reporter, bool report_aggregates_only, + const RunResults& run_results) { + assert(reporter); + // If there are no aggregates, do output non-aggregates. + report_aggregates_only &= !run_results.aggregates_only.empty(); + if (!report_aggregates_only) + reporter->ReportRuns(run_results.non_aggregates); + if (!run_results.aggregates_only.empty()) + reporter->ReportRuns(run_results.aggregates_only); + }; + + for (const RunResults& run_results : run_results_vector) { + report(display_reporter, run_results.display_report_aggregates_only, + run_results); if (file_reporter) - report(file_reporter, run_results.file_report_aggregates_only); + report(file_reporter, run_results.file_report_aggregates_only, + run_results); flushStreams(display_reporter); flushStreams(file_reporter); @@ -399,7 +493,7 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, if (FLAGS_benchmark_list_tests) { for (auto const& benchmark : benchmarks) - Out << benchmark.name.str() << "\n"; + Out << benchmark.name().str() << "\n"; } else { internal::RunBenchmarks(benchmarks, display_reporter, file_reporter); } @@ -443,6 +537,10 @@ void ParseCommandLineFlags(int* argc, char** argv) { &FLAGS_benchmark_min_time) || ParseInt32Flag(argv[i], "benchmark_repetitions", &FLAGS_benchmark_repetitions) || + ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving", + &FLAGS_benchmark_enable_random_interleaving) || + ParseDoubleFlag(argv[i], "benchmark_random_interleaving_max_overhead", + &FLAGS_benchmark_random_interleaving_max_overhead) || ParseBoolFlag(argv[i], "benchmark_report_aggregates_only", &FLAGS_benchmark_report_aggregates_only) || ParseBoolFlag(argv[i], "benchmark_display_aggregates_only", diff --git a/src/benchmark_adjust_repetitions.cc b/src/benchmark_adjust_repetitions.cc new file mode 100644 index 0000000000..31a1cd6b08 --- /dev/null +++ b/src/benchmark_adjust_repetitions.cc @@ -0,0 +1,111 @@ +#include "benchmark_adjust_repetitions.h" + +#include "benchmark_api_internal.h" +#include "log.h" + +namespace benchmark { +namespace internal { + +namespace { + +constexpr double kNanosecondInSecond = 0.000000001; + +} // namespace + +size_t ComputeRandomInterleavingRepetitions( + InternalRandomInterleavingRepetitionsInput input) { + // Find the repetitions such that total overhead is bounded. Let + // n = desired number of repetitions, i.e., the output of this method. + // t = total real execution time per repetition including overhead, + // (input.total_execution_time_per_repetition). + // o = maximum allowed increase in total real execution time due to random + // interleaving, measured as a fraction (input.max_overhead). + // e = estimated total execution time without Random Interleaving + // We want + // t * n / e <= 1 + o + // I.e., + // n <= (1 + o) * e / t + // + // Let + // h = overhead per repetition, which include all setup / teardown time and + // also the execution time of preliminary trials used to search for the + // correct number of iterations. + // r = real execution time per repetition not including overhead + // (input.real_accumulated_time_per_repetition). + // s = measured execution time per repetition not including overhead, + // which can be either real or CPU time + // (input.accumulated_time_per_repetition). + // We have + // h = t - r + // + // Let + // m = total minimum measured execution time for all repetitions + // (input.min_time_per_repetition * input.max_repetitions). + // Let + // f = m / s + // f is the scale factor between m and s, and will be used to estimate + // l, the total real execution time for all repetitions excluding the + // overhead. It's reasonable to assume that the real execution time excluding + // the overhead is proportional to the measured time. Hence we expect to see + // l / r to be equal to m / s. That is, l / r = f, thus, l = r * f. Then the + // total execution time e can be estimated by h + l, which is h + r * f. + // e = h + r * f + // Note that this might be an underestimation. If number of repetitions is + // reduced, we may need to run more iterations per repetition, and that may + // increase the number of preliminary trials needed to find the correct + // number of iterations. + + double h = std::max(0.0, input.total_execution_time_per_repetition - + input.real_time_used_per_repetition); + double r = + std::max(input.real_time_used_per_repetition, kNanosecondInSecond); + double s = + std::max(input.time_used_per_repetition, kNanosecondInSecond); + double m = input.min_time_per_repetition * input.max_repetitions; + + // f = m / s + // RunBenchmark() always overshoot the iteration count by kSafetyMultiplier. + // Apply the same factor here. + // f = kSafetyMultiplier * m / s + // Also we want to make sure 1 <= f <= input.max_repetitions. Note that we + // may not be able to reach m because the total iters per repetition is + // upper bounded by --benchmark_max_iters. This behavior is preserved in + // Random Interleaving, as we won't run repetitions more than + // input.max_repetitions to reach m. + + double f = kSafetyMultiplier * m / s; + f = std::min(std::max(f, 1.0), static_cast(input.max_repetitions)); + + double e = h + r * f; + // n <= (1 + o) * e / t = (1 + o) * e / (h + r) + // Also we want to make sure 1 <= n <= input.max_repetition, and (h + r) > 0. + double n = (1 + input.max_overhead) * e / (h + r); + n = std::min(std::max(n, 1.0), static_cast(input.max_repetitions)); + + size_t n_size_t = static_cast(n); + + VLOG(2) << "Computed random interleaving repetitions" + << "\n input.total_execution_time_per_repetition: " + << input.total_execution_time_per_repetition + << "\n input.time_used_per_repetition: " + << input.time_used_per_repetition + << "\n input.real_time_used_per_repetition: " + << input.real_time_used_per_repetition + << "\n input.min_time_per_repetitions: " + << input.min_time_per_repetition + << "\n input.max_repetitions: " << input.max_repetitions + << "\n input.max_overhead: " << input.max_overhead + << "\n h: " << h + << "\n r: " << r + << "\n s: " << s + << "\n f: " << f + << "\n m: " << m + << "\n e: " << e + << "\n n: " << n + << "\n n_size_t: " << n_size_t; + + return n_size_t; +} + +} // internal +} // benchmark diff --git a/src/benchmark_adjust_repetitions.h b/src/benchmark_adjust_repetitions.h new file mode 100644 index 0000000000..eeb69ff65c --- /dev/null +++ b/src/benchmark_adjust_repetitions.h @@ -0,0 +1,28 @@ +#ifndef BENCHMARK_ADJUST_REPETITIONS_H +#define BENCHMARK_ADJUST_REPETITIONS_H + +#include "benchmark/benchmark.h" +#include "commandlineflags.h" + +namespace benchmark { +namespace internal { + +// Defines the input tuple to ComputeRandomInterleavingRepetitions(). +struct InternalRandomInterleavingRepetitionsInput { + double total_execution_time_per_repetition; + double time_used_per_repetition; + double real_time_used_per_repetition; + double min_time_per_repetition; + double max_overhead; + size_t max_repetitions; +}; + +// Should be called right after the first repetition is completed to estimate +// the number of iterations. +size_t ComputeRandomInterleavingRepetitions( + InternalRandomInterleavingRepetitionsInput input); + +} // end namespace internal +} // end namespace benchmark + +#endif // BENCHMARK_ADJUST_REPETITIONS_H diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc index d468a257e3..f7e38dc34d 100644 --- a/src/benchmark_api_internal.cc +++ b/src/benchmark_api_internal.cc @@ -1,13 +1,165 @@ #include "benchmark_api_internal.h" +#include + +#include "string_util.h" + +DECLARE_bool(benchmark_enable_random_interleaving); + namespace benchmark { namespace internal { +BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, + const std::vector& args, + int threads) + : benchmark_(benchmark), args_(args), threads_(threads) { + name_.function_name = benchmark->name_; + + // Add arguments to instance name + size_t arg_i = 0; + for (auto const& arg : args) { + if (!name_.args.empty()) { + name_.args += '/'; + } + + if (arg_i < benchmark->arg_names_.size()) { + const auto& arg_name = benchmark->arg_names_[arg_i]; + if (!arg_name.empty()) { + name_.args += StrFormat("%s:", arg_name.c_str()); + } + } + + name_.args += StrFormat("%" PRId64, arg); + ++arg_i; + } + + if (!IsZero(benchmark->min_time_)) + name_.min_time = StrFormat("min_time:%0.3f", benchmark->min_time_); + if (benchmark->iterations_ != 0) { + name_.iterations = StrFormat( + "iterations:%lu", static_cast(benchmark->iterations_)); + } + if (benchmark->repetitions_ != 0) + name_.repetitions = StrFormat("repeats:%d", benchmark->repetitions_); + + if (benchmark->measure_process_cpu_time_) { + name_.time_type = "process_time"; + } + + if (benchmark->use_manual_time_) { + if (!name_.time_type.empty()) { + name_.time_type += '/'; + } + name_.time_type += "manual_time"; + } else if (benchmark->use_real_time_) { + if (!name_.time_type.empty()) { + name_.time_type += '/'; + } + name_.time_type += "real_time"; + } + + // Add the number of threads used to the name + if (!benchmark->thread_counts_.empty()) { + name_.threads = StrFormat("threads:%d", threads_); + } + + aggregation_report_mode_ = benchmark->aggregation_report_mode_; + time_unit_ = benchmark->time_unit_; + range_multiplier_ = benchmark->range_multiplier_; + min_time_ = benchmark->min_time_; + iterations_ = benchmark->iterations_; + repetitions_ = benchmark->repetitions_; + measure_process_cpu_time_ = benchmark->measure_process_cpu_time_; + use_real_time_ = benchmark->use_real_time_; + use_manual_time_ = benchmark->use_manual_time_; + complexity_ = benchmark->complexity_; + complexity_lambda_ = benchmark->complexity_lambda_; + statistics_ = &(benchmark->statistics_); +} + +const BenchmarkName& BenchmarkInstance::name() const { + return name_; +} + +AggregationReportMode BenchmarkInstance::aggregation_report_mode() const { + return aggregation_report_mode_; +} + +TimeUnit BenchmarkInstance::time_unit() const { + return time_unit_; +} + +int BenchmarkInstance::threads() const{ + return threads_; +} + +bool BenchmarkInstance::measure_process_cpu_time() const { + return measure_process_cpu_time_; +} + +bool BenchmarkInstance::use_real_time() const { + return use_real_time_; +} + +bool BenchmarkInstance::use_manual_time() const { + return use_manual_time_; +} + +BigO BenchmarkInstance::complexity() const { + return complexity_; +} + +BigOFunc* BenchmarkInstance::complexity_lambda() const { + return complexity_lambda_; +} + +bool BenchmarkInstance::last_benchmark_instance() const { + return last_benchmark_instance_; +} + +IterationCount BenchmarkInstance::iterations() const { + return iterations_; +} + +int BenchmarkInstance::repetitions() const { + return repetitions_; +} + +const std::vector* BenchmarkInstance::statistics() const { + return statistics_; +} + +double BenchmarkInstance::min_time() const { + if (FLAGS_benchmark_enable_random_interleaving) { + // Random Interleaving will automatically adjust + // random_interleaving_repetitions(). Dividing + // total execution time by random_interleaving_repetitions() gives + // the adjusted min_time per repetition. + return min_time_ * GetRepetitions() / random_interleaving_repetitions(); + } + return min_time_; +} + +size_t BenchmarkInstance::random_interleaving_repetitions() const { + return random_interleaving_repetitions_ == std::numeric_limits::max() + ? GetRepetitions() + : random_interleaving_repetitions_; +} + +bool BenchmarkInstance::random_interleaving_repetitions_initialized() const { + return random_interleaving_repetitions_ != std::numeric_limits::max(); +} + +void BenchmarkInstance::init_random_interleaving_repetitions( + size_t repetitions) const { + random_interleaving_repetitions_ = repetitions; +} + State BenchmarkInstance::Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer, internal::ThreadManager* manager) const { - State st(iters, arg, thread_id, threads, timer, manager); - benchmark->Run(st); + State st(iters, args_, thread_id, threads_, timer, manager); + benchmark_->Run(st); return st; } diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h index 264eff95c5..4b9c16ddb6 100644 --- a/src/benchmark_api_internal.h +++ b/src/benchmark_api_internal.h @@ -4,6 +4,7 @@ #include "benchmark/benchmark.h" #include "commandlineflags.h" +#include #include #include #include @@ -14,29 +15,79 @@ namespace benchmark { namespace internal { +extern const double kSafetyMultiplier; + // Information kept per benchmark we may want to run -struct BenchmarkInstance { - BenchmarkName name; - Benchmark* benchmark; - AggregationReportMode aggregation_report_mode; - std::vector arg; - TimeUnit time_unit; - int range_multiplier; - bool measure_process_cpu_time; - bool use_real_time; - bool use_manual_time; - BigO complexity; - BigOFunc* complexity_lambda; - UserCounters counters; - const std::vector* statistics; - bool last_benchmark_instance; - int repetitions; - double min_time; - IterationCount iterations; - int threads; // Number of concurrent threads to us +class BenchmarkInstance { + public: + BenchmarkInstance(Benchmark* benchmark, const std::vector& args, + int threads); + + // Const accessors. + + const BenchmarkName& name() const; + int repetitions() const; + const std::vector* statistics() const; + AggregationReportMode aggregation_report_mode() const; + TimeUnit time_unit() const; + int threads() const; + bool measure_process_cpu_time() const; + bool use_real_time() const; + bool use_manual_time() const; + BigO complexity() const; + BigOFunc* complexity_lambda() const; + bool last_benchmark_instance() const; + IterationCount iterations() const; + + // Returns the min time to run a microbenchmark in RunBenchmark(). + double min_time() const; + + // Returns number of repetitions for Random Interleaving. This will be + // initialized later once we finish the first repetition, if Random + // Interleaving is enabled. See also ComputeRandominterleavingrepetitions(). + size_t random_interleaving_repetitions() const; + + // Returns true if repetitions for Random Interleaving is initialized. + bool random_interleaving_repetitions_initialized() const; + + // Initializes number of repetitions for random interleaving. + void init_random_interleaving_repetitions(size_t repetitions) const; + + // Setters. + + // Sets the value of last_benchmark_instance. + void set_last_benchmark_instance(bool last_benchmark_instance) { + last_benchmark_instance_ = last_benchmark_instance; + } + + // Public APIs. State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer, internal::ThreadManager* manager) const; + + private: + BenchmarkName name_; + Benchmark* benchmark_; + AggregationReportMode aggregation_report_mode_; + std::vector args_; + TimeUnit time_unit_; + int range_multiplier_; + bool measure_process_cpu_time_; + bool use_real_time_; + bool use_manual_time_; + BigO complexity_; + BigOFunc* complexity_lambda_; + UserCounters counters_; + const std::vector* statistics_; + bool last_benchmark_instance_; + int repetitions_; + double min_time_; + IterationCount iterations_; + int threads_; // Number of concurrent threads to use + // Make it mutable so it can be initialized (mutated) later on a const + // instance. + mutable size_t random_interleaving_repetitions_ = + std::numeric_limits::max(); }; bool FindBenchmarksInternal(const std::string& re, @@ -47,6 +98,10 @@ bool IsZero(double n); ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false); +double GetMinTime(); + +size_t GetRepetitions(); + } // end namespace internal } // end namespace benchmark diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc index 65d9944f4f..ac71f72693 100644 --- a/src/benchmark_register.cc +++ b/src/benchmark_register.cc @@ -158,79 +158,12 @@ bool BenchmarkFamilies::FindBenchmarks( for (auto const& args : family->args_) { for (int num_threads : *thread_counts) { - BenchmarkInstance instance; - instance.name.function_name = family->name_; - instance.benchmark = family.get(); - instance.aggregation_report_mode = family->aggregation_report_mode_; - instance.arg = args; - instance.time_unit = family->time_unit_; - instance.range_multiplier = family->range_multiplier_; - instance.min_time = family->min_time_; - instance.iterations = family->iterations_; - instance.repetitions = family->repetitions_; - instance.measure_process_cpu_time = family->measure_process_cpu_time_; - instance.use_real_time = family->use_real_time_; - instance.use_manual_time = family->use_manual_time_; - instance.complexity = family->complexity_; - instance.complexity_lambda = family->complexity_lambda_; - instance.statistics = &family->statistics_; - instance.threads = num_threads; - - // Add arguments to instance name - size_t arg_i = 0; - for (auto const& arg : args) { - if (!instance.name.args.empty()) { - instance.name.args += '/'; - } - - if (arg_i < family->arg_names_.size()) { - const auto& arg_name = family->arg_names_[arg_i]; - if (!arg_name.empty()) { - instance.name.args += StrFormat("%s:", arg_name.c_str()); - } - } - - instance.name.args += StrFormat("%" PRId64, arg); - ++arg_i; - } - - if (!IsZero(family->min_time_)) - instance.name.min_time = - StrFormat("min_time:%0.3f", family->min_time_); - if (family->iterations_ != 0) { - instance.name.iterations = - StrFormat("iterations:%lu", - static_cast(family->iterations_)); - } - if (family->repetitions_ != 0) - instance.name.repetitions = - StrFormat("repeats:%d", family->repetitions_); - - if (family->measure_process_cpu_time_) { - instance.name.time_type = "process_time"; - } - - if (family->use_manual_time_) { - if (!instance.name.time_type.empty()) { - instance.name.time_type += '/'; - } - instance.name.time_type += "manual_time"; - } else if (family->use_real_time_) { - if (!instance.name.time_type.empty()) { - instance.name.time_type += '/'; - } - instance.name.time_type += "real_time"; - } - - // Add the number of threads used to the name - if (!family->thread_counts_.empty()) { - instance.name.threads = StrFormat("threads:%d", instance.threads); - } + BenchmarkInstance instance(family.get(), args, num_threads); - const auto full_name = instance.name.str(); + const auto full_name = instance.name().str(); if ((re.Match(full_name) && !isNegativeFilter) || (!re.Match(full_name) && isNegativeFilter)) { - instance.last_benchmark_instance = (&args == &family->args_.back()); + instance.set_last_benchmark_instance(&args == &family->args_.back()); benchmarks->push_back(std::move(instance)); } } diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index 7bc6b6329e..9cd93ca97b 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -15,6 +15,7 @@ #include "benchmark_runner.h" #include "benchmark/benchmark.h" #include "benchmark_api_internal.h" +#include "benchmark_adjust_repetitions.h" #include "internal_macros.h" #ifndef BENCHMARK_OS_WINDOWS @@ -51,6 +52,9 @@ #include "thread_manager.h" #include "thread_timer.h" +DECLARE_bool(benchmark_enable_random_interleaving); +DECLARE_double(benchmark_random_interleaving_max_overhead); + namespace benchmark { namespace internal { @@ -70,28 +74,28 @@ BenchmarkReporter::Run CreateRunReport( // Create report about this benchmark run. BenchmarkReporter::Run report; - report.run_name = b.name; + report.run_name = b.name(); report.error_occurred = results.has_error_; report.error_message = results.error_message_; report.report_label = results.report_label_; // This is the total iterations across all threads. report.iterations = results.iterations; - report.time_unit = b.time_unit; - report.threads = b.threads; + report.time_unit = b.time_unit(); + report.threads = b.threads(); report.repetition_index = repetition_index; - report.repetitions = b.repetitions; + report.repetitions = b.repetitions(); if (!report.error_occurred) { - if (b.use_manual_time) { + if (b.use_manual_time()) { report.real_accumulated_time = results.manual_time_used; } else { report.real_accumulated_time = results.real_time_used; } report.cpu_accumulated_time = results.cpu_time_used; report.complexity_n = results.complexity_n; - report.complexity = b.complexity; - report.complexity_lambda = b.complexity_lambda; - report.statistics = b.statistics; + report.complexity = b.complexity(); + report.complexity_lambda = b.complexity_lambda(); + report.statistics = b.statistics(); report.counters = results.counters; if (memory_iterations > 0) { @@ -103,7 +107,7 @@ BenchmarkReporter::Run CreateRunReport( report.max_bytes_used = memory_result.max_bytes_used; } - internal::Finish(&report.counters, results.iterations, seconds, b.threads); + internal::Finish(&report.counters, results.iterations, seconds, b.threads()); } return report; } @@ -113,7 +117,7 @@ BenchmarkReporter::Run CreateRunReport( void RunInThread(const BenchmarkInstance* b, IterationCount iters, int thread_id, ThreadManager* manager) { internal::ThreadTimer timer( - b->measure_process_cpu_time + b->measure_process_cpu_time() ? internal::ThreadTimer::CreateProcessCpuTime() : internal::ThreadTimer::Create()); State st = b->Run(iters, thread_id, &timer, manager); @@ -135,26 +139,32 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, class BenchmarkRunner { public: BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, - std::vector* complexity_reports_) + const size_t outer_repetitions_, + const size_t inner_repetitions_, + std::vector* complexity_reports_, + RunResults* run_results_) : b(b_), complexity_reports(*complexity_reports_), - min_time(!IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time), - repeats(b.repetitions != 0 ? b.repetitions - : FLAGS_benchmark_repetitions), - has_explicit_iteration_count(b.iterations != 0), - pool(b.threads - 1), - iters(has_explicit_iteration_count ? b.iterations : 1) { - run_results.display_report_aggregates_only = + run_results(run_results_), + outer_repetitions(outer_repetitions_), + inner_repetitions(inner_repetitions_), + min_time(!IsZero(b.min_time()) ? b.min_time() : GetMinTime()), + repeats(b.repetitions() != 0 ? b.repetitions() : inner_repetitions_), + has_explicit_iteration_count(b.iterations() != 0), + pool(b.threads() - 1), + iters(has_explicit_iteration_count ? b.iterations() : 1) { + run_results->display_report_aggregates_only = (FLAGS_benchmark_report_aggregates_only || FLAGS_benchmark_display_aggregates_only); - run_results.file_report_aggregates_only = + run_results->file_report_aggregates_only = FLAGS_benchmark_report_aggregates_only; - if (b.aggregation_report_mode != internal::ARM_Unspecified) { - run_results.display_report_aggregates_only = - (b.aggregation_report_mode & + if (b.aggregation_report_mode() != internal::ARM_Unspecified) { + run_results->display_report_aggregates_only = + (b.aggregation_report_mode() & internal::ARM_DisplayReportAggregatesOnly); - run_results.file_report_aggregates_only = - (b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly); + run_results->file_report_aggregates_only = + (b.aggregation_report_mode() & + internal::ARM_FileReportAggregatesOnly); } for (int repetition_num = 0; repetition_num < repeats; repetition_num++) { @@ -162,26 +172,26 @@ class BenchmarkRunner { } // Calculate additional statistics - run_results.aggregates_only = ComputeStats(run_results.non_aggregates); + run_results->aggregates_only = ComputeStats(run_results->non_aggregates); // Maybe calculate complexity report - if ((b.complexity != oNone) && b.last_benchmark_instance) { + if ((b.complexity() != oNone) && b.last_benchmark_instance()) { auto additional_run_stats = ComputeBigO(complexity_reports); - run_results.aggregates_only.insert(run_results.aggregates_only.end(), - additional_run_stats.begin(), - additional_run_stats.end()); + run_results->aggregates_only.insert(run_results->aggregates_only.end(), + additional_run_stats.begin(), + additional_run_stats.end()); complexity_reports.clear(); } } - RunResults&& get_results() { return std::move(run_results); } - private: - RunResults run_results; - const benchmark::internal::BenchmarkInstance& b; std::vector& complexity_reports; + RunResults* run_results = nullptr; + + const size_t outer_repetitions; + const size_t inner_repetitions; const double min_time; const int repeats; const bool has_explicit_iteration_count; @@ -198,10 +208,10 @@ class BenchmarkRunner { double seconds; }; IterationResults DoNIterations() { - VLOG(2) << "Running " << b.name.str() << " for " << iters << "\n"; + VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n"; std::unique_ptr manager; - manager.reset(new internal::ThreadManager(b.threads)); + manager.reset(new internal::ThreadManager(b.threads())); // Run all but one thread in separate threads for (std::size_t ti = 0; ti < pool.size(); ++ti) { @@ -228,10 +238,10 @@ class BenchmarkRunner { manager.reset(); // Adjust real/manual time stats since they were reported per thread. - i.results.real_time_used /= b.threads; - i.results.manual_time_used /= b.threads; + i.results.real_time_used /= b.threads(); + i.results.manual_time_used /= b.threads(); // If we were measuring whole-process CPU usage, adjust the CPU time too. - if (b.measure_process_cpu_time) i.results.cpu_time_used /= b.threads; + if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads(); VLOG(2) << "Ran in " << i.results.cpu_time_used << "/" << i.results.real_time_used << "\n"; @@ -240,9 +250,9 @@ class BenchmarkRunner { i.iters = iters; // Base decisions off of real time if requested by this benchmark. i.seconds = i.results.cpu_time_used; - if (b.use_manual_time) { + if (b.use_manual_time()) { i.seconds = i.results.manual_time_used; - } else if (b.use_real_time) { + } else if (b.use_real_time()) { i.seconds = i.results.real_time_used; } @@ -252,7 +262,8 @@ class BenchmarkRunner { IterationCount PredictNumItersNeeded(const IterationResults& i) const { // See how much iterations should be increased by. // Note: Avoid division by zero with max(seconds, 1ns). - double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9); + double multiplier = + min_time * kSafetyMultiplier / std::max(i.seconds, 1e-9); // If our last run was at least 10% of FLAGS_benchmark_min_time then we // use the multiplier directly. // Otherwise we use at most 10 times expansion. @@ -283,7 +294,7 @@ class BenchmarkRunner { // CPU time is specified but the elapsed real time greatly exceeds // the minimum time. // Note that user provided timers are except from this sanity check. - ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time); + ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time()); } void DoOneRepetition(int64_t repetition_index) { @@ -296,8 +307,10 @@ class BenchmarkRunner { // Please do note that the if there are repetitions, the iteration count // is *only* calculated for the *first* repetition, and other repetitions // simply use that precomputed iteration count. + const auto exec_start = benchmark::ChronoClockNow(); for (;;) { i = DoNIterations(); + const auto exec_end = benchmark::ChronoClockNow(); // Do we consider the results to be significant? // If we are doing repetitions, and the first repetition was already done, @@ -308,7 +321,38 @@ class BenchmarkRunner { has_explicit_iteration_count || ShouldReportIterationResults(i); - if (results_are_significant) break; // Good, let's report them! + if (results_are_significant) { + // The number of repetitions for random interleaving may be reduced + // to limit the increase in benchmark execution time. When this happens + // the target execution time for each repetition is increased. We may + // need to rerun trials to calculate iters according to the increased + // target execution time. + bool rerun_trial = false; + // If random interleaving is enabled and the repetitions is not + // initialized, do it now. + if (FLAGS_benchmark_enable_random_interleaving && + !b.random_interleaving_repetitions_initialized()) { + b.init_random_interleaving_repetitions( + ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = exec_end - exec_start, + .time_used_per_repetition = i.seconds, + .real_time_used_per_repetition = i.results.real_time_used, + .min_time_per_repetition = GetMinTime(), + .max_overhead = + FLAGS_benchmark_random_interleaving_max_overhead, + .max_repetitions = GetRepetitions()})); + // If the number of repetitions changed, need to rerun the last trial + // because iters may also change. Note that we only need to do this + // if accumulated_time < b.min_time(), i.e., the iterations we have + // run is not enough for the already adjusted b.min_time(). + // Otherwise, we will still skip the rerun. + rerun_trial = + b.random_interleaving_repetitions() < GetRepetitions() && + i.seconds < min_time; + } + + if (!rerun_trial) break; // Good, let's report them! + } // Nope, bad iteration. Let's re-estimate the hopefully-sufficient // iteration count, and run the benchmark again... @@ -325,7 +369,8 @@ class BenchmarkRunner { if (memory_manager != nullptr) { // Only run a few iterations to reduce the impact of one-time // allocations in benchmarks that are not properly managed. - memory_iterations = std::min(16, iters); + memory_iterations = std::min( + 16 / outer_repetitions + (16 % outer_repetitions != 0), iters); memory_manager->Start(); std::unique_ptr manager; manager.reset(new internal::ThreadManager(1)); @@ -341,20 +386,21 @@ class BenchmarkRunner { CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds, repetition_index); - if (!report.error_occurred && b.complexity != oNone) + if (!report.error_occurred && b.complexity() != oNone) complexity_reports.push_back(report); - run_results.non_aggregates.push_back(report); + run_results->non_aggregates.push_back(report); } }; } // end namespace -RunResults RunBenchmark( - const benchmark::internal::BenchmarkInstance& b, - std::vector* complexity_reports) { - internal::BenchmarkRunner r(b, complexity_reports); - return r.get_results(); +void RunBenchmark(const benchmark::internal::BenchmarkInstance& b, + const size_t outer_repetitions, const size_t inner_repetitions, + std::vector* complexity_reports, + RunResults* run_results) { + internal::BenchmarkRunner r(b, outer_repetitions, inner_repetitions, + complexity_reports, run_results); } } // end namespace internal diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h index 96e8282a11..2da3e83ad8 100644 --- a/src/benchmark_runner.h +++ b/src/benchmark_runner.h @@ -40,9 +40,10 @@ struct RunResults { bool file_report_aggregates_only = false; }; -RunResults RunBenchmark( - const benchmark::internal::BenchmarkInstance& b, - std::vector* complexity_reports); +void RunBenchmark(const benchmark::internal::BenchmarkInstance& b, + const size_t outer_repetitions, const size_t inner_repetitions, + std::vector* complexity_reports, + RunResults* run_results); } // namespace internal diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c1a3a3fc19..37d7b9d6b2 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -193,6 +193,7 @@ if (BENCHMARK_ENABLE_GTEST_TESTS) add_gtest(benchmark_gtest) add_gtest(benchmark_name_gtest) + add_gtest(benchmark_random_interleaving_gtest) add_gtest(commandlineflags_gtest) add_gtest(statistics_gtest) add_gtest(string_util_gtest) diff --git a/test/benchmark_random_interleaving_gtest.cc b/test/benchmark_random_interleaving_gtest.cc new file mode 100644 index 0000000000..2c017bd7fe --- /dev/null +++ b/test/benchmark_random_interleaving_gtest.cc @@ -0,0 +1,394 @@ +#include +#include +#include + +#include "../src/benchmark_adjust_repetitions.h" +#include "../src/string_util.h" +#include "benchmark/benchmark.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +DECLARE_bool(benchmark_enable_random_interleaving); +DECLARE_string(benchmark_filter); +DECLARE_double(benchmark_random_interleaving_max_overhead); +DECLARE_int32(benchmark_repetitions); + +namespace benchmark { +namespace internal { +namespace { + +class EventQueue : public std::queue { + public: + void Put(const std::string& event) { + push(event); + } + + void Clear() { + while (!empty()) { + pop(); + } + } + + std::string Get() { + std::string event = front(); + pop(); + return event; + } +}; + +static EventQueue* queue = new EventQueue; + +class NullReporter : public BenchmarkReporter { + public: + bool ReportContext(const Context& /*context*/) override { + return true; + } + void ReportRuns(const std::vector& /* report */) override {} +}; + +class BenchmarkTest : public testing::Test { + public: + static void SetupHook(int /* num_threads */) { queue->push("Setup"); } + + static void TeardownHook(int /* num_threads */) { queue->push("Teardown"); } + + void Execute(const std::string& pattern) { + queue->Clear(); + + BenchmarkReporter* reporter = new NullReporter; + FLAGS_benchmark_filter = pattern; + RunSpecifiedBenchmarks(reporter); + delete reporter; + + queue->Put("DONE"); // End marker + } +}; + +static void BM_Match1(benchmark::State& state) { + const int arg = state.range(0); + + ASSERT_EQ(100, state.max_iterations ); + queue->Put(StrFormat("BM_Match1/%d", arg)); +} +BENCHMARK(BM_Match1) + ->Iterations(100) + ->Arg(1) + ->Arg(2) + ->Arg(3) + ->Range(10, 80) + ->Args({90}) + ->Args({100}); + +static void BM_MatchOverhead(benchmark::State& state) { + const int arg = state.range(0); + + queue->Put(StrFormat("BM_MatchOverhead/%d", arg)); +} +BENCHMARK(BM_MatchOverhead) + ->Iterations(100) + ->Arg(64) + ->Arg(80); + +TEST_F(BenchmarkTest, Match1) { + Execute("BM_Match1"); + ASSERT_EQ("BM_Match1/1", queue->Get()); + ASSERT_EQ("BM_Match1/2", queue->Get()); + ASSERT_EQ("BM_Match1/3", queue->Get()); + ASSERT_EQ("BM_Match1/10", queue->Get()); + ASSERT_EQ("BM_Match1/64", queue->Get()); + ASSERT_EQ("BM_Match1/80", queue->Get()); + ASSERT_EQ("BM_Match1/90", queue->Get()); + ASSERT_EQ("BM_Match1/100", queue->Get()); + ASSERT_EQ("DONE", queue->Get()); +} + +TEST_F(BenchmarkTest, Match1WithRepetition) { + FLAGS_benchmark_repetitions = 2; + + Execute("BM_Match1/(64|80)"); + ASSERT_EQ("BM_Match1/64", queue->Get()); + ASSERT_EQ("BM_Match1/64", queue->Get()); + ASSERT_EQ("BM_Match1/80", queue->Get()); + ASSERT_EQ("BM_Match1/80", queue->Get()); + ASSERT_EQ("DONE", queue->Get()); +} + +TEST_F(BenchmarkTest, Match1WithRandomInterleaving) { + FLAGS_benchmark_enable_random_interleaving = true; + FLAGS_benchmark_repetitions = 100; + FLAGS_benchmark_random_interleaving_max_overhead = + std::numeric_limits::infinity(); + + std::vector expected({"BM_Match1/64", "BM_Match1/80"}); + std::map interleaving_count; + Execute("BM_Match1/(64|80)"); + for (int i = 0; i < 100; ++i) { + std::vector interleaving; + interleaving.push_back(queue->Get()); + interleaving.push_back(queue->Get()); + EXPECT_THAT(interleaving, testing::UnorderedElementsAreArray(expected)); + interleaving_count[StrFormat("%s,%s", interleaving[0].c_str(), + interleaving[1].c_str())]++; + } + EXPECT_GE(interleaving_count.size(), 2) << "Interleaving was not randomized."; + ASSERT_EQ("DONE", queue->Get()); +} + +TEST_F(BenchmarkTest, Match1WithRandomInterleavingAndZeroOverhead) { + FLAGS_benchmark_enable_random_interleaving = true; + FLAGS_benchmark_repetitions = 100; + FLAGS_benchmark_random_interleaving_max_overhead = 0; + + // ComputeRandomInterleavingRepetitions() will kick in and rerun each + // benchmark once with increased iterations. Then number of repetitions will + // be reduced to 1. Thus altogether 4 executions, 2 x BM_MatchOverhead/64, + // and 2 x BM_MatchOverhead/80. + std::vector expected( + {"BM_MatchOverhead/64", "BM_MatchOverhead/80", "BM_MatchOverhead/64", + "BM_MatchOverhead/80"}); + std::map interleaving_count; + Execute("BM_MatchOverhead/(64|80)"); + std::vector interleaving; + interleaving.push_back(queue->Get()); + interleaving.push_back(queue->Get()); + interleaving.push_back(queue->Get()); + interleaving.push_back(queue->Get()); + EXPECT_THAT(interleaving, testing::UnorderedElementsAreArray(expected)); + ASSERT_EQ("DONE", queue->Get()) << "# Repetitions was not reduced to 1."; +} + +TEST(Benchmark, ComputeRandomInterleavingRepetitions) { + // On wall clock time. + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.05, + .time_used_per_repetition = 0.05, + .real_time_used_per_repetition = 0.05, + .min_time_per_repetition = 0.05, + .max_overhead = 0.0, + .max_repetitions = 10}), + 10); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.05, + .time_used_per_repetition = 0.05, + .real_time_used_per_repetition = 0.05, + .min_time_per_repetition = 0.05, + .max_overhead = 0.4, + .max_repetitions = 10}), + 10); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.06, + .time_used_per_repetition = 0.05, + .real_time_used_per_repetition = 0.05, + .min_time_per_repetition = 0.05, + .max_overhead = 0.0, + .max_repetitions = 10}), + 8); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.06, + .time_used_per_repetition = 0.05, + .real_time_used_per_repetition = 0.05, + .min_time_per_repetition = 0.05, + .max_overhead = 0.4, + .max_repetitions = 10}), + 10); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.08, + .time_used_per_repetition = 0.05, + .real_time_used_per_repetition = 0.05, + .min_time_per_repetition = 0.05, + .max_overhead = 0.0, + .max_repetitions = 10}), + 6); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.08, + .time_used_per_repetition = 0.05, + .real_time_used_per_repetition = 0.05, + .min_time_per_repetition = 0.05, + .max_overhead = 0.4, + .max_repetitions = 10}), + 9); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.25, + .time_used_per_repetition = 0.25, + .real_time_used_per_repetition = 0.25, + .min_time_per_repetition = 0.05, + .max_overhead = 0.0, + .max_repetitions = 10}), + 2); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.25, + .time_used_per_repetition = 0.25, + .real_time_used_per_repetition = 0.25, + .min_time_per_repetition = 0.05, + .max_overhead = 0.4, + .max_repetitions = 10}), + 3); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.26, + .time_used_per_repetition = 0.25, + .real_time_used_per_repetition = 0.25, + .min_time_per_repetition = 0.05, + .max_overhead = 0.0, + .max_repetitions = 10}), + 2); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.26, + .time_used_per_repetition = 0.25, + .real_time_used_per_repetition = 0.25, + .min_time_per_repetition = 0.05, + .max_overhead = 0.4, + .max_repetitions = 10}), + 3); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.38, + .time_used_per_repetition = 0.25, + .real_time_used_per_repetition = 0.25, + .min_time_per_repetition = 0.05, + .max_overhead = 0.0, + .max_repetitions = 10}), + 2); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.38, + .time_used_per_repetition = 0.25, + .real_time_used_per_repetition = 0.25, + .min_time_per_repetition = 0.05, + .max_overhead = 0.4, + .max_repetitions = 10}), + 3); + + // On CPU time. + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.1, + .time_used_per_repetition = 0.05, + .real_time_used_per_repetition = 0.1, + .min_time_per_repetition = 0.05, + .max_overhead = 0.0, + .max_repetitions = 10}), + 10); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.1, + .time_used_per_repetition = 0.05, + .real_time_used_per_repetition = 0.1, + .min_time_per_repetition = 0.05, + .max_overhead = 0.4, + .max_repetitions = 10}), + 10); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.11, + .time_used_per_repetition = 0.05, + .real_time_used_per_repetition = 0.1, + .min_time_per_repetition = 0.05, + .max_overhead = 0.0, + .max_repetitions = 10}), + 9); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.11, + .time_used_per_repetition = 0.05, + .real_time_used_per_repetition = 0.1, + .min_time_per_repetition = 0.05, + .max_overhead = 0.4, + .max_repetitions = 10}), + 10); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.15, + .time_used_per_repetition = 0.05, + .real_time_used_per_repetition = 0.1, + .min_time_per_repetition = 0.05, + .max_overhead = 0.0, + .max_repetitions = 10}), + 7); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.15, + .time_used_per_repetition = 0.05, + .real_time_used_per_repetition = 0.1, + .min_time_per_repetition = 0.05, + .max_overhead = 0.4, + .max_repetitions = 10}), + 9); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.5, + .time_used_per_repetition = 0.25, + .real_time_used_per_repetition = 0.5, + .min_time_per_repetition = 0.05, + .max_overhead = 0.0, + .max_repetitions = 10}), + 2); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.5, + .time_used_per_repetition = 0.25, + .real_time_used_per_repetition = 0.5, + .min_time_per_repetition = 0.05, + .max_overhead = 0.4, + .max_repetitions = 10}), + 3); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.51, + .time_used_per_repetition = 0.25, + .real_time_used_per_repetition = 0.5, + .min_time_per_repetition = 0.05, + .max_overhead = 0.0, + .max_repetitions = 10}), + 2); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.51, + .time_used_per_repetition = 0.25, + .real_time_used_per_repetition = 0.5, + .min_time_per_repetition = 0.05, + .max_overhead = 0.4, + .max_repetitions = 10}), + 3); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.8, + .time_used_per_repetition = 0.25, + .real_time_used_per_repetition = 0.5, + .min_time_per_repetition = 0.05, + .max_overhead = 0.0, + .max_repetitions = 10}), + 2); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.8, + .time_used_per_repetition = 0.25, + .real_time_used_per_repetition = 0.5, + .min_time_per_repetition = 0.05, + .max_overhead = 0.4, + .max_repetitions = 10}), + 2); + + // Corner cases. + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.0, + .time_used_per_repetition = 0.25, + .real_time_used_per_repetition = 0.5, + .min_time_per_repetition = 0.05, + .max_overhead = 0.4, + .max_repetitions = 10}), + 3); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.8, + .time_used_per_repetition = 0.0, + .real_time_used_per_repetition = 0.5, + .min_time_per_repetition = 0.05, + .max_overhead = 0.4, + .max_repetitions = 10}), + 9); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.8, + .time_used_per_repetition = 0.25, + .real_time_used_per_repetition = 0.0, + .min_time_per_repetition = 0.05, + .max_overhead = 0.4, + .max_repetitions = 10}), + 1); + EXPECT_EQ(ComputeRandomInterleavingRepetitions( + {.total_execution_time_per_repetition = 0.8, + .time_used_per_repetition = 0.25, + .real_time_used_per_repetition = 0.5, + .min_time_per_repetition = 0.0, + .max_overhead = 0.4, + .max_repetitions = 10}), + 1); +} + +} // namespace +} // namespace internal +} // namespace benchmark From f6033516e069a68e44464e0a0503a1cb7597c32a Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Sun, 28 Mar 2021 17:35:13 -0400 Subject: [PATCH 02/21] Fix benchmark_random_interleaving_gtest.cc for fr-1051 Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: src/benchmark.cc modified: src/benchmark_runner.cc modified: test/benchmark_random_interleaving_gtest.cc --- src/benchmark.cc | 2 +- src/benchmark_runner.cc | 23 ++++++++++++--------- test/benchmark_random_interleaving_gtest.cc | 9 ++++---- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/benchmark.cc b/src/benchmark.cc index 32f01b8272..8b48e49bac 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -328,7 +328,7 @@ void RunBenchmarks(const std::vector& benchmarks, FLAGS_benchmark_enable_random_interleaving ? 1 : GetRepetitions(); size_t outer_repetitions = FLAGS_benchmark_enable_random_interleaving ? GetRepetitions() : 1; - std::vector benchmark_indices(benchmarks.size()); + std::vector benchmark_indices(benchmarks.size()); for (size_t i = 0; i < benchmarks.size(); ++i) { benchmark_indices[i] = i; } diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index 9cd93ca97b..60a9f6be82 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -148,7 +148,7 @@ class BenchmarkRunner { run_results(run_results_), outer_repetitions(outer_repetitions_), inner_repetitions(inner_repetitions_), - min_time(!IsZero(b.min_time()) ? b.min_time() : GetMinTime()), + // min_time(!IsZero(b.min_time()) ? b.min_time() : GetMinTime()), repeats(b.repetitions() != 0 ? b.repetitions() : inner_repetitions_), has_explicit_iteration_count(b.iterations() != 0), pool(b.threads() - 1), @@ -192,7 +192,7 @@ class BenchmarkRunner { const size_t outer_repetitions; const size_t inner_repetitions; - const double min_time; + // const double min_time; const int repeats; const bool has_explicit_iteration_count; @@ -263,13 +263,13 @@ class BenchmarkRunner { // See how much iterations should be increased by. // Note: Avoid division by zero with max(seconds, 1ns). double multiplier = - min_time * kSafetyMultiplier / std::max(i.seconds, 1e-9); + b.min_time() * kSafetyMultiplier / std::max(i.seconds, 1e-9); // If our last run was at least 10% of FLAGS_benchmark_min_time then we // use the multiplier directly. // Otherwise we use at most 10 times expansion. // NOTE: When the last run was at least 10% of the min time the max // expansion should be 14x. - bool is_significant = (i.seconds / min_time) > 0.1; + bool is_significant = (i.seconds / b.min_time()) > 0.1; multiplier = is_significant ? multiplier : std::min(10.0, multiplier); if (multiplier <= 1.0) multiplier = 2.0; @@ -290,11 +290,14 @@ class BenchmarkRunner { // or because an error was reported. return i.results.has_error_ || i.iters >= kMaxIterations || // Too many iterations already. - i.seconds >= min_time || // The elapsed time is large enough. - // CPU time is specified but the elapsed real time greatly exceeds - // the minimum time. - // Note that user provided timers are except from this sanity check. - ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time()); + i.seconds >= b.min_time() || // The elapsed time is large enough. + // CPU time is specified but the + // elapsed real time greatly exceeds + // the minimum time. Note that user + // provided timers are except from this + // sanity check. + ((i.results.real_time_used >= 5 * b.min_time()) && + !b.use_manual_time()); } void DoOneRepetition(int64_t repetition_index) { @@ -348,7 +351,7 @@ class BenchmarkRunner { // Otherwise, we will still skip the rerun. rerun_trial = b.random_interleaving_repetitions() < GetRepetitions() && - i.seconds < min_time; + i.seconds < b.min_time(); } if (!rerun_trial) break; // Good, let's report them! diff --git a/test/benchmark_random_interleaving_gtest.cc b/test/benchmark_random_interleaving_gtest.cc index 2c017bd7fe..a45b566ec6 100644 --- a/test/benchmark_random_interleaving_gtest.cc +++ b/test/benchmark_random_interleaving_gtest.cc @@ -67,7 +67,7 @@ class BenchmarkTest : public testing::Test { static void BM_Match1(benchmark::State& state) { const int arg = state.range(0); - ASSERT_EQ(100, state.max_iterations ); + for (auto _ : state) {} queue->Put(StrFormat("BM_Match1/%d", arg)); } BENCHMARK(BM_Match1) @@ -82,6 +82,7 @@ BENCHMARK(BM_Match1) static void BM_MatchOverhead(benchmark::State& state) { const int arg = state.range(0); + for (auto _ : state) {} queue->Put(StrFormat("BM_MatchOverhead/%d", arg)); } BENCHMARK(BM_MatchOverhead) @@ -141,8 +142,8 @@ TEST_F(BenchmarkTest, Match1WithRandomInterleavingAndZeroOverhead) { // ComputeRandomInterleavingRepetitions() will kick in and rerun each // benchmark once with increased iterations. Then number of repetitions will - // be reduced to 1. Thus altogether 4 executions, 2 x BM_MatchOverhead/64, - // and 2 x BM_MatchOverhead/80. + // be reduced to < 100. The first 4 executions should be + // 2 x BM_MatchOverhead/64 and 2 x BM_MatchOverhead/80. std::vector expected( {"BM_MatchOverhead/64", "BM_MatchOverhead/80", "BM_MatchOverhead/64", "BM_MatchOverhead/80"}); @@ -154,7 +155,7 @@ TEST_F(BenchmarkTest, Match1WithRandomInterleavingAndZeroOverhead) { interleaving.push_back(queue->Get()); interleaving.push_back(queue->Get()); EXPECT_THAT(interleaving, testing::UnorderedElementsAreArray(expected)); - ASSERT_EQ("DONE", queue->Get()) << "# Repetitions was not reduced to 1."; + ASSERT_LT(queue->size(), 100) << "# Repetitions was not reduced to < 100."; } TEST(Benchmark, ComputeRandomInterleavingRepetitions) { From 58f3a39b04aecb09c1df4aafe845e8da263ac222 Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Sun, 28 Mar 2021 17:52:56 -0400 Subject: [PATCH 03/21] Fix macos build for fr-1051 Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: src/benchmark_api_internal.cc modified: src/benchmark_api_internal.h modified: src/benchmark_runner.cc --- src/benchmark_api_internal.cc | 2 +- src/benchmark_api_internal.h | 4 ++-- src/benchmark_runner.cc | 7 +++---- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc index f7e38dc34d..f57b34d805 100644 --- a/src/benchmark_api_internal.cc +++ b/src/benchmark_api_internal.cc @@ -121,7 +121,7 @@ IterationCount BenchmarkInstance::iterations() const { return iterations_; } -int BenchmarkInstance::repetitions() const { +size_t BenchmarkInstance::repetitions() const { return repetitions_; } diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h index 4b9c16ddb6..dae49fc57a 100644 --- a/src/benchmark_api_internal.h +++ b/src/benchmark_api_internal.h @@ -26,7 +26,7 @@ class BenchmarkInstance { // Const accessors. const BenchmarkName& name() const; - int repetitions() const; + size_t repetitions() const; const std::vector* statistics() const; AggregationReportMode aggregation_report_mode() const; TimeUnit time_unit() const; @@ -80,7 +80,7 @@ class BenchmarkInstance { UserCounters counters_; const std::vector* statistics_; bool last_benchmark_instance_; - int repetitions_; + size_t repetitions_; double min_time_; IterationCount iterations_; int threads_; // Number of concurrent threads to use diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index 60a9f6be82..7f2455a929 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -148,7 +148,6 @@ class BenchmarkRunner { run_results(run_results_), outer_repetitions(outer_repetitions_), inner_repetitions(inner_repetitions_), - // min_time(!IsZero(b.min_time()) ? b.min_time() : GetMinTime()), repeats(b.repetitions() != 0 ? b.repetitions() : inner_repetitions_), has_explicit_iteration_count(b.iterations() != 0), pool(b.threads() - 1), @@ -167,7 +166,8 @@ class BenchmarkRunner { internal::ARM_FileReportAggregatesOnly); } - for (int repetition_num = 0; repetition_num < repeats; repetition_num++) { + for (size_t repetition_num = 0; repetition_num < repeats; + repetition_num++) { DoOneRepetition(repetition_num); } @@ -192,8 +192,7 @@ class BenchmarkRunner { const size_t outer_repetitions; const size_t inner_repetitions; - // const double min_time; - const int repeats; + const size_t repeats; const bool has_explicit_iteration_count; std::vector pool; From 89d3a78507c2964530fbf0f2bea8def4003460d9 Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Sun, 28 Mar 2021 18:16:52 -0400 Subject: [PATCH 04/21] Fix macos and windows build for fr-1051. Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: src/benchmark_runner.cc --- src/benchmark_runner.cc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index 7f2455a929..d54fa53724 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -148,7 +148,7 @@ class BenchmarkRunner { run_results(run_results_), outer_repetitions(outer_repetitions_), inner_repetitions(inner_repetitions_), - repeats(b.repetitions() != 0 ? b.repetitions() : inner_repetitions_), + repeats(b.repetitions() != 0 ? b.repetitions() : inner_repetitions), has_explicit_iteration_count(b.iterations() != 0), pool(b.threads() - 1), iters(has_explicit_iteration_count ? b.iterations() : 1) { @@ -334,15 +334,15 @@ class BenchmarkRunner { // initialized, do it now. if (FLAGS_benchmark_enable_random_interleaving && !b.random_interleaving_repetitions_initialized()) { + InternalRandomInterleavingRepetitionsInput input; + input.total_execution_time_per_repetition = exec_end - exec_start; + input.time_used_per_repetition = i.seconds; + input.real_time_used_per_repetition = i.results.real_time_used; + input.min_time_per_repetition = GetMinTime(); + input.max_overhead = FLAGS_benchmark_random_interleaving_max_overhead; + input.max_repetitions = GetRepetitions(); b.init_random_interleaving_repetitions( - ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = exec_end - exec_start, - .time_used_per_repetition = i.seconds, - .real_time_used_per_repetition = i.results.real_time_used, - .min_time_per_repetition = GetMinTime(), - .max_overhead = - FLAGS_benchmark_random_interleaving_max_overhead, - .max_repetitions = GetRepetitions()})); + ComputeRandomInterleavingRepetitions(input)); // If the number of repetitions changed, need to rerun the last trial // because iters may also change. Note that we only need to do this // if accumulated_time < b.min_time(), i.e., the iterations we have From 0aadade98c3145b72053ad5d453cbca597fb9d44 Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Sun, 28 Mar 2021 19:24:35 -0400 Subject: [PATCH 05/21] Fix benchmark_random_interleaving_test.cc for macos and windows in fr-1051 Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: test/benchmark_random_interleaving_gtest.cc --- test/benchmark_random_interleaving_gtest.cc | 217 ++++---------------- 1 file changed, 45 insertions(+), 172 deletions(-) diff --git a/test/benchmark_random_interleaving_gtest.cc b/test/benchmark_random_interleaving_gtest.cc index a45b566ec6..4b5f70a728 100644 --- a/test/benchmark_random_interleaving_gtest.cc +++ b/test/benchmark_random_interleaving_gtest.cc @@ -65,10 +65,10 @@ class BenchmarkTest : public testing::Test { }; static void BM_Match1(benchmark::State& state) { - const int arg = state.range(0); + const int64_t arg = state.range(0); for (auto _ : state) {} - queue->Put(StrFormat("BM_Match1/%d", arg)); + queue->Put(StrFormat("BM_Match1/%ld", arg)); } BENCHMARK(BM_Match1) ->Iterations(100) @@ -80,10 +80,10 @@ BENCHMARK(BM_Match1) ->Args({100}); static void BM_MatchOverhead(benchmark::State& state) { - const int arg = state.range(0); + const int64_t arg = state.range(0); for (auto _ : state) {} - queue->Put(StrFormat("BM_MatchOverhead/%d", arg)); + queue->Put(StrFormat("BM_MatchOverhead/%ld", arg)); } BENCHMARK(BM_MatchOverhead) ->Iterations(100) @@ -158,235 +158,108 @@ TEST_F(BenchmarkTest, Match1WithRandomInterleavingAndZeroOverhead) { ASSERT_LT(queue->size(), 100) << "# Repetitions was not reduced to < 100."; } +InternalRandomInterleavingRepetitionsInput CreateInput( + double total, double time, double real_time, double min_time, + double overhead, size_t repetitions) { + InternalRandomInterleavingRepetitionsInput input; + input.total_execution_time_per_repetition = total; + input.time_used_per_repetition = time; + input.real_time_used_per_repetition = real_time; + input.min_time_per_repetition = min_time; + input.max_overhead = overhead; + input.max_repetitions = repetitions; + return input; +} + TEST(Benchmark, ComputeRandomInterleavingRepetitions) { // On wall clock time. EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.05, - .time_used_per_repetition = 0.05, - .real_time_used_per_repetition = 0.05, - .min_time_per_repetition = 0.05, - .max_overhead = 0.0, - .max_repetitions = 10}), + CreateInput(0.05, 0.05, 0.05, 0.05, 0.0, 10)), 10); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.05, - .time_used_per_repetition = 0.05, - .real_time_used_per_repetition = 0.05, - .min_time_per_repetition = 0.05, - .max_overhead = 0.4, - .max_repetitions = 10}), + CreateInput(0.05, 0.05, 0.05, 0.05, 0.4, 10)), 10); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.06, - .time_used_per_repetition = 0.05, - .real_time_used_per_repetition = 0.05, - .min_time_per_repetition = 0.05, - .max_overhead = 0.0, - .max_repetitions = 10}), + CreateInput(0.06, 0.05, 0.05, 0.05, 0.0, 10)), 8); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.06, - .time_used_per_repetition = 0.05, - .real_time_used_per_repetition = 0.05, - .min_time_per_repetition = 0.05, - .max_overhead = 0.4, - .max_repetitions = 10}), + CreateInput(0.06, 0.05, 0.05, 0.05, 0.4, 10)), 10); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.08, - .time_used_per_repetition = 0.05, - .real_time_used_per_repetition = 0.05, - .min_time_per_repetition = 0.05, - .max_overhead = 0.0, - .max_repetitions = 10}), + CreateInput(0.08, 0.05, 0.05, 0.05, 0.0, 10)), 6); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.08, - .time_used_per_repetition = 0.05, - .real_time_used_per_repetition = 0.05, - .min_time_per_repetition = 0.05, - .max_overhead = 0.4, - .max_repetitions = 10}), + CreateInput(0.08, 0.05, 0.05, 0.05, 0.4, 10)), 9); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.25, - .time_used_per_repetition = 0.25, - .real_time_used_per_repetition = 0.25, - .min_time_per_repetition = 0.05, - .max_overhead = 0.0, - .max_repetitions = 10}), + CreateInput(0.26, 0.25, 0.25, 0.05, 0.0, 10)), 2); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.25, - .time_used_per_repetition = 0.25, - .real_time_used_per_repetition = 0.25, - .min_time_per_repetition = 0.05, - .max_overhead = 0.4, - .max_repetitions = 10}), + CreateInput(0.25, 0.25, 0.25, 0.05, 0.4, 10)), 3); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.26, - .time_used_per_repetition = 0.25, - .real_time_used_per_repetition = 0.25, - .min_time_per_repetition = 0.05, - .max_overhead = 0.0, - .max_repetitions = 10}), + CreateInput(0.26, 0.25, 0.25, 0.05, 0.0, 10)), 2); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.26, - .time_used_per_repetition = 0.25, - .real_time_used_per_repetition = 0.25, - .min_time_per_repetition = 0.05, - .max_overhead = 0.4, - .max_repetitions = 10}), + CreateInput(0.26, 0.25, 0.25, 0.05, 0.4, 10)), 3); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.38, - .time_used_per_repetition = 0.25, - .real_time_used_per_repetition = 0.25, - .min_time_per_repetition = 0.05, - .max_overhead = 0.0, - .max_repetitions = 10}), + CreateInput(0.38, 0.25, 0.25, 0.05, 0.0, 10)), 2); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.38, - .time_used_per_repetition = 0.25, - .real_time_used_per_repetition = 0.25, - .min_time_per_repetition = 0.05, - .max_overhead = 0.4, - .max_repetitions = 10}), + CreateInput(0.38, 0.25, 0.25, 0.05, 0.4, 10)), 3); // On CPU time. EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.1, - .time_used_per_repetition = 0.05, - .real_time_used_per_repetition = 0.1, - .min_time_per_repetition = 0.05, - .max_overhead = 0.0, - .max_repetitions = 10}), + CreateInput(0.1, 0.05, 0.1, 0.05, 0.0, 10)), 10); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.1, - .time_used_per_repetition = 0.05, - .real_time_used_per_repetition = 0.1, - .min_time_per_repetition = 0.05, - .max_overhead = 0.4, - .max_repetitions = 10}), + CreateInput(0.1, 0.05, 0.1, 0.05, 0.4, 10)), 10); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.11, - .time_used_per_repetition = 0.05, - .real_time_used_per_repetition = 0.1, - .min_time_per_repetition = 0.05, - .max_overhead = 0.0, - .max_repetitions = 10}), + CreateInput(0.11, 0.05, 0.1, 0.05, 0.0, 10)), 9); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.11, - .time_used_per_repetition = 0.05, - .real_time_used_per_repetition = 0.1, - .min_time_per_repetition = 0.05, - .max_overhead = 0.4, - .max_repetitions = 10}), + CreateInput(0.11, 0.05, 0.1, 0.05, 0.4, 10)), 10); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.15, - .time_used_per_repetition = 0.05, - .real_time_used_per_repetition = 0.1, - .min_time_per_repetition = 0.05, - .max_overhead = 0.0, - .max_repetitions = 10}), + CreateInput(0.15, 0.05, 0.1, 0.05, 0.0, 10)), 7); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.15, - .time_used_per_repetition = 0.05, - .real_time_used_per_repetition = 0.1, - .min_time_per_repetition = 0.05, - .max_overhead = 0.4, - .max_repetitions = 10}), + CreateInput(0.15, 0.05, 0.1, 0.05, 0.4, 10)), 9); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.5, - .time_used_per_repetition = 0.25, - .real_time_used_per_repetition = 0.5, - .min_time_per_repetition = 0.05, - .max_overhead = 0.0, - .max_repetitions = 10}), + CreateInput(0.5, 0.25, 0.5, 0.05, 0.0, 10)), 2); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.5, - .time_used_per_repetition = 0.25, - .real_time_used_per_repetition = 0.5, - .min_time_per_repetition = 0.05, - .max_overhead = 0.4, - .max_repetitions = 10}), + CreateInput(0.5, 0.25, 0.5, 0.05, 0.4, 10)), 3); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.51, - .time_used_per_repetition = 0.25, - .real_time_used_per_repetition = 0.5, - .min_time_per_repetition = 0.05, - .max_overhead = 0.0, - .max_repetitions = 10}), + CreateInput(0.51, 0.25, 0.5, 0.05, 0.0, 10)), 2); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.51, - .time_used_per_repetition = 0.25, - .real_time_used_per_repetition = 0.5, - .min_time_per_repetition = 0.05, - .max_overhead = 0.4, - .max_repetitions = 10}), + CreateInput(0.51, 0.25, 0.5, 0.05, 0.4, 10)), 3); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.8, - .time_used_per_repetition = 0.25, - .real_time_used_per_repetition = 0.5, - .min_time_per_repetition = 0.05, - .max_overhead = 0.0, - .max_repetitions = 10}), + CreateInput(0.8, 0.25, 0.5, 0.05, 0.0, 10)), 2); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.8, - .time_used_per_repetition = 0.25, - .real_time_used_per_repetition = 0.5, - .min_time_per_repetition = 0.05, - .max_overhead = 0.4, - .max_repetitions = 10}), + CreateInput(0.8, 0.25, 0.5, 0.05, 0.4, 10)), 2); // Corner cases. EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.0, - .time_used_per_repetition = 0.25, - .real_time_used_per_repetition = 0.5, - .min_time_per_repetition = 0.05, - .max_overhead = 0.4, - .max_repetitions = 10}), + CreateInput(0.0, 0.25, 0.5, 0.05, 0.4, 10)), 3); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.8, - .time_used_per_repetition = 0.0, - .real_time_used_per_repetition = 0.5, - .min_time_per_repetition = 0.05, - .max_overhead = 0.4, - .max_repetitions = 10}), + CreateInput(0.8, 0.0, 0.5, 0.05, 0.4, 10)), 9); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.8, - .time_used_per_repetition = 0.25, - .real_time_used_per_repetition = 0.0, - .min_time_per_repetition = 0.05, - .max_overhead = 0.4, - .max_repetitions = 10}), + CreateInput(0.8, 0.25, 0.0, 0.05, 0.4, 10)), 1); EXPECT_EQ(ComputeRandomInterleavingRepetitions( - {.total_execution_time_per_repetition = 0.8, - .time_used_per_repetition = 0.25, - .real_time_used_per_repetition = 0.5, - .min_time_per_repetition = 0.0, - .max_overhead = 0.4, - .max_repetitions = 10}), + CreateInput(0.8, 0.25, 0.5, 0.0, 0.4, 10)), 1); } From f42e87c966f30ae34c654f1db06996890c436b3b Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Sun, 28 Mar 2021 19:37:07 -0400 Subject: [PATCH 06/21] Fix int type benchmark_random_interleaving_gtest for macos in fr-1051 Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: test/benchmark_random_interleaving_gtest.cc --- test/benchmark_random_interleaving_gtest.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/benchmark_random_interleaving_gtest.cc b/test/benchmark_random_interleaving_gtest.cc index 4b5f70a728..582beeba27 100644 --- a/test/benchmark_random_interleaving_gtest.cc +++ b/test/benchmark_random_interleaving_gtest.cc @@ -68,7 +68,7 @@ static void BM_Match1(benchmark::State& state) { const int64_t arg = state.range(0); for (auto _ : state) {} - queue->Put(StrFormat("BM_Match1/%ld", arg)); + queue->Put(StrFormat("BM_Match1/%d", static_cast(arg))); } BENCHMARK(BM_Match1) ->Iterations(100) @@ -83,7 +83,7 @@ static void BM_MatchOverhead(benchmark::State& state) { const int64_t arg = state.range(0); for (auto _ : state) {} - queue->Put(StrFormat("BM_MatchOverhead/%ld", arg)); + queue->Put(StrFormat("BM_MatchOverhead/%d", static_cast(arg))); } BENCHMARK(BM_MatchOverhead) ->Iterations(100) From 8660ae121f41c902daed5836cae0e94dfdb584bc Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Tue, 30 Mar 2021 01:16:30 -0400 Subject: [PATCH 07/21] Address dominichamon's comments 03/29 for fr-1051 Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: src/benchmark.cc modified: src/benchmark_api_internal.cc modified: src/benchmark_api_internal.h modified: test/benchmark_random_interleaving_gtest.cc --- src/benchmark.cc | 48 +++++++++++++-------- src/benchmark_api_internal.cc | 10 ++--- src/benchmark_api_internal.h | 14 +++--- test/benchmark_random_interleaving_gtest.cc | 9 ++-- 4 files changed, 47 insertions(+), 34 deletions(-) diff --git a/src/benchmark.cc b/src/benchmark.cc index 8b48e49bac..cf1cd7ec35 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -60,8 +60,8 @@ namespace { // Attempt to make each repetition run for at least this much of time. -constexpr double kDefaultMinTimeTotal = 0.5; -constexpr size_t kDefaultRepetitions = 12; +constexpr double kDefaultMinTimeTotalSecs = 0.5; +constexpr int64_t kDefaultRepetitions = 12; } // namespace @@ -73,23 +73,30 @@ DEFINE_bool(benchmark_list_tests, false); // linked into the binary are run. DEFINE_string(benchmark_filter, "."); +// Do NOT read these flags directly. Use Get*() to read them. +namespace do_not_read_flag_directly { + // Minimum number of seconds we should run benchmark per repetition before // results are considered significant. For cpu-time based tests, this is the // lower bound on the total cpu time used by all threads that make up the test. // For real-time based tests, this is the lower bound on the elapsed time of the // benchmark execution, regardless of number of threads. If left unset, will use -// 0.5 / 12 if random interleaving is enabled. Otherwise, will use 0.5. +// kDefaultMinTimeTotalSecs / FLAGS_benchmark_repetitions, if random +// interleaving is enabled. Otherwise, will use kDefaultMinTimeTotalSecs. // Do NOT read this flag directly. Use GetMinTime() to read this flag. DEFINE_double(benchmark_min_time, -1.0); // The number of runs of each benchmark. If greater than 1, the mean and // standard deviation of the runs will be reported. By default, the number of -// repetitions is 1 if random interleaving is disabled, and up to 12 if random -// interleaving is enabled. (Read the documentation for random interleaving to -// see why it might be less than 12.) +// repetitions is 1 if random interleaving is disabled, and up to +// kDefaultRepetitions if random interleaving is enabled. (Read the +// documentation for random interleaving to see why it might be less than +// kDefaultRepetitions.) // Do NOT read this flag directly, Use GetRepetitions() to access this flag. DEFINE_int32(benchmark_repetitions, -1); +} // namespace do_not_read_flag_directly + // The maximum overhead allowed for random interleaving. A value X means total // execution time under random interleaving is limited by // (1 + X) * original total execution time. Set to 'inf' to allow infinite @@ -147,22 +154,23 @@ const double kSafetyMultiplier = 1.4; // Wraps --benchmark_min_time and returns valid default values if not supplied. double GetMinTime() { - const double min_time = FLAGS_benchmark_min_time; + const double min_time = do_not_read_flag_directly::FLAGS_benchmark_min_time; if (min_time >= 0.0) { return min_time; } if (FLAGS_benchmark_enable_random_interleaving) { - return kDefaultMinTimeTotal / kDefaultRepetitions; + return kDefaultMinTimeTotalSecs / GetRepetitions(); } - return kDefaultMinTimeTotal; + return kDefaultMinTimeTotalSecs; } // Wraps --benchmark_repetitions and return valid default value if not supplied. -size_t GetRepetitions() { - const int repetitions = FLAGS_benchmark_repetitions; +int64_t GetRepetitions() { + const int64_t repetitions = + do_not_read_flag_directly::FLAGS_benchmark_repetitions; if (repetitions >= 0) { - return static_cast(repetitions); + return repetitions; } if (FLAGS_benchmark_enable_random_interleaving) { @@ -324,9 +332,9 @@ void RunBenchmarks(const std::vector& benchmarks, // {Random order of A, B, C, ...}, {Random order of A, B, C, ...}, ... // That is, repetitions is outside of RunBenchmark(), hence the name // outer_repetitions. - size_t inner_repetitions = + int64_t inner_repetitions = FLAGS_benchmark_enable_random_interleaving ? 1 : GetRepetitions(); - size_t outer_repetitions = + int64_t outer_repetitions = FLAGS_benchmark_enable_random_interleaving ? GetRepetitions() : 1; std::vector benchmark_indices(benchmarks.size()); for (size_t i = 0; i < benchmarks.size(); ++i) { @@ -335,7 +343,7 @@ void RunBenchmarks(const std::vector& benchmarks, // 'run_results_vector' and 'benchmarks' are parallel arrays. std::vector run_results_vector(benchmarks.size()); - for (size_t i = 0; i < outer_repetitions; i++) { + for (int64_t i = 0; i < outer_repetitions; i++) { if (FLAGS_benchmark_enable_random_interleaving) { std::random_shuffle(benchmark_indices.begin(), benchmark_indices.end()); } @@ -533,10 +541,12 @@ void ParseCommandLineFlags(int* argc, char** argv) { if (ParseBoolFlag(argv[i], "benchmark_list_tests", &FLAGS_benchmark_list_tests) || ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || - ParseDoubleFlag(argv[i], "benchmark_min_time", - &FLAGS_benchmark_min_time) || - ParseInt32Flag(argv[i], "benchmark_repetitions", - &FLAGS_benchmark_repetitions) || + ParseDoubleFlag( + argv[i], "benchmark_min_time", + &do_not_read_flag_directly::FLAGS_benchmark_min_time) || + ParseInt32Flag( + argv[i], "benchmark_repetitions", + &do_not_read_flag_directly::FLAGS_benchmark_repetitions) || ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving", &FLAGS_benchmark_enable_random_interleaving) || ParseDoubleFlag(argv[i], "benchmark_random_interleaving_max_overhead", diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc index f57b34d805..faa974d67d 100644 --- a/src/benchmark_api_internal.cc +++ b/src/benchmark_api_internal.cc @@ -121,7 +121,7 @@ IterationCount BenchmarkInstance::iterations() const { return iterations_; } -size_t BenchmarkInstance::repetitions() const { +int64_t BenchmarkInstance::repetitions() const { return repetitions_; } @@ -140,18 +140,18 @@ double BenchmarkInstance::min_time() const { return min_time_; } -size_t BenchmarkInstance::random_interleaving_repetitions() const { - return random_interleaving_repetitions_ == std::numeric_limits::max() +int64_t BenchmarkInstance::random_interleaving_repetitions() const { + return random_interleaving_repetitions_ == std::numeric_limits::max() ? GetRepetitions() : random_interleaving_repetitions_; } bool BenchmarkInstance::random_interleaving_repetitions_initialized() const { - return random_interleaving_repetitions_ != std::numeric_limits::max(); + return random_interleaving_repetitions_ != std::numeric_limits::max(); } void BenchmarkInstance::init_random_interleaving_repetitions( - size_t repetitions) const { + int64_t repetitions) const { random_interleaving_repetitions_ = repetitions; } diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h index dae49fc57a..7bab901d15 100644 --- a/src/benchmark_api_internal.h +++ b/src/benchmark_api_internal.h @@ -26,7 +26,7 @@ class BenchmarkInstance { // Const accessors. const BenchmarkName& name() const; - size_t repetitions() const; + int64_t repetitions() const; const std::vector* statistics() const; AggregationReportMode aggregation_report_mode() const; TimeUnit time_unit() const; @@ -45,13 +45,13 @@ class BenchmarkInstance { // Returns number of repetitions for Random Interleaving. This will be // initialized later once we finish the first repetition, if Random // Interleaving is enabled. See also ComputeRandominterleavingrepetitions(). - size_t random_interleaving_repetitions() const; + int64_t random_interleaving_repetitions() const; // Returns true if repetitions for Random Interleaving is initialized. bool random_interleaving_repetitions_initialized() const; // Initializes number of repetitions for random interleaving. - void init_random_interleaving_repetitions(size_t repetitions) const; + void init_random_interleaving_repetitions(int64_t repetitions) const; // Setters. @@ -80,14 +80,14 @@ class BenchmarkInstance { UserCounters counters_; const std::vector* statistics_; bool last_benchmark_instance_; - size_t repetitions_; + int64_t repetitions_; double min_time_; IterationCount iterations_; int threads_; // Number of concurrent threads to use // Make it mutable so it can be initialized (mutated) later on a const // instance. - mutable size_t random_interleaving_repetitions_ = - std::numeric_limits::max(); + mutable int64_t random_interleaving_repetitions_ = + std::numeric_limits::max(); }; bool FindBenchmarksInternal(const std::string& re, @@ -100,7 +100,7 @@ ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false); double GetMinTime(); -size_t GetRepetitions(); +int64_t GetRepetitions(); } // end namespace internal } // end namespace benchmark diff --git a/test/benchmark_random_interleaving_gtest.cc b/test/benchmark_random_interleaving_gtest.cc index 582beeba27..1b597fcfb9 100644 --- a/test/benchmark_random_interleaving_gtest.cc +++ b/test/benchmark_random_interleaving_gtest.cc @@ -11,7 +11,10 @@ DECLARE_bool(benchmark_enable_random_interleaving); DECLARE_string(benchmark_filter); DECLARE_double(benchmark_random_interleaving_max_overhead); + +namespace do_not_read_flag_directly { DECLARE_int32(benchmark_repetitions); +} // namespace do_not_read_flag_directly namespace benchmark { namespace internal { @@ -104,7 +107,7 @@ TEST_F(BenchmarkTest, Match1) { } TEST_F(BenchmarkTest, Match1WithRepetition) { - FLAGS_benchmark_repetitions = 2; + do_not_read_flag_directly::FLAGS_benchmark_repetitions = 2; Execute("BM_Match1/(64|80)"); ASSERT_EQ("BM_Match1/64", queue->Get()); @@ -116,7 +119,7 @@ TEST_F(BenchmarkTest, Match1WithRepetition) { TEST_F(BenchmarkTest, Match1WithRandomInterleaving) { FLAGS_benchmark_enable_random_interleaving = true; - FLAGS_benchmark_repetitions = 100; + do_not_read_flag_directly::FLAGS_benchmark_repetitions = 100; FLAGS_benchmark_random_interleaving_max_overhead = std::numeric_limits::infinity(); @@ -137,7 +140,7 @@ TEST_F(BenchmarkTest, Match1WithRandomInterleaving) { TEST_F(BenchmarkTest, Match1WithRandomInterleavingAndZeroOverhead) { FLAGS_benchmark_enable_random_interleaving = true; - FLAGS_benchmark_repetitions = 100; + do_not_read_flag_directly::FLAGS_benchmark_repetitions = 100; FLAGS_benchmark_random_interleaving_max_overhead = 0; // ComputeRandomInterleavingRepetitions() will kick in and rerun each From 4640ed48f9f9642e0e94437e076dd3c1286a5959 Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Wed, 31 Mar 2021 01:59:51 -0400 Subject: [PATCH 08/21] Address dominichamon's comment on default min_time / repetitions for fr-1051. Also change sentinel of random_interleaving_repetitions to -1. Hopefully it fixes the failures on Windows. Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: src/benchmark.cc modified: src/benchmark_api_internal.cc modified: src/benchmark_api_internal.h --- src/benchmark.cc | 30 +++++++++++------------------- src/benchmark_api_internal.cc | 4 ++-- src/benchmark_api_internal.h | 3 +-- 3 files changed, 14 insertions(+), 23 deletions(-) diff --git a/src/benchmark.cc b/src/benchmark.cc index cf1cd7ec35..05f9a04540 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -61,7 +61,7 @@ namespace { // Attempt to make each repetition run for at least this much of time. constexpr double kDefaultMinTimeTotalSecs = 0.5; -constexpr int64_t kDefaultRepetitions = 12; +constexpr int64_t kRandomInterleavingDefaultRepetitions = 12; } // namespace @@ -154,29 +154,21 @@ const double kSafetyMultiplier = 1.4; // Wraps --benchmark_min_time and returns valid default values if not supplied. double GetMinTime() { - const double min_time = do_not_read_flag_directly::FLAGS_benchmark_min_time; - if (min_time >= 0.0) { - return min_time; - } - - if (FLAGS_benchmark_enable_random_interleaving) { - return kDefaultMinTimeTotalSecs / GetRepetitions(); - } - return kDefaultMinTimeTotalSecs; + const double default_min_time = kDefaultMinTimeTotalSecs / GetRepetitions(); + const double flag_min_time = + do_not_read_flag_directly::FLAGS_benchmark_min_time; + return flag_min_time >= 0.0 ? flag_min_time : default_min_time; } // Wraps --benchmark_repetitions and return valid default value if not supplied. int64_t GetRepetitions() { - const int64_t repetitions = + const int64_t default_repetitions = + FLAGS_benchmark_enable_random_interleaving + ? kRandomInterleavingDefaultRepetitions + : 1; + const int64_t flag_repetitions = do_not_read_flag_directly::FLAGS_benchmark_repetitions; - if (repetitions >= 0) { - return repetitions; - } - - if (FLAGS_benchmark_enable_random_interleaving) { - return kDefaultRepetitions; - } - return 1; + return flag_repetitions >= 0 ? flag_repetitions : default_repetitions; } // FIXME: wouldn't LTO mess this up? diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc index faa974d67d..6f3a6cce61 100644 --- a/src/benchmark_api_internal.cc +++ b/src/benchmark_api_internal.cc @@ -141,13 +141,13 @@ double BenchmarkInstance::min_time() const { } int64_t BenchmarkInstance::random_interleaving_repetitions() const { - return random_interleaving_repetitions_ == std::numeric_limits::max() + return random_interleaving_repetitions_ < 0 ? GetRepetitions() : random_interleaving_repetitions_; } bool BenchmarkInstance::random_interleaving_repetitions_initialized() const { - return random_interleaving_repetitions_ != std::numeric_limits::max(); + return random_interleaving_repetitions_ >= 0; } void BenchmarkInstance::init_random_interleaving_repetitions( diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h index 7bab901d15..349c2a0640 100644 --- a/src/benchmark_api_internal.h +++ b/src/benchmark_api_internal.h @@ -86,8 +86,7 @@ class BenchmarkInstance { int threads_; // Number of concurrent threads to use // Make it mutable so it can be initialized (mutated) later on a const // instance. - mutable int64_t random_interleaving_repetitions_ = - std::numeric_limits::max(); + mutable int64_t random_interleaving_repetitions_ = -1; }; bool FindBenchmarksInternal(const std::string& re, From 1a2b6dfb131e2b97ddce8e2c6acdfa6ce9d045b8 Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Fri, 2 Apr 2021 11:27:51 -0400 Subject: [PATCH 09/21] Fix windows test failures for fr-1051 Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: src/benchmark_api_internal.cc modified: src/benchmark_runner.cc --- src/benchmark_api_internal.cc | 3 ++- src/benchmark_runner.cc | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc index 6f3a6cce61..b9b8239003 100644 --- a/src/benchmark_api_internal.cc +++ b/src/benchmark_api_internal.cc @@ -66,7 +66,8 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, aggregation_report_mode_ = benchmark->aggregation_report_mode_; time_unit_ = benchmark->time_unit_; range_multiplier_ = benchmark->range_multiplier_; - min_time_ = benchmark->min_time_; + min_time_ = + !IsZero(benchmark->min_time_) ? benchmark->min_time_ : GetMinTime(); iterations_ = benchmark->iterations_; repetitions_ = benchmark->repetitions_; measure_process_cpu_time_ = benchmark->measure_process_cpu_time_; diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index d54fa53724..d0da0a6b03 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -350,7 +350,7 @@ class BenchmarkRunner { // Otherwise, we will still skip the rerun. rerun_trial = b.random_interleaving_repetitions() < GetRepetitions() && - i.seconds < b.min_time(); + i.seconds < b.min_time() && !has_explicit_iteration_count; } if (!rerun_trial) break; // Good, let's report them! From 81c9ab0869ef5546c7809da80da72fe3806eb1b2 Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Tue, 13 Apr 2021 18:52:02 -0400 Subject: [PATCH 10/21] Add license blurb for fr-1051. Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: src/benchmark_adjust_repetitions.cc modified: src/benchmark_adjust_repetitions.h --- src/benchmark_adjust_repetitions.cc | 14 ++++++++++++++ src/benchmark_adjust_repetitions.h | 14 ++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/src/benchmark_adjust_repetitions.cc b/src/benchmark_adjust_repetitions.cc index 31a1cd6b08..665d561b3b 100644 --- a/src/benchmark_adjust_repetitions.cc +++ b/src/benchmark_adjust_repetitions.cc @@ -1,3 +1,17 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "benchmark_adjust_repetitions.h" #include "benchmark_api_internal.h" diff --git a/src/benchmark_adjust_repetitions.h b/src/benchmark_adjust_repetitions.h index eeb69ff65c..2b82ecc40a 100644 --- a/src/benchmark_adjust_repetitions.h +++ b/src/benchmark_adjust_repetitions.h @@ -1,3 +1,17 @@ +// Copyright 2015 Google Inc. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #ifndef BENCHMARK_ADJUST_REPETITIONS_H #define BENCHMARK_ADJUST_REPETITIONS_H From 70fe24ac765c799e45ec37f1bb3ada9d60eca951 Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Mon, 19 Apr 2021 13:03:55 -0400 Subject: [PATCH 11/21] Switch to std::shuffle() for fr-1105. Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: src/benchmark.cc --- src/benchmark.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/benchmark.cc b/src/benchmark.cc index 05f9a04540..649863479a 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -333,11 +333,13 @@ void RunBenchmarks(const std::vector& benchmarks, benchmark_indices[i] = i; } + std::random_device rd; + std::mt19937 g(rd()); // 'run_results_vector' and 'benchmarks' are parallel arrays. std::vector run_results_vector(benchmarks.size()); for (int64_t i = 0; i < outer_repetitions; i++) { if (FLAGS_benchmark_enable_random_interleaving) { - std::random_shuffle(benchmark_indices.begin(), benchmark_indices.end()); + std::shuffle(benchmark_indices.begin(), benchmark_indices.end(), g); } for (size_t j : benchmark_indices) { // Repetitions will be automatically adjusted under random interleaving. From 4966e9021bba26ebe945cf37df5221d7faa97d49 Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Thu, 29 Apr 2021 02:48:03 -0400 Subject: [PATCH 12/21] Change to 1e-9 in fr-1105 Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: src/benchmark_adjust_repetitions.cc --- src/benchmark_adjust_repetitions.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/benchmark_adjust_repetitions.cc b/src/benchmark_adjust_repetitions.cc index 665d561b3b..e5c618a53a 100644 --- a/src/benchmark_adjust_repetitions.cc +++ b/src/benchmark_adjust_repetitions.cc @@ -22,7 +22,7 @@ namespace internal { namespace { -constexpr double kNanosecondInSecond = 0.000000001; +constexpr double kNanosecondInSecond = 1e-9; } // namespace From 9399f14c9acab50c1ede9542394a5e824ffad83b Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Thu, 29 Apr 2021 03:20:23 -0400 Subject: [PATCH 13/21] Fix broken build caused by bad merge for fr-1105. Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: src/benchmark_api_internal.cc modified: src/benchmark_runner.cc --- src/benchmark_api_internal.cc | 4 ++-- src/benchmark_runner.cc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc index dcfc483f5b..8a34200e33 100644 --- a/src/benchmark_api_internal.cc +++ b/src/benchmark_api_internal.cc @@ -160,9 +160,9 @@ State BenchmarkInstance::Run( IterationCount iters, int thread_id, internal::ThreadTimer* timer, internal::ThreadManager* manager, internal::PerfCountersMeasurement* perf_counters_measurement) const { - State st(iters, arg, thread_id, threads, timer, manager, + State st(iters, args_, thread_id, threads_, timer, manager, perf_counters_measurement); - benchmark->Run(st); + benchmark_->Run(st); return st; } diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index a397d49812..77ff163b63 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -170,7 +170,7 @@ class BenchmarkRunner { (b.aggregation_report_mode() & internal::ARM_DisplayReportAggregatesOnly); run_results->file_report_aggregates_only = - (b.aggregation_report_mode() & + (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly); CHECK(b.threads() == 1 || !perf_counters_measurement.IsValid()) << "Perf counters are not supported in multi-threaded cases.\n"; @@ -263,7 +263,7 @@ class BenchmarkRunner { // By using KeepRunningBatch a benchmark can iterate more times than // requested, so take the iteration count from i.results. - i.iters = i.results.iterations / b.threads; + i.iters = i.results.iterations / b.threads(); // Base decisions off of real time if requested by this benchmark. i.seconds = i.results.cpu_time_used; From bb8e0e95a0275e1614a5ae186c20f45a0d9e0dfd Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Mon, 10 May 2021 14:56:27 -0400 Subject: [PATCH 14/21] Fix build breakage for fr-1051. Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: src/benchmark.cc modified: src/benchmark_api_internal.cc modified: src/benchmark_api_internal.h modified: src/benchmark_register.cc modified: src/benchmark_runner.cc --- src/benchmark.cc | 4 ++-- src/benchmark_api_internal.cc | 4 ++-- src/benchmark_api_internal.h | 5 +++++ src/benchmark_register.cc | 2 +- src/benchmark_runner.cc | 7 +++---- 5 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/benchmark.cc b/src/benchmark.cc index 300b3153a3..cbe8df9470 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -158,7 +158,7 @@ namespace internal { DEFINE_kvpairs(benchmark_context, {}); std::map* global_context = nullptr; - + // Performance measurements always come with random variances. Defines a // factor by which the required number of iterations is overestimated in order // to reduce the probability that the minimum time requirement will not be met. @@ -371,7 +371,7 @@ void RunBenchmarks(const std::vector& benchmarks, for (size_t j : benchmark_indices) { // Repetitions will be automatically adjusted under random interleaving. if (!FLAGS_benchmark_enable_random_interleaving || - i < benchmarks[j].random_interleaving_repetitions()) { + i < benchmarks[j].RandomInterleavingRepetitions()) { RunBenchmark(benchmarks[j], outer_repetitions, inner_repetitions, &complexity_reports, &run_results_vector[j]); } diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc index 7ad74708c7..0d38e2d8b9 100644 --- a/src/benchmark_api_internal.cc +++ b/src/benchmark_api_internal.cc @@ -73,7 +73,7 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, } name_.time_type += "real_time"; } - + if (!benchmark_.thread_counts_.empty()) { name_.threads = StrFormat("threads:%d", threads_); } @@ -85,7 +85,7 @@ double BenchmarkInstance::MinTime() const { // random_interleaving_repetitions(). Dividing // total execution time by random_interleaving_repetitions() gives // the adjusted min_time per repetition. - return min_time_ * GetRepetitions() / random_interleaving_repetitions(); + return min_time_ * GetRepetitions() / RandomInterleavingRepetitions(); } return min_time_; } diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h index 02441aca88..33a0864976 100644 --- a/src/benchmark_api_internal.h +++ b/src/benchmark_api_internal.h @@ -73,6 +73,11 @@ class BenchmarkInstance { bool use_manual_time_; BigO complexity_; BigOFunc* complexity_lambda_; + std::vector statistics_; + int repetitions_; + double min_time_; + IterationCount iterations_; + int threads_; UserCounters counters_; mutable int64_t random_interleaving_repetitions_ = -1; }; diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc index e5e5db63a5..1f0dcd1d0e 100644 --- a/src/benchmark_register.cc +++ b/src/benchmark_register.cc @@ -159,7 +159,7 @@ bool BenchmarkFamilies::FindBenchmarks( const auto full_name = instance.name().str(); if ((re.Match(full_name) && !isNegativeFilter) || (!re.Match(full_name) && isNegativeFilter)) { - instance.set_last_benchmark_instance(&args == &family->args_.back()); + instance.last_benchmark_instance = (&args == &family->args_.back()); benchmarks->push_back(std::move(instance)); } } diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index 50135b2db1..0b8a906b17 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -152,7 +152,6 @@ class BenchmarkRunner { outer_repetitions(outer_repetitions_), inner_repetitions(inner_repetitions_), repeats(b.repetitions() != 0 ? b.repetitions() : inner_repetitions), - min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time), has_explicit_iteration_count(b.iterations() != 0), pool(b.threads() - 1), iters(has_explicit_iteration_count ? b.iterations() : 1), @@ -352,7 +351,7 @@ class BenchmarkRunner { // If random interleaving is enabled and the repetitions is not // initialized, do it now. if (FLAGS_benchmark_enable_random_interleaving && - !b.random_interleaving_repetitions_initialized()) { + !b.RandomInterleavingRepetitionsInitialized()) { InternalRandomInterleavingRepetitionsInput input; input.total_execution_time_per_repetition = exec_end - exec_start; input.time_used_per_repetition = i.seconds; @@ -360,7 +359,7 @@ class BenchmarkRunner { input.min_time_per_repetition = GetMinTime(); input.max_overhead = FLAGS_benchmark_random_interleaving_max_overhead; input.max_repetitions = GetRepetitions(); - b.init_random_interleaving_repetitions( + b.InitRandomInterleavingRepetitions( ComputeRandomInterleavingRepetitions(input)); // If the number of repetitions changed, need to rerun the last trial // because iters may also change. Note that we only need to do this @@ -368,7 +367,7 @@ class BenchmarkRunner { // run is not enough for the already adjusted b.min_time(). // Otherwise, we will still skip the rerun. rerun_trial = - b.random_interleaving_repetitions() < GetRepetitions() && + b.RandomInterleavingRepetitions() < GetRepetitions() && i.seconds < b.min_time() && !has_explicit_iteration_count; } From 32e86fcb4aecec8722b59ff8a71c7382b37b5aec Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Mon, 10 May 2021 15:22:36 -0400 Subject: [PATCH 15/21] Print out reports as they come in if random interleaving is disabled (fr-1051) Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: src/benchmark.cc --- src/benchmark.cc | 51 +++++++++++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/src/benchmark.cc b/src/benchmark.cc index cbe8df9470..50f6ebbbef 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -360,6 +360,17 @@ void RunBenchmarks(const std::vector& benchmarks, benchmark_indices[i] = i; } + auto report = [](BenchmarkReporter* reporter, bool report_aggregates_only, + const RunResults& run_results) { + assert(reporter); + // If there are no aggregates, do output non-aggregates. + report_aggregates_only &= !run_results.aggregates_only.empty(); + if (!report_aggregates_only) + reporter->ReportRuns(run_results.non_aggregates); + if (!run_results.aggregates_only.empty()) + reporter->ReportRuns(run_results.aggregates_only); + }; + std::random_device rd; std::mt19937 g(rd()); // 'run_results_vector' and 'benchmarks' are parallel arrays. @@ -374,30 +385,34 @@ void RunBenchmarks(const std::vector& benchmarks, i < benchmarks[j].RandomInterleavingRepetitions()) { RunBenchmark(benchmarks[j], outer_repetitions, inner_repetitions, &complexity_reports, &run_results_vector[j]); + if (!FLAGS_benchmark_enable_random_interleaving) { + // Print out reports as they come in. + const RunResults& run_results = run_results_vector.at(j); + report(display_reporter, run_results.display_report_aggregates_only, + run_results); + if (file_reporter) + report(file_reporter, run_results.file_report_aggregates_only, + run_results); + + flushStreams(display_reporter); + flushStreams(file_reporter); + } } } } - auto report = [](BenchmarkReporter* reporter, bool report_aggregates_only, - const RunResults& run_results) { - assert(reporter); - // If there are no aggregates, do output non-aggregates. - report_aggregates_only &= !run_results.aggregates_only.empty(); - if (!report_aggregates_only) - reporter->ReportRuns(run_results.non_aggregates); - if (!run_results.aggregates_only.empty()) - reporter->ReportRuns(run_results.aggregates_only); - }; - - for (const RunResults& run_results : run_results_vector) { - report(display_reporter, run_results.display_report_aggregates_only, - run_results); - if (file_reporter) - report(file_reporter, run_results.file_report_aggregates_only, + if (FLAGS_benchmark_enable_random_interleaving) { + // Print out all reports at the end of the test. + for (const RunResults& run_results : run_results_vector) { + report(display_reporter, run_results.display_report_aggregates_only, run_results); + if (file_reporter) + report(file_reporter, run_results.file_report_aggregates_only, + run_results); - flushStreams(display_reporter); - flushStreams(file_reporter); + flushStreams(display_reporter); + flushStreams(file_reporter); + } } } display_reporter->Finalize(); From 1bc21736f42fa735e84307dc88a647cfaea6e8f0 Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Wed, 12 May 2021 13:00:40 -0400 Subject: [PATCH 16/21] size_t, int64_t --> int in benchmark_runner for fr-1051. Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: src/benchmark_runner.cc modified: src/benchmark_runner.h --- src/benchmark_runner.cc | 37 ++++++++++++++++++------------------- src/benchmark_runner.h | 2 +- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index 0b8a906b17..1320136e30 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -71,7 +71,7 @@ BenchmarkReporter::Run CreateRunReport( const internal::ThreadManager::Result& results, IterationCount memory_iterations, const MemoryManager::Result& memory_result, double seconds, - int64_t repetition_index) { + int repetition_index) { // Create report about this benchmark run. BenchmarkReporter::Run report; @@ -142,13 +142,13 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, class BenchmarkRunner { public: BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_, - const size_t outer_repetitions_, - const size_t inner_repetitions_, + int outer_repetitions_, + int inner_repetitions_, std::vector* complexity_reports_, RunResults* run_results_) : b(b_), complexity_reports(*complexity_reports_), - run_results(run_results_), + run_results(*run_results_), outer_repetitions(outer_repetitions_), inner_repetitions(inner_repetitions_), repeats(b.repetitions() != 0 ? b.repetitions() : inner_repetitions), @@ -160,16 +160,16 @@ class BenchmarkRunner { perf_counters_measurement_ptr(perf_counters_measurement.IsValid() ? &perf_counters_measurement : nullptr) { - run_results->display_report_aggregates_only = + run_results.display_report_aggregates_only = (FLAGS_benchmark_report_aggregates_only || FLAGS_benchmark_display_aggregates_only); - run_results->file_report_aggregates_only = + run_results.file_report_aggregates_only = FLAGS_benchmark_report_aggregates_only; if (b.aggregation_report_mode() != internal::ARM_Unspecified) { - run_results->display_report_aggregates_only = + run_results.display_report_aggregates_only = (b.aggregation_report_mode() & internal::ARM_DisplayReportAggregatesOnly); - run_results->file_report_aggregates_only = + run_results.file_report_aggregates_only = (b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly); CHECK(b.threads() == 1 || !perf_counters_measurement.IsValid()) @@ -179,18 +179,17 @@ class BenchmarkRunner { << "Perf counters were requested but could not be set up."; } - for (size_t repetition_num = 0; repetition_num < repeats; - repetition_num++) { + for (int repetition_num = 0; repetition_num < repeats; repetition_num++) { DoOneRepetition(repetition_num); } // Calculate additional statistics - run_results->aggregates_only = ComputeStats(run_results->non_aggregates); + run_results.aggregates_only = ComputeStats(run_results.non_aggregates); // Maybe calculate complexity report if ((b.complexity() != oNone) && b.last_benchmark_instance) { auto additional_run_stats = ComputeBigO(complexity_reports); - run_results->aggregates_only.insert(run_results->aggregates_only.end(), + run_results.aggregates_only.insert(run_results.aggregates_only.end(), additional_run_stats.begin(), additional_run_stats.end()); complexity_reports.clear(); @@ -201,11 +200,11 @@ class BenchmarkRunner { const benchmark::internal::BenchmarkInstance& b; std::vector& complexity_reports; - RunResults* run_results = nullptr; + RunResults& run_results; - const size_t outer_repetitions; - const size_t inner_repetitions; - const size_t repeats; + const int outer_repetitions; + const int inner_repetitions; + const int repeats; const bool has_explicit_iteration_count; std::vector pool; @@ -317,7 +316,7 @@ class BenchmarkRunner { !b.use_manual_time()); } - void DoOneRepetition(int64_t repetition_index) { + void DoOneRepetition(int repetition_index) { const bool is_the_first_repetition = repetition_index == 0; IterationResults i; @@ -410,14 +409,14 @@ class BenchmarkRunner { if (!report.error_occurred && b.complexity() != oNone) complexity_reports.push_back(report); - run_results->non_aggregates.push_back(report); + run_results.non_aggregates.push_back(report); } }; } // end namespace void RunBenchmark(const benchmark::internal::BenchmarkInstance& b, - const size_t outer_repetitions, const size_t inner_repetitions, + const int outer_repetitions, const int inner_repetitions, std::vector* complexity_reports, RunResults* run_results) { internal::BenchmarkRunner r(b, outer_repetitions, inner_repetitions, diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h index 326cd79979..e29aa32306 100644 --- a/src/benchmark_runner.h +++ b/src/benchmark_runner.h @@ -43,7 +43,7 @@ struct RunResults { }; void RunBenchmark(const benchmark::internal::BenchmarkInstance& b, - const size_t outer_repetitions, const size_t inner_repetitions, + int outer_repetitions, int inner_repetitions, std::vector* complexity_reports, RunResults* run_results); From ce7220ac86b1cd035c7b10bec626b840f5c8f81e Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Fri, 14 May 2021 00:19:18 -0400 Subject: [PATCH 17/21] Address comments from dominichamon for fr-1051 Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: src/benchmark.cc modified: src/benchmark_adjust_repetitions.cc modified: src/benchmark_adjust_repetitions.h modified: src/benchmark_api_internal.cc modified: src/benchmark_api_internal.h modified: test/benchmark_random_interleaving_gtest.cc --- src/benchmark.cc | 65 ++++++++++----------- src/benchmark_adjust_repetitions.cc | 8 +-- src/benchmark_adjust_repetitions.h | 4 +- src/benchmark_api_internal.cc | 4 +- src/benchmark_api_internal.h | 8 +-- test/benchmark_random_interleaving_gtest.cc | 2 +- 6 files changed, 45 insertions(+), 46 deletions(-) diff --git a/src/benchmark.cc b/src/benchmark.cc index 50f6ebbbef..46cf073d7c 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -64,7 +64,7 @@ namespace { // Attempt to make each repetition run for at least this much of time. constexpr double kDefaultMinTimeTotalSecs = 0.5; -constexpr int64_t kRandomInterleavingDefaultRepetitions = 12; +constexpr int kRandomInterleavingDefaultRepetitions = 12; } // namespace @@ -173,12 +173,12 @@ double GetMinTime() { } // Wraps --benchmark_repetitions and return valid default value if not supplied. -int64_t GetRepetitions() { - const int64_t default_repetitions = +int GetRepetitions() { + const int default_repetitions = FLAGS_benchmark_enable_random_interleaving ? kRandomInterleavingDefaultRepetitions : 1; - const int64_t flag_repetitions = + const int flag_repetitions = do_not_read_flag_directly::FLAGS_benchmark_repetitions; return flag_repetitions >= 0 ? flag_repetitions : default_repetitions; } @@ -351,35 +351,48 @@ void RunBenchmarks(const std::vector& benchmarks, // {Random order of A, B, C, ...}, {Random order of A, B, C, ...}, ... // That is, repetitions is outside of RunBenchmark(), hence the name // outer_repetitions. - int64_t inner_repetitions = + int inner_repetitions = FLAGS_benchmark_enable_random_interleaving ? 1 : GetRepetitions(); - int64_t outer_repetitions = + int outer_repetitions = FLAGS_benchmark_enable_random_interleaving ? GetRepetitions() : 1; std::vector benchmark_indices(benchmarks.size()); for (size_t i = 0; i < benchmarks.size(); ++i) { benchmark_indices[i] = i; } - auto report = [](BenchmarkReporter* reporter, bool report_aggregates_only, - const RunResults& run_results) { - assert(reporter); - // If there are no aggregates, do output non-aggregates. - report_aggregates_only &= !run_results.aggregates_only.empty(); - if (!report_aggregates_only) - reporter->ReportRuns(run_results.non_aggregates); - if (!run_results.aggregates_only.empty()) - reporter->ReportRuns(run_results.aggregates_only); + auto report = [flushStreams, display_reporter, file_reporter]( + const RunResults& run_results) { + auto report_one = [](BenchmarkReporter* reporter, + bool aggregates_only, + const RunResults& results) { + assert(reporter); + // If there are no aggregates, do output non-aggregates. + aggregates_only &= !results.aggregates_only.empty(); + if (!aggregates_only) + reporter->ReportRuns(results.non_aggregates); + if (!results.aggregates_only.empty()) + reporter->ReportRuns(results.aggregates_only); + }; + + report_one(display_reporter, run_results.display_report_aggregates_only, + run_results); + if (file_reporter) + report_one(file_reporter, run_results.file_report_aggregates_only, + run_results); + + flushStreams(display_reporter); + flushStreams(file_reporter); }; std::random_device rd; std::mt19937 g(rd()); // 'run_results_vector' and 'benchmarks' are parallel arrays. std::vector run_results_vector(benchmarks.size()); - for (int64_t i = 0; i < outer_repetitions; i++) { + for (int i = 0; i < outer_repetitions; i++) { if (FLAGS_benchmark_enable_random_interleaving) { std::shuffle(benchmark_indices.begin(), benchmark_indices.end(), g); } - for (size_t j : benchmark_indices) { + for (int j : benchmark_indices) { // Repetitions will be automatically adjusted under random interleaving. if (!FLAGS_benchmark_enable_random_interleaving || i < benchmarks[j].RandomInterleavingRepetitions()) { @@ -388,14 +401,7 @@ void RunBenchmarks(const std::vector& benchmarks, if (!FLAGS_benchmark_enable_random_interleaving) { // Print out reports as they come in. const RunResults& run_results = run_results_vector.at(j); - report(display_reporter, run_results.display_report_aggregates_only, - run_results); - if (file_reporter) - report(file_reporter, run_results.file_report_aggregates_only, - run_results); - - flushStreams(display_reporter); - flushStreams(file_reporter); + report(run_results); } } } @@ -404,14 +410,7 @@ void RunBenchmarks(const std::vector& benchmarks, if (FLAGS_benchmark_enable_random_interleaving) { // Print out all reports at the end of the test. for (const RunResults& run_results : run_results_vector) { - report(display_reporter, run_results.display_report_aggregates_only, - run_results); - if (file_reporter) - report(file_reporter, run_results.file_report_aggregates_only, - run_results); - - flushStreams(display_reporter); - flushStreams(file_reporter); + report(run_results); } } } diff --git a/src/benchmark_adjust_repetitions.cc b/src/benchmark_adjust_repetitions.cc index e5c618a53a..2847927628 100644 --- a/src/benchmark_adjust_repetitions.cc +++ b/src/benchmark_adjust_repetitions.cc @@ -26,7 +26,7 @@ constexpr double kNanosecondInSecond = 1e-9; } // namespace -size_t ComputeRandomInterleavingRepetitions( +int ComputeRandomInterleavingRepetitions( InternalRandomInterleavingRepetitionsInput input) { // Find the repetitions such that total overhead is bounded. Let // n = desired number of repetitions, i.e., the output of this method. @@ -96,7 +96,7 @@ size_t ComputeRandomInterleavingRepetitions( double n = (1 + input.max_overhead) * e / (h + r); n = std::min(std::max(n, 1.0), static_cast(input.max_repetitions)); - size_t n_size_t = static_cast(n); + int n_int = static_cast(n); VLOG(2) << "Computed random interleaving repetitions" << "\n input.total_execution_time_per_repetition: " @@ -116,9 +116,9 @@ size_t ComputeRandomInterleavingRepetitions( << "\n m: " << m << "\n e: " << e << "\n n: " << n - << "\n n_size_t: " << n_size_t; + << "\n n_int: " << n_int; - return n_size_t; + return n_int; } } // internal diff --git a/src/benchmark_adjust_repetitions.h b/src/benchmark_adjust_repetitions.h index 2b82ecc40a..21a666afe0 100644 --- a/src/benchmark_adjust_repetitions.h +++ b/src/benchmark_adjust_repetitions.h @@ -28,12 +28,12 @@ struct InternalRandomInterleavingRepetitionsInput { double real_time_used_per_repetition; double min_time_per_repetition; double max_overhead; - size_t max_repetitions; + int max_repetitions; }; // Should be called right after the first repetition is completed to estimate // the number of iterations. -size_t ComputeRandomInterleavingRepetitions( +int ComputeRandomInterleavingRepetitions( InternalRandomInterleavingRepetitionsInput input); } // end namespace internal diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc index 0d38e2d8b9..06a4344330 100644 --- a/src/benchmark_api_internal.cc +++ b/src/benchmark_api_internal.cc @@ -90,7 +90,7 @@ double BenchmarkInstance::MinTime() const { return min_time_; } -int64_t BenchmarkInstance::RandomInterleavingRepetitions() const { +int BenchmarkInstance::RandomInterleavingRepetitions() const { return random_interleaving_repetitions_ < 0 ? GetRepetitions() : random_interleaving_repetitions_; @@ -101,7 +101,7 @@ bool BenchmarkInstance::RandomInterleavingRepetitionsInitialized() const { } void BenchmarkInstance::InitRandomInterleavingRepetitions( - int64_t repetitions) const { + int repetitions) const { random_interleaving_repetitions_ = repetitions; } diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h index 33a0864976..aff5528a57 100644 --- a/src/benchmark_api_internal.h +++ b/src/benchmark_api_internal.h @@ -32,13 +32,13 @@ class BenchmarkInstance { // Returns number of repetitions for Random Interleaving. This will be // initialized later once we finish the first repetition, if Random // Interleaving is enabled. See also ComputeRandominterleavingrepetitions(). - int64_t RandomInterleavingRepetitions() const; + int RandomInterleavingRepetitions() const; // Returns true if repetitions for Random Interleaving is initialized. bool RandomInterleavingRepetitionsInitialized() const; // Initializes number of repetitions for random interleaving. - void InitRandomInterleavingRepetitions(int64_t repetitions) const; + void InitRandomInterleavingRepetitions(int repetitions) const; const BenchmarkName& name() const { return name_; } AggregationReportMode aggregation_report_mode() const { @@ -79,7 +79,7 @@ class BenchmarkInstance { IterationCount iterations_; int threads_; UserCounters counters_; - mutable int64_t random_interleaving_repetitions_ = -1; + mutable int random_interleaving_repetitions_ = -1; }; bool FindBenchmarksInternal(const std::string& re, @@ -92,7 +92,7 @@ ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false); double GetMinTime(); -int64_t GetRepetitions(); +int GetRepetitions(); } // end namespace internal } // end namespace benchmark diff --git a/test/benchmark_random_interleaving_gtest.cc b/test/benchmark_random_interleaving_gtest.cc index 1b597fcfb9..5e8329a4e6 100644 --- a/test/benchmark_random_interleaving_gtest.cc +++ b/test/benchmark_random_interleaving_gtest.cc @@ -163,7 +163,7 @@ TEST_F(BenchmarkTest, Match1WithRandomInterleavingAndZeroOverhead) { InternalRandomInterleavingRepetitionsInput CreateInput( double total, double time, double real_time, double min_time, - double overhead, size_t repetitions) { + double overhead, int repetitions) { InternalRandomInterleavingRepetitionsInput input; input.total_execution_time_per_repetition = total; input.time_used_per_repetition = time; From 81ac7fe3d1ae74eab725540d2de5dc0e4e6ceb85 Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Sat, 15 May 2021 20:04:53 -0400 Subject: [PATCH 18/21] benchmar_indices --> size_t to make CI pass: fr-1051 Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: src/benchmark.cc --- src/benchmark.cc | 81 ++++++++++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/src/benchmark.cc b/src/benchmark.cc index 46cf073d7c..272794e147 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -302,6 +302,39 @@ void State::FinishKeepRunning() { namespace internal { namespace { +// Flushes streams after invoking reporter methods that write to them. This +// ensures users get timely updates even when streams are not line-buffered. +void FlushStreams(BenchmarkReporter* reporter) { + if (!reporter) return; + std::flush(reporter->GetOutputStream()); + std::flush(reporter->GetErrorStream()); +}; + +// Reports in both display and file reporters. +void Report(BenchmarkReporter* display_reporter, + BenchmarkReporter* file_reporter, const RunResults& run_results) { + auto report_one = [](BenchmarkReporter* reporter, + bool aggregates_only, + const RunResults& results) { + assert(reporter); + // If there are no aggregates, do output non-aggregates. + aggregates_only &= !results.aggregates_only.empty(); + if (!aggregates_only) + reporter->ReportRuns(results.non_aggregates); + if (!results.aggregates_only.empty()) + reporter->ReportRuns(results.aggregates_only); + }; + + report_one(display_reporter, run_results.display_report_aggregates_only, + run_results); + if (file_reporter) + report_one(file_reporter, run_results.file_report_aggregates_only, + run_results); + + FlushStreams(display_reporter); + FlushStreams(file_reporter); +}; + void RunBenchmarks(const std::vector& benchmarks, BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter) { @@ -330,18 +363,10 @@ void RunBenchmarks(const std::vector& benchmarks, // Keep track of running times of all instances of current benchmark std::vector complexity_reports; - // We flush streams after invoking reporter methods that write to them. This - // ensures users get timely updates even when streams are not line-buffered. - auto flushStreams = [](BenchmarkReporter* reporter) { - if (!reporter) return; - std::flush(reporter->GetOutputStream()); - std::flush(reporter->GetErrorStream()); - }; - if (display_reporter->ReportContext(context) && (!file_reporter || file_reporter->ReportContext(context))) { - flushStreams(display_reporter); - flushStreams(file_reporter); + FlushStreams(display_reporter); + FlushStreams(file_reporter); // Without random interleaving, benchmarks are executed in the order of: // A, A, ..., A, B, B, ..., B, C, C, ..., C, ... @@ -360,30 +385,6 @@ void RunBenchmarks(const std::vector& benchmarks, benchmark_indices[i] = i; } - auto report = [flushStreams, display_reporter, file_reporter]( - const RunResults& run_results) { - auto report_one = [](BenchmarkReporter* reporter, - bool aggregates_only, - const RunResults& results) { - assert(reporter); - // If there are no aggregates, do output non-aggregates. - aggregates_only &= !results.aggregates_only.empty(); - if (!aggregates_only) - reporter->ReportRuns(results.non_aggregates); - if (!results.aggregates_only.empty()) - reporter->ReportRuns(results.aggregates_only); - }; - - report_one(display_reporter, run_results.display_report_aggregates_only, - run_results); - if (file_reporter) - report_one(file_reporter, run_results.file_report_aggregates_only, - run_results); - - flushStreams(display_reporter); - flushStreams(file_reporter); - }; - std::random_device rd; std::mt19937 g(rd()); // 'run_results_vector' and 'benchmarks' are parallel arrays. @@ -392,7 +393,7 @@ void RunBenchmarks(const std::vector& benchmarks, if (FLAGS_benchmark_enable_random_interleaving) { std::shuffle(benchmark_indices.begin(), benchmark_indices.end(), g); } - for (int j : benchmark_indices) { + for (size_t j : benchmark_indices) { // Repetitions will be automatically adjusted under random interleaving. if (!FLAGS_benchmark_enable_random_interleaving || i < benchmarks[j].RandomInterleavingRepetitions()) { @@ -400,8 +401,7 @@ void RunBenchmarks(const std::vector& benchmarks, &complexity_reports, &run_results_vector[j]); if (!FLAGS_benchmark_enable_random_interleaving) { // Print out reports as they come in. - const RunResults& run_results = run_results_vector.at(j); - report(run_results); + Report(display_reporter, file_reporter, run_results_vector.at(j)); } } } @@ -410,14 +410,14 @@ void RunBenchmarks(const std::vector& benchmarks, if (FLAGS_benchmark_enable_random_interleaving) { // Print out all reports at the end of the test. for (const RunResults& run_results : run_results_vector) { - report(run_results); + Report(display_reporter, file_reporter, run_results); } } } display_reporter->Finalize(); if (file_reporter) file_reporter->Finalize(); - flushStreams(display_reporter); - flushStreams(file_reporter); + FlushStreams(display_reporter); + FlushStreams(file_reporter); } // Disable deprecated warnings temporarily because we need to reference @@ -567,6 +567,7 @@ void PrintUsageAndExit() { " [--benchmark_filter=]\n" " [--benchmark_min_time=]\n" " [--benchmark_repetitions=]\n" + " [--benchmark_enable_random_interleaving={true|false}]\n" " [--benchmark_report_aggregates_only={true|false}]\n" " [--benchmark_display_aggregates_only={true|false}]\n" " [--benchmark_format=]\n" From 086b15ef01e27c5aa48d9591a16854d8fd5d48e9 Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Sat, 15 May 2021 23:06:29 -0400 Subject: [PATCH 19/21] Fix min_time not initialized issue for fr-1051. Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: src/benchmark_api_internal.cc modified: src/benchmark_api_internal.h --- src/benchmark_api_internal.cc | 8 ++++++-- src/benchmark_api_internal.h | 5 +---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc index 06a4344330..b7ddabf36c 100644 --- a/src/benchmark_api_internal.cc +++ b/src/benchmark_api_internal.cc @@ -2,6 +2,7 @@ #include +#include "check.h" #include "string_util.h" DECLARE_bool(benchmark_enable_random_interleaving); @@ -23,9 +24,12 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, complexity_lambda_(benchmark_.complexity_lambda_), statistics_(benchmark_.statistics_), repetitions_(benchmark_.repetitions_), - min_time_(benchmark_.min_time_), + min_time_(!IsZero(benchmark_.min_time_) ? benchmark_.min_time_ + : GetMinTime()), iterations_(benchmark_.iterations_), threads_(thread_count) { + CHECK(!IsZero(min_time_)) << "min_time must be non-zero."; + name_.function_name = benchmark_.name_; size_t arg_i = 0; @@ -79,7 +83,7 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, } } -double BenchmarkInstance::MinTime() const { +double BenchmarkInstance::min_time() const { if (FLAGS_benchmark_enable_random_interleaving) { // Random Interleaving will automatically adjust // random_interleaving_repetitions(). Dividing diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h index aff5528a57..39ea407738 100644 --- a/src/benchmark_api_internal.h +++ b/src/benchmark_api_internal.h @@ -26,9 +26,6 @@ class BenchmarkInstance { BenchmarkInstance(Benchmark* benchmark, const std::vector& args, int threads); - // Returns the min time to run a microbenchmark in RunBenchmark(). - double MinTime() const; - // Returns number of repetitions for Random Interleaving. This will be // initialized later once we finish the first repetition, if Random // Interleaving is enabled. See also ComputeRandominterleavingrepetitions(). @@ -52,7 +49,7 @@ class BenchmarkInstance { BigOFunc& complexity_lambda() const { return *complexity_lambda_; } const std::vector& statistics() const { return statistics_; } int repetitions() const { return repetitions_; } - double min_time() const { return min_time_; } + double min_time() const; IterationCount iterations() const { return iterations_; } int threads() const { return threads_; } From ee70382bcff174e1135d637b7994bd786c4ed498 Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Tue, 18 May 2021 16:59:57 -0400 Subject: [PATCH 20/21] min_time --> MinTime in fr-1051. Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: src/benchmark_api_internal.cc modified: src/benchmark_api_internal.h modified: src/benchmark_runner.cc --- src/benchmark_api_internal.cc | 6 +++--- src/benchmark_api_internal.h | 4 ++-- src/benchmark_runner.cc | 14 +++++++------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc index b7ddabf36c..ddd46bee63 100644 --- a/src/benchmark_api_internal.cc +++ b/src/benchmark_api_internal.cc @@ -83,7 +83,7 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, } } -double BenchmarkInstance::min_time() const { +double BenchmarkInstance::MinTime() const { if (FLAGS_benchmark_enable_random_interleaving) { // Random Interleaving will automatically adjust // random_interleaving_repetitions(). Dividing @@ -105,8 +105,8 @@ bool BenchmarkInstance::RandomInterleavingRepetitionsInitialized() const { } void BenchmarkInstance::InitRandomInterleavingRepetitions( - int repetitions) const { - random_interleaving_repetitions_ = repetitions; + int reps) const { + random_interleaving_repetitions_ = reps; } State BenchmarkInstance::Run( diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h index 39ea407738..0ff8dafbe6 100644 --- a/src/benchmark_api_internal.h +++ b/src/benchmark_api_internal.h @@ -35,7 +35,7 @@ class BenchmarkInstance { bool RandomInterleavingRepetitionsInitialized() const; // Initializes number of repetitions for random interleaving. - void InitRandomInterleavingRepetitions(int repetitions) const; + void InitRandomInterleavingRepetitions(int reps) const; const BenchmarkName& name() const { return name_; } AggregationReportMode aggregation_report_mode() const { @@ -49,7 +49,7 @@ class BenchmarkInstance { BigOFunc& complexity_lambda() const { return *complexity_lambda_; } const std::vector& statistics() const { return statistics_; } int repetitions() const { return repetitions_; } - double min_time() const; + double MinTime() const; IterationCount iterations() const { return iterations_; } int threads() const { return threads_; } diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index 1320136e30..a84eae8194 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -279,13 +279,13 @@ class BenchmarkRunner { // See how much iterations should be increased by. // Note: Avoid division by zero with max(seconds, 1ns). double multiplier = - b.min_time() * kSafetyMultiplier / std::max(i.seconds, 1e-9); + b.MinTime() * kSafetyMultiplier / std::max(i.seconds, 1e-9); // If our last run was at least 10% of FLAGS_benchmark_min_time then we // use the multiplier directly. // Otherwise we use at most 10 times expansion. // NOTE: When the last run was at least 10% of the min time the max // expansion should be 14x. - bool is_significant = (i.seconds / b.min_time()) > 0.1; + bool is_significant = (i.seconds / b.MinTime()) > 0.1; multiplier = is_significant ? multiplier : std::min(10.0, multiplier); if (multiplier <= 1.0) multiplier = 2.0; @@ -306,13 +306,13 @@ class BenchmarkRunner { // or because an error was reported. return i.results.has_error_ || i.iters >= kMaxIterations || // Too many iterations already. - i.seconds >= b.min_time() || // The elapsed time is large enough. + i.seconds >= b.MinTime() || // The elapsed time is large enough. // CPU time is specified but the // elapsed real time greatly exceeds // the minimum time. Note that user // provided timers are except from this // sanity check. - ((i.results.real_time_used >= 5 * b.min_time()) && + ((i.results.real_time_used >= 5 * b.MinTime()) && !b.use_manual_time()); } @@ -362,12 +362,12 @@ class BenchmarkRunner { ComputeRandomInterleavingRepetitions(input)); // If the number of repetitions changed, need to rerun the last trial // because iters may also change. Note that we only need to do this - // if accumulated_time < b.min_time(), i.e., the iterations we have - // run is not enough for the already adjusted b.min_time(). + // if accumulated_time < b.MinTime(), i.e., the iterations we have + // run is not enough for the already adjusted b.MinTime(). // Otherwise, we will still skip the rerun. rerun_trial = b.RandomInterleavingRepetitions() < GetRepetitions() && - i.seconds < b.min_time() && !has_explicit_iteration_count; + i.seconds < b.MinTime() && !has_explicit_iteration_count; } if (!rerun_trial) break; // Good, let's report them! From 736875b29812fb47af1941a4beec1d54fd8fa348 Mon Sep 17 00:00:00 2001 From: Hai Huang Date: Thu, 20 May 2021 01:03:58 -0400 Subject: [PATCH 21/21] Add doc for random interleaving for fr-1051 Committer: Hai Huang On branch fr-1051 Your branch is up to date with 'origin/fr-1051'. Changes to be committed: modified: README.md new file: docs/random_interleaving.md --- README.md | 8 +++++--- docs/random_interleaving.md | 26 ++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 docs/random_interleaving.md diff --git a/README.md b/README.md index a853115b36..f32e3d9d6a 100644 --- a/README.md +++ b/README.md @@ -179,7 +179,7 @@ BENCHMARK_MAIN(); ``` To run the benchmark, compile and link against the `benchmark` library -(libbenchmark.a/.so). If you followed the build steps above, this library will +(libbenchmark.a/.so). If you followed the build steps above, this library will be under the build directory you created. ```bash @@ -299,6 +299,8 @@ too (`-lkstat`). [Setting the Time Unit](#setting-the-time-unit) +[Random Interleaving](docs/random_interleaving.md) + [User-Requested Performance Counters](docs/perf_counters.md) [Preventing Optimization](#preventing-optimization) @@ -399,8 +401,8 @@ Write benchmark results to a file with the `--benchmark_out=` option (or set `BENCHMARK_OUT`). Specify the output format with `--benchmark_out_format={json|console|csv}` (or set `BENCHMARK_OUT_FORMAT={json|console|csv}`). Note that the 'csv' reporter is -deprecated and the saved `.csv` file -[is not parsable](https://github.com/google/benchmark/issues/794) by csv +deprecated and the saved `.csv` file +[is not parsable](https://github.com/google/benchmark/issues/794) by csv parsers. Specifying `--benchmark_out` does not suppress the console output. diff --git a/docs/random_interleaving.md b/docs/random_interleaving.md new file mode 100644 index 0000000000..2471b46bb0 --- /dev/null +++ b/docs/random_interleaving.md @@ -0,0 +1,26 @@ + + +# Random Interleaving + +[Random Interleaving](https://github.com/google/benchmark/issues/1051) is a +technique to lower run-to-run variance. It breaks the execution of a +microbenchmark into multiple chunks and randomly interleaves them with chunks +from other microbenchmarks in the same benchmark test. Data shows it is able to +lower run-to-run variance by +[40%](https://github.com/google/benchmark/issues/1051) on average. + +To use, set `--benchmark_enable_random_interleaving=true`. + +It's a known issue that random interleaving may increase the benchmark execution +time, if: + +1. A benchmark has costly setup and / or teardown. Random interleaving will run + setup and teardown many times and may increase test execution time + significantly. +2. The time to run a single benchmark iteration is larger than the desired time + per repetition (i.e., `benchmark_min_time / benchmark_repetitions`). + +The overhead of random interleaving can be controlled by +`--benchmark_random_interleaving_max_overhead`. The default value is 0.4 meaning +the total execution time under random interlaving is limited by 1.4 x original +total execution time. Set it to `inf` for unlimited overhead.