From 25a5fdfcb6a325816b31790aea225d35aa430a5d Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Sun, 28 Mar 2021 15:21:34 -0400
Subject: [PATCH 01/21] Implementation of random interleaving. See
 http://github.com/google/benchmark/issues/1051 for the feature requests.

Committer: Hai Huang (http://github.com/haih-g)

On branch fr-1051
Changes to be committed:
modified:   include/benchmark/benchmark.h
modified:   src/benchmark.cc
new file:   src/benchmark_adjust_repetitions.cc
new file:   src/benchmark_adjust_repetitions.h
modified:   src/benchmark_api_internal.cc
modified:   src/benchmark_api_internal.h
modified:   src/benchmark_register.cc
modified:   src/benchmark_runner.cc
modified:   src/benchmark_runner.h
modified:   test/CMakeLists.txt
new file:   test/benchmark_random_interleaving_gtest.cc
---
 include/benchmark/benchmark.h               |   5 +-
 src/benchmark.cc                            | 156 ++++++--
 src/benchmark_adjust_repetitions.cc         | 111 ++++++
 src/benchmark_adjust_repetitions.h          |  28 ++
 src/benchmark_api_internal.cc               | 156 +++++++-
 src/benchmark_api_internal.h                |  93 ++++-
 src/benchmark_register.cc                   |  73 +---
 src/benchmark_runner.cc                     | 148 +++++---
 src/benchmark_runner.h                      |   7 +-
 test/CMakeLists.txt                         |   1 +
 test/benchmark_random_interleaving_gtest.cc | 394 ++++++++++++++++++++
 11 files changed, 996 insertions(+), 176 deletions(-)
 create mode 100644 src/benchmark_adjust_repetitions.cc
 create mode 100644 src/benchmark_adjust_repetitions.h
 create mode 100644 test/benchmark_random_interleaving_gtest.cc
diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h
index f57e3e79bd..42ac7849f1 100644
--- a/include/benchmark/benchmark.h
+++ b/include/benchmark/benchmark.h
@@ -434,7 +434,7 @@ struct Statistics {
       : name_(name), compute_(compute) {}
 };
 
-struct BenchmarkInstance;
+class BenchmarkInstance;
 class ThreadTimer;
 class ThreadManager;
 
@@ -686,7 +686,7 @@ class State {
   internal::ThreadTimer* timer_;
   internal::ThreadManager* manager_;
 
-  friend struct internal::BenchmarkInstance;
+  friend class internal::BenchmarkInstance;
 };
 
 inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() {
@@ -961,6 +961,7 @@ class Benchmark {
 
  private:
   friend class BenchmarkFamilies;
+  friend class BenchmarkInstance;
 
   std::string name_;
   AggregationReportMode aggregation_report_mode_;
diff --git a/src/benchmark.cc b/src/benchmark.cc
index ffe4bf45a6..32f01b8272 100644
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -32,7 +32,9 @@
 #include <cstdlib>
 #include <fstream>
 #include <iostream>
+#include <limits>
 #include <memory>
+#include <random>
 #include <string>
 #include <thread>
 #include <utility>
@@ -51,6 +53,18 @@
 #include "thread_manager.h"
 #include "thread_timer.h"
 
+// Each benchmark can be repeated a number of times, and within each
+// *repetition*, we run the user-defined benchmark function a number of
+// *iterations*. The number of repetitions is determined based on flags
+// (--benchmark_repetitions).
+namespace {
+
+// Attempt to make each repetition run for at least this much of time.
+constexpr double kDefaultMinTimeTotal = 0.5;
+constexpr size_t kDefaultRepetitions = 12;
+
+}  // namespace
+
 // Print a list of benchmarks. This option overrides all other options.
 DEFINE_bool(benchmark_list_tests, false);
 
@@ -59,16 +73,32 @@ DEFINE_bool(benchmark_list_tests, false);
 // linked into the binary are run.
 DEFINE_string(benchmark_filter, ".");
 
-// Minimum number of seconds we should run benchmark before results are
-// considered significant.  For cpu-time based tests, this is the lower bound
-// on the total cpu time used by all threads that make up the test.  For
-// real-time based tests, this is the lower bound on the elapsed time of the
-// benchmark execution, regardless of number of threads.
-DEFINE_double(benchmark_min_time, 0.5);
+// Minimum number of seconds we should run benchmark per repetition before
+// results are considered significant. For cpu-time based tests, this is the
+// lower bound on the total cpu time used by all threads that make up the test.
+// For real-time based tests, this is the lower bound on the elapsed time of the
+// benchmark execution, regardless of number of threads. If left unset, will use
+// 0.5 / 12 if random interleaving is enabled. Otherwise, will use 0.5.
+// Do NOT read this flag directly. Use GetMinTime() to read this flag.
+DEFINE_double(benchmark_min_time, -1.0);
 
 // The number of runs of each benchmark. If greater than 1, the mean and
-// standard deviation of the runs will be reported.
-DEFINE_int32(benchmark_repetitions, 1);
+// standard deviation of the runs will be reported. By default, the number of
+// repetitions is 1 if random interleaving is disabled, and up to 12 if random
+// interleaving is enabled. (Read the documentation for random interleaving to
+// see why it might be less than 12.)
+// Do NOT read this flag directly, Use GetRepetitions() to access this flag.
+DEFINE_int32(benchmark_repetitions, -1);
+
+// The maximum overhead allowed for random interleaving. A value X means total
+// execution time under random interleaving is limited by
+// (1 + X) * original total execution time. Set to 'inf' to allow infinite
+// overhead.
+DEFINE_double(benchmark_random_interleaving_max_overhead, 0.4);
+
+// If set, enable random interleaving. See
+// http://github.com/google/benchmark/issues/1051 for details.
+DEFINE_bool(benchmark_enable_random_interleaving, false);
 
 // Report the result of each benchmark repetitions. When 'true' is specified
 // only the mean, standard deviation, and other statistics are reported for
@@ -110,6 +140,37 @@ namespace benchmark {
 
 namespace internal {
 
+// Performance measurements always come with random variances. Defines a
+// factor by which the required number of iterations is overestimated in order
+// to reduce the probability that the minimum time requirement will not be met.
+const double kSafetyMultiplier = 1.4;
+
+// Wraps --benchmark_min_time and returns valid default values if not supplied.
+double GetMinTime() {
+  const double min_time = FLAGS_benchmark_min_time;
+  if (min_time >= 0.0) {
+    return min_time;
+  }
+
+  if (FLAGS_benchmark_enable_random_interleaving) {
+    return kDefaultMinTimeTotal / kDefaultRepetitions;
+  }
+  return kDefaultMinTimeTotal;
+}
+
+// Wraps --benchmark_repetitions and return valid default value if not supplied.
+size_t GetRepetitions() {
+  const int repetitions = FLAGS_benchmark_repetitions;
+  if (repetitions >= 0) {
+    return static_cast<size_t>(repetitions);
+  }
+
+  if (FLAGS_benchmark_enable_random_interleaving) {
+    return kDefaultRepetitions;
+  }
+  return 1;
+}
+
 // FIXME: wouldn't LTO mess this up?
 void UseCharPointer(char const volatile*) {}
 
@@ -222,15 +283,15 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
   CHECK(display_reporter != nullptr);
 
   // Determine the width of the name field using a minimum width of 10.
-  bool might_have_aggregates = FLAGS_benchmark_repetitions > 1;
+  bool might_have_aggregates = GetRepetitions() > 1;
   size_t name_field_width = 10;
   size_t stat_field_width = 0;
   for (const BenchmarkInstance& benchmark : benchmarks) {
     name_field_width =
-        std::max<size_t>(name_field_width, benchmark.name.str().size());
-    might_have_aggregates |= benchmark.repetitions > 1;
+        std::max<size_t>(name_field_width, benchmark.name().str().size());
+    might_have_aggregates |= benchmark.repetitions() > 1;
 
-    for (const auto& Stat : *benchmark.statistics)
+    for (const auto& Stat : *benchmark.statistics())
       stat_field_width = std::max<size_t>(stat_field_width, Stat.name_.size());
   }
   if (might_have_aggregates) name_field_width += 1 + stat_field_width;
@@ -255,23 +316,56 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
     flushStreams(display_reporter);
     flushStreams(file_reporter);
 
-    for (const auto& benchmark : benchmarks) {
-      RunResults run_results = RunBenchmark(benchmark, &complexity_reports);
-
-      auto report = [&run_results](BenchmarkReporter* reporter,
-                                   bool report_aggregates_only) {
-        assert(reporter);
-        // If there are no aggregates, do output non-aggregates.
-        report_aggregates_only &= !run_results.aggregates_only.empty();
-        if (!report_aggregates_only)
-          reporter->ReportRuns(run_results.non_aggregates);
-        if (!run_results.aggregates_only.empty())
-          reporter->ReportRuns(run_results.aggregates_only);
-      };
-
-      report(display_reporter, run_results.display_report_aggregates_only);
+    // Without random interleaving, benchmarks are executed in the order of:
+    //   A, A, ..., A, B, B, ..., B, C, C, ..., C, ...
+    // That is, repetition is within RunBenchmark(), hence the name
+    // inner_repetitions.
+    // With random interleaving, benchmarks are executed in the order of:
+    //  {Random order of A, B, C, ...}, {Random order of A, B, C, ...}, ...
+    // That is, repetitions is outside of RunBenchmark(), hence the name
+    // outer_repetitions.
+    size_t inner_repetitions =
+        FLAGS_benchmark_enable_random_interleaving ? 1 : GetRepetitions();
+    size_t outer_repetitions =
+        FLAGS_benchmark_enable_random_interleaving ? GetRepetitions() : 1;
+    std::vector<int> benchmark_indices(benchmarks.size());
+    for (size_t i = 0; i < benchmarks.size(); ++i) {
+      benchmark_indices[i] = i;
+    }
+
+    // 'run_results_vector' and 'benchmarks' are parallel arrays.
+    std::vector<RunResults> run_results_vector(benchmarks.size());
+    for (size_t i = 0; i < outer_repetitions; i++) {
+      if (FLAGS_benchmark_enable_random_interleaving) {
+        std::random_shuffle(benchmark_indices.begin(), benchmark_indices.end());
+      }
+      for (size_t j : benchmark_indices) {
+        // Repetitions will be automatically adjusted under random interleaving.
+        if (!FLAGS_benchmark_enable_random_interleaving ||
+            i < benchmarks[j].random_interleaving_repetitions()) {
+          RunBenchmark(benchmarks[j], outer_repetitions, inner_repetitions,
+                       &complexity_reports, &run_results_vector[j]);
+        }
+      }
+    }
+
+    auto report = [](BenchmarkReporter* reporter, bool report_aggregates_only,
+                     const RunResults& run_results) {
+      assert(reporter);
+      // If there are no aggregates, do output non-aggregates.
+      report_aggregates_only &= !run_results.aggregates_only.empty();
+      if (!report_aggregates_only)
+        reporter->ReportRuns(run_results.non_aggregates);
+      if (!run_results.aggregates_only.empty())
+        reporter->ReportRuns(run_results.aggregates_only);
+    };
+
+    for (const RunResults& run_results : run_results_vector) {
+      report(display_reporter, run_results.display_report_aggregates_only,
+             run_results);
       if (file_reporter)
-        report(file_reporter, run_results.file_report_aggregates_only);
+        report(file_reporter, run_results.file_report_aggregates_only,
+               run_results);
 
       flushStreams(display_reporter);
       flushStreams(file_reporter);
@@ -399,7 +493,7 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
 
   if (FLAGS_benchmark_list_tests) {
     for (auto const& benchmark : benchmarks)
-      Out << benchmark.name.str() << "\n";
+      Out << benchmark.name().str() << "\n";
   } else {
     internal::RunBenchmarks(benchmarks, display_reporter, file_reporter);
   }
@@ -443,6 +537,10 @@ void ParseCommandLineFlags(int* argc, char** argv) {
                         &FLAGS_benchmark_min_time) ||
         ParseInt32Flag(argv[i], "benchmark_repetitions",
                        &FLAGS_benchmark_repetitions) ||
+        ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving",
+                      &FLAGS_benchmark_enable_random_interleaving) ||
+        ParseDoubleFlag(argv[i], "benchmark_random_interleaving_max_overhead",
+                        &FLAGS_benchmark_random_interleaving_max_overhead) ||
         ParseBoolFlag(argv[i], "benchmark_report_aggregates_only",
                       &FLAGS_benchmark_report_aggregates_only) ||
         ParseBoolFlag(argv[i], "benchmark_display_aggregates_only",
diff --git a/src/benchmark_adjust_repetitions.cc b/src/benchmark_adjust_repetitions.cc
new file mode 100644
index 0000000000..31a1cd6b08
--- /dev/null
+++ b/src/benchmark_adjust_repetitions.cc
@@ -0,0 +1,111 @@
+#include "benchmark_adjust_repetitions.h"
+
+#include "benchmark_api_internal.h"
+#include "log.h"
+
+namespace benchmark {
+namespace internal {
+
+namespace {
+
+constexpr double kNanosecondInSecond = 0.000000001;
+
+}  // namespace
+
+size_t ComputeRandomInterleavingRepetitions(
+    InternalRandomInterleavingRepetitionsInput input) {
+  // Find the repetitions such that total overhead is bounded. Let
+  //   n = desired number of repetitions, i.e., the output of this method.
+  //   t = total real execution time per repetition including overhead,
+  //       (input.total_execution_time_per_repetition).
+  //   o = maximum allowed increase in total real execution time due to random
+  //       interleaving, measured as a fraction (input.max_overhead).
+  //   e = estimated total execution time without Random Interleaving
+  // We want
+  //   t * n / e <= 1 + o
+  // I.e.,
+  //   n <= (1 + o) * e / t
+  //
+  // Let
+  //   h = overhead per repetition, which include all setup / teardown time and
+  //       also the execution time of preliminary trials used to search for the
+  //       correct number of iterations.
+  //   r = real execution time per repetition not including overhead
+  //       (input.real_accumulated_time_per_repetition).
+  //   s = measured execution time per repetition not including overhead,
+  //       which can be either real or CPU time
+  //       (input.accumulated_time_per_repetition).
+  // We have
+  //   h = t - r
+  //
+  // Let
+  //   m = total minimum measured execution time for all repetitions
+  //       (input.min_time_per_repetition * input.max_repetitions).
+  // Let
+  //   f = m / s
+  // f is the scale factor between m and s, and will be used to estimate
+  // l, the total real execution time for all repetitions excluding the
+  // overhead. It's reasonable to assume that the real execution time excluding
+  // the overhead is proportional to the measured time. Hence we expect to see
+  // l / r to be equal to m / s. That is, l / r = f, thus, l = r * f. Then the
+  // total execution time e can be estimated by h + l, which is h + r * f.
+  //   e = h + r * f
+  // Note that this might be an underestimation. If number of repetitions is
+  // reduced, we may need to run more iterations per repetition, and that may
+  // increase the number of preliminary trials needed to find the correct
+  // number of iterations.
+
+  double h = std::max(0.0, input.total_execution_time_per_repetition -
+                               input.real_time_used_per_repetition);
+  double r =
+      std::max(input.real_time_used_per_repetition, kNanosecondInSecond);
+  double s =
+      std::max(input.time_used_per_repetition, kNanosecondInSecond);
+  double m = input.min_time_per_repetition * input.max_repetitions;
+
+  // f = m / s
+  // RunBenchmark() always overshoot the iteration count by kSafetyMultiplier.
+  // Apply the same factor here.
+  //   f = kSafetyMultiplier * m / s
+  // Also we want to make sure 1 <= f <= input.max_repetitions. Note that we
+  // may not be able to reach m because the total iters per repetition is
+  // upper bounded by --benchmark_max_iters. This behavior is preserved in
+  // Random Interleaving, as we won't run repetitions more than
+  // input.max_repetitions to reach m.
+
+  double f = kSafetyMultiplier * m / s;
+  f = std::min(std::max(f, 1.0), static_cast<double>(input.max_repetitions));
+
+  double e = h + r * f;
+  // n <= (1 + o) * e / t = (1 + o) * e / (h + r)
+  // Also we want to make sure 1 <= n <= input.max_repetition, and (h + r) > 0.
+  double n = (1 + input.max_overhead) * e / (h + r);
+  n = std::min(std::max(n, 1.0), static_cast<double>(input.max_repetitions));
+
+  size_t n_size_t = static_cast<size_t>(n);
+
+  VLOG(2) << "Computed random interleaving repetitions"
+          << "\n  input.total_execution_time_per_repetition: "
+          << input.total_execution_time_per_repetition
+          << "\n  input.time_used_per_repetition: "
+          << input.time_used_per_repetition
+          << "\n  input.real_time_used_per_repetition: "
+          << input.real_time_used_per_repetition
+          << "\n  input.min_time_per_repetitions: "
+          << input.min_time_per_repetition
+          << "\n  input.max_repetitions: " << input.max_repetitions
+          << "\n  input.max_overhead: " << input.max_overhead
+          << "\n  h: " << h
+          << "\n  r: " << r
+          << "\n  s: " << s
+          << "\n  f: " << f
+          << "\n  m: " << m
+          << "\n  e: " << e
+          << "\n  n: " << n
+          << "\n  n_size_t: " << n_size_t;
+
+  return n_size_t;
+}
+
+}  // internal
+}  // benchmark
diff --git a/src/benchmark_adjust_repetitions.h b/src/benchmark_adjust_repetitions.h
new file mode 100644
index 0000000000..eeb69ff65c
--- /dev/null
+++ b/src/benchmark_adjust_repetitions.h
@@ -0,0 +1,28 @@
+#ifndef BENCHMARK_ADJUST_REPETITIONS_H
+#define BENCHMARK_ADJUST_REPETITIONS_H
+
+#include "benchmark/benchmark.h"
+#include "commandlineflags.h"
+
+namespace benchmark {
+namespace internal {
+
+// Defines the input tuple to ComputeRandomInterleavingRepetitions().
+struct InternalRandomInterleavingRepetitionsInput {
+  double total_execution_time_per_repetition;
+  double time_used_per_repetition;
+  double real_time_used_per_repetition;
+  double min_time_per_repetition;
+  double max_overhead;
+  size_t max_repetitions;
+};
+
+// Should be called right after the first repetition is completed to estimate
+// the number of iterations.
+size_t ComputeRandomInterleavingRepetitions(
+    InternalRandomInterleavingRepetitionsInput input);
+
+}  // end namespace internal
+}  // end namespace benchmark
+
+#endif  // BENCHMARK_ADJUST_REPETITIONS_H
diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc
index d468a257e3..f7e38dc34d 100644
--- a/src/benchmark_api_internal.cc
+++ b/src/benchmark_api_internal.cc
@@ -1,13 +1,165 @@
 #include "benchmark_api_internal.h"
 
+#include <cinttypes>
+
+#include "string_util.h"
+
+DECLARE_bool(benchmark_enable_random_interleaving);
+
 namespace benchmark {
 namespace internal {
 
+BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark,
+                                     const std::vector<int64_t>& args,
+                                     int threads)
+    : benchmark_(benchmark), args_(args), threads_(threads) {
+  name_.function_name = benchmark->name_;
+
+  // Add arguments to instance name
+  size_t arg_i = 0;
+  for (auto const& arg : args) {
+    if (!name_.args.empty()) {
+      name_.args += '/';
+    }
+
+    if (arg_i < benchmark->arg_names_.size()) {
+      const auto& arg_name = benchmark->arg_names_[arg_i];
+      if (!arg_name.empty()) {
+        name_.args += StrFormat("%s:", arg_name.c_str());
+      }
+    }
+
+    name_.args += StrFormat("%" PRId64, arg);
+    ++arg_i;
+  }
+
+  if (!IsZero(benchmark->min_time_))
+    name_.min_time = StrFormat("min_time:%0.3f", benchmark->min_time_);
+  if (benchmark->iterations_ != 0) {
+    name_.iterations = StrFormat(
+        "iterations:%lu", static_cast<unsigned long>(benchmark->iterations_));
+  }
+  if (benchmark->repetitions_ != 0)
+    name_.repetitions = StrFormat("repeats:%d", benchmark->repetitions_);
+
+  if (benchmark->measure_process_cpu_time_) {
+    name_.time_type = "process_time";
+  }
+
+  if (benchmark->use_manual_time_) {
+    if (!name_.time_type.empty()) {
+      name_.time_type += '/';
+    }
+    name_.time_type += "manual_time";
+  } else if (benchmark->use_real_time_) {
+    if (!name_.time_type.empty()) {
+      name_.time_type += '/';
+    }
+    name_.time_type += "real_time";
+  }
+
+  // Add the number of threads used to the name
+  if (!benchmark->thread_counts_.empty()) {
+    name_.threads = StrFormat("threads:%d", threads_);
+  }
+
+  aggregation_report_mode_ = benchmark->aggregation_report_mode_;
+  time_unit_ = benchmark->time_unit_;
+  range_multiplier_ = benchmark->range_multiplier_;
+  min_time_ = benchmark->min_time_;
+  iterations_ = benchmark->iterations_;
+  repetitions_ = benchmark->repetitions_;
+  measure_process_cpu_time_ = benchmark->measure_process_cpu_time_;
+  use_real_time_ = benchmark->use_real_time_;
+  use_manual_time_ = benchmark->use_manual_time_;
+  complexity_ = benchmark->complexity_;
+  complexity_lambda_ = benchmark->complexity_lambda_;
+  statistics_ = &(benchmark->statistics_);
+}
+
+const BenchmarkName& BenchmarkInstance::name() const {
+  return name_;
+}
+
+AggregationReportMode BenchmarkInstance::aggregation_report_mode() const {
+  return aggregation_report_mode_;
+}
+
+TimeUnit BenchmarkInstance::time_unit() const {
+  return time_unit_;
+}
+
+int BenchmarkInstance::threads() const{
+  return threads_;
+}
+
+bool BenchmarkInstance::measure_process_cpu_time() const {
+  return measure_process_cpu_time_;
+}
+
+bool BenchmarkInstance::use_real_time() const {
+  return use_real_time_;
+}
+
+bool BenchmarkInstance::use_manual_time() const {
+  return use_manual_time_;
+}
+
+BigO BenchmarkInstance::complexity() const {
+  return complexity_;
+}
+
+BigOFunc* BenchmarkInstance::complexity_lambda() const {
+  return complexity_lambda_;
+}
+
+bool BenchmarkInstance::last_benchmark_instance() const {
+  return last_benchmark_instance_;
+}
+
+IterationCount BenchmarkInstance::iterations() const {
+  return iterations_;
+}
+
+int BenchmarkInstance::repetitions() const {
+  return repetitions_;
+}
+
+const std::vector<Statistics>* BenchmarkInstance::statistics() const {
+  return statistics_;
+}
+
+double BenchmarkInstance::min_time() const {
+  if (FLAGS_benchmark_enable_random_interleaving) {
+    // Random Interleaving will automatically adjust
+    // random_interleaving_repetitions(). Dividing
+    // total execution time by random_interleaving_repetitions() gives
+    // the adjusted min_time per repetition.
+    return min_time_ * GetRepetitions() / random_interleaving_repetitions();
+  }
+  return min_time_;
+}
+
+size_t BenchmarkInstance::random_interleaving_repetitions() const {
+  return random_interleaving_repetitions_ == std::numeric_limits<size_t>::max()
+             ? GetRepetitions()
+             : random_interleaving_repetitions_;
+}
+
+bool BenchmarkInstance::random_interleaving_repetitions_initialized() const {
+  return random_interleaving_repetitions_ != std::numeric_limits<size_t>::max();
+}
+
+void BenchmarkInstance::init_random_interleaving_repetitions(
+    size_t repetitions) const {
+  random_interleaving_repetitions_ = repetitions;
+}
+
 State BenchmarkInstance::Run(IterationCount iters, int thread_id,
                              internal::ThreadTimer* timer,
                              internal::ThreadManager* manager) const {
-  State st(iters, arg, thread_id, threads, timer, manager);
-  benchmark->Run(st);
+  State st(iters, args_, thread_id, threads_, timer, manager);
+  benchmark_->Run(st);
   return st;
 }
 
diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h
index 264eff95c5..4b9c16ddb6 100644
--- a/src/benchmark_api_internal.h
+++ b/src/benchmark_api_internal.h
@@ -4,6 +4,7 @@
 #include "benchmark/benchmark.h"
 #include "commandlineflags.h"
 
+#include <chrono>
 #include <cmath>
 #include <iosfwd>
 #include <limits>
@@ -14,29 +15,79 @@
 namespace benchmark {
 namespace internal {
 
+extern const double kSafetyMultiplier;
+
 // Information kept per benchmark we may want to run
-struct BenchmarkInstance {
-  BenchmarkName name;
-  Benchmark* benchmark;
-  AggregationReportMode aggregation_report_mode;
-  std::vector<int64_t> arg;
-  TimeUnit time_unit;
-  int range_multiplier;
-  bool measure_process_cpu_time;
-  bool use_real_time;
-  bool use_manual_time;
-  BigO complexity;
-  BigOFunc* complexity_lambda;
-  UserCounters counters;
-  const std::vector<Statistics>* statistics;
-  bool last_benchmark_instance;
-  int repetitions;
-  double min_time;
-  IterationCount iterations;
-  int threads;  // Number of concurrent threads to us
+class BenchmarkInstance {
+ public:
+  BenchmarkInstance(Benchmark* benchmark, const std::vector<int64_t>& args,
+                    int threads);
+
+  // Const accessors.
+
+  const BenchmarkName& name() const;
+  int repetitions() const;
+  const std::vector<Statistics>* statistics() const;
+  AggregationReportMode aggregation_report_mode() const;
+  TimeUnit time_unit() const;
+  int threads() const;
+  bool measure_process_cpu_time() const;
+  bool use_real_time() const;
+  bool use_manual_time() const;
+  BigO complexity() const;
+  BigOFunc* complexity_lambda() const;
+  bool last_benchmark_instance() const;
+  IterationCount iterations() const;
+
+  // Returns the min time to run a microbenchmark in RunBenchmark().
+  double min_time() const;
+
+  // Returns number of repetitions for Random Interleaving. This will be
+  // initialized later once we finish the first repetition, if Random
+  // Interleaving is enabled. See also ComputeRandominterleavingrepetitions().
+  size_t random_interleaving_repetitions() const;
+
+  // Returns true if repetitions for Random Interleaving is initialized.
+  bool random_interleaving_repetitions_initialized() const;
+
+  // Initializes number of repetitions for random interleaving.
+  void init_random_interleaving_repetitions(size_t repetitions) const;
+
+  // Setters.
+
+  // Sets the value of last_benchmark_instance.
+  void set_last_benchmark_instance(bool last_benchmark_instance) {
+    last_benchmark_instance_ = last_benchmark_instance;
+  }
+
+  // Public APIs.
 
   State Run(IterationCount iters, int thread_id, internal::ThreadTimer* timer,
             internal::ThreadManager* manager) const;
+
+ private:
+  BenchmarkName name_;
+  Benchmark* benchmark_;
+  AggregationReportMode aggregation_report_mode_;
+  std::vector<int64_t> args_;
+  TimeUnit time_unit_;
+  int range_multiplier_;
+  bool measure_process_cpu_time_;
+  bool use_real_time_;
+  bool use_manual_time_;
+  BigO complexity_;
+  BigOFunc* complexity_lambda_;
+  UserCounters counters_;
+  const std::vector<Statistics>* statistics_;
+  bool last_benchmark_instance_;
+  int repetitions_;
+  double min_time_;
+  IterationCount iterations_;
+  int threads_;  // Number of concurrent threads to use
+  // Make it mutable so it can be initialized (mutated) later on a const
+  // instance.
+  mutable size_t random_interleaving_repetitions_ =
+      std::numeric_limits<size_t>::max();
 };
 
 bool FindBenchmarksInternal(const std::string& re,
@@ -47,6 +98,10 @@ bool IsZero(double n);
 
 ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false);
 
+double GetMinTime();
+
+size_t GetRepetitions();
+
 }  // end namespace internal
 }  // end namespace benchmark
 
diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc
index 65d9944f4f..ac71f72693 100644
--- a/src/benchmark_register.cc
+++ b/src/benchmark_register.cc
@@ -158,79 +158,12 @@ bool BenchmarkFamilies::FindBenchmarks(
 
     for (auto const& args : family->args_) {
       for (int num_threads : *thread_counts) {
-        BenchmarkInstance instance;
-        instance.name.function_name = family->name_;
-        instance.benchmark = family.get();
-        instance.aggregation_report_mode = family->aggregation_report_mode_;
-        instance.arg = args;
-        instance.time_unit = family->time_unit_;
-        instance.range_multiplier = family->range_multiplier_;
-        instance.min_time = family->min_time_;
-        instance.iterations = family->iterations_;
-        instance.repetitions = family->repetitions_;
-        instance.measure_process_cpu_time = family->measure_process_cpu_time_;
-        instance.use_real_time = family->use_real_time_;
-        instance.use_manual_time = family->use_manual_time_;
-        instance.complexity = family->complexity_;
-        instance.complexity_lambda = family->complexity_lambda_;
-        instance.statistics = &family->statistics_;
-        instance.threads = num_threads;
-
-        // Add arguments to instance name
-        size_t arg_i = 0;
-        for (auto const& arg : args) {
-          if (!instance.name.args.empty()) {
-            instance.name.args += '/';
-          }
-
-          if (arg_i < family->arg_names_.size()) {
-            const auto& arg_name = family->arg_names_[arg_i];
-            if (!arg_name.empty()) {
-              instance.name.args += StrFormat("%s:", arg_name.c_str());
-            }
-          }
-
-          instance.name.args += StrFormat("%" PRId64, arg);
-          ++arg_i;
-        }
-
-        if (!IsZero(family->min_time_))
-          instance.name.min_time =
-              StrFormat("min_time:%0.3f", family->min_time_);
-        if (family->iterations_ != 0) {
-          instance.name.iterations =
-              StrFormat("iterations:%lu",
-                        static_cast<unsigned long>(family->iterations_));
-        }
-        if (family->repetitions_ != 0)
-          instance.name.repetitions =
-              StrFormat("repeats:%d", family->repetitions_);
-
-        if (family->measure_process_cpu_time_) {
-          instance.name.time_type = "process_time";
-        }
-
-        if (family->use_manual_time_) {
-          if (!instance.name.time_type.empty()) {
-            instance.name.time_type += '/';
-          }
-          instance.name.time_type += "manual_time";
-        } else if (family->use_real_time_) {
-          if (!instance.name.time_type.empty()) {
-            instance.name.time_type += '/';
-          }
-          instance.name.time_type += "real_time";
-        }
-
-        // Add the number of threads used to the name
-        if (!family->thread_counts_.empty()) {
-          instance.name.threads = StrFormat("threads:%d", instance.threads);
-        }
+        BenchmarkInstance instance(family.get(), args, num_threads);
 
-        const auto full_name = instance.name.str();
+        const auto full_name = instance.name().str();
         if ((re.Match(full_name) && !isNegativeFilter) ||
             (!re.Match(full_name) && isNegativeFilter)) {
-          instance.last_benchmark_instance = (&args == &family->args_.back());
+          instance.set_last_benchmark_instance(&args == &family->args_.back());
           benchmarks->push_back(std::move(instance));
         }
       }
diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc
index 7bc6b6329e..9cd93ca97b 100644
--- a/src/benchmark_runner.cc
+++ b/src/benchmark_runner.cc
@@ -15,6 +15,7 @@
 #include "benchmark_runner.h"
 #include "benchmark/benchmark.h"
 #include "benchmark_api_internal.h"
+#include "benchmark_adjust_repetitions.h"
 #include "internal_macros.h"
 
 #ifndef BENCHMARK_OS_WINDOWS
@@ -51,6 +52,9 @@
 #include "thread_manager.h"
 #include "thread_timer.h"
 
+DECLARE_bool(benchmark_enable_random_interleaving);
+DECLARE_double(benchmark_random_interleaving_max_overhead);
+
 namespace benchmark {
 
 namespace internal {
@@ -70,28 +74,28 @@ BenchmarkReporter::Run CreateRunReport(
   // Create report about this benchmark run.
   BenchmarkReporter::Run report;
 
-  report.run_name = b.name;
+  report.run_name = b.name();
   report.error_occurred = results.has_error_;
   report.error_message = results.error_message_;
   report.report_label = results.report_label_;
   // This is the total iterations across all threads.
   report.iterations = results.iterations;
-  report.time_unit = b.time_unit;
-  report.threads = b.threads;
+  report.time_unit = b.time_unit();
+  report.threads = b.threads();
   report.repetition_index = repetition_index;
-  report.repetitions = b.repetitions;
+  report.repetitions = b.repetitions();
 
   if (!report.error_occurred) {
-    if (b.use_manual_time) {
+    if (b.use_manual_time()) {
       report.real_accumulated_time = results.manual_time_used;
     } else {
       report.real_accumulated_time = results.real_time_used;
     }
     report.cpu_accumulated_time = results.cpu_time_used;
     report.complexity_n = results.complexity_n;
-    report.complexity = b.complexity;
-    report.complexity_lambda = b.complexity_lambda;
-    report.statistics = b.statistics;
+    report.complexity = b.complexity();
+    report.complexity_lambda = b.complexity_lambda();
+    report.statistics = b.statistics();
     report.counters = results.counters;
 
     if (memory_iterations > 0) {
@@ -103,7 +107,7 @@ BenchmarkReporter::Run CreateRunReport(
       report.max_bytes_used = memory_result.max_bytes_used;
     }
 
-    internal::Finish(&report.counters, results.iterations, seconds, b.threads);
+    internal::Finish(&report.counters, results.iterations, seconds, b.threads());
   }
   return report;
 }
@@ -113,7 +117,7 @@ BenchmarkReporter::Run CreateRunReport(
 void RunInThread(const BenchmarkInstance* b, IterationCount iters,
                  int thread_id, ThreadManager* manager) {
   internal::ThreadTimer timer(
-      b->measure_process_cpu_time
+      b->measure_process_cpu_time()
           ? internal::ThreadTimer::CreateProcessCpuTime()
           : internal::ThreadTimer::Create());
   State st = b->Run(iters, thread_id, &timer, manager);
@@ -135,26 +139,32 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
 class BenchmarkRunner {
  public:
   BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
-                  std::vector<BenchmarkReporter::Run>* complexity_reports_)
+                  const size_t outer_repetitions_,
+                  const size_t inner_repetitions_,
+                  std::vector<BenchmarkReporter::Run>* complexity_reports_,
+                  RunResults* run_results_)
       : b(b_),
         complexity_reports(*complexity_reports_),
-        min_time(!IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time),
-        repeats(b.repetitions != 0 ? b.repetitions
-                                   : FLAGS_benchmark_repetitions),
-        has_explicit_iteration_count(b.iterations != 0),
-        pool(b.threads - 1),
-        iters(has_explicit_iteration_count ? b.iterations : 1) {
-    run_results.display_report_aggregates_only =
+        run_results(run_results_),
+        outer_repetitions(outer_repetitions_),
+        inner_repetitions(inner_repetitions_),
+        min_time(!IsZero(b.min_time()) ? b.min_time() : GetMinTime()),
+        repeats(b.repetitions() != 0 ? b.repetitions() : inner_repetitions_),
+        has_explicit_iteration_count(b.iterations() != 0),
+        pool(b.threads() - 1),
+        iters(has_explicit_iteration_count ? b.iterations() : 1) {
+    run_results->display_report_aggregates_only =
         (FLAGS_benchmark_report_aggregates_only ||
          FLAGS_benchmark_display_aggregates_only);
-    run_results.file_report_aggregates_only =
+    run_results->file_report_aggregates_only =
         FLAGS_benchmark_report_aggregates_only;
-    if (b.aggregation_report_mode != internal::ARM_Unspecified) {
-      run_results.display_report_aggregates_only =
-          (b.aggregation_report_mode &
+    if (b.aggregation_report_mode() != internal::ARM_Unspecified) {
+      run_results->display_report_aggregates_only =
+          (b.aggregation_report_mode() &
            internal::ARM_DisplayReportAggregatesOnly);
-      run_results.file_report_aggregates_only =
-          (b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly);
+      run_results->file_report_aggregates_only =
+          (b.aggregation_report_mode() &
+           internal::ARM_FileReportAggregatesOnly);
     }
 
     for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
@@ -162,26 +172,26 @@ class BenchmarkRunner {
     }
 
     // Calculate additional statistics
-    run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
+    run_results->aggregates_only = ComputeStats(run_results->non_aggregates);
 
     // Maybe calculate complexity report
-    if ((b.complexity != oNone) && b.last_benchmark_instance) {
+    if ((b.complexity() != oNone) && b.last_benchmark_instance()) {
       auto additional_run_stats = ComputeBigO(complexity_reports);
-      run_results.aggregates_only.insert(run_results.aggregates_only.end(),
-                                         additional_run_stats.begin(),
-                                         additional_run_stats.end());
+      run_results->aggregates_only.insert(run_results->aggregates_only.end(),
+                                          additional_run_stats.begin(),
+                                          additional_run_stats.end());
       complexity_reports.clear();
     }
   }
 
-  RunResults&& get_results() { return std::move(run_results); }
-
  private:
-  RunResults run_results;
-
   const benchmark::internal::BenchmarkInstance& b;
   std::vector<BenchmarkReporter::Run>& complexity_reports;
 
+  RunResults* run_results = nullptr;
+
+  const size_t outer_repetitions;
+  const size_t inner_repetitions;
   const double min_time;
   const int repeats;
   const bool has_explicit_iteration_count;
@@ -198,10 +208,10 @@ class BenchmarkRunner {
     double seconds;
   };
   IterationResults DoNIterations() {
-    VLOG(2) << "Running " << b.name.str() << " for " << iters << "\n";
+    VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n";
 
     std::unique_ptr<internal::ThreadManager> manager;
-    manager.reset(new internal::ThreadManager(b.threads));
+    manager.reset(new internal::ThreadManager(b.threads()));
 
     // Run all but one thread in separate threads
     for (std::size_t ti = 0; ti < pool.size(); ++ti) {
@@ -228,10 +238,10 @@ class BenchmarkRunner {
     manager.reset();
 
     // Adjust real/manual time stats since they were reported per thread.
-    i.results.real_time_used /= b.threads;
-    i.results.manual_time_used /= b.threads;
+    i.results.real_time_used /= b.threads();
+    i.results.manual_time_used /= b.threads();
     // If we were measuring whole-process CPU usage, adjust the CPU time too.
-    if (b.measure_process_cpu_time) i.results.cpu_time_used /= b.threads;
+    if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads();
 
     VLOG(2) << "Ran in " << i.results.cpu_time_used << "/"
             << i.results.real_time_used << "\n";
@@ -240,9 +250,9 @@ class BenchmarkRunner {
     i.iters = iters;
     // Base decisions off of real time if requested by this benchmark.
     i.seconds = i.results.cpu_time_used;
-    if (b.use_manual_time) {
+    if (b.use_manual_time()) {
       i.seconds = i.results.manual_time_used;
-    } else if (b.use_real_time) {
+    } else if (b.use_real_time()) {
       i.seconds = i.results.real_time_used;
     }
 
@@ -252,7 +262,8 @@ class BenchmarkRunner {
   IterationCount PredictNumItersNeeded(const IterationResults& i) const {
     // See how much iterations should be increased by.
     // Note: Avoid division by zero with max(seconds, 1ns).
-    double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9);
+    double multiplier =
+        min_time * kSafetyMultiplier / std::max(i.seconds, 1e-9);
     // If our last run was at least 10% of FLAGS_benchmark_min_time then we
     // use the multiplier directly.
     // Otherwise we use at most 10 times expansion.
@@ -283,7 +294,7 @@ class BenchmarkRunner {
            // CPU time is specified but the elapsed real time greatly exceeds
            // the minimum time.
            // Note that user provided timers are except from this sanity check.
-           ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time);
+           ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time());
   }
 
   void DoOneRepetition(int64_t repetition_index) {
@@ -296,8 +307,10 @@ class BenchmarkRunner {
     // Please do note that the if there are repetitions, the iteration count
     // is *only* calculated for the *first* repetition, and other repetitions
     // simply use that precomputed iteration count.
+    const auto exec_start = benchmark::ChronoClockNow();
     for (;;) {
       i = DoNIterations();
+      const auto exec_end = benchmark::ChronoClockNow();
 
       // Do we consider the results to be significant?
       // If we are doing repetitions, and the first repetition was already done,
@@ -308,7 +321,38 @@ class BenchmarkRunner {
                                            has_explicit_iteration_count ||
                                            ShouldReportIterationResults(i);
 
-      if (results_are_significant) break;  // Good, let's report them!
+      if (results_are_significant) {
+        // The number of repetitions for random interleaving may be reduced
+        // to limit the increase in benchmark execution time. When this happens
+        // the target execution time for each repetition is increased. We may
+        // need to rerun trials to calculate iters according to the increased
+        // target execution time.
+        bool rerun_trial = false;
+        // If random interleaving is enabled and the repetitions is not
+        // initialized, do it now.
+        if (FLAGS_benchmark_enable_random_interleaving &&
+            !b.random_interleaving_repetitions_initialized()) {
+          b.init_random_interleaving_repetitions(
+              ComputeRandomInterleavingRepetitions(
+                  {.total_execution_time_per_repetition = exec_end - exec_start,
+                   .time_used_per_repetition = i.seconds,
+                   .real_time_used_per_repetition = i.results.real_time_used,
+                   .min_time_per_repetition = GetMinTime(),
+                   .max_overhead =
+                       FLAGS_benchmark_random_interleaving_max_overhead,
+                   .max_repetitions = GetRepetitions()}));
+          // If the number of repetitions changed, need to rerun the last trial
+          // because iters may also change. Note that we only need to do this
+          // if accumulated_time < b.min_time(), i.e., the iterations we have
+          // run is not enough for the already adjusted b.min_time().
+          // Otherwise, we will still skip the rerun.
+          rerun_trial =
+              b.random_interleaving_repetitions() < GetRepetitions() &&
+              i.seconds < min_time;
+        }
+
+        if (!rerun_trial) break;  // Good, let's report them!
+      }
 
       // Nope, bad iteration. Let's re-estimate the hopefully-sufficient
       // iteration count, and run the benchmark again...
@@ -325,7 +369,8 @@ class BenchmarkRunner {
     if (memory_manager != nullptr) {
       // Only run a few iterations to reduce the impact of one-time
       // allocations in benchmarks that are not properly managed.
-      memory_iterations = std::min<IterationCount>(16, iters);
+      memory_iterations = std::min<IterationCount>(
+          16 / outer_repetitions + (16 % outer_repetitions != 0), iters);
       memory_manager->Start();
       std::unique_ptr<internal::ThreadManager> manager;
       manager.reset(new internal::ThreadManager(1));
@@ -341,20 +386,21 @@ class BenchmarkRunner {
         CreateRunReport(b, i.results, memory_iterations, memory_result,
                         i.seconds, repetition_index);
 
-    if (!report.error_occurred && b.complexity != oNone)
+    if (!report.error_occurred && b.complexity() != oNone)
       complexity_reports.push_back(report);
 
-    run_results.non_aggregates.push_back(report);
+    run_results->non_aggregates.push_back(report);
   }
 };
 
 }  // end namespace
 
-RunResults RunBenchmark(
-    const benchmark::internal::BenchmarkInstance& b,
-    std::vector<BenchmarkReporter::Run>* complexity_reports) {
-  internal::BenchmarkRunner r(b, complexity_reports);
-  return r.get_results();
+void RunBenchmark(const benchmark::internal::BenchmarkInstance& b,
+                  const size_t outer_repetitions, const size_t inner_repetitions,
+                  std::vector<BenchmarkReporter::Run>* complexity_reports,
+                  RunResults* run_results) {
+  internal::BenchmarkRunner r(b, outer_repetitions, inner_repetitions,
+                              complexity_reports, run_results);
 }
 
 }  // end namespace internal
diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h
index 96e8282a11..2da3e83ad8 100644
--- a/src/benchmark_runner.h
+++ b/src/benchmark_runner.h
@@ -40,9 +40,10 @@ struct RunResults {
   bool file_report_aggregates_only = false;
 };
 
-RunResults RunBenchmark(
-    const benchmark::internal::BenchmarkInstance& b,
-    std::vector<BenchmarkReporter::Run>* complexity_reports);
+void RunBenchmark(const benchmark::internal::BenchmarkInstance& b,
+                  const size_t outer_repetitions, const size_t inner_repetitions,
+                  std::vector<BenchmarkReporter::Run>* complexity_reports,
+                  RunResults* run_results);
 
 }  // namespace internal
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index c1a3a3fc19..37d7b9d6b2 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -193,6 +193,7 @@ if (BENCHMARK_ENABLE_GTEST_TESTS)
 
   add_gtest(benchmark_gtest)
   add_gtest(benchmark_name_gtest)
+  add_gtest(benchmark_random_interleaving_gtest)
   add_gtest(commandlineflags_gtest)
   add_gtest(statistics_gtest)
   add_gtest(string_util_gtest)
diff --git a/test/benchmark_random_interleaving_gtest.cc b/test/benchmark_random_interleaving_gtest.cc
new file mode 100644
index 0000000000..2c017bd7fe
--- /dev/null
+++ b/test/benchmark_random_interleaving_gtest.cc
@@ -0,0 +1,394 @@
+#include <queue>
+#include <string>
+#include <vector>
+
+#include "../src/benchmark_adjust_repetitions.h"
+#include "../src/string_util.h"
+#include "benchmark/benchmark.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+DECLARE_bool(benchmark_enable_random_interleaving);
+DECLARE_string(benchmark_filter);
+DECLARE_double(benchmark_random_interleaving_max_overhead);
+DECLARE_int32(benchmark_repetitions);
+
+namespace benchmark {
+namespace internal {
+namespace {
+
+class EventQueue : public std::queue<std::string> {
+ public:
+  void Put(const std::string& event) {
+    push(event);
+  }
+
+  void Clear() {
+    while (!empty()) {
+      pop();
+    }
+  }
+
+  std::string Get() {
+    std::string event = front();
+    pop();
+    return event;
+  }
+};
+
+static EventQueue* queue = new EventQueue;
+
+class NullReporter : public BenchmarkReporter {
+ public:
+  bool ReportContext(const Context& /*context*/) override {
+    return true;
+  }
+  void ReportRuns(const std::vector<Run>& /* report */) override {}
+};
+
+class BenchmarkTest : public testing::Test {
+ public:
+  static void SetupHook(int /* num_threads */) { queue->push("Setup"); }
+
+  static void TeardownHook(int /* num_threads */) { queue->push("Teardown"); }
+
+  void Execute(const std::string& pattern) {
+    queue->Clear();
+
+    BenchmarkReporter* reporter = new NullReporter;
+    FLAGS_benchmark_filter = pattern;
+    RunSpecifiedBenchmarks(reporter);
+    delete reporter;
+
+    queue->Put("DONE");  // End marker
+  }
+};
+
+static void BM_Match1(benchmark::State& state) {
+  const int arg = state.range(0);
+
+  ASSERT_EQ(100, state.max_iterations );
+  queue->Put(StrFormat("BM_Match1/%d", arg));
+}
+BENCHMARK(BM_Match1)
+    ->Iterations(100)
+    ->Arg(1)
+    ->Arg(2)
+    ->Arg(3)
+    ->Range(10, 80)
+    ->Args({90})
+    ->Args({100});
+
+static void BM_MatchOverhead(benchmark::State& state) {
+  const int arg = state.range(0);
+
+  queue->Put(StrFormat("BM_MatchOverhead/%d", arg));
+}
+BENCHMARK(BM_MatchOverhead)
+    ->Iterations(100)
+    ->Arg(64)
+    ->Arg(80);
+
+TEST_F(BenchmarkTest, Match1) {
+  Execute("BM_Match1");
+  ASSERT_EQ("BM_Match1/1", queue->Get());
+  ASSERT_EQ("BM_Match1/2", queue->Get());
+  ASSERT_EQ("BM_Match1/3", queue->Get());
+  ASSERT_EQ("BM_Match1/10", queue->Get());
+  ASSERT_EQ("BM_Match1/64", queue->Get());
+  ASSERT_EQ("BM_Match1/80", queue->Get());
+  ASSERT_EQ("BM_Match1/90", queue->Get());
+  ASSERT_EQ("BM_Match1/100", queue->Get());
+  ASSERT_EQ("DONE", queue->Get());
+}
+
+TEST_F(BenchmarkTest, Match1WithRepetition) {
+  FLAGS_benchmark_repetitions = 2;
+
+  Execute("BM_Match1/(64|80)");
+  ASSERT_EQ("BM_Match1/64", queue->Get());
+  ASSERT_EQ("BM_Match1/64", queue->Get());
+  ASSERT_EQ("BM_Match1/80", queue->Get());
+  ASSERT_EQ("BM_Match1/80", queue->Get());
+  ASSERT_EQ("DONE", queue->Get());
+}
+
+TEST_F(BenchmarkTest, Match1WithRandomInterleaving) {
+  FLAGS_benchmark_enable_random_interleaving = true;
+  FLAGS_benchmark_repetitions = 100;
+  FLAGS_benchmark_random_interleaving_max_overhead =
+      std::numeric_limits<double>::infinity();
+
+  std::vector<std::string> expected({"BM_Match1/64", "BM_Match1/80"});
+  std::map<std::string, int> interleaving_count;
+  Execute("BM_Match1/(64|80)");
+  for (int i = 0; i < 100; ++i) {
+    std::vector<std::string> interleaving;
+    interleaving.push_back(queue->Get());
+    interleaving.push_back(queue->Get());
+    EXPECT_THAT(interleaving, testing::UnorderedElementsAreArray(expected));
+    interleaving_count[StrFormat("%s,%s", interleaving[0].c_str(),
+                                 interleaving[1].c_str())]++;
+  }
+  EXPECT_GE(interleaving_count.size(), 2) << "Interleaving was not randomized.";
+  ASSERT_EQ("DONE", queue->Get());
+}
+
+TEST_F(BenchmarkTest, Match1WithRandomInterleavingAndZeroOverhead) {
+  FLAGS_benchmark_enable_random_interleaving = true;
+  FLAGS_benchmark_repetitions = 100;
+  FLAGS_benchmark_random_interleaving_max_overhead = 0;
+
+  // ComputeRandomInterleavingRepetitions() will kick in and rerun each
+  // benchmark once with increased iterations. Then number of repetitions will
+  // be reduced to 1. Thus altogether 4 executions, 2 x BM_MatchOverhead/64,
+  // and 2 x BM_MatchOverhead/80.
+  std::vector<std::string> expected(
+      {"BM_MatchOverhead/64", "BM_MatchOverhead/80", "BM_MatchOverhead/64",
+       "BM_MatchOverhead/80"});
+  std::map<std::string, int> interleaving_count;
+  Execute("BM_MatchOverhead/(64|80)");
+  std::vector<std::string> interleaving;
+  interleaving.push_back(queue->Get());
+  interleaving.push_back(queue->Get());
+  interleaving.push_back(queue->Get());
+  interleaving.push_back(queue->Get());
+  EXPECT_THAT(interleaving, testing::UnorderedElementsAreArray(expected));
+  ASSERT_EQ("DONE", queue->Get()) << "# Repetitions was not reduced to 1.";
+}
+
+TEST(Benchmark, ComputeRandomInterleavingRepetitions) {
+  // On wall clock time.
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.05,
+                 .time_used_per_repetition = 0.05,
+                 .real_time_used_per_repetition = 0.05,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.0,
+                 .max_repetitions = 10}),
+            10);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.05,
+                 .time_used_per_repetition = 0.05,
+                 .real_time_used_per_repetition = 0.05,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.4,
+                 .max_repetitions = 10}),
+            10);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.06,
+                 .time_used_per_repetition = 0.05,
+                 .real_time_used_per_repetition = 0.05,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.0,
+                 .max_repetitions = 10}),
+            8);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.06,
+                 .time_used_per_repetition = 0.05,
+                 .real_time_used_per_repetition = 0.05,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.4,
+                 .max_repetitions = 10}),
+            10);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.08,
+                 .time_used_per_repetition = 0.05,
+                 .real_time_used_per_repetition = 0.05,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.0,
+                 .max_repetitions = 10}),
+            6);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.08,
+                 .time_used_per_repetition = 0.05,
+                 .real_time_used_per_repetition = 0.05,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.4,
+                 .max_repetitions = 10}),
+            9);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.25,
+                 .time_used_per_repetition = 0.25,
+                 .real_time_used_per_repetition = 0.25,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.0,
+                 .max_repetitions = 10}),
+            2);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.25,
+                 .time_used_per_repetition = 0.25,
+                 .real_time_used_per_repetition = 0.25,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.4,
+                 .max_repetitions = 10}),
+            3);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.26,
+                 .time_used_per_repetition = 0.25,
+                 .real_time_used_per_repetition = 0.25,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.0,
+                 .max_repetitions = 10}),
+            2);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.26,
+                 .time_used_per_repetition = 0.25,
+                 .real_time_used_per_repetition = 0.25,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.4,
+                 .max_repetitions = 10}),
+            3);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.38,
+                 .time_used_per_repetition = 0.25,
+                 .real_time_used_per_repetition = 0.25,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.0,
+                 .max_repetitions = 10}),
+            2);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.38,
+                 .time_used_per_repetition = 0.25,
+                 .real_time_used_per_repetition = 0.25,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.4,
+                 .max_repetitions = 10}),
+            3);
+
+  // On CPU time.
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.1,
+                 .time_used_per_repetition = 0.05,
+                 .real_time_used_per_repetition = 0.1,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.0,
+                 .max_repetitions = 10}),
+            10);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.1,
+                 .time_used_per_repetition = 0.05,
+                 .real_time_used_per_repetition = 0.1,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.4,
+                 .max_repetitions = 10}),
+            10);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.11,
+                 .time_used_per_repetition = 0.05,
+                 .real_time_used_per_repetition = 0.1,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.0,
+                 .max_repetitions = 10}),
+            9);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.11,
+                 .time_used_per_repetition = 0.05,
+                 .real_time_used_per_repetition = 0.1,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.4,
+                 .max_repetitions = 10}),
+            10);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.15,
+                 .time_used_per_repetition = 0.05,
+                 .real_time_used_per_repetition = 0.1,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.0,
+                 .max_repetitions = 10}),
+            7);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.15,
+                 .time_used_per_repetition = 0.05,
+                 .real_time_used_per_repetition = 0.1,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.4,
+                 .max_repetitions = 10}),
+            9);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.5,
+                 .time_used_per_repetition = 0.25,
+                 .real_time_used_per_repetition = 0.5,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.0,
+                 .max_repetitions = 10}),
+            2);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.5,
+                 .time_used_per_repetition = 0.25,
+                 .real_time_used_per_repetition = 0.5,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.4,
+                 .max_repetitions = 10}),
+            3);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.51,
+                 .time_used_per_repetition = 0.25,
+                 .real_time_used_per_repetition = 0.5,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.0,
+                 .max_repetitions = 10}),
+            2);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.51,
+                 .time_used_per_repetition = 0.25,
+                 .real_time_used_per_repetition = 0.5,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.4,
+                 .max_repetitions = 10}),
+            3);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.8,
+                 .time_used_per_repetition = 0.25,
+                 .real_time_used_per_repetition = 0.5,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.0,
+                 .max_repetitions = 10}),
+            2);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.8,
+                 .time_used_per_repetition = 0.25,
+                 .real_time_used_per_repetition = 0.5,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.4,
+                 .max_repetitions = 10}),
+            2);
+
+  // Corner cases.
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.0,
+                 .time_used_per_repetition = 0.25,
+                 .real_time_used_per_repetition = 0.5,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.4,
+                 .max_repetitions = 10}),
+            3);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.8,
+                 .time_used_per_repetition = 0.0,
+                 .real_time_used_per_repetition = 0.5,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.4,
+                 .max_repetitions = 10}),
+            9);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.8,
+                 .time_used_per_repetition = 0.25,
+                 .real_time_used_per_repetition = 0.0,
+                 .min_time_per_repetition = 0.05,
+                 .max_overhead = 0.4,
+                 .max_repetitions = 10}),
+            1);
+  EXPECT_EQ(ComputeRandomInterleavingRepetitions(
+                {.total_execution_time_per_repetition = 0.8,
+                 .time_used_per_repetition = 0.25,
+                 .real_time_used_per_repetition = 0.5,
+                 .min_time_per_repetition = 0.0,
+                 .max_overhead = 0.4,
+                 .max_repetitions = 10}),
+            1);
+}
+
+}  // namespace
+}  // namespace internal
+}  // namespace benchmark

From f6033516e069a68e44464e0a0503a1cb7597c32a Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Sun, 28 Mar 2021 17:35:13 -0400
Subject: [PATCH 02/21] Fix benchmark_random_interleaving_gtest.cc for fr-1051

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   src/benchmark.cc
modified:   src/benchmark_runner.cc
modified:   test/benchmark_random_interleaving_gtest.cc
---
 src/benchmark.cc                            |  2 +-
 src/benchmark_runner.cc                     | 23 ++++++++++++---------
 test/benchmark_random_interleaving_gtest.cc |  9 ++++----
 3 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/src/benchmark.cc b/src/benchmark.cc
index 32f01b8272..8b48e49bac 100644
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -328,7 +328,7 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
         FLAGS_benchmark_enable_random_interleaving ? 1 : GetRepetitions();
     size_t outer_repetitions =
         FLAGS_benchmark_enable_random_interleaving ? GetRepetitions() : 1;
-    std::vector<int> benchmark_indices(benchmarks.size());
+    std::vector<size_t> benchmark_indices(benchmarks.size());
     for (size_t i = 0; i < benchmarks.size(); ++i) {
       benchmark_indices[i] = i;
     }
diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc
index 9cd93ca97b..60a9f6be82 100644
--- a/src/benchmark_runner.cc
+++ b/src/benchmark_runner.cc
@@ -148,7 +148,7 @@ class BenchmarkRunner {
         run_results(run_results_),
         outer_repetitions(outer_repetitions_),
         inner_repetitions(inner_repetitions_),
-        min_time(!IsZero(b.min_time()) ? b.min_time() : GetMinTime()),
+        // min_time(!IsZero(b.min_time()) ? b.min_time() : GetMinTime()),
         repeats(b.repetitions() != 0 ? b.repetitions() : inner_repetitions_),
         has_explicit_iteration_count(b.iterations() != 0),
         pool(b.threads() - 1),
@@ -192,7 +192,7 @@ class BenchmarkRunner {
 
   const size_t outer_repetitions;
   const size_t inner_repetitions;
-  const double min_time;
+  // const double min_time;
   const int repeats;
   const bool has_explicit_iteration_count;
 
@@ -263,13 +263,13 @@ class BenchmarkRunner {
     // See how much iterations should be increased by.
     // Note: Avoid division by zero with max(seconds, 1ns).
     double multiplier =
-        min_time * kSafetyMultiplier / std::max(i.seconds, 1e-9);
+        b.min_time() * kSafetyMultiplier / std::max(i.seconds, 1e-9);
     // If our last run was at least 10% of FLAGS_benchmark_min_time then we
     // use the multiplier directly.
     // Otherwise we use at most 10 times expansion.
     // NOTE: When the last run was at least 10% of the min time the max
     // expansion should be 14x.
-    bool is_significant = (i.seconds / min_time) > 0.1;
+    bool is_significant = (i.seconds / b.min_time()) > 0.1;
     multiplier = is_significant ? multiplier : std::min(10.0, multiplier);
     if (multiplier <= 1.0) multiplier = 2.0;
 
@@ -290,11 +290,14 @@ class BenchmarkRunner {
     // or because an error was reported.
     return i.results.has_error_ ||
            i.iters >= kMaxIterations ||  // Too many iterations already.
-           i.seconds >= min_time ||      // The elapsed time is large enough.
-           // CPU time is specified but the elapsed real time greatly exceeds
-           // the minimum time.
-           // Note that user provided timers are except from this sanity check.
-           ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time());
+           i.seconds >= b.min_time() ||  // The elapsed time is large enough.
+                                         // CPU time is specified but the
+                                         // elapsed real time greatly exceeds
+                                         // the minimum time. Note that user
+                                         // provided timers are except from this
+                                         // sanity check.
+           ((i.results.real_time_used >= 5 * b.min_time()) &&
+            !b.use_manual_time());
   }
 
   void DoOneRepetition(int64_t repetition_index) {
@@ -348,7 +351,7 @@ class BenchmarkRunner {
           // Otherwise, we will still skip the rerun.
           rerun_trial =
               b.random_interleaving_repetitions() < GetRepetitions() &&
-              i.seconds < min_time;
+              i.seconds < b.min_time();
         }
 
         if (!rerun_trial) break;  // Good, let's report them!
diff --git a/test/benchmark_random_interleaving_gtest.cc b/test/benchmark_random_interleaving_gtest.cc
index 2c017bd7fe..a45b566ec6 100644
--- a/test/benchmark_random_interleaving_gtest.cc
+++ b/test/benchmark_random_interleaving_gtest.cc
@@ -67,7 +67,7 @@ class BenchmarkTest : public testing::Test {
 static void BM_Match1(benchmark::State& state) {
   const int arg = state.range(0);
 
-  ASSERT_EQ(100, state.max_iterations );
+  for (auto _ : state) {}
   queue->Put(StrFormat("BM_Match1/%d", arg));
 }
 BENCHMARK(BM_Match1)
@@ -82,6 +82,7 @@ BENCHMARK(BM_Match1)
 static void BM_MatchOverhead(benchmark::State& state) {
   const int arg = state.range(0);
 
+  for (auto _ : state) {}
   queue->Put(StrFormat("BM_MatchOverhead/%d", arg));
 }
 BENCHMARK(BM_MatchOverhead)
@@ -141,8 +142,8 @@ TEST_F(BenchmarkTest, Match1WithRandomInterleavingAndZeroOverhead) {
 
   // ComputeRandomInterleavingRepetitions() will kick in and rerun each
   // benchmark once with increased iterations. Then number of repetitions will
-  // be reduced to 1. Thus altogether 4 executions, 2 x BM_MatchOverhead/64,
-  // and 2 x BM_MatchOverhead/80.
+  // be reduced to < 100. The first 4 executions should be
+  // 2 x BM_MatchOverhead/64 and 2 x BM_MatchOverhead/80.
   std::vector<std::string> expected(
       {"BM_MatchOverhead/64", "BM_MatchOverhead/80", "BM_MatchOverhead/64",
        "BM_MatchOverhead/80"});
@@ -154,7 +155,7 @@ TEST_F(BenchmarkTest, Match1WithRandomInterleavingAndZeroOverhead) {
   interleaving.push_back(queue->Get());
   interleaving.push_back(queue->Get());
   EXPECT_THAT(interleaving, testing::UnorderedElementsAreArray(expected));
-  ASSERT_EQ("DONE", queue->Get()) << "# Repetitions was not reduced to 1.";
+  ASSERT_LT(queue->size(), 100) << "# Repetitions was not reduced to < 100.";
 }
 
 TEST(Benchmark, ComputeRandomInterleavingRepetitions) {

From 58f3a39b04aecb09c1df4aafe845e8da263ac222 Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Sun, 28 Mar 2021 17:52:56 -0400
Subject: [PATCH 03/21] Fix macos build for fr-1051

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   src/benchmark_api_internal.cc
modified:   src/benchmark_api_internal.h
modified:   src/benchmark_runner.cc
---
 src/benchmark_api_internal.cc | 2 +-
 src/benchmark_api_internal.h  | 4 ++--
 src/benchmark_runner.cc       | 7 +++----
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc
index f7e38dc34d..f57b34d805 100644
--- a/src/benchmark_api_internal.cc
+++ b/src/benchmark_api_internal.cc
@@ -121,7 +121,7 @@ IterationCount BenchmarkInstance::iterations() const {
   return iterations_;
 }
 
-int BenchmarkInstance::repetitions() const {
+size_t BenchmarkInstance::repetitions() const {
   return repetitions_;
 }
 
diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h
index 4b9c16ddb6..dae49fc57a 100644
--- a/src/benchmark_api_internal.h
+++ b/src/benchmark_api_internal.h
@@ -26,7 +26,7 @@ class BenchmarkInstance {
   // Const accessors.
 
   const BenchmarkName& name() const;
-  int repetitions() const;
+  size_t repetitions() const;
   const std::vector<Statistics>* statistics() const;
   AggregationReportMode aggregation_report_mode() const;
   TimeUnit time_unit() const;
@@ -80,7 +80,7 @@ class BenchmarkInstance {
   UserCounters counters_;
   const std::vector<Statistics>* statistics_;
   bool last_benchmark_instance_;
-  int repetitions_;
+  size_t repetitions_;
   double min_time_;
   IterationCount iterations_;
   int threads_;  // Number of concurrent threads to use
diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc
index 60a9f6be82..7f2455a929 100644
--- a/src/benchmark_runner.cc
+++ b/src/benchmark_runner.cc
@@ -148,7 +148,6 @@ class BenchmarkRunner {
         run_results(run_results_),
         outer_repetitions(outer_repetitions_),
         inner_repetitions(inner_repetitions_),
-        // min_time(!IsZero(b.min_time()) ? b.min_time() : GetMinTime()),
         repeats(b.repetitions() != 0 ? b.repetitions() : inner_repetitions_),
         has_explicit_iteration_count(b.iterations() != 0),
         pool(b.threads() - 1),
@@ -167,7 +166,8 @@ class BenchmarkRunner {
            internal::ARM_FileReportAggregatesOnly);
     }
 
-    for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
+    for (size_t repetition_num = 0; repetition_num < repeats;
+         repetition_num++) {
       DoOneRepetition(repetition_num);
     }
 
@@ -192,8 +192,7 @@ class BenchmarkRunner {
 
   const size_t outer_repetitions;
   const size_t inner_repetitions;
-  // const double min_time;
-  const int repeats;
+  const size_t repeats;
   const bool has_explicit_iteration_count;
 
   std::vector<std::thread> pool;

From 89d3a78507c2964530fbf0f2bea8def4003460d9 Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Sun, 28 Mar 2021 18:16:52 -0400
Subject: [PATCH 04/21] Fix macos and windows build for fr-1051.

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   src/benchmark_runner.cc
---
 src/benchmark_runner.cc | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc
index 7f2455a929..d54fa53724 100644
--- a/src/benchmark_runner.cc
+++ b/src/benchmark_runner.cc
@@ -148,7 +148,7 @@ class BenchmarkRunner {
         run_results(run_results_),
         outer_repetitions(outer_repetitions_),
         inner_repetitions(inner_repetitions_),
-        repeats(b.repetitions() != 0 ? b.repetitions() : inner_repetitions_),
+        repeats(b.repetitions() != 0 ? b.repetitions() : inner_repetitions),
         has_explicit_iteration_count(b.iterations() != 0),
         pool(b.threads() - 1),
         iters(has_explicit_iteration_count ? b.iterations() : 1) {
@@ -334,15 +334,15 @@ class BenchmarkRunner {
         // initialized, do it now.
         if (FLAGS_benchmark_enable_random_interleaving &&
             !b.random_interleaving_repetitions_initialized()) {
+          InternalRandomInterleavingRepetitionsInput input;
+          input.total_execution_time_per_repetition = exec_end - exec_start;
+          input.time_used_per_repetition = i.seconds;
+          input.real_time_used_per_repetition = i.results.real_time_used;
+          input.min_time_per_repetition = GetMinTime();
+          input.max_overhead = FLAGS_benchmark_random_interleaving_max_overhead;
+          input.max_repetitions = GetRepetitions();
           b.init_random_interleaving_repetitions(
-              ComputeRandomInterleavingRepetitions(
-                  {.total_execution_time_per_repetition = exec_end - exec_start,
-                   .time_used_per_repetition = i.seconds,
-                   .real_time_used_per_repetition = i.results.real_time_used,
-                   .min_time_per_repetition = GetMinTime(),
-                   .max_overhead =
-                       FLAGS_benchmark_random_interleaving_max_overhead,
-                   .max_repetitions = GetRepetitions()}));
+              ComputeRandomInterleavingRepetitions(input));
           // If the number of repetitions changed, need to rerun the last trial
           // because iters may also change. Note that we only need to do this
           // if accumulated_time < b.min_time(), i.e., the iterations we have

From 0aadade98c3145b72053ad5d453cbca597fb9d44 Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Sun, 28 Mar 2021 19:24:35 -0400
Subject: [PATCH 05/21] Fix benchmark_random_interleaving_test.cc for macos and
 windows in fr-1051

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   test/benchmark_random_interleaving_gtest.cc
---
 test/benchmark_random_interleaving_gtest.cc | 217 ++++----------------
 1 file changed, 45 insertions(+), 172 deletions(-)

diff --git a/test/benchmark_random_interleaving_gtest.cc b/test/benchmark_random_interleaving_gtest.cc
index a45b566ec6..4b5f70a728 100644
--- a/test/benchmark_random_interleaving_gtest.cc
+++ b/test/benchmark_random_interleaving_gtest.cc
@@ -65,10 +65,10 @@ class BenchmarkTest : public testing::Test {
 };
 
 static void BM_Match1(benchmark::State& state) {
-  const int arg = state.range(0);
+  const int64_t arg = state.range(0);
 
   for (auto _ : state) {}
-  queue->Put(StrFormat("BM_Match1/%d", arg));
+  queue->Put(StrFormat("BM_Match1/%ld", arg));
 }
 BENCHMARK(BM_Match1)
     ->Iterations(100)
@@ -80,10 +80,10 @@ BENCHMARK(BM_Match1)
     ->Args({100});
 
 static void BM_MatchOverhead(benchmark::State& state) {
-  const int arg = state.range(0);
+  const int64_t arg = state.range(0);
 
   for (auto _ : state) {}
-  queue->Put(StrFormat("BM_MatchOverhead/%d", arg));
+  queue->Put(StrFormat("BM_MatchOverhead/%ld", arg));
 }
 BENCHMARK(BM_MatchOverhead)
     ->Iterations(100)
@@ -158,235 +158,108 @@ TEST_F(BenchmarkTest, Match1WithRandomInterleavingAndZeroOverhead) {
   ASSERT_LT(queue->size(), 100) << "# Repetitions was not reduced to < 100.";
 }
 
+InternalRandomInterleavingRepetitionsInput CreateInput(
+    double total, double time, double real_time, double min_time,
+    double overhead, size_t repetitions) {
+  InternalRandomInterleavingRepetitionsInput input;
+  input.total_execution_time_per_repetition = total;
+  input.time_used_per_repetition = time;
+  input.real_time_used_per_repetition = real_time;
+  input.min_time_per_repetition = min_time;
+  input.max_overhead = overhead;
+  input.max_repetitions = repetitions;
+  return input;
+}
+
 TEST(Benchmark, ComputeRandomInterleavingRepetitions) {
   // On wall clock time.
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.05,
-                 .time_used_per_repetition = 0.05,
-                 .real_time_used_per_repetition = 0.05,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.0,
-                 .max_repetitions = 10}),
+                CreateInput(0.05, 0.05, 0.05, 0.05, 0.0, 10)),
             10);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.05,
-                 .time_used_per_repetition = 0.05,
-                 .real_time_used_per_repetition = 0.05,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.4,
-                 .max_repetitions = 10}),
+                CreateInput(0.05, 0.05, 0.05, 0.05, 0.4, 10)),
             10);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.06,
-                 .time_used_per_repetition = 0.05,
-                 .real_time_used_per_repetition = 0.05,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.0,
-                 .max_repetitions = 10}),
+                CreateInput(0.06, 0.05, 0.05, 0.05, 0.0, 10)),
             8);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.06,
-                 .time_used_per_repetition = 0.05,
-                 .real_time_used_per_repetition = 0.05,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.4,
-                 .max_repetitions = 10}),
+                CreateInput(0.06, 0.05, 0.05, 0.05, 0.4, 10)),
             10);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.08,
-                 .time_used_per_repetition = 0.05,
-                 .real_time_used_per_repetition = 0.05,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.0,
-                 .max_repetitions = 10}),
+                CreateInput(0.08, 0.05, 0.05, 0.05, 0.0, 10)),
             6);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.08,
-                 .time_used_per_repetition = 0.05,
-                 .real_time_used_per_repetition = 0.05,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.4,
-                 .max_repetitions = 10}),
+                CreateInput(0.08, 0.05, 0.05, 0.05, 0.4, 10)),
             9);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.25,
-                 .time_used_per_repetition = 0.25,
-                 .real_time_used_per_repetition = 0.25,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.0,
-                 .max_repetitions = 10}),
+                CreateInput(0.26, 0.25, 0.25, 0.05, 0.0, 10)),
             2);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.25,
-                 .time_used_per_repetition = 0.25,
-                 .real_time_used_per_repetition = 0.25,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.4,
-                 .max_repetitions = 10}),
+                CreateInput(0.25, 0.25, 0.25, 0.05, 0.4, 10)),
             3);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.26,
-                 .time_used_per_repetition = 0.25,
-                 .real_time_used_per_repetition = 0.25,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.0,
-                 .max_repetitions = 10}),
+                CreateInput(0.26, 0.25, 0.25, 0.05, 0.0, 10)),
             2);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.26,
-                 .time_used_per_repetition = 0.25,
-                 .real_time_used_per_repetition = 0.25,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.4,
-                 .max_repetitions = 10}),
+                CreateInput(0.26, 0.25, 0.25, 0.05, 0.4, 10)),
             3);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.38,
-                 .time_used_per_repetition = 0.25,
-                 .real_time_used_per_repetition = 0.25,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.0,
-                 .max_repetitions = 10}),
+                CreateInput(0.38, 0.25, 0.25, 0.05, 0.0, 10)),
             2);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.38,
-                 .time_used_per_repetition = 0.25,
-                 .real_time_used_per_repetition = 0.25,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.4,
-                 .max_repetitions = 10}),
+                CreateInput(0.38, 0.25, 0.25, 0.05, 0.4, 10)),
             3);
 
   // On CPU time.
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.1,
-                 .time_used_per_repetition = 0.05,
-                 .real_time_used_per_repetition = 0.1,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.0,
-                 .max_repetitions = 10}),
+                CreateInput(0.1, 0.05, 0.1, 0.05, 0.0, 10)),
             10);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.1,
-                 .time_used_per_repetition = 0.05,
-                 .real_time_used_per_repetition = 0.1,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.4,
-                 .max_repetitions = 10}),
+                CreateInput(0.1, 0.05, 0.1, 0.05, 0.4, 10)),
             10);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.11,
-                 .time_used_per_repetition = 0.05,
-                 .real_time_used_per_repetition = 0.1,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.0,
-                 .max_repetitions = 10}),
+                CreateInput(0.11, 0.05, 0.1, 0.05, 0.0, 10)),
             9);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.11,
-                 .time_used_per_repetition = 0.05,
-                 .real_time_used_per_repetition = 0.1,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.4,
-                 .max_repetitions = 10}),
+                CreateInput(0.11, 0.05, 0.1, 0.05, 0.4, 10)),
             10);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.15,
-                 .time_used_per_repetition = 0.05,
-                 .real_time_used_per_repetition = 0.1,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.0,
-                 .max_repetitions = 10}),
+                CreateInput(0.15, 0.05, 0.1, 0.05, 0.0, 10)),
             7);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.15,
-                 .time_used_per_repetition = 0.05,
-                 .real_time_used_per_repetition = 0.1,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.4,
-                 .max_repetitions = 10}),
+                CreateInput(0.15, 0.05, 0.1, 0.05, 0.4, 10)),
             9);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.5,
-                 .time_used_per_repetition = 0.25,
-                 .real_time_used_per_repetition = 0.5,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.0,
-                 .max_repetitions = 10}),
+                CreateInput(0.5, 0.25, 0.5, 0.05, 0.0, 10)),
             2);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.5,
-                 .time_used_per_repetition = 0.25,
-                 .real_time_used_per_repetition = 0.5,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.4,
-                 .max_repetitions = 10}),
+                CreateInput(0.5, 0.25, 0.5, 0.05, 0.4, 10)),
             3);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.51,
-                 .time_used_per_repetition = 0.25,
-                 .real_time_used_per_repetition = 0.5,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.0,
-                 .max_repetitions = 10}),
+                CreateInput(0.51, 0.25, 0.5, 0.05, 0.0, 10)),
             2);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.51,
-                 .time_used_per_repetition = 0.25,
-                 .real_time_used_per_repetition = 0.5,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.4,
-                 .max_repetitions = 10}),
+                CreateInput(0.51, 0.25, 0.5, 0.05, 0.4, 10)),
             3);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.8,
-                 .time_used_per_repetition = 0.25,
-                 .real_time_used_per_repetition = 0.5,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.0,
-                 .max_repetitions = 10}),
+                CreateInput(0.8, 0.25, 0.5, 0.05, 0.0, 10)),
             2);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.8,
-                 .time_used_per_repetition = 0.25,
-                 .real_time_used_per_repetition = 0.5,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.4,
-                 .max_repetitions = 10}),
+                CreateInput(0.8, 0.25, 0.5, 0.05, 0.4, 10)),
             2);
 
   // Corner cases.
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.0,
-                 .time_used_per_repetition = 0.25,
-                 .real_time_used_per_repetition = 0.5,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.4,
-                 .max_repetitions = 10}),
+                CreateInput(0.0, 0.25, 0.5, 0.05, 0.4, 10)),
             3);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.8,
-                 .time_used_per_repetition = 0.0,
-                 .real_time_used_per_repetition = 0.5,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.4,
-                 .max_repetitions = 10}),
+                CreateInput(0.8, 0.0, 0.5, 0.05, 0.4, 10)),
             9);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.8,
-                 .time_used_per_repetition = 0.25,
-                 .real_time_used_per_repetition = 0.0,
-                 .min_time_per_repetition = 0.05,
-                 .max_overhead = 0.4,
-                 .max_repetitions = 10}),
+                CreateInput(0.8, 0.25, 0.0, 0.05, 0.4, 10)),
             1);
   EXPECT_EQ(ComputeRandomInterleavingRepetitions(
-                {.total_execution_time_per_repetition = 0.8,
-                 .time_used_per_repetition = 0.25,
-                 .real_time_used_per_repetition = 0.5,
-                 .min_time_per_repetition = 0.0,
-                 .max_overhead = 0.4,
-                 .max_repetitions = 10}),
+                CreateInput(0.8, 0.25, 0.5, 0.0, 0.4, 10)),
             1);
 }
 

From f42e87c966f30ae34c654f1db06996890c436b3b Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Sun, 28 Mar 2021 19:37:07 -0400
Subject: [PATCH 06/21] Fix int type benchmark_random_interleaving_gtest for
 macos in fr-1051

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   test/benchmark_random_interleaving_gtest.cc
---
 test/benchmark_random_interleaving_gtest.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/benchmark_random_interleaving_gtest.cc b/test/benchmark_random_interleaving_gtest.cc
index 4b5f70a728..582beeba27 100644
--- a/test/benchmark_random_interleaving_gtest.cc
+++ b/test/benchmark_random_interleaving_gtest.cc
@@ -68,7 +68,7 @@ static void BM_Match1(benchmark::State& state) {
   const int64_t arg = state.range(0);
 
   for (auto _ : state) {}
-  queue->Put(StrFormat("BM_Match1/%ld", arg));
+  queue->Put(StrFormat("BM_Match1/%d", static_cast<int>(arg)));
 }
 BENCHMARK(BM_Match1)
     ->Iterations(100)
@@ -83,7 +83,7 @@ static void BM_MatchOverhead(benchmark::State& state) {
   const int64_t arg = state.range(0);
 
   for (auto _ : state) {}
-  queue->Put(StrFormat("BM_MatchOverhead/%ld", arg));
+  queue->Put(StrFormat("BM_MatchOverhead/%d", static_cast<int>(arg)));
 }
 BENCHMARK(BM_MatchOverhead)
     ->Iterations(100)

From 8660ae121f41c902daed5836cae0e94dfdb584bc Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Tue, 30 Mar 2021 01:16:30 -0400
Subject: [PATCH 07/21] Address dominichamon's comments 03/29 for fr-1051

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   src/benchmark.cc
modified:   src/benchmark_api_internal.cc
modified:   src/benchmark_api_internal.h
modified:   test/benchmark_random_interleaving_gtest.cc
---
 src/benchmark.cc                            | 48 +++++++++++++--------
 src/benchmark_api_internal.cc               | 10 ++---
 src/benchmark_api_internal.h                | 14 +++---
 test/benchmark_random_interleaving_gtest.cc |  9 ++--
 4 files changed, 47 insertions(+), 34 deletions(-)

diff --git a/src/benchmark.cc b/src/benchmark.cc
index 8b48e49bac..cf1cd7ec35 100644
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -60,8 +60,8 @@
 namespace {
 
 // Attempt to make each repetition run for at least this much of time.
-constexpr double kDefaultMinTimeTotal = 0.5;
-constexpr size_t kDefaultRepetitions = 12;
+constexpr double kDefaultMinTimeTotalSecs = 0.5;
+constexpr int64_t kDefaultRepetitions = 12;
 
 }  // namespace
 
@@ -73,23 +73,30 @@ DEFINE_bool(benchmark_list_tests, false);
 // linked into the binary are run.
 DEFINE_string(benchmark_filter, ".");
 
+// Do NOT read these flags directly. Use Get*() to read them.
+namespace do_not_read_flag_directly {
+
 // Minimum number of seconds we should run benchmark per repetition before
 // results are considered significant. For cpu-time based tests, this is the
 // lower bound on the total cpu time used by all threads that make up the test.
 // For real-time based tests, this is the lower bound on the elapsed time of the
 // benchmark execution, regardless of number of threads. If left unset, will use
-// 0.5 / 12 if random interleaving is enabled. Otherwise, will use 0.5.
+// kDefaultMinTimeTotalSecs / FLAGS_benchmark_repetitions, if random
+// interleaving is enabled. Otherwise, will use kDefaultMinTimeTotalSecs.
 // Do NOT read this flag directly. Use GetMinTime() to read this flag.
 DEFINE_double(benchmark_min_time, -1.0);
 
 // The number of runs of each benchmark. If greater than 1, the mean and
 // standard deviation of the runs will be reported. By default, the number of
-// repetitions is 1 if random interleaving is disabled, and up to 12 if random
-// interleaving is enabled. (Read the documentation for random interleaving to
-// see why it might be less than 12.)
+// repetitions is 1 if random interleaving is disabled, and up to
+// kDefaultRepetitions if random interleaving is enabled. (Read the
+// documentation for random interleaving to see why it might be less than
+// kDefaultRepetitions.)
 // Do NOT read this flag directly, Use GetRepetitions() to access this flag.
 DEFINE_int32(benchmark_repetitions, -1);
 
+}  // namespace do_not_read_flag_directly
+
 // The maximum overhead allowed for random interleaving. A value X means total
 // execution time under random interleaving is limited by
 // (1 + X) * original total execution time. Set to 'inf' to allow infinite
@@ -147,22 +154,23 @@ const double kSafetyMultiplier = 1.4;
 
 // Wraps --benchmark_min_time and returns valid default values if not supplied.
 double GetMinTime() {
-  const double min_time = FLAGS_benchmark_min_time;
+  const double min_time = do_not_read_flag_directly::FLAGS_benchmark_min_time;
   if (min_time >= 0.0) {
     return min_time;
   }
 
   if (FLAGS_benchmark_enable_random_interleaving) {
-    return kDefaultMinTimeTotal / kDefaultRepetitions;
+    return kDefaultMinTimeTotalSecs / GetRepetitions();
   }
-  return kDefaultMinTimeTotal;
+  return kDefaultMinTimeTotalSecs;
 }
 
 // Wraps --benchmark_repetitions and return valid default value if not supplied.
-size_t GetRepetitions() {
-  const int repetitions = FLAGS_benchmark_repetitions;
+int64_t GetRepetitions() {
+  const int64_t repetitions =
+      do_not_read_flag_directly::FLAGS_benchmark_repetitions;
   if (repetitions >= 0) {
-    return static_cast<size_t>(repetitions);
+    return repetitions;
   }
 
   if (FLAGS_benchmark_enable_random_interleaving) {
@@ -324,9 +332,9 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
     //  {Random order of A, B, C, ...}, {Random order of A, B, C, ...}, ...
     // That is, repetitions is outside of RunBenchmark(), hence the name
     // outer_repetitions.
-    size_t inner_repetitions =
+    int64_t inner_repetitions =
         FLAGS_benchmark_enable_random_interleaving ? 1 : GetRepetitions();
-    size_t outer_repetitions =
+    int64_t outer_repetitions =
         FLAGS_benchmark_enable_random_interleaving ? GetRepetitions() : 1;
     std::vector<size_t> benchmark_indices(benchmarks.size());
     for (size_t i = 0; i < benchmarks.size(); ++i) {
@@ -335,7 +343,7 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
 
     // 'run_results_vector' and 'benchmarks' are parallel arrays.
     std::vector<RunResults> run_results_vector(benchmarks.size());
-    for (size_t i = 0; i < outer_repetitions; i++) {
+    for (int64_t i = 0; i < outer_repetitions; i++) {
       if (FLAGS_benchmark_enable_random_interleaving) {
         std::random_shuffle(benchmark_indices.begin(), benchmark_indices.end());
       }
@@ -533,10 +541,12 @@ void ParseCommandLineFlags(int* argc, char** argv) {
     if (ParseBoolFlag(argv[i], "benchmark_list_tests",
                       &FLAGS_benchmark_list_tests) ||
         ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
-        ParseDoubleFlag(argv[i], "benchmark_min_time",
-                        &FLAGS_benchmark_min_time) ||
-        ParseInt32Flag(argv[i], "benchmark_repetitions",
-                       &FLAGS_benchmark_repetitions) ||
+        ParseDoubleFlag(
+            argv[i], "benchmark_min_time",
+            &do_not_read_flag_directly::FLAGS_benchmark_min_time) ||
+        ParseInt32Flag(
+            argv[i], "benchmark_repetitions",
+            &do_not_read_flag_directly::FLAGS_benchmark_repetitions) ||
         ParseBoolFlag(argv[i], "benchmark_enable_random_interleaving",
                       &FLAGS_benchmark_enable_random_interleaving) ||
         ParseDoubleFlag(argv[i], "benchmark_random_interleaving_max_overhead",
diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc
index f57b34d805..faa974d67d 100644
--- a/src/benchmark_api_internal.cc
+++ b/src/benchmark_api_internal.cc
@@ -121,7 +121,7 @@ IterationCount BenchmarkInstance::iterations() const {
   return iterations_;
 }
 
-size_t BenchmarkInstance::repetitions() const {
+int64_t BenchmarkInstance::repetitions() const {
   return repetitions_;
 }
 
@@ -140,18 +140,18 @@ double BenchmarkInstance::min_time() const {
   return min_time_;
 }
 
-size_t BenchmarkInstance::random_interleaving_repetitions() const {
-  return random_interleaving_repetitions_ == std::numeric_limits<size_t>::max()
+int64_t BenchmarkInstance::random_interleaving_repetitions() const {
+  return random_interleaving_repetitions_ == std::numeric_limits<int64_t>::max()
              ? GetRepetitions()
              : random_interleaving_repetitions_;
 }
 
 bool BenchmarkInstance::random_interleaving_repetitions_initialized() const {
-  return random_interleaving_repetitions_ != std::numeric_limits<size_t>::max();
+  return random_interleaving_repetitions_ != std::numeric_limits<int64_t>::max();
 }
 
 void BenchmarkInstance::init_random_interleaving_repetitions(
-    size_t repetitions) const {
+    int64_t repetitions) const {
   random_interleaving_repetitions_ = repetitions;
 }
 
diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h
index dae49fc57a..7bab901d15 100644
--- a/src/benchmark_api_internal.h
+++ b/src/benchmark_api_internal.h
@@ -26,7 +26,7 @@ class BenchmarkInstance {
   // Const accessors.
 
   const BenchmarkName& name() const;
-  size_t repetitions() const;
+  int64_t repetitions() const;
   const std::vector<Statistics>* statistics() const;
   AggregationReportMode aggregation_report_mode() const;
   TimeUnit time_unit() const;
@@ -45,13 +45,13 @@ class BenchmarkInstance {
   // Returns number of repetitions for Random Interleaving. This will be
   // initialized later once we finish the first repetition, if Random
   // Interleaving is enabled. See also ComputeRandominterleavingrepetitions().
-  size_t random_interleaving_repetitions() const;
+  int64_t random_interleaving_repetitions() const;
 
   // Returns true if repetitions for Random Interleaving is initialized.
   bool random_interleaving_repetitions_initialized() const;
 
   // Initializes number of repetitions for random interleaving.
-  void init_random_interleaving_repetitions(size_t repetitions) const;
+  void init_random_interleaving_repetitions(int64_t repetitions) const;
 
   // Setters.
 
@@ -80,14 +80,14 @@ class BenchmarkInstance {
   UserCounters counters_;
   const std::vector<Statistics>* statistics_;
   bool last_benchmark_instance_;
-  size_t repetitions_;
+  int64_t repetitions_;
   double min_time_;
   IterationCount iterations_;
   int threads_;  // Number of concurrent threads to use
   // Make it mutable so it can be initialized (mutated) later on a const
   // instance.
-  mutable size_t random_interleaving_repetitions_ =
-      std::numeric_limits<size_t>::max();
+  mutable int64_t random_interleaving_repetitions_ =
+      std::numeric_limits<int64_t>::max();
 };
 
 bool FindBenchmarksInternal(const std::string& re,
@@ -100,7 +100,7 @@ ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false);
 
 double GetMinTime();
 
-size_t GetRepetitions();
+int64_t GetRepetitions();
 
 }  // end namespace internal
 }  // end namespace benchmark
diff --git a/test/benchmark_random_interleaving_gtest.cc b/test/benchmark_random_interleaving_gtest.cc
index 582beeba27..1b597fcfb9 100644
--- a/test/benchmark_random_interleaving_gtest.cc
+++ b/test/benchmark_random_interleaving_gtest.cc
@@ -11,7 +11,10 @@
 DECLARE_bool(benchmark_enable_random_interleaving);
 DECLARE_string(benchmark_filter);
 DECLARE_double(benchmark_random_interleaving_max_overhead);
+
+namespace do_not_read_flag_directly {
 DECLARE_int32(benchmark_repetitions);
+}  // namespace do_not_read_flag_directly
 
 namespace benchmark {
 namespace internal {
@@ -104,7 +107,7 @@ TEST_F(BenchmarkTest, Match1) {
 }
 
 TEST_F(BenchmarkTest, Match1WithRepetition) {
-  FLAGS_benchmark_repetitions = 2;
+  do_not_read_flag_directly::FLAGS_benchmark_repetitions = 2;
 
   Execute("BM_Match1/(64|80)");
   ASSERT_EQ("BM_Match1/64", queue->Get());
@@ -116,7 +119,7 @@ TEST_F(BenchmarkTest, Match1WithRepetition) {
 
 TEST_F(BenchmarkTest, Match1WithRandomInterleaving) {
   FLAGS_benchmark_enable_random_interleaving = true;
-  FLAGS_benchmark_repetitions = 100;
+  do_not_read_flag_directly::FLAGS_benchmark_repetitions = 100;
   FLAGS_benchmark_random_interleaving_max_overhead =
       std::numeric_limits<double>::infinity();
 
@@ -137,7 +140,7 @@ TEST_F(BenchmarkTest, Match1WithRandomInterleaving) {
 
 TEST_F(BenchmarkTest, Match1WithRandomInterleavingAndZeroOverhead) {
   FLAGS_benchmark_enable_random_interleaving = true;
-  FLAGS_benchmark_repetitions = 100;
+  do_not_read_flag_directly::FLAGS_benchmark_repetitions = 100;
   FLAGS_benchmark_random_interleaving_max_overhead = 0;
 
   // ComputeRandomInterleavingRepetitions() will kick in and rerun each

From 4640ed48f9f9642e0e94437e076dd3c1286a5959 Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Wed, 31 Mar 2021 01:59:51 -0400
Subject: [PATCH 08/21] Address dominichamon's comment on default min_time /
 repetitions for fr-1051. Also change sentinel of
 random_interleaving_repetitions to -1. Hopefully it fixes the failures on
 Windows.

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   src/benchmark.cc
modified:   src/benchmark_api_internal.cc
modified:   src/benchmark_api_internal.h
---
 src/benchmark.cc              | 30 +++++++++++-------------------
 src/benchmark_api_internal.cc |  4 ++--
 src/benchmark_api_internal.h  |  3 +--
 3 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/src/benchmark.cc b/src/benchmark.cc
index cf1cd7ec35..05f9a04540 100644
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -61,7 +61,7 @@ namespace {
 
 // Attempt to make each repetition run for at least this much of time.
 constexpr double kDefaultMinTimeTotalSecs = 0.5;
-constexpr int64_t kDefaultRepetitions = 12;
+constexpr int64_t kRandomInterleavingDefaultRepetitions = 12;
 
 }  // namespace
 
@@ -154,29 +154,21 @@ const double kSafetyMultiplier = 1.4;
 
 // Wraps --benchmark_min_time and returns valid default values if not supplied.
 double GetMinTime() {
-  const double min_time = do_not_read_flag_directly::FLAGS_benchmark_min_time;
-  if (min_time >= 0.0) {
-    return min_time;
-  }
-
-  if (FLAGS_benchmark_enable_random_interleaving) {
-    return kDefaultMinTimeTotalSecs / GetRepetitions();
-  }
-  return kDefaultMinTimeTotalSecs;
+  const double default_min_time = kDefaultMinTimeTotalSecs / GetRepetitions();
+  const double flag_min_time =
+      do_not_read_flag_directly::FLAGS_benchmark_min_time;
+  return flag_min_time >= 0.0 ? flag_min_time : default_min_time;
 }
 
 // Wraps --benchmark_repetitions and return valid default value if not supplied.
 int64_t GetRepetitions() {
-  const int64_t repetitions =
+  const int64_t default_repetitions =
+      FLAGS_benchmark_enable_random_interleaving
+          ? kRandomInterleavingDefaultRepetitions
+          : 1;
+  const int64_t flag_repetitions =
       do_not_read_flag_directly::FLAGS_benchmark_repetitions;
-  if (repetitions >= 0) {
-    return repetitions;
-  }
-
-  if (FLAGS_benchmark_enable_random_interleaving) {
-    return kDefaultRepetitions;
-  }
-  return 1;
+  return flag_repetitions >= 0 ? flag_repetitions : default_repetitions;
 }
 
 // FIXME: wouldn't LTO mess this up?
diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc
index faa974d67d..6f3a6cce61 100644
--- a/src/benchmark_api_internal.cc
+++ b/src/benchmark_api_internal.cc
@@ -141,13 +141,13 @@ double BenchmarkInstance::min_time() const {
 }
 
 int64_t BenchmarkInstance::random_interleaving_repetitions() const {
-  return random_interleaving_repetitions_ == std::numeric_limits<int64_t>::max()
+  return random_interleaving_repetitions_ < 0
              ? GetRepetitions()
              : random_interleaving_repetitions_;
 }
 
 bool BenchmarkInstance::random_interleaving_repetitions_initialized() const {
-  return random_interleaving_repetitions_ != std::numeric_limits<int64_t>::max();
+  return random_interleaving_repetitions_ >= 0;
 }
 
 void BenchmarkInstance::init_random_interleaving_repetitions(
diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h
index 7bab901d15..349c2a0640 100644
--- a/src/benchmark_api_internal.h
+++ b/src/benchmark_api_internal.h
@@ -86,8 +86,7 @@ class BenchmarkInstance {
   int threads_;  // Number of concurrent threads to use
   // Make it mutable so it can be initialized (mutated) later on a const
   // instance.
-  mutable int64_t random_interleaving_repetitions_ =
-      std::numeric_limits<int64_t>::max();
+  mutable int64_t random_interleaving_repetitions_ = -1;
 };
 
 bool FindBenchmarksInternal(const std::string& re,

From 1a2b6dfb131e2b97ddce8e2c6acdfa6ce9d045b8 Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Fri, 2 Apr 2021 11:27:51 -0400
Subject: [PATCH 09/21] Fix windows test failures for fr-1051

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   src/benchmark_api_internal.cc
modified:   src/benchmark_runner.cc
---
 src/benchmark_api_internal.cc | 3 ++-
 src/benchmark_runner.cc       | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc
index 6f3a6cce61..b9b8239003 100644
--- a/src/benchmark_api_internal.cc
+++ b/src/benchmark_api_internal.cc
@@ -66,7 +66,8 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark,
   aggregation_report_mode_ = benchmark->aggregation_report_mode_;
   time_unit_ = benchmark->time_unit_;
   range_multiplier_ = benchmark->range_multiplier_;
-  min_time_ = benchmark->min_time_;
+  min_time_ =
+      !IsZero(benchmark->min_time_) ? benchmark->min_time_ : GetMinTime();
   iterations_ = benchmark->iterations_;
   repetitions_ = benchmark->repetitions_;
   measure_process_cpu_time_ = benchmark->measure_process_cpu_time_;
diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc
index d54fa53724..d0da0a6b03 100644
--- a/src/benchmark_runner.cc
+++ b/src/benchmark_runner.cc
@@ -350,7 +350,7 @@ class BenchmarkRunner {
           // Otherwise, we will still skip the rerun.
           rerun_trial =
               b.random_interleaving_repetitions() < GetRepetitions() &&
-              i.seconds < b.min_time();
+              i.seconds < b.min_time() && !has_explicit_iteration_count;
         }
 
         if (!rerun_trial) break;  // Good, let's report them!

From 81c9ab0869ef5546c7809da80da72fe3806eb1b2 Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Tue, 13 Apr 2021 18:52:02 -0400
Subject: [PATCH 10/21] Add license blurb for fr-1051.

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   src/benchmark_adjust_repetitions.cc
modified:   src/benchmark_adjust_repetitions.h
---
 src/benchmark_adjust_repetitions.cc | 14 ++++++++++++++
 src/benchmark_adjust_repetitions.h  | 14 ++++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/src/benchmark_adjust_repetitions.cc b/src/benchmark_adjust_repetitions.cc
index 31a1cd6b08..665d561b3b 100644
--- a/src/benchmark_adjust_repetitions.cc
+++ b/src/benchmark_adjust_repetitions.cc
@@ -1,3 +1,17 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #include "benchmark_adjust_repetitions.h"
 
 #include "benchmark_api_internal.h"
diff --git a/src/benchmark_adjust_repetitions.h b/src/benchmark_adjust_repetitions.h
index eeb69ff65c..2b82ecc40a 100644
--- a/src/benchmark_adjust_repetitions.h
+++ b/src/benchmark_adjust_repetitions.h
@@ -1,3 +1,17 @@
+// Copyright 2015 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #ifndef BENCHMARK_ADJUST_REPETITIONS_H
 #define BENCHMARK_ADJUST_REPETITIONS_H
 

From 70fe24ac765c799e45ec37f1bb3ada9d60eca951 Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Mon, 19 Apr 2021 13:03:55 -0400
Subject: [PATCH 11/21] Switch to std::shuffle() for fr-1105.

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   src/benchmark.cc
---
 src/benchmark.cc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/benchmark.cc b/src/benchmark.cc
index 05f9a04540..649863479a 100644
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -333,11 +333,13 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
       benchmark_indices[i] = i;
     }
 
+    std::random_device rd;
+    std::mt19937 g(rd());
     // 'run_results_vector' and 'benchmarks' are parallel arrays.
     std::vector<RunResults> run_results_vector(benchmarks.size());
     for (int64_t i = 0; i < outer_repetitions; i++) {
       if (FLAGS_benchmark_enable_random_interleaving) {
-        std::random_shuffle(benchmark_indices.begin(), benchmark_indices.end());
+        std::shuffle(benchmark_indices.begin(), benchmark_indices.end(), g);
       }
       for (size_t j : benchmark_indices) {
         // Repetitions will be automatically adjusted under random interleaving.

From 4966e9021bba26ebe945cf37df5221d7faa97d49 Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Thu, 29 Apr 2021 02:48:03 -0400
Subject: [PATCH 12/21] Change to 1e-9 in fr-1105

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   src/benchmark_adjust_repetitions.cc
---
 src/benchmark_adjust_repetitions.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/benchmark_adjust_repetitions.cc b/src/benchmark_adjust_repetitions.cc
index 665d561b3b..e5c618a53a 100644
--- a/src/benchmark_adjust_repetitions.cc
+++ b/src/benchmark_adjust_repetitions.cc
@@ -22,7 +22,7 @@ namespace internal {
 
 namespace {
 
-constexpr double kNanosecondInSecond = 0.000000001;
+constexpr double kNanosecondInSecond = 1e-9;
 
 }  // namespace
 

From 9399f14c9acab50c1ede9542394a5e824ffad83b Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Thu, 29 Apr 2021 03:20:23 -0400
Subject: [PATCH 13/21] Fix broken build caused by bad merge for fr-1105.

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   src/benchmark_api_internal.cc
modified:   src/benchmark_runner.cc
---
 src/benchmark_api_internal.cc | 4 ++--
 src/benchmark_runner.cc       | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc
index dcfc483f5b..8a34200e33 100644
--- a/src/benchmark_api_internal.cc
+++ b/src/benchmark_api_internal.cc
@@ -160,9 +160,9 @@ State BenchmarkInstance::Run(
     IterationCount iters, int thread_id, internal::ThreadTimer* timer,
     internal::ThreadManager* manager,
     internal::PerfCountersMeasurement* perf_counters_measurement) const {
-  State st(iters, arg, thread_id, threads, timer, manager,
+  State st(iters, args_, thread_id, threads_, timer, manager,
            perf_counters_measurement);
-  benchmark->Run(st);
+  benchmark_->Run(st);
   return st;
 }
 
diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc
index a397d49812..77ff163b63 100644
--- a/src/benchmark_runner.cc
+++ b/src/benchmark_runner.cc
@@ -170,7 +170,7 @@ class BenchmarkRunner {
           (b.aggregation_report_mode() &
            internal::ARM_DisplayReportAggregatesOnly);
       run_results->file_report_aggregates_only =
-          (b.aggregation_report_mode() & 
+          (b.aggregation_report_mode() &
            internal::ARM_FileReportAggregatesOnly);
       CHECK(b.threads() == 1 || !perf_counters_measurement.IsValid())
           << "Perf counters are not supported in multi-threaded cases.\n";
@@ -263,7 +263,7 @@ class BenchmarkRunner {
 
     // By using KeepRunningBatch a benchmark can iterate more times than
     // requested, so take the iteration count from i.results.
-    i.iters = i.results.iterations / b.threads;
+    i.iters = i.results.iterations / b.threads();
 
     // Base decisions off of real time if requested by this benchmark.
     i.seconds = i.results.cpu_time_used;

From bb8e0e95a0275e1614a5ae186c20f45a0d9e0dfd Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Mon, 10 May 2021 14:56:27 -0400
Subject: [PATCH 14/21] Fix build breakage for fr-1051.

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   src/benchmark.cc
modified:   src/benchmark_api_internal.cc
modified:   src/benchmark_api_internal.h
modified:   src/benchmark_register.cc
modified:   src/benchmark_runner.cc
---
 src/benchmark.cc              | 4 ++--
 src/benchmark_api_internal.cc | 4 ++--
 src/benchmark_api_internal.h  | 5 +++++
 src/benchmark_register.cc     | 2 +-
 src/benchmark_runner.cc       | 7 +++----
 5 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/benchmark.cc b/src/benchmark.cc
index 300b3153a3..cbe8df9470 100644
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -158,7 +158,7 @@ namespace internal {
 DEFINE_kvpairs(benchmark_context, {});
 
 std::map<std::string, std::string>* global_context = nullptr;
-  
+
 // Performance measurements always come with random variances. Defines a
 // factor by which the required number of iterations is overestimated in order
 // to reduce the probability that the minimum time requirement will not be met.
@@ -371,7 +371,7 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
       for (size_t j : benchmark_indices) {
         // Repetitions will be automatically adjusted under random interleaving.
         if (!FLAGS_benchmark_enable_random_interleaving ||
-            i < benchmarks[j].random_interleaving_repetitions()) {
+            i < benchmarks[j].RandomInterleavingRepetitions()) {
           RunBenchmark(benchmarks[j], outer_repetitions, inner_repetitions,
                        &complexity_reports, &run_results_vector[j]);
         }
diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc
index 7ad74708c7..0d38e2d8b9 100644
--- a/src/benchmark_api_internal.cc
+++ b/src/benchmark_api_internal.cc
@@ -73,7 +73,7 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark,
     }
     name_.time_type += "real_time";
   }
-        
+
   if (!benchmark_.thread_counts_.empty()) {
     name_.threads = StrFormat("threads:%d", threads_);
   }
@@ -85,7 +85,7 @@ double BenchmarkInstance::MinTime() const {
     // random_interleaving_repetitions(). Dividing
     // total execution time by random_interleaving_repetitions() gives
     // the adjusted min_time per repetition.
-    return min_time_ * GetRepetitions() / random_interleaving_repetitions();
+    return min_time_ * GetRepetitions() / RandomInterleavingRepetitions();
   }
   return min_time_;
 }
diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h
index 02441aca88..33a0864976 100644
--- a/src/benchmark_api_internal.h
+++ b/src/benchmark_api_internal.h
@@ -73,6 +73,11 @@ class BenchmarkInstance {
   bool use_manual_time_;
   BigO complexity_;
   BigOFunc* complexity_lambda_;
+  std::vector<Statistics> statistics_;
+  int repetitions_;
+  double min_time_;
+  IterationCount iterations_;
+  int threads_;
   UserCounters counters_;
   mutable int64_t random_interleaving_repetitions_ = -1;
 };
diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc
index e5e5db63a5..1f0dcd1d0e 100644
--- a/src/benchmark_register.cc
+++ b/src/benchmark_register.cc
@@ -159,7 +159,7 @@ bool BenchmarkFamilies::FindBenchmarks(
         const auto full_name = instance.name().str();
         if ((re.Match(full_name) && !isNegativeFilter) ||
             (!re.Match(full_name) && isNegativeFilter)) {
-          instance.set_last_benchmark_instance(&args == &family->args_.back());
+          instance.last_benchmark_instance = (&args == &family->args_.back());
           benchmarks->push_back(std::move(instance));
         }
       }
diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc
index 50135b2db1..0b8a906b17 100644
--- a/src/benchmark_runner.cc
+++ b/src/benchmark_runner.cc
@@ -152,7 +152,6 @@ class BenchmarkRunner {
         outer_repetitions(outer_repetitions_),
         inner_repetitions(inner_repetitions_),
         repeats(b.repetitions() != 0 ? b.repetitions() : inner_repetitions),
-        min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time),
         has_explicit_iteration_count(b.iterations() != 0),
         pool(b.threads() - 1),
         iters(has_explicit_iteration_count ? b.iterations() : 1),
@@ -352,7 +351,7 @@ class BenchmarkRunner {
         // If random interleaving is enabled and the repetitions is not
         // initialized, do it now.
         if (FLAGS_benchmark_enable_random_interleaving &&
-            !b.random_interleaving_repetitions_initialized()) {
+            !b.RandomInterleavingRepetitionsInitialized()) {
           InternalRandomInterleavingRepetitionsInput input;
           input.total_execution_time_per_repetition = exec_end - exec_start;
           input.time_used_per_repetition = i.seconds;
@@ -360,7 +359,7 @@ class BenchmarkRunner {
           input.min_time_per_repetition = GetMinTime();
           input.max_overhead = FLAGS_benchmark_random_interleaving_max_overhead;
           input.max_repetitions = GetRepetitions();
-          b.init_random_interleaving_repetitions(
+          b.InitRandomInterleavingRepetitions(
               ComputeRandomInterleavingRepetitions(input));
           // If the number of repetitions changed, need to rerun the last trial
           // because iters may also change. Note that we only need to do this
@@ -368,7 +367,7 @@ class BenchmarkRunner {
           // run is not enough for the already adjusted b.min_time().
           // Otherwise, we will still skip the rerun.
           rerun_trial =
-              b.random_interleaving_repetitions() < GetRepetitions() &&
+              b.RandomInterleavingRepetitions() < GetRepetitions() &&
               i.seconds < b.min_time() && !has_explicit_iteration_count;
         }
 

From 32e86fcb4aecec8722b59ff8a71c7382b37b5aec Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Mon, 10 May 2021 15:22:36 -0400
Subject: [PATCH 15/21] Print out reports as they come in if random
 interleaving is disabled (fr-1051)

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   src/benchmark.cc
---
 src/benchmark.cc | 51 +++++++++++++++++++++++++++++++-----------------
 1 file changed, 33 insertions(+), 18 deletions(-)

diff --git a/src/benchmark.cc b/src/benchmark.cc
index cbe8df9470..50f6ebbbef 100644
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -360,6 +360,17 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
       benchmark_indices[i] = i;
     }
 
+    auto report = [](BenchmarkReporter* reporter, bool report_aggregates_only,
+                     const RunResults& run_results) {
+      assert(reporter);
+      // If there are no aggregates, do output non-aggregates.
+      report_aggregates_only &= !run_results.aggregates_only.empty();
+      if (!report_aggregates_only)
+        reporter->ReportRuns(run_results.non_aggregates);
+      if (!run_results.aggregates_only.empty())
+        reporter->ReportRuns(run_results.aggregates_only);
+    };
+
     std::random_device rd;
     std::mt19937 g(rd());
     // 'run_results_vector' and 'benchmarks' are parallel arrays.
@@ -374,30 +385,34 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
             i < benchmarks[j].RandomInterleavingRepetitions()) {
           RunBenchmark(benchmarks[j], outer_repetitions, inner_repetitions,
                        &complexity_reports, &run_results_vector[j]);
+          if (!FLAGS_benchmark_enable_random_interleaving) {
+            // Print out reports as they come in.
+            const RunResults& run_results = run_results_vector.at(j);
+            report(display_reporter, run_results.display_report_aggregates_only,
+                   run_results);
+            if (file_reporter)
+              report(file_reporter, run_results.file_report_aggregates_only,
+                     run_results);
+
+            flushStreams(display_reporter);
+            flushStreams(file_reporter);
+          }
         }
       }
     }
 
-    auto report = [](BenchmarkReporter* reporter, bool report_aggregates_only,
-                     const RunResults& run_results) {
-      assert(reporter);
-      // If there are no aggregates, do output non-aggregates.
-      report_aggregates_only &= !run_results.aggregates_only.empty();
-      if (!report_aggregates_only)
-        reporter->ReportRuns(run_results.non_aggregates);
-      if (!run_results.aggregates_only.empty())
-        reporter->ReportRuns(run_results.aggregates_only);
-    };
-
-    for (const RunResults& run_results : run_results_vector) {
-      report(display_reporter, run_results.display_report_aggregates_only,
-             run_results);
-      if (file_reporter)
-        report(file_reporter, run_results.file_report_aggregates_only,
+    if (FLAGS_benchmark_enable_random_interleaving) {
+      // Print out all reports at the end of the test.
+      for (const RunResults& run_results : run_results_vector) {
+        report(display_reporter, run_results.display_report_aggregates_only,
                run_results);
+        if (file_reporter)
+          report(file_reporter, run_results.file_report_aggregates_only,
+                 run_results);
 
-      flushStreams(display_reporter);
-      flushStreams(file_reporter);
+        flushStreams(display_reporter);
+        flushStreams(file_reporter);
+      }
     }
   }
   display_reporter->Finalize();

From 1bc21736f42fa735e84307dc88a647cfaea6e8f0 Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Wed, 12 May 2021 13:00:40 -0400
Subject: [PATCH 16/21] size_t, int64_t --> int in benchmark_runner for
 fr-1051.

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   src/benchmark_runner.cc
modified:   src/benchmark_runner.h
---
 src/benchmark_runner.cc | 37 ++++++++++++++++++-------------------
 src/benchmark_runner.h  |  2 +-
 2 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc
index 0b8a906b17..1320136e30 100644
--- a/src/benchmark_runner.cc
+++ b/src/benchmark_runner.cc
@@ -71,7 +71,7 @@ BenchmarkReporter::Run CreateRunReport(
     const internal::ThreadManager::Result& results,
     IterationCount memory_iterations,
     const MemoryManager::Result& memory_result, double seconds,
-    int64_t repetition_index) {
+    int repetition_index) {
   // Create report about this benchmark run.
   BenchmarkReporter::Run report;
 
@@ -142,13 +142,13 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
 class BenchmarkRunner {
  public:
   BenchmarkRunner(const benchmark::internal::BenchmarkInstance& b_,
-                  const size_t outer_repetitions_,
-                  const size_t inner_repetitions_,
+                  int outer_repetitions_,
+                  int inner_repetitions_,
                   std::vector<BenchmarkReporter::Run>* complexity_reports_,
                   RunResults* run_results_)
       : b(b_),
         complexity_reports(*complexity_reports_),
-        run_results(run_results_),
+        run_results(*run_results_),
         outer_repetitions(outer_repetitions_),
         inner_repetitions(inner_repetitions_),
         repeats(b.repetitions() != 0 ? b.repetitions() : inner_repetitions),
@@ -160,16 +160,16 @@ class BenchmarkRunner {
         perf_counters_measurement_ptr(perf_counters_measurement.IsValid()
                                           ? &perf_counters_measurement
                                           : nullptr) {
-    run_results->display_report_aggregates_only =
+    run_results.display_report_aggregates_only =
         (FLAGS_benchmark_report_aggregates_only ||
          FLAGS_benchmark_display_aggregates_only);
-    run_results->file_report_aggregates_only =
+    run_results.file_report_aggregates_only =
         FLAGS_benchmark_report_aggregates_only;
     if (b.aggregation_report_mode() != internal::ARM_Unspecified) {
-      run_results->display_report_aggregates_only =
+      run_results.display_report_aggregates_only =
           (b.aggregation_report_mode() &
            internal::ARM_DisplayReportAggregatesOnly);
-      run_results->file_report_aggregates_only =
+      run_results.file_report_aggregates_only =
           (b.aggregation_report_mode() &
            internal::ARM_FileReportAggregatesOnly);
       CHECK(b.threads() == 1 || !perf_counters_measurement.IsValid())
@@ -179,18 +179,17 @@ class BenchmarkRunner {
           << "Perf counters were requested but could not be set up.";
     }
 
-    for (size_t repetition_num = 0; repetition_num < repeats;
-         repetition_num++) {
+    for (int repetition_num = 0; repetition_num < repeats; repetition_num++) {
       DoOneRepetition(repetition_num);
     }
 
     // Calculate additional statistics
-    run_results->aggregates_only = ComputeStats(run_results->non_aggregates);
+    run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
 
     // Maybe calculate complexity report
     if ((b.complexity() != oNone) && b.last_benchmark_instance) {
       auto additional_run_stats = ComputeBigO(complexity_reports);
-      run_results->aggregates_only.insert(run_results->aggregates_only.end(),
+      run_results.aggregates_only.insert(run_results.aggregates_only.end(),
                                           additional_run_stats.begin(),
                                           additional_run_stats.end());
       complexity_reports.clear();
@@ -201,11 +200,11 @@ class BenchmarkRunner {
   const benchmark::internal::BenchmarkInstance& b;
   std::vector<BenchmarkReporter::Run>& complexity_reports;
 
-  RunResults* run_results = nullptr;
+  RunResults& run_results;
 
-  const size_t outer_repetitions;
-  const size_t inner_repetitions;
-  const size_t repeats;
+  const int outer_repetitions;
+  const int inner_repetitions;
+  const int repeats;
   const bool has_explicit_iteration_count;
 
   std::vector<std::thread> pool;
@@ -317,7 +316,7 @@ class BenchmarkRunner {
             !b.use_manual_time());
   }
 
-  void DoOneRepetition(int64_t repetition_index) {
+  void DoOneRepetition(int repetition_index) {
     const bool is_the_first_repetition = repetition_index == 0;
     IterationResults i;
 
@@ -410,14 +409,14 @@ class BenchmarkRunner {
     if (!report.error_occurred && b.complexity() != oNone)
       complexity_reports.push_back(report);
 
-    run_results->non_aggregates.push_back(report);
+    run_results.non_aggregates.push_back(report);
   }
 };
 
 }  // end namespace
 
 void RunBenchmark(const benchmark::internal::BenchmarkInstance& b,
-                  const size_t outer_repetitions, const size_t inner_repetitions,
+                  const int outer_repetitions, const int inner_repetitions,
                   std::vector<BenchmarkReporter::Run>* complexity_reports,
                   RunResults* run_results) {
   internal::BenchmarkRunner r(b, outer_repetitions, inner_repetitions,
diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h
index 326cd79979..e29aa32306 100644
--- a/src/benchmark_runner.h
+++ b/src/benchmark_runner.h
@@ -43,7 +43,7 @@ struct RunResults {
 };
 
 void RunBenchmark(const benchmark::internal::BenchmarkInstance& b,
-                  const size_t outer_repetitions, const size_t inner_repetitions,
+                  int outer_repetitions, int inner_repetitions,
                   std::vector<BenchmarkReporter::Run>* complexity_reports,
                   RunResults* run_results);
 

From ce7220ac86b1cd035c7b10bec626b840f5c8f81e Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Fri, 14 May 2021 00:19:18 -0400
Subject: [PATCH 17/21] Address comments from dominichamon for fr-1051

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   src/benchmark.cc
modified:   src/benchmark_adjust_repetitions.cc
modified:   src/benchmark_adjust_repetitions.h
modified:   src/benchmark_api_internal.cc
modified:   src/benchmark_api_internal.h
modified:   test/benchmark_random_interleaving_gtest.cc
---
 src/benchmark.cc                            | 65 ++++++++++-----------
 src/benchmark_adjust_repetitions.cc         |  8 +--
 src/benchmark_adjust_repetitions.h          |  4 +-
 src/benchmark_api_internal.cc               |  4 +-
 src/benchmark_api_internal.h                |  8 +--
 test/benchmark_random_interleaving_gtest.cc |  2 +-
 6 files changed, 45 insertions(+), 46 deletions(-)

diff --git a/src/benchmark.cc b/src/benchmark.cc
index 50f6ebbbef..46cf073d7c 100644
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -64,7 +64,7 @@ namespace {
 
 // Attempt to make each repetition run for at least this much of time.
 constexpr double kDefaultMinTimeTotalSecs = 0.5;
-constexpr int64_t kRandomInterleavingDefaultRepetitions = 12;
+constexpr int kRandomInterleavingDefaultRepetitions = 12;
 
 }  // namespace
 
@@ -173,12 +173,12 @@ double GetMinTime() {
 }
 
 // Wraps --benchmark_repetitions and return valid default value if not supplied.
-int64_t GetRepetitions() {
-  const int64_t default_repetitions =
+int GetRepetitions() {
+  const int default_repetitions =
       FLAGS_benchmark_enable_random_interleaving
           ? kRandomInterleavingDefaultRepetitions
           : 1;
-  const int64_t flag_repetitions =
+  const int flag_repetitions =
       do_not_read_flag_directly::FLAGS_benchmark_repetitions;
   return flag_repetitions >= 0 ? flag_repetitions : default_repetitions;
 }
@@ -351,35 +351,48 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
     //  {Random order of A, B, C, ...}, {Random order of A, B, C, ...}, ...
     // That is, repetitions is outside of RunBenchmark(), hence the name
     // outer_repetitions.
-    int64_t inner_repetitions =
+    int inner_repetitions =
         FLAGS_benchmark_enable_random_interleaving ? 1 : GetRepetitions();
-    int64_t outer_repetitions =
+    int outer_repetitions =
         FLAGS_benchmark_enable_random_interleaving ? GetRepetitions() : 1;
     std::vector<size_t> benchmark_indices(benchmarks.size());
     for (size_t i = 0; i < benchmarks.size(); ++i) {
       benchmark_indices[i] = i;
     }
 
-    auto report = [](BenchmarkReporter* reporter, bool report_aggregates_only,
-                     const RunResults& run_results) {
-      assert(reporter);
-      // If there are no aggregates, do output non-aggregates.
-      report_aggregates_only &= !run_results.aggregates_only.empty();
-      if (!report_aggregates_only)
-        reporter->ReportRuns(run_results.non_aggregates);
-      if (!run_results.aggregates_only.empty())
-        reporter->ReportRuns(run_results.aggregates_only);
+    auto report = [flushStreams, display_reporter, file_reporter](
+        const RunResults& run_results) {
+      auto report_one = [](BenchmarkReporter* reporter,
+                           bool aggregates_only,
+                           const RunResults& results) {
+        assert(reporter);
+        // If there are no aggregates, do output non-aggregates.
+        aggregates_only &= !results.aggregates_only.empty();
+        if (!aggregates_only)
+          reporter->ReportRuns(results.non_aggregates);
+        if (!results.aggregates_only.empty())
+          reporter->ReportRuns(results.aggregates_only);
+      };
+
+      report_one(display_reporter, run_results.display_report_aggregates_only,
+                 run_results);
+      if (file_reporter)
+        report_one(file_reporter, run_results.file_report_aggregates_only,
+                   run_results);
+
+      flushStreams(display_reporter);
+      flushStreams(file_reporter);
     };
 
     std::random_device rd;
     std::mt19937 g(rd());
     // 'run_results_vector' and 'benchmarks' are parallel arrays.
     std::vector<RunResults> run_results_vector(benchmarks.size());
-    for (int64_t i = 0; i < outer_repetitions; i++) {
+    for (int i = 0; i < outer_repetitions; i++) {
       if (FLAGS_benchmark_enable_random_interleaving) {
         std::shuffle(benchmark_indices.begin(), benchmark_indices.end(), g);
       }
-      for (size_t j : benchmark_indices) {
+      for (int j : benchmark_indices) {
         // Repetitions will be automatically adjusted under random interleaving.
         if (!FLAGS_benchmark_enable_random_interleaving ||
             i < benchmarks[j].RandomInterleavingRepetitions()) {
@@ -388,14 +401,7 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
           if (!FLAGS_benchmark_enable_random_interleaving) {
             // Print out reports as they come in.
             const RunResults& run_results = run_results_vector.at(j);
-            report(display_reporter, run_results.display_report_aggregates_only,
-                   run_results);
-            if (file_reporter)
-              report(file_reporter, run_results.file_report_aggregates_only,
-                     run_results);
-
-            flushStreams(display_reporter);
-            flushStreams(file_reporter);
+            report(run_results);
           }
         }
       }
@@ -404,14 +410,7 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
     if (FLAGS_benchmark_enable_random_interleaving) {
       // Print out all reports at the end of the test.
       for (const RunResults& run_results : run_results_vector) {
-        report(display_reporter, run_results.display_report_aggregates_only,
-               run_results);
-        if (file_reporter)
-          report(file_reporter, run_results.file_report_aggregates_only,
-                 run_results);
-
-        flushStreams(display_reporter);
-        flushStreams(file_reporter);
+        report(run_results);
       }
     }
   }
diff --git a/src/benchmark_adjust_repetitions.cc b/src/benchmark_adjust_repetitions.cc
index e5c618a53a..2847927628 100644
--- a/src/benchmark_adjust_repetitions.cc
+++ b/src/benchmark_adjust_repetitions.cc
@@ -26,7 +26,7 @@ constexpr double kNanosecondInSecond = 1e-9;
 
 }  // namespace
 
-size_t ComputeRandomInterleavingRepetitions(
+int ComputeRandomInterleavingRepetitions(
     InternalRandomInterleavingRepetitionsInput input) {
   // Find the repetitions such that total overhead is bounded. Let
   //   n = desired number of repetitions, i.e., the output of this method.
@@ -96,7 +96,7 @@ size_t ComputeRandomInterleavingRepetitions(
   double n = (1 + input.max_overhead) * e / (h + r);
   n = std::min(std::max(n, 1.0), static_cast<double>(input.max_repetitions));
 
-  size_t n_size_t = static_cast<size_t>(n);
+  int n_int = static_cast<int>(n);
 
   VLOG(2) << "Computed random interleaving repetitions"
           << "\n  input.total_execution_time_per_repetition: "
@@ -116,9 +116,9 @@ size_t ComputeRandomInterleavingRepetitions(
           << "\n  m: " << m
           << "\n  e: " << e
           << "\n  n: " << n
-          << "\n  n_size_t: " << n_size_t;
+          << "\n  n_int: " << n_int;
 
-  return n_size_t;
+  return n_int;
 }
 
 }  // internal
diff --git a/src/benchmark_adjust_repetitions.h b/src/benchmark_adjust_repetitions.h
index 2b82ecc40a..21a666afe0 100644
--- a/src/benchmark_adjust_repetitions.h
+++ b/src/benchmark_adjust_repetitions.h
@@ -28,12 +28,12 @@ struct InternalRandomInterleavingRepetitionsInput {
   double real_time_used_per_repetition;
   double min_time_per_repetition;
   double max_overhead;
-  size_t max_repetitions;
+  int max_repetitions;
 };
 
 // Should be called right after the first repetition is completed to estimate
 // the number of iterations.
-size_t ComputeRandomInterleavingRepetitions(
+int ComputeRandomInterleavingRepetitions(
     InternalRandomInterleavingRepetitionsInput input);
 
 }  // end namespace internal
diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc
index 0d38e2d8b9..06a4344330 100644
--- a/src/benchmark_api_internal.cc
+++ b/src/benchmark_api_internal.cc
@@ -90,7 +90,7 @@ double BenchmarkInstance::MinTime() const {
   return min_time_;
 }
 
-int64_t BenchmarkInstance::RandomInterleavingRepetitions() const {
+int BenchmarkInstance::RandomInterleavingRepetitions() const {
   return random_interleaving_repetitions_ < 0
              ? GetRepetitions()
              : random_interleaving_repetitions_;
@@ -101,7 +101,7 @@ bool BenchmarkInstance::RandomInterleavingRepetitionsInitialized() const {
 }
 
 void BenchmarkInstance::InitRandomInterleavingRepetitions(
-    int64_t repetitions) const {
+    int repetitions) const {
   random_interleaving_repetitions_ = repetitions;
 }
 
diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h
index 33a0864976..aff5528a57 100644
--- a/src/benchmark_api_internal.h
+++ b/src/benchmark_api_internal.h
@@ -32,13 +32,13 @@ class BenchmarkInstance {
   // Returns number of repetitions for Random Interleaving. This will be
   // initialized later once we finish the first repetition, if Random
   // Interleaving is enabled. See also ComputeRandominterleavingrepetitions().
-  int64_t RandomInterleavingRepetitions() const;
+  int RandomInterleavingRepetitions() const;
 
   // Returns true if repetitions for Random Interleaving is initialized.
   bool RandomInterleavingRepetitionsInitialized() const;
 
   // Initializes number of repetitions for random interleaving.
-  void InitRandomInterleavingRepetitions(int64_t repetitions) const;
+  void InitRandomInterleavingRepetitions(int repetitions) const;
 
   const BenchmarkName& name() const { return name_; }
   AggregationReportMode aggregation_report_mode() const {
@@ -79,7 +79,7 @@ class BenchmarkInstance {
   IterationCount iterations_;
   int threads_;
   UserCounters counters_;
-  mutable int64_t random_interleaving_repetitions_ = -1;
+  mutable int random_interleaving_repetitions_ = -1;
 };
 
 bool FindBenchmarksInternal(const std::string& re,
@@ -92,7 +92,7 @@ ConsoleReporter::OutputOptions GetOutputOptions(bool force_no_color = false);
 
 double GetMinTime();
 
-int64_t GetRepetitions();
+int GetRepetitions();
 
 }  // end namespace internal
 }  // end namespace benchmark
diff --git a/test/benchmark_random_interleaving_gtest.cc b/test/benchmark_random_interleaving_gtest.cc
index 1b597fcfb9..5e8329a4e6 100644
--- a/test/benchmark_random_interleaving_gtest.cc
+++ b/test/benchmark_random_interleaving_gtest.cc
@@ -163,7 +163,7 @@ TEST_F(BenchmarkTest, Match1WithRandomInterleavingAndZeroOverhead) {
 
 InternalRandomInterleavingRepetitionsInput CreateInput(
     double total, double time, double real_time, double min_time,
-    double overhead, size_t repetitions) {
+    double overhead, int repetitions) {
   InternalRandomInterleavingRepetitionsInput input;
   input.total_execution_time_per_repetition = total;
   input.time_used_per_repetition = time;

From 81ac7fe3d1ae74eab725540d2de5dc0e4e6ceb85 Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Sat, 15 May 2021 20:04:53 -0400
Subject: [PATCH 18/21] benchmar_indices --> size_t to make CI pass: fr-1051

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   src/benchmark.cc
---
 src/benchmark.cc | 81 ++++++++++++++++++++++++------------------------
 1 file changed, 41 insertions(+), 40 deletions(-)

diff --git a/src/benchmark.cc b/src/benchmark.cc
index 46cf073d7c..272794e147 100644
--- a/src/benchmark.cc
+++ b/src/benchmark.cc
@@ -302,6 +302,39 @@ void State::FinishKeepRunning() {
 namespace internal {
 namespace {
 
+// Flushes streams after invoking reporter methods that write to them. This
+// ensures users get timely updates even when streams are not line-buffered.
+void FlushStreams(BenchmarkReporter* reporter) {
+  if (!reporter) return;
+  std::flush(reporter->GetOutputStream());
+  std::flush(reporter->GetErrorStream());
+};
+
+// Reports in both display and file reporters.
+void Report(BenchmarkReporter* display_reporter,
+            BenchmarkReporter* file_reporter, const RunResults& run_results) {
+  auto report_one = [](BenchmarkReporter* reporter,
+                       bool aggregates_only,
+                       const RunResults& results) {
+    assert(reporter);
+    // If there are no aggregates, do output non-aggregates.
+    aggregates_only &= !results.aggregates_only.empty();
+    if (!aggregates_only)
+      reporter->ReportRuns(results.non_aggregates);
+    if (!results.aggregates_only.empty())
+      reporter->ReportRuns(results.aggregates_only);
+  };
+
+  report_one(display_reporter, run_results.display_report_aggregates_only,
+             run_results);
+  if (file_reporter)
+    report_one(file_reporter, run_results.file_report_aggregates_only,
+               run_results);
+
+  FlushStreams(display_reporter);
+  FlushStreams(file_reporter);
+};
+
 void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
                    BenchmarkReporter* display_reporter,
                    BenchmarkReporter* file_reporter) {
@@ -330,18 +363,10 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
   // Keep track of running times of all instances of current benchmark
   std::vector<BenchmarkReporter::Run> complexity_reports;
 
-  // We flush streams after invoking reporter methods that write to them. This
-  // ensures users get timely updates even when streams are not line-buffered.
-  auto flushStreams = [](BenchmarkReporter* reporter) {
-    if (!reporter) return;
-    std::flush(reporter->GetOutputStream());
-    std::flush(reporter->GetErrorStream());
-  };
-
   if (display_reporter->ReportContext(context) &&
       (!file_reporter || file_reporter->ReportContext(context))) {
-    flushStreams(display_reporter);
-    flushStreams(file_reporter);
+    FlushStreams(display_reporter);
+    FlushStreams(file_reporter);
 
     // Without random interleaving, benchmarks are executed in the order of:
     //   A, A, ..., A, B, B, ..., B, C, C, ..., C, ...
@@ -360,30 +385,6 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
       benchmark_indices[i] = i;
     }
 
-    auto report = [flushStreams, display_reporter, file_reporter](
-        const RunResults& run_results) {
-      auto report_one = [](BenchmarkReporter* reporter,
-                           bool aggregates_only,
-                           const RunResults& results) {
-        assert(reporter);
-        // If there are no aggregates, do output non-aggregates.
-        aggregates_only &= !results.aggregates_only.empty();
-        if (!aggregates_only)
-          reporter->ReportRuns(results.non_aggregates);
-        if (!results.aggregates_only.empty())
-          reporter->ReportRuns(results.aggregates_only);
-      };
-
-      report_one(display_reporter, run_results.display_report_aggregates_only,
-                 run_results);
-      if (file_reporter)
-        report_one(file_reporter, run_results.file_report_aggregates_only,
-                   run_results);
-
-      flushStreams(display_reporter);
-      flushStreams(file_reporter);
-    };
-
     std::random_device rd;
     std::mt19937 g(rd());
     // 'run_results_vector' and 'benchmarks' are parallel arrays.
@@ -392,7 +393,7 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
       if (FLAGS_benchmark_enable_random_interleaving) {
         std::shuffle(benchmark_indices.begin(), benchmark_indices.end(), g);
       }
-      for (int j : benchmark_indices) {
+      for (size_t j : benchmark_indices) {
         // Repetitions will be automatically adjusted under random interleaving.
         if (!FLAGS_benchmark_enable_random_interleaving ||
             i < benchmarks[j].RandomInterleavingRepetitions()) {
@@ -400,8 +401,7 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
                        &complexity_reports, &run_results_vector[j]);
           if (!FLAGS_benchmark_enable_random_interleaving) {
             // Print out reports as they come in.
-            const RunResults& run_results = run_results_vector.at(j);
-            report(run_results);
+            Report(display_reporter, file_reporter, run_results_vector.at(j));
           }
         }
       }
@@ -410,14 +410,14 @@ void RunBenchmarks(const std::vector<BenchmarkInstance>& benchmarks,
     if (FLAGS_benchmark_enable_random_interleaving) {
       // Print out all reports at the end of the test.
       for (const RunResults& run_results : run_results_vector) {
-        report(run_results);
+        Report(display_reporter, file_reporter, run_results);
       }
     }
   }
   display_reporter->Finalize();
   if (file_reporter) file_reporter->Finalize();
-  flushStreams(display_reporter);
-  flushStreams(file_reporter);
+  FlushStreams(display_reporter);
+  FlushStreams(file_reporter);
 }
 
 // Disable deprecated warnings temporarily because we need to reference
@@ -567,6 +567,7 @@ void PrintUsageAndExit() {
           "          [--benchmark_filter=<regex>]\n"
           "          [--benchmark_min_time=<min_time>]\n"
           "          [--benchmark_repetitions=<num_repetitions>]\n"
+          "          [--benchmark_enable_random_interleaving={true|false}]\n"
           "          [--benchmark_report_aggregates_only={true|false}]\n"
           "          [--benchmark_display_aggregates_only={true|false}]\n"
           "          [--benchmark_format=<console|json|csv>]\n"

From 086b15ef01e27c5aa48d9591a16854d8fd5d48e9 Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Sat, 15 May 2021 23:06:29 -0400
Subject: [PATCH 19/21] Fix min_time not initialized issue for fr-1051.

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   src/benchmark_api_internal.cc
modified:   src/benchmark_api_internal.h
---
 src/benchmark_api_internal.cc | 8 ++++++--
 src/benchmark_api_internal.h  | 5 +----
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc
index 06a4344330..b7ddabf36c 100644
--- a/src/benchmark_api_internal.cc
+++ b/src/benchmark_api_internal.cc
@@ -2,6 +2,7 @@
 
 #include <cinttypes>
 
+#include "check.h"
 #include "string_util.h"
 
 DECLARE_bool(benchmark_enable_random_interleaving);
@@ -23,9 +24,12 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark,
       complexity_lambda_(benchmark_.complexity_lambda_),
       statistics_(benchmark_.statistics_),
       repetitions_(benchmark_.repetitions_),
-      min_time_(benchmark_.min_time_),
+      min_time_(!IsZero(benchmark_.min_time_) ? benchmark_.min_time_
+                                              : GetMinTime()),
       iterations_(benchmark_.iterations_),
       threads_(thread_count) {
+  CHECK(!IsZero(min_time_)) << "min_time must be non-zero.";
+
   name_.function_name = benchmark_.name_;
 
   size_t arg_i = 0;
@@ -79,7 +83,7 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark,
   }
 }
 
-double BenchmarkInstance::MinTime() const {
+double BenchmarkInstance::min_time() const {
   if (FLAGS_benchmark_enable_random_interleaving) {
     // Random Interleaving will automatically adjust
     // random_interleaving_repetitions(). Dividing
diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h
index aff5528a57..39ea407738 100644
--- a/src/benchmark_api_internal.h
+++ b/src/benchmark_api_internal.h
@@ -26,9 +26,6 @@ class BenchmarkInstance {
   BenchmarkInstance(Benchmark* benchmark, const std::vector<int64_t>& args,
                     int threads);
 
-  // Returns the min time to run a microbenchmark in RunBenchmark().
-  double MinTime() const;
-
   // Returns number of repetitions for Random Interleaving. This will be
   // initialized later once we finish the first repetition, if Random
   // Interleaving is enabled. See also ComputeRandominterleavingrepetitions().
@@ -52,7 +49,7 @@ class BenchmarkInstance {
   BigOFunc& complexity_lambda() const { return *complexity_lambda_; }
   const std::vector<Statistics>& statistics() const { return statistics_; }
   int repetitions() const { return repetitions_; }
-  double min_time() const { return min_time_; }
+  double min_time() const;
   IterationCount iterations() const { return iterations_; }
   int threads() const { return threads_; }
 

From ee70382bcff174e1135d637b7994bd786c4ed498 Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Tue, 18 May 2021 16:59:57 -0400
Subject: [PATCH 20/21] min_time --> MinTime in fr-1051.

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   src/benchmark_api_internal.cc
modified:   src/benchmark_api_internal.h
modified:   src/benchmark_runner.cc
---
 src/benchmark_api_internal.cc |  6 +++---
 src/benchmark_api_internal.h  |  4 ++--
 src/benchmark_runner.cc       | 14 +++++++-------
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/benchmark_api_internal.cc b/src/benchmark_api_internal.cc
index b7ddabf36c..ddd46bee63 100644
--- a/src/benchmark_api_internal.cc
+++ b/src/benchmark_api_internal.cc
@@ -83,7 +83,7 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark,
   }
 }
 
-double BenchmarkInstance::min_time() const {
+double BenchmarkInstance::MinTime() const {
   if (FLAGS_benchmark_enable_random_interleaving) {
     // Random Interleaving will automatically adjust
     // random_interleaving_repetitions(). Dividing
@@ -105,8 +105,8 @@ bool BenchmarkInstance::RandomInterleavingRepetitionsInitialized() const {
 }
 
 void BenchmarkInstance::InitRandomInterleavingRepetitions(
-    int repetitions) const {
-  random_interleaving_repetitions_ = repetitions;
+    int reps) const {
+  random_interleaving_repetitions_ = reps;
 }
 
 State BenchmarkInstance::Run(
diff --git a/src/benchmark_api_internal.h b/src/benchmark_api_internal.h
index 39ea407738..0ff8dafbe6 100644
--- a/src/benchmark_api_internal.h
+++ b/src/benchmark_api_internal.h
@@ -35,7 +35,7 @@ class BenchmarkInstance {
   bool RandomInterleavingRepetitionsInitialized() const;
 
   // Initializes number of repetitions for random interleaving.
-  void InitRandomInterleavingRepetitions(int repetitions) const;
+  void InitRandomInterleavingRepetitions(int reps) const;
 
   const BenchmarkName& name() const { return name_; }
   AggregationReportMode aggregation_report_mode() const {
@@ -49,7 +49,7 @@ class BenchmarkInstance {
   BigOFunc& complexity_lambda() const { return *complexity_lambda_; }
   const std::vector<Statistics>& statistics() const { return statistics_; }
   int repetitions() const { return repetitions_; }
-  double min_time() const;
+  double MinTime() const;
   IterationCount iterations() const { return iterations_; }
   int threads() const { return threads_; }
 
diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc
index 1320136e30..a84eae8194 100644
--- a/src/benchmark_runner.cc
+++ b/src/benchmark_runner.cc
@@ -279,13 +279,13 @@ class BenchmarkRunner {
     // See how much iterations should be increased by.
     // Note: Avoid division by zero with max(seconds, 1ns).
     double multiplier =
-        b.min_time() * kSafetyMultiplier / std::max(i.seconds, 1e-9);
+        b.MinTime() * kSafetyMultiplier / std::max(i.seconds, 1e-9);
     // If our last run was at least 10% of FLAGS_benchmark_min_time then we
     // use the multiplier directly.
     // Otherwise we use at most 10 times expansion.
     // NOTE: When the last run was at least 10% of the min time the max
     // expansion should be 14x.
-    bool is_significant = (i.seconds / b.min_time()) > 0.1;
+    bool is_significant = (i.seconds / b.MinTime()) > 0.1;
     multiplier = is_significant ? multiplier : std::min(10.0, multiplier);
     if (multiplier <= 1.0) multiplier = 2.0;
 
@@ -306,13 +306,13 @@ class BenchmarkRunner {
     // or because an error was reported.
     return i.results.has_error_ ||
            i.iters >= kMaxIterations ||  // Too many iterations already.
-           i.seconds >= b.min_time() ||  // The elapsed time is large enough.
+           i.seconds >= b.MinTime() ||   // The elapsed time is large enough.
                                          // CPU time is specified but the
                                          // elapsed real time greatly exceeds
                                          // the minimum time. Note that user
                                          // provided timers are except from this
                                          // sanity check.
-           ((i.results.real_time_used >= 5 * b.min_time()) &&
+           ((i.results.real_time_used >= 5 * b.MinTime()) &&
             !b.use_manual_time());
   }
 
@@ -362,12 +362,12 @@ class BenchmarkRunner {
               ComputeRandomInterleavingRepetitions(input));
           // If the number of repetitions changed, need to rerun the last trial
           // because iters may also change. Note that we only need to do this
-          // if accumulated_time < b.min_time(), i.e., the iterations we have
-          // run is not enough for the already adjusted b.min_time().
+          // if accumulated_time < b.MinTime(), i.e., the iterations we have
+          // run is not enough for the already adjusted b.MinTime().
           // Otherwise, we will still skip the rerun.
           rerun_trial =
               b.RandomInterleavingRepetitions() < GetRepetitions() &&
-              i.seconds < b.min_time() && !has_explicit_iteration_count;
+              i.seconds < b.MinTime() && !has_explicit_iteration_count;
         }
 
         if (!rerun_trial) break;  // Good, let's report them!

From 736875b29812fb47af1941a4beec1d54fd8fa348 Mon Sep 17 00:00:00 2001
From: Hai Huang <haih@google.com>
Date: Thu, 20 May 2021 01:03:58 -0400
Subject: [PATCH 21/21] Add doc for random interleaving for fr-1051

Committer: Hai Huang <haih@google.com>

On branch fr-1051
Your branch is up to date with 'origin/fr-1051'.

Changes to be committed:
modified:   README.md
new file:   docs/random_interleaving.md
---
 README.md                   |  8 +++++---
 docs/random_interleaving.md | 26 ++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 3 deletions(-)
 create mode 100644 docs/random_interleaving.md

diff --git a/README.md b/README.md
index a853115b36..f32e3d9d6a 100644
--- a/README.md
+++ b/README.md
@@ -179,7 +179,7 @@ BENCHMARK_MAIN();
 ```
 
 To run the benchmark, compile and link against the `benchmark` library
-(libbenchmark.a/.so). If you followed the build steps above, this library will 
+(libbenchmark.a/.so). If you followed the build steps above, this library will
 be under the build directory you created.
 
 ```bash
@@ -299,6 +299,8 @@ too (`-lkstat`).
 
 [Setting the Time Unit](#setting-the-time-unit)
 
+[Random Interleaving](docs/random_interleaving.md)
+
 [User-Requested Performance Counters](docs/perf_counters.md)
 
 [Preventing Optimization](#preventing-optimization)
@@ -399,8 +401,8 @@ Write benchmark results to a file with the `--benchmark_out=<filename>` option
 (or set `BENCHMARK_OUT`). Specify the output format with
 `--benchmark_out_format={json|console|csv}` (or set
 `BENCHMARK_OUT_FORMAT={json|console|csv}`). Note that the 'csv' reporter is
-deprecated and the saved `.csv` file 
-[is not parsable](https://github.com/google/benchmark/issues/794) by csv 
+deprecated and the saved `.csv` file
+[is not parsable](https://github.com/google/benchmark/issues/794) by csv
 parsers.
 
 Specifying `--benchmark_out` does not suppress the console output.
diff --git a/docs/random_interleaving.md b/docs/random_interleaving.md
new file mode 100644
index 0000000000..2471b46bb0
--- /dev/null
+++ b/docs/random_interleaving.md
@@ -0,0 +1,26 @@
+<a name="interleaving" />
+
+# Random Interleaving
+
+[Random Interleaving](https://github.com/google/benchmark/issues/1051) is a
+technique to lower run-to-run variance. It breaks the execution of a
+microbenchmark into multiple chunks and randomly interleaves them with chunks
+from other microbenchmarks in the same benchmark test. Data shows it is able to
+lower run-to-run variance by
+[40%](https://github.com/google/benchmark/issues/1051) on average.
+
+To use, set `--benchmark_enable_random_interleaving=true`.
+
+It's a known issue that random interleaving may increase the benchmark execution
+time, if:
+
+1.  A benchmark has costly setup and / or teardown. Random interleaving will run
+    setup and teardown many times and may increase test execution time
+    significantly.
+2.  The time to run a single benchmark iteration is larger than the desired time
+    per repetition (i.e., `benchmark_min_time / benchmark_repetitions`).
+
+The overhead of random interleaving can be controlled by
+`--benchmark_random_interleaving_max_overhead`. The default value is 0.4 meaning
+the total execution time under random interlaving is limited by 1.4 x original
+total execution time. Set it to `inf` for unlimited overhead.