Add benchmarks

thepinetree · thepinetree · commit 490280fb3610 · 2021-01-20T14:01:21.000-05:00
diff --git a/benchmark/runner/ssb_benchmark.cpp b/benchmark/runner/ssb_benchmark.cpp
@@ -0,0 +1,121 @@
+#include <tbb/task_scheduler_init.h>
+
+#include "benchmark/benchmark.h"
+#include "common/scoped_timer.h"
+#include "execution/execution_util.h"
+#include "execution/vm/module.h"
+#include "main/db_main.h"
+#include "settings/settings_manager.h"
+#include "test_util/tpch/workload.h"
+
+namespace noisepage::tpch {
+class SSBBenchmark : public benchmark::Fixture {
+ public:
+  const bool print_exec_info_ = true;
+  const double threshold_ = 0.1;
+  const uint64_t min_iterations_per_query_ = 10;
+  const uint64_t max_iterations_per_query_ = 10;
+  const int32_t threads_ = tbb::task_scheduler_init::default_num_threads();
+  const execution::vm::ExecutionMode mode_ = execution::vm::ExecutionMode::Interpret;
+  std::unique_ptr<DBMain> db_main_;
+  std::unique_ptr<tpch::Workload> ssb_workload_;
+
+  const std::string ssb_table_root_ = "/Users/dpatra/Research/NoisePage-Support/SSB/SF0.01/";
+  const std::string ssb_database_name_ = "ssb_db";
+
+  void SetUp(const benchmark::State &state) final {
+    execution::ExecutionUtil::InitTPL();
+
+    // Set up database
+    std::unordered_map<settings::Param, settings::ParamInfo> param_map;
+    settings::SettingsManager::ConstructParamMap(param_map);
+    auto db_main_builder = DBMain::Builder().SetUseGC(true)
+                                            .SetUseCatalog(true)
+                                            .SetUseGCThread(true)
+                                            .SetUseSettingsManager(true)
+                                            .SetSettingsParameterMap(std::move(param_map));
+    db_main_ = db_main_builder.Build();
+
+    // Set up metrics manager
+    auto metrics_manager = db_main_->GetMetricsManager();
+    metrics_manager->EnableMetric(metrics::MetricsComponent::EXECUTION_PIPELINE);
+    metrics_manager->SetMetricSampleInterval(metrics::MetricsComponent::EXECUTION_PIPELINE, 0);
+
+    // Load the TPCH tables and compile the queries
+    ssb_workload_ =
+        std::make_unique<tpch::Workload>(common::ManagedPointer<DBMain>(db_main_), ssb_database_name_,
+                                         ssb_table_root_, Workload::BenchmarkType::SSB, threads_);
+  }
+
+  void TearDown(const benchmark::State &state) final {
+    execution::ExecutionUtil::ShutdownTPL();
+    // free db main here so we don't need to use the loggers anymore
+    db_main_.reset();
+  }
+};
+
+// NOLINTNEXTLINE
+BENCHMARK_DEFINE_F(SSBBenchmark, StabilizeBenchmark)(benchmark::State &state) {
+  // Run benchmark for each query independently
+  auto num_queries = ssb_workload_->GetQueryNum();
+
+  for (auto _ : state) {
+    // Overall totals
+    uint64_t queries_run = 0, total_time = 0;
+    for (uint32_t i = 0; i < num_queries; i++) {
+      // Single query running totals
+      double old_avg = 0, avg = 0;
+      double total = 0;
+      uint64_t iterations = 0;
+      // Iterate at least until min_iterations_per_query and at most until max_iterations_per_query and until average
+      // stabilizes
+      while ((iterations < min_iterations_per_query_) ||
+             ((abs(avg - old_avg) > threshold_) && (iterations < max_iterations_per_query_))) {
+        old_avg = avg;
+        total += ssb_workload_->TimeQuery(i, mode_, print_exec_info_);
+        iterations++;
+        avg = total/iterations;
+      }
+
+      if (print_exec_info_) {
+        std::cout << ssb_workload_->GetQueryName(i) << " took " << iterations
+                  << " iterations with an average execution time of " << avg << std::endl;
+      }
+
+      queries_run += iterations;
+      total_time += total;
+    }
+    state.SetIterationTime(total_time);
+    state.SetItemsProcessed(queries_run);
+  }
+
+  // Free the workload here so we don't need to use the loggers anymore
+  ssb_workload_.reset();
+}
+
+// NOLINTNEXTLINE
+BENCHMARK_DEFINE_F(SSBBenchmark, RuntimeBenchmark)(benchmark::State &state) {
+  // Run benchmark for each query independently
+  auto num_queries = ssb_workload_->GetQueryNum();
+
+  for (auto _ : state) {
+    // Overall totals
+    uint64_t queries_run = 0, total_time = 0;
+    for (uint64_t iterations = 0; iterations < min_iterations_per_query_; iterations++) {
+      // Iterate to min_iterations_per_query
+      for (uint32_t i = 0; i < num_queries; i++) {
+        total_time += ssb_workload_->TimeQuery(i, mode_, print_exec_info_);
+        queries_run++;
+      }
+    }
+    state.SetIterationTime(total_time);
+    state.SetItemsProcessed(queries_run);
+  }
+
+  // Free the workload here so we don't need to use the loggers anymore
+  ssb_workload_.reset();
+}
+
+// BENCHMARK_REGISTER_F(SSBBenchmark, StabilizeBenchmark)->Unit(benchmark::kMillisecond)->UseManualTime()->Iterations(1);
+BENCHMARK_REGISTER_F(SSBBenchmark, RuntimeBenchmark)->Unit(benchmark::kMillisecond)->UseManualTime()->Iterations(1);
+}  // namespace terrier::runner
diff --git a/benchmark/runner/tpch_benchmark.cpp b/benchmark/runner/tpch_benchmark.cpp
@@ -0,0 +1,121 @@
+#include <tbb/task_scheduler_init.h>
+
+#include "benchmark/benchmark.h"
+#include "common/scoped_timer.h"
+#include "execution/execution_util.h"
+#include "execution/vm/module.h"
+#include "main/db_main.h"
+#include "settings/settings_manager.h"
+#include "test_util/tpch/workload.h"
+
+namespace noisepage::tpch {
+class TPCHBenchmark : public benchmark::Fixture {
+ public:
+  const bool print_exec_info_ = true;
+  const double threshold_ = 0.1;
+  const uint64_t min_iterations_per_query_ = 10;
+  const uint64_t max_iterations_per_query_ = 10;
+  const int32_t threads_ = tbb::task_scheduler_init::default_num_threads();
+  const execution::vm::ExecutionMode mode_ = execution::vm::ExecutionMode::Interpret;
+
+  std::unique_ptr<DBMain> db_main_;
+  std::unique_ptr<tpch::Workload> tpch_workload_;
+
+  const std::string tpch_table_root_ = "/Users/dpatra/Research/NoisePage-Support/TPCH/SF0.01/";
+  const std::string tpch_database_name_ = "tpch_db";
+
+  void SetUp(const benchmark::State &state) final {
+    execution::ExecutionUtil::InitTPL();
+
+    // Set up database
+    std::unordered_map<settings::Param, settings::ParamInfo> param_map;
+    settings::SettingsManager::ConstructParamMap(param_map);
+    auto db_main_builder = DBMain::Builder().SetUseGC(true)
+                                            .SetUseCatalog(true)
+                                            .SetUseGCThread(true)
+                                            .SetUseSettingsManager(true)
+                                            .SetSettingsParameterMap(std::move(param_map));
+    db_main_ = db_main_builder.Build();
+
+    // Set up metrics manager
+    auto metrics_manager = db_main_->GetMetricsManager();
+    metrics_manager->EnableMetric(metrics::MetricsComponent::EXECUTION_PIPELINE);
+    metrics_manager->SetMetricSampleInterval(metrics::MetricsComponent::EXECUTION_PIPELINE, 0);
+
+    // Load the TPCH tables and compile the queries
+    tpch_workload_ = std::make_unique<tpch::Workload>(common::ManagedPointer<DBMain>(db_main_), tpch_database_name_,
+                                                      tpch_table_root_, tpch::Workload::BenchmarkType::TPCH, threads_);
+  }
+
+  void TearDown(const benchmark::State &state) final {
+    execution::ExecutionUtil::ShutdownTPL();
+    // free db main here so we don't need to use the loggers anymore
+    db_main_.reset();
+  }
+};
+
+// NOLINTNEXTLINE
+BENCHMARK_DEFINE_F(TPCHBenchmark, StabilizeBenchmark)(benchmark::State &state) {
+  // Run benchmark for each query independently
+  auto num_queries = tpch_workload_->GetQueryNum();
+
+  for (auto _ : state) {
+    // Overall totals
+    uint64_t queries_run = 0, total_time = 0;
+    for (uint32_t i = 0; i < num_queries; i++) {
+      // Single query running totals
+      double old_avg = 0, avg = 0;
+      double total = 0;
+      uint64_t iterations = 0;
+      // Iterate at least until min_iterations_per_query and at most until max_iterations_per_query and until average
+      // stabilizes
+      while ((iterations < min_iterations_per_query_) ||
+             ((abs(avg - old_avg) > threshold_) && (iterations < max_iterations_per_query_))) {
+        old_avg = avg;
+        total += tpch_workload_->TimeQuery(i, mode_, print_exec_info_);
+        iterations++;
+        avg = total / iterations;
+      }
+
+      if (print_exec_info_) {
+        std::cout << tpch_workload_->GetQueryName(i) << " took " << iterations
+                  << " iterations with an average execution time of " << avg << std::endl;
+      }
+
+      queries_run += iterations;
+      total_time += total;
+    }
+    state.SetIterationTime(total_time);
+    state.SetItemsProcessed(queries_run);
+  }
+
+  // Free the workload here so we don't need to use the loggers anymore
+  tpch_workload_.reset();
+}
+
+// NOLINTNEXTLINE
+BENCHMARK_DEFINE_F(TPCHBenchmark, RuntimeBenchmark)(benchmark::State &state) {
+  // Run benchmark for each query independently
+  auto num_queries = tpch_workload_->GetQueryNum();
+
+  for (auto _ : state) {
+    // Overall totals
+    uint64_t queries_run = 0, total_time = 0;
+    for (uint64_t iterations = 0; iterations < min_iterations_per_query_; iterations++) {
+      // Iterate to min_iterations_per_query
+      for (uint32_t i = 0; i < num_queries; i++) {
+        total_time += tpch_workload_->TimeQuery(i, mode_, print_exec_info_);
+        queries_run++;
+      }
+    }
+    state.SetIterationTime(total_time);
+    state.SetItemsProcessed(queries_run);
+  }
+
+  // Free the workload here so we don't need to use the loggers anymore
+  tpch_workload_.reset();
+}
+
+//BENCHMARK_REGISTER_F(TPCHBenchmark, StabilizeBenchmark)->Unit(benchmark::kMillisecond)->UseManualTime()->Iterations(1);
+BENCHMARK_REGISTER_F(TPCHBenchmark, RuntimeBenchmark)->Unit(benchmark::kMillisecond)->UseManualTime()->Iterations(1);
+}  // namespace tpch
diff --git a/benchmark/runner/tpch_runner.cpp b/benchmark/runner/tpch_runner.cpp
@@ -66,7 +66,7 @@ BENCHMARK_DEFINE_F(TPCHRunner, Runner)(benchmark::State &state) {
       UNREACHABLE("Unimplemented Benchmark Type");
   }
   workload_ = std::make_unique<tpch::Workload>(common::ManagedPointer<DBMain>(db_main_), tpch_database_name_,
-                                               table_root, type_);
+                                               table_root, type_, total_num_threads_);
 
   int8_t num_thread_start;
   uint32_t query_num_start, repeat_num;
diff --git a/src/execution/compiler/executable_query.cpp b/src/execution/compiler/executable_query.cpp
@@ -160,9 +160,13 @@ void ExecutableQuery::Run(common::ManagedPointer<exec::ExecutionContext> exec_ct
   exec_ctx->SetPipelineOperatingUnits(GetPipelineOperatingUnits());
   exec_ctx->SetQueryId(query_id_);
 
-  // Now run through fragments.
-  for (const auto &fragment : fragments_) {
-    fragment->Run(query_state.get(), mode);
+  double elapsed_ms;
+  {
+    util::ScopedTimer timer(&elapsed_ms);
+    // Now run through fragments.
+    for (const auto &fragment : fragments_) {
+      fragment->Run(query_state.get(), mode);
+    }
   }
 
   // We do not currently re-use ExecutionContexts. However, this is unset to help ensure
diff --git a/src/execution/sql/aggregation_hash_table.cpp b/src/execution/sql/aggregation_hash_table.cpp
@@ -1,7 +1,6 @@
 #include "execution/sql/aggregation_hash_table.h"
 
 #include <tbb/parallel_for_each.h>
-#include <tbb/task_scheduler_init.h>
 
 #include <algorithm>
 #include <memory>
@@ -670,7 +669,7 @@ void AggregationHashTable::ExecuteParallelPartitionedScan(void *query_state, Thr
   util::Timer<std::milli> timer;
   timer.Start();
 
-  size_t num_threads = tbb::task_scheduler_init::default_num_threads();
+  size_t num_threads = exec_ctx_->GetExecutionSettings().GetNumberOfParallelExecutionThreads();
   size_t num_tasks = nonempty_parts.size();
   size_t concurrent_estimate = std::min(num_threads, num_tasks);
   exec_ctx_->SetNumConcurrentEstimate(concurrent_estimate);
diff --git a/src/execution/sql/join_hash_table.cpp b/src/execution/sql/join_hash_table.cpp
@@ -2,7 +2,6 @@
 
 #include <llvm/ADT/STLExtras.h>
 #include <tbb/parallel_for_each.h>
-#include <tbb/task_scheduler_init.h>
 
 #include <algorithm>
 #include <limits>
@@ -593,7 +592,7 @@ void JoinHashTable::MergeParallel(ThreadStateContainer *thread_state_container,
     EXECUTION_LOG_TRACE("JHT: Estimated {} elements >= {} element parallel threshold. Using parallel merge.",
                         num_elem_estimate, DEFAULT_MIN_SIZE_FOR_PARALLEL_MERGE);
 
-    size_t num_threads = tbb::task_scheduler_init::default_num_threads();
+    size_t num_threads = exec_ctx_->GetExecutionSettings().GetNumberOfParallelExecutionThreads();
     size_t num_tasks = tl_join_tables.size();
     auto estimate = std::min(num_threads, num_tasks);
     exec_ctx_->SetNumConcurrentEstimate(estimate);
diff --git a/src/execution/sql/sorter.cpp b/src/execution/sql/sorter.cpp
@@ -2,7 +2,6 @@
 
 #include <llvm/ADT/STLExtras.h>
 #include <tbb/parallel_for_each.h>
-#include <tbb/task_scheduler_init.h>
 
 #include <algorithm>
 #include <queue>
@@ -234,9 +233,8 @@ void Sorter::SortParallel(ThreadStateContainer *thread_state_container, std::siz
   util::StageTimer<std::milli> timer;
   timer.EnterStage("Parallel Sort Thread-Local Instances");
 
-  tbb::task_scheduler_init sched;
   {
-    size_t num_threads = tbb::task_scheduler_init::default_num_threads();
+    size_t num_threads = exec_ctx_->GetExecutionSettings().GetNumberOfParallelExecutionThreads();
     size_t num_tasks = tl_sorters.size();
     size_t num_concurrent = std::min(num_threads, num_tasks);
     exec_ctx_->SetNumConcurrentEstimate(num_concurrent);
@@ -360,7 +358,7 @@ void Sorter::SortParallel(ThreadStateContainer *thread_state_container, std::siz
   };
 
   {
-    size_t num_threads = tbb::task_scheduler_init::default_num_threads();
+    size_t num_threads = exec_ctx_->GetExecutionSettings().GetNumberOfParallelExecutionThreads();
     size_t num_tasks = merge_work.size();
     size_t concurrent = std::min(num_threads, num_tasks);
     exec_ctx_->SetNumConcurrentEstimate(concurrent);
diff --git a/test/include/test_util/tpch/workload.h b/test/include/test_util/tpch/workload.h
@@ -39,15 +39,39 @@ class Workload {
   enum class BenchmarkType : uint32_t { TPCH, SSB };
 
   Workload(common::ManagedPointer<DBMain> db_main, const std::string &db_name, const std::string &table_root,
-           enum BenchmarkType type);
+           enum BenchmarkType type, int64_t threads);
 
   /**
    * Function to invoke for a single worker thread to invoke the TPCH queries
-   * @param worker_id 1-indexed thread id
-   */
+   * @param execution_us_per_worker max execution time for single worker
+   * @param avg_interval_us interval timing
+   * @param query_num number of queries to run
+   * @param mode execution mode
+   * */
   void Execute(int8_t worker_id, uint64_t execution_us_per_worker, uint64_t avg_interval_us, uint32_t query_num,
                execution::vm::ExecutionMode mode);
-  uint32_t GetQueryNum() { return query_and_plan_.size(); }
+
+  /**
+   * Function to invoke a single TPCH query and collect runtime
+   * @param query_ind index of query into query_and_plan_
+   * @param avg_interval_us interval timing
+   * @param mode execution mode
+   * @param print_output boolean flag to determine whether timing output should be printed
+   * @return time taken to run query
+   */
+  uint64_t TimeQuery(int32_t query_ind, execution::vm::ExecutionMode mode, bool print_output = false);
+
+  /**
+   * Function to get number of queries in plan
+   * @return size of query plan vector
+   */
+  uint32_t GetQueryNum() const { return query_and_plan_.size(); }
+
+  /**
+   * Function to get number of queries in plan
+   * @return size of query plan vector
+   */
+  std::string GetQueryName(int32_t query_ind) const { return query_names_[query_ind]; }
 
  private:
   void GenerateTables(execution::exec::ExecutionContext *exec_ctx, const std::string &dir_name,
@@ -67,6 +91,7 @@ class Workload {
   std::vector<
       std::tuple<std::unique_ptr<execution::compiler::ExecutableQuery>, std::unique_ptr<planner::AbstractPlanNode>>>
       query_and_plan_;
+  std::vector<std::string> query_names_;
 };
 
 }  // namespace noisepage::tpch
diff --git a/test/test_util/tpch/workload.cpp b/test/test_util/tpch/workload.cpp
diff --git a/util/execution/tpl.cpp b/util/execution/tpl.cpp
diff --git a/util/include/execution/table_generator/schema_reader.h b/util/include/execution/table_generator/schema_reader.h

Original file line number	Diff line number	Diff line change
`@@ -66,7 +66,7 @@ BENCHMARK_DEFINE_F(TPCHRunner, Runner)(benchmark::State &state) {`
`66`	`66`	`UNREACHABLE("Unimplemented Benchmark Type");`
`67`	`67`	`}`
`68`	`68`	`workload_ = std::make_unique<tpch::Workload>(common::ManagedPointer<DBMain>(db_main_), tpch_database_name_,`
`69`		`- table_root, type_);`
	`69`	`+ table_root, type_, total_num_threads_);`
`70`	`70`
`71`	`71`	`int8_t num_thread_start;`
`72`	`72`	`uint32_t query_num_start, repeat_num;`