From 79799e59b1a0c7bde8b85ea955593b2d63d2a46c Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Mon, 22 Apr 2024 22:07:03 +0900 Subject: [PATCH] GH-39664: [C++][Acero] Ensure Acero benchmarks present a metric for identifying throughput (#40884) ### Rationale for this change Acero benchmarks sometimes output metrics such as `items/s`, `bytes/s`, `batches/s`, and `rows/s`. However, there is inconsistency in how these metrics are presented across different benchmarks. We are undertaking an effort to standardize the output of these metrics to ensure uniformity and clarity in performance measurement across all Acero benchmarks. ### What changes are included in this PR? `rows/s` has a similar meaning to `items/s`. - `bytes/s` and `items/s`: aggregate - `bytes/s` and `rows/s`: asof_join - `batches/s` and `rows/s`: project, filter, expression ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #39664 Authored-by: Hyunseok Seo Signed-off-by: Weston Pace --- cpp/src/arrow/acero/aggregate_benchmark.cc | 19 +++++++++++++++++++ cpp/src/arrow/acero/asof_join_benchmark.cc | 2 +- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/acero/aggregate_benchmark.cc b/cpp/src/arrow/acero/aggregate_benchmark.cc index 4db7e443227d9..854862e3e48ca 100644 --- a/cpp/src/arrow/acero/aggregate_benchmark.cc +++ b/cpp/src/arrow/acero/aggregate_benchmark.cc @@ -29,6 +29,7 @@ #include "arrow/util/benchmark_util.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_reader.h" +#include "arrow/util/byte_size.h" #include "arrow/util/string.h" namespace arrow { @@ -50,6 +51,7 @@ namespace acero { #include using arrow::internal::ToChars; +using arrow::util::TotalBufferSize; #ifdef ARROW_WITH_BENCHMARKS_REFERENCE @@ -371,9 +373,11 @@ static void BenchmarkGroupBy(benchmark::State& state, std::vector agg for (std::size_t arg_idx = 0; arg_idx < arguments.size(); arg_idx++) { aggregates[arg_idx].target = {FieldRef(static_cast(arg_idx))}; } + int64_t total_bytes = TotalBufferSize(*batch); for (auto _ : state) { ABORT_NOT_OK(BatchGroupBy(batch, aggregates, key_refs)); } + state.SetBytesProcessed(total_bytes * state.iterations()); } #define GROUP_BY_BENCHMARK(Name, Impl) \ @@ -578,6 +582,8 @@ static void SumKernel(benchmark::State& state) { for (auto _ : state) { ABORT_NOT_OK(Sum(array).status()); } + + state.SetItemsProcessed(state.iterations() * array_size); } static void SumKernelArgs(benchmark::internal::Benchmark* bench) { @@ -611,6 +617,8 @@ void ModeKernel(benchmark::State& state, int min, int max) { for (auto _ : state) { ABORT_NOT_OK(Mode(array).status()); } + + state.SetItemsProcessed(state.iterations() * array_size); } template @@ -625,13 +633,18 @@ void ModeKernelNarrow(benchmark::State& state) { template <> void ModeKernelNarrow(benchmark::State& state) { + using CType = typename TypeTraits::CType; + RegressionArgs args(state); + const int64_t array_size = args.size / sizeof(CType); auto rand = random::RandomArrayGenerator(1924); auto array = rand.Boolean(args.size * 8, 0.5, args.null_proportion); for (auto _ : state) { ABORT_NOT_OK(Mode(array).status()); } + + state.SetItemsProcessed(state.iterations() * array_size); } template @@ -668,6 +681,8 @@ static void MinMaxKernelBench(benchmark::State& state) { for (auto _ : state) { ABORT_NOT_OK(MinMax(array).status()); } + + state.SetItemsProcessed(state.iterations() * array_size); } static void MinMaxKernelBenchArgs(benchmark::internal::Benchmark* bench) { @@ -698,6 +713,8 @@ static void CountKernelBenchInt64(benchmark::State& state) { for (auto _ : state) { ABORT_NOT_OK(Count(array->Slice(1, array_size)).status()); } + + state.SetItemsProcessed(state.iterations() * array_size); } BENCHMARK(CountKernelBenchInt64)->Args({1 * 1024 * 1024, 2}); // 1M with 50% null. @@ -718,6 +735,8 @@ void VarianceKernelBench(benchmark::State& state) { for (auto _ : state) { ABORT_NOT_OK(Variance(array, options).status()); } + + state.SetItemsProcessed(state.iterations() * array_size); } static void VarianceKernelBenchArgs(benchmark::internal::Benchmark* bench) { diff --git a/cpp/src/arrow/acero/asof_join_benchmark.cc b/cpp/src/arrow/acero/asof_join_benchmark.cc index 02116b09fc1fd..ed2ac2258eb6c 100644 --- a/cpp/src/arrow/acero/asof_join_benchmark.cc +++ b/cpp/src/arrow/acero/asof_join_benchmark.cc @@ -91,7 +91,7 @@ static void TableJoinOverhead(benchmark::State& state, ASSERT_OK(DeclarationToStatus(std::move(join_node), /*use_threads=*/false)); } - state.counters["input_rows_per_second"] = benchmark::Counter( + state.counters["rows_per_second"] = benchmark::Counter( static_cast(state.iterations() * (left_table_stats.rows + right_hand_rows)), benchmark::Counter::kIsRate);