diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index 03f11cc957b..549cb8e5d5d 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -344,11 +344,18 @@ ConfigureNVBench(CSV_WRITER_NVBENCH io/csv/csv_writer.cpp) # ################################################################################################## # * ast benchmark --------------------------------------------------------------------------------- -ConfigureNVBench(AST_NVBENCH ast/transform.cpp) +ConfigureNVBench(AST_NVBENCH ast/polynomials.cpp ast/transform.cpp) # ################################################################################################## # * binaryop benchmark ---------------------------------------------------------------------------- -ConfigureNVBench(BINARYOP_NVBENCH binaryop/binaryop.cpp binaryop/compiled_binaryop.cpp) +ConfigureNVBench( + BINARYOP_NVBENCH binaryop/binaryop.cpp binaryop/compiled_binaryop.cpp binaryop/polynomials.cpp +) + +# ################################################################################################## +# * transform benchmark +# --------------------------------------------------------------------------------- +ConfigureNVBench(TRANSFORM_NVBENCH transform/polynomials.cpp) # ################################################################################################## # * nvtext benchmark ------------------------------------------------------------------- diff --git a/cpp/benchmarks/ast/polynomials.cpp b/cpp/benchmarks/ast/polynomials.cpp new file mode 100644 index 00000000000..b8e4ca46b72 --- /dev/null +++ b/cpp/benchmarks/ast/polynomials.cpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include + +template +static void BM_ast_polynomials(nvbench::state& state) +{ + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const order = static_cast(state.get_int64("order")); + + CUDF_EXPECTS(order > 0, "Polynomial order must be greater than 0"); + + data_profile profile; + profile.set_distribution_params(cudf::type_to_id(), + distribution_id::NORMAL, + static_cast(0), + static_cast(1)); + auto table = create_random_table({cudf::type_to_id()}, row_count{num_rows}, profile); + auto column_view = table->get_column(0); + + std::vector> constants; + { + std::random_device random_device; + std::mt19937 generator; + std::uniform_real_distribution distribution{0, 1}; + + std::transform(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(order + 1), + std::back_inserter(constants), + [&](int) { return distribution(generator); }); + } + + cudf::ast::tree tree{}; + + auto& column_ref = tree.push(cudf::ast::column_reference{0}); + + // computes polynomials: (((ax + b)x + c)x + d)x + e... = ax**4 + bx**3 + cx**2 + dx + e.... + tree.push(cudf::ast::literal{constants[0]}); + + for (cudf::size_type i = 0; i < order; i++) { + auto& product = + tree.push(cudf::ast::operation{cudf::ast::ast_operator::MUL, tree.back(), column_ref}); + auto& constant = tree.push(cudf::ast::literal{constants[i + 1]}); + tree.push(cudf::ast::operation{cudf::ast::ast_operator::ADD, product, constant}); + } + + // Use the number of bytes read from global memory + state.add_global_memory_reads(num_rows); + state.add_global_memory_writes(num_rows); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + cudf::scoped_range range{"benchmark_iteration"}; + cudf::compute_column(*table, tree.back(), launch.get_stream().get_stream()); + }); +} + +#define AST_POLYNOMIAL_BENCHMARK_DEFINE(name, key_type) \ + static void name(::nvbench::state& st) { ::BM_ast_polynomials(st); } \ + NVBENCH_BENCH(name) \ + .set_name(#name) \ + .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000}) \ + .add_int64_axis("order", {1, 2, 4, 8, 16, 32}) + +AST_POLYNOMIAL_BENCHMARK_DEFINE(ast_polynomials_float32, float); + +AST_POLYNOMIAL_BENCHMARK_DEFINE(ast_polynomials_float64, double); diff --git a/cpp/benchmarks/binaryop/polynomials.cpp b/cpp/benchmarks/binaryop/polynomials.cpp new file mode 100644 index 00000000000..782ae1db927 --- /dev/null +++ b/cpp/benchmarks/binaryop/polynomials.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +template +static void BM_binaryop_polynomials(nvbench::state& state) +{ + auto const num_rows{static_cast(state.get_int64("num_rows"))}; + auto const order{static_cast(state.get_int64("order"))}; + + CUDF_EXPECTS(order > 0, "Polynomial order must be greater than 0"); + + data_profile profile; + profile.set_distribution_params(cudf::type_to_id(), + distribution_id::NORMAL, + static_cast(0), + static_cast(1)); + auto table = create_random_table({cudf::type_to_id()}, row_count{num_rows}, profile); + auto column_view = table->get_column(0); + + std::vector> constants; + { + std::random_device random_device; + std::mt19937 generator; + std::uniform_real_distribution distribution{0, 1}; + + std::transform(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(order + 1), + std::back_inserter(constants), + [&](int) { return cudf::numeric_scalar(distribution(generator)); }); + } + + // Use the number of bytes read from global memory + state.add_global_memory_reads(num_rows); + state.add_global_memory_writes(num_rows); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + // computes polynomials: (((ax + b)x + c)x + d)x + e... = ax**4 + bx**3 + cx**2 + dx + e.... + cudf::scoped_range range{"benchmark_iteration"}; + rmm::cuda_stream_view stream{launch.get_stream().get_stream()}; + std::vector> intermediates; + + auto result = cudf::make_column_from_scalar(constants[0], num_rows, stream); + + for (cudf::size_type i = 0; i < order; i++) { + auto product = cudf::binary_operation(result->view(), + column_view, + cudf::binary_operator::MUL, + cudf::data_type{cudf::type_to_id()}, + stream); + auto sum = cudf::binary_operation(product->view(), + constants[i + 1], + cudf::binary_operator::ADD, + cudf::data_type{cudf::type_to_id()}, + stream); + intermediates.push_back(std::move(product)); + intermediates.push_back(std::move(result)); + result = std::move(sum); + } + }); +} + +#define BINARYOP_POLYNOMIALS_BENCHMARK_DEFINE(name, key_type) \ + \ + static void name(::nvbench::state& st) { ::BM_binaryop_polynomials(st); } \ + NVBENCH_BENCH(name) \ + .set_name(#name) \ + .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000}) \ + .add_int64_axis("order", {1, 2, 4, 8, 16, 32}) + +BINARYOP_POLYNOMIALS_BENCHMARK_DEFINE(binaryop_polynomials_float32, float); + +BINARYOP_POLYNOMIALS_BENCHMARK_DEFINE(binaryop_polynomials_float64, double); diff --git a/cpp/benchmarks/transform/polynomials.cpp b/cpp/benchmarks/transform/polynomials.cpp new file mode 100644 index 00000000000..f23789f5460 --- /dev/null +++ b/cpp/benchmarks/transform/polynomials.cpp @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +template +static void BM_transform_polynomials(nvbench::state& state) +{ + auto const num_rows{static_cast(state.get_int64("num_rows"))}; + auto const order{static_cast(state.get_int64("order"))}; + + CUDF_EXPECTS(order > 0, "Polynomial order must be greater than 0"); + + data_profile profile; + profile.set_distribution_params(cudf::type_to_id(), + distribution_id::NORMAL, + static_cast(0), + static_cast(1)); + auto table = create_random_table({cudf::type_to_id()}, row_count{num_rows}, profile); + auto column_view = table->get_column(0); + + std::vector constants; + + { + std::random_device random_device; + std::mt19937 generator; + std::uniform_real_distribution distribution{0, 1}; + + std::transform(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(order + 1), + std::back_inserter(constants), + [&](int) { return distribution(generator); }); + } + + // Use the number of bytes read from global memory + state.add_global_memory_reads(num_rows); + state.add_global_memory_writes(num_rows); + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + // computes polynomials: (((ax + b)x + c)x + d)x + e... = ax**4 + bx**3 + cx**2 + dx + e.... + + cudf::scoped_range range{"benchmark_iteration"}; + + std::string expr = std::to_string(constants[0]); + + for (cudf::size_type i = 0; i < order; i++) { + expr = "( " + expr + " ) * x + " + std::to_string(constants[i + 1]); + } + + static_assert(std::is_same_v || std::is_same_v); + std::string type = std::is_same_v ? "float" : "double"; + + std::string udf = R"***( +__device__ inline void compute_polynomial ( + )***" + type + R"***(* out, + )***" + type + R"***( x +) +{ + *out = )***" + expr + + R"***(; +} +)***"; + + cudf::transform(column_view, + udf, + cudf::data_type{cudf::type_to_id()}, + false, + launch.get_stream().get_stream()); + }); +} + +#define TRANSFORM_POLYNOMIALS_BENCHMARK_DEFINE(name, key_type) \ + \ + static void name(::nvbench::state& st) { ::BM_transform_polynomials(st); } \ + NVBENCH_BENCH(name) \ + .set_name(#name) \ + .add_int64_axis("num_rows", {100'000, 1'000'000, 10'000'000, 100'000'000}) \ + .add_int64_axis("order", {1, 2, 4, 8, 16, 32}) + +TRANSFORM_POLYNOMIALS_BENCHMARK_DEFINE(transform_polynomials_float32, float); + +TRANSFORM_POLYNOMIALS_BENCHMARK_DEFINE(transform_polynomials_float64, double);