diff --git a/cpp/benchmarks/ast/transform.cpp b/cpp/benchmarks/ast/transform.cpp index f44f26e4d2c..7fe61054a26 100644 --- a/cpp/benchmarks/ast/transform.cpp +++ b/cpp/benchmarks/ast/transform.cpp @@ -16,16 +16,29 @@ #include +#include + +#include +#include +#include +#include #include #include +#include +#include #include #include #include +#include #include +#include +#include +#include +#include #include #include #include @@ -86,7 +99,71 @@ static void BM_ast_transform(nvbench::state& state) auto const& expression_tree_root = expressions.back(); // Use the number of bytes read from global memory - state.add_global_memory_reads(table_size * (tree_levels + 1)); + state.add_global_memory_reads(static_cast(table_size) * (tree_levels + 1)); + state.add_global_memory_writes(table_size); + + state.exec(nvbench::exec_tag::sync, + [&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); }); +} + +template +static void BM_string_compare_ast_transform(nvbench::state& state) +{ + auto const string_width = static_cast(state.get_int64("string_width")); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const num_comparisons = static_cast(state.get_int64("num_comparisons")); + auto const hit_rate = static_cast(state.get_int64("hit_rate")); + + CUDF_EXPECTS(num_comparisons > 0, "benchmarks require 1 or more comparisons"); + + // Create table data + auto const num_cols = num_comparisons * 2; + std::vector> columns; + std::for_each( + thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) { + columns.emplace_back(create_string_column(num_rows, string_width, hit_rate)); + }); + + cudf::table table{std::move(columns)}; + cudf::table_view const table_view = table.view(); + + int64_t const chars_size = std::accumulate( + table_view.begin(), + table_view.end(), + static_cast(0), + [](int64_t size, auto& column) -> int64_t { + return size + cudf::strings_column_view{column}.chars_size(cudf::get_default_stream()); + }); + + // Create column references + auto column_refs = std::vector(); + std::transform(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_cols), + std::back_inserter(column_refs), + [](auto const& column_id) { return cudf::ast::column_reference(column_id); }); + + // Create expression trees + std::list expressions; + + // Construct AST tree (a == b && c == d && e == f && ...) + + expressions.emplace_back(cudf::ast::operation(cmp_op, column_refs[0], column_refs[1])); + + std::for_each(thrust::make_counting_iterator(1), + thrust::make_counting_iterator(num_comparisons), + [&](size_t idx) { + auto const& lhs = expressions.back(); + auto const& rhs = expressions.emplace_back( + cudf::ast::operation(cmp_op, column_refs[idx * 2], column_refs[idx * 2 + 1])); + expressions.emplace_back(cudf::ast::operation(reduce_op, lhs, rhs)); + }); + + auto const& expression_tree_root = expressions.back(); + + // Use the number of bytes read from global memory + state.add_element_count(chars_size, "chars_size"); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(num_rows); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); }); @@ -115,3 +192,19 @@ AST_TRANSFORM_BENCHMARK_DEFINE( ast_int32_imbalanced_reuse_nulls, int32_t, TreeType::IMBALANCED_LEFT, true, true); AST_TRANSFORM_BENCHMARK_DEFINE( ast_double_imbalanced_unique_nulls, double, TreeType::IMBALANCED_LEFT, false, true); + +#define AST_STRING_COMPARE_TRANSFORM_BENCHMARK_DEFINE(name, cmp_op, reduce_op) \ + static void name(::nvbench::state& st) \ + { \ + ::BM_string_compare_ast_transform(st); \ + } \ + NVBENCH_BENCH(name) \ + .set_name(#name) \ + .add_int64_axis("string_width", {32, 64, 128, 256}) \ + .add_int64_axis("num_rows", {32768, 262144, 2097152}) \ + .add_int64_axis("num_comparisons", {1, 2, 3, 4}) \ + .add_int64_axis("hit_rate", {50, 100}) + +AST_STRING_COMPARE_TRANSFORM_BENCHMARK_DEFINE(ast_string_equal_logical_and, + cudf::ast::ast_operator::EQUAL, + cudf::ast::ast_operator::LOGICAL_AND); diff --git a/cpp/benchmarks/binaryop/binaryop.cpp b/cpp/benchmarks/binaryop/binaryop.cpp index 7d267a88764..35e41c6c2a4 100644 --- a/cpp/benchmarks/binaryop/binaryop.cpp +++ b/cpp/benchmarks/binaryop/binaryop.cpp @@ -17,12 +17,18 @@ #include #include +#include +#include #include #include +#include + #include #include +#include +#include // This set of benchmarks is designed to be a comparison for the AST benchmarks @@ -44,7 +50,8 @@ static void BM_binaryop_transform(nvbench::state& state) cudf::table_view table{*source_table}; // Use the number of bytes read from global memory - state.add_global_memory_reads(table_size * (tree_levels + 1)); + state.add_global_memory_reads(static_cast(table_size) * (tree_levels + 1)); + state.add_global_memory_writes(table_size); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) { // Execute tree that chains additions like (((a + b) + c) + d) @@ -64,11 +71,65 @@ static void BM_binaryop_transform(nvbench::state& state) }); } +template +static void BM_string_compare_binaryop_transform(nvbench::state& state) +{ + auto const string_width = static_cast(state.get_int64("string_width")); + auto const num_rows = static_cast(state.get_int64("num_rows")); + auto const num_comparisons = static_cast(state.get_int64("num_comparisons")); + auto const hit_rate = static_cast(state.get_int64("hit_rate")); + + CUDF_EXPECTS(num_comparisons > 0, "benchmarks require 1 or more comparisons"); + + // Create table data + auto const num_cols = num_comparisons * 2; + std::vector> columns; + std::for_each( + thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) { + columns.emplace_back(create_string_column(num_rows, string_width, hit_rate)); + }); + + cudf::table table{std::move(columns)}; + cudf::table_view const table_view = table.view(); + + int64_t const chars_size = std::accumulate( + table_view.begin(), table_view.end(), static_cast(0), [](int64_t size, auto& column) { + return size + cudf::strings_column_view{column}.chars_size(cudf::get_default_stream()); + }); + + // Create column references + + // Use the number of bytes read from global memory + state.add_element_count(chars_size, "chars_size"); + state.add_global_memory_reads(chars_size); + state.add_global_memory_writes(num_rows); + + // Construct binary operations (a == b && c == d && e == f && ...) + auto constexpr bool_type = cudf::data_type{cudf::type_id::BOOL8}; + + state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { + rmm::cuda_stream_view stream{launch.get_stream().get_stream()}; + std::unique_ptr reduction = + cudf::binary_operation(table.get_column(0), table.get_column(1), cmp_op, bool_type, stream); + std::for_each( + thrust::make_counting_iterator(1), + thrust::make_counting_iterator(num_comparisons), + [&](size_t idx) { + std::unique_ptr comparison = cudf::binary_operation( + table.get_column(idx * 2), table.get_column(idx * 2 + 1), cmp_op, bool_type, stream); + std::unique_ptr reduced = + cudf::binary_operation(*comparison, *reduction, reduce_op, bool_type, stream); + stream.synchronize(); + reduction = std::move(reduced); + }); + }); +} + #define BINARYOP_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns) \ \ static void name(::nvbench::state& st) \ { \ - BM_binaryop_transform(st); \ + ::BM_binaryop_transform(st); \ } \ NVBENCH_BENCH(name) \ .add_int64_axis("tree_levels", {1, 2, 5, 10}) \ @@ -86,3 +147,20 @@ BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_double_imbalanced_unique, double, TreeType::IMBALANCED_LEFT, false); + +#define STRING_COMPARE_BINARYOP_TRANSFORM_BENCHMARK_DEFINE(name, cmp_op, reduce_op) \ + \ + static void name(::nvbench::state& st) \ + { \ + ::BM_string_compare_binaryop_transform(st); \ + } \ + NVBENCH_BENCH(name) \ + .set_name(#name) \ + .add_int64_axis("string_width", {32, 64, 128, 256}) \ + .add_int64_axis("num_rows", {32768, 262144, 2097152}) \ + .add_int64_axis("num_comparisons", {1, 2, 3, 4}) \ + .add_int64_axis("hit_rate", {50, 100}) + +STRING_COMPARE_BINARYOP_TRANSFORM_BENCHMARK_DEFINE(string_compare_binaryop_transform, + cudf::binary_operator::EQUAL, + cudf::binary_operator::LOGICAL_AND); diff --git a/cpp/benchmarks/binaryop/compiled_binaryop.cpp b/cpp/benchmarks/binaryop/compiled_binaryop.cpp index bc0ff69bce9..cd3c3871a2e 100644 --- a/cpp/benchmarks/binaryop/compiled_binaryop.cpp +++ b/cpp/benchmarks/binaryop/compiled_binaryop.cpp @@ -39,7 +39,7 @@ void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop) // use number of bytes read and written to global memory state.add_global_memory_reads(table_size); state.add_global_memory_reads(table_size); - state.add_global_memory_reads(table_size); + state.add_global_memory_writes(table_size); state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) { cudf::binary_operation(lhs, rhs, binop, output_dtype); }); diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu index dc258e32dc5..bdce8a31176 100644 --- a/cpp/benchmarks/common/generate_input.cu +++ b/cpp/benchmarks/common/generate_input.cu @@ -17,13 +17,17 @@ #include "generate_input.hpp" #include "random_distribution_factory.cuh" +#include + #include #include +#include #include #include #include #include #include +#include #include #include #include @@ -918,6 +922,58 @@ std::unique_ptr create_sequence_table(std::vector co return std::make_unique(std::move(columns)); } +std::unique_ptr create_string_column(cudf::size_type num_rows, + cudf::size_type row_width, + int32_t hit_rate) +{ + // build input table using the following data + auto raw_data = cudf::test::strings_column_wrapper( + { + "123 abc 4567890 DEFGHI 0987 5W43", // matches both patterns; + "012345 6789 01234 56789 0123 456", // the rest do not match + "abc 4567890 DEFGHI 0987 Wxyz 123", + "abcdefghijklmnopqrstuvwxyz 01234", + "", + "AbcéDEFGHIJKLMNOPQRSTUVWXYZ 01", + "9876543210,abcdefghijklmnopqrstU", + "9876543210,abcdefghijklmnopqrstU", + "123 édf 4567890 DéFG 0987 X5", + "1", + }) + .release(); + + if (row_width / 32 > 1) { + std::vector columns; + for (int i = 0; i < row_width / 32; ++i) { + columns.push_back(raw_data->view()); + } + raw_data = cudf::strings::concatenate(cudf::table_view(columns)); + } + auto data_view = raw_data->view(); + + // compute number of rows in n_rows that should match + auto const num_matches = (static_cast(num_rows) * hit_rate) / 100; + + // Create a randomized gather-map to build a column out of the strings in data. + data_profile gather_profile = + data_profile_builder().cardinality(0).null_probability(0.0).distribution( + cudf::type_id::INT32, distribution_id::UNIFORM, 1, data_view.size() - 1); + auto gather_table = + create_random_table({cudf::type_id::INT32}, row_count{num_rows}, gather_profile); + gather_table->get_column(0).set_null_mask(rmm::device_buffer{}, 0); + + // Create scatter map by placing 0-index values throughout the gather-map + auto scatter_data = cudf::sequence(num_matches, + cudf::numeric_scalar(0), + cudf::numeric_scalar(num_rows / num_matches)); + auto zero_scalar = cudf::numeric_scalar(0); + auto table = cudf::scatter({zero_scalar}, scatter_data->view(), gather_table->view()); + auto gather_map = table->view().column(0); + table = cudf::gather(cudf::table_view({data_view}), gather_map); + + return std::move(table->release().front()); +} + std::pair create_random_null_mask( cudf::size_type size, std::optional null_probability, unsigned seed) { diff --git a/cpp/benchmarks/common/generate_input.hpp b/cpp/benchmarks/common/generate_input.hpp index 68d3dc492f5..57834fd11d2 100644 --- a/cpp/benchmarks/common/generate_input.hpp +++ b/cpp/benchmarks/common/generate_input.hpp @@ -670,6 +670,18 @@ std::unique_ptr create_random_column(cudf::type_id dtype_id, data_profile const& data_params = data_profile{}, unsigned seed = 1); +/** + * @brief Deterministically generates a large string column filled with data with the given + * parameters. + * + * @param num_rows Number of rows in the output column + * @param row_width Width of each string in the column + * @param hit_rate The hit rate percentage, ranging from 0 to 100 + */ +std::unique_ptr create_string_column(cudf::size_type num_rows, + cudf::size_type row_width, + int32_t hit_rate); + /** * @brief Generate sequence columns starting with value 0 in first row and increasing by 1 in * subsequent rows. diff --git a/cpp/benchmarks/string/contains.cpp b/cpp/benchmarks/string/contains.cpp index ae6c8b844c8..a73017dda18 100644 --- a/cpp/benchmarks/string/contains.cpp +++ b/cpp/benchmarks/string/contains.cpp @@ -17,10 +17,6 @@ #include #include -#include - -#include -#include #include #include #include @@ -28,57 +24,6 @@ #include -std::unique_ptr build_input_column(cudf::size_type n_rows, - cudf::size_type row_width, - int32_t hit_rate) -{ - // build input table using the following data - auto raw_data = cudf::test::strings_column_wrapper( - { - "123 abc 4567890 DEFGHI 0987 5W43", // matches both patterns; - "012345 6789 01234 56789 0123 456", // the rest do not match - "abc 4567890 DEFGHI 0987 Wxyz 123", - "abcdefghijklmnopqrstuvwxyz 01234", - "", - "AbcéDEFGHIJKLMNOPQRSTUVWXYZ 01", - "9876543210,abcdefghijklmnopqrstU", - "9876543210,abcdefghijklmnopqrstU", - "123 édf 4567890 DéFG 0987 X5", - "1", - }) - .release(); - - if (row_width / 32 > 1) { - std::vector columns; - for (int i = 0; i < row_width / 32; ++i) { - columns.push_back(raw_data->view()); - } - raw_data = cudf::strings::concatenate(cudf::table_view(columns)); - } - auto data_view = raw_data->view(); - - // compute number of rows in n_rows that should match - auto matches = static_cast(n_rows * hit_rate) / 100; - - // Create a randomized gather-map to build a column out of the strings in data. - data_profile gather_profile = - data_profile_builder().cardinality(0).null_probability(0.0).distribution( - cudf::type_id::INT32, distribution_id::UNIFORM, 1, data_view.size() - 1); - auto gather_table = - create_random_table({cudf::type_id::INT32}, row_count{n_rows}, gather_profile); - gather_table->get_column(0).set_null_mask(rmm::device_buffer{}, 0); - - // Create scatter map by placing 0-index values throughout the gather-map - auto scatter_data = cudf::sequence( - matches, cudf::numeric_scalar(0), cudf::numeric_scalar(n_rows / matches)); - auto zero_scalar = cudf::numeric_scalar(0); - auto table = cudf::scatter({zero_scalar}, scatter_data->view(), gather_table->view()); - auto gather_map = table->view().column(0); - table = cudf::gather(cudf::table_view({data_view}), gather_map); - - return std::move(table->release().front()); -} - // longer pattern lengths demand more working memory per string std::string patterns[] = {"^\\d+ [a-z]+", "[A-Z ]+\\d+ +\\d+[A-Z]+\\d+$", "5W43"}; @@ -94,7 +39,7 @@ static void bench_contains(nvbench::state& state) state.skip("Skip benchmarks greater than size_type limit"); } - auto col = build_input_column(n_rows, row_width, hit_rate); + auto col = create_string_column(n_rows, row_width, hit_rate); auto input = cudf::strings_column_view(col->view()); auto pattern = patterns[pattern_index]; diff --git a/cpp/benchmarks/string/find.cpp b/cpp/benchmarks/string/find.cpp index a9c620e4bf0..996bdcf0332 100644 --- a/cpp/benchmarks/string/find.cpp +++ b/cpp/benchmarks/string/find.cpp @@ -19,7 +19,6 @@ #include -#include #include #include #include @@ -29,10 +28,6 @@ #include -std::unique_ptr build_input_column(cudf::size_type n_rows, - cudf::size_type row_width, - int32_t hit_rate); - static void bench_find_string(nvbench::state& state) { auto const n_rows = static_cast(state.get_int64("num_rows")); @@ -46,7 +41,7 @@ static void bench_find_string(nvbench::state& state) } auto const stream = cudf::get_default_stream(); - auto const col = build_input_column(n_rows, row_width, hit_rate); + auto const col = create_string_column(n_rows, row_width, hit_rate); auto const input = cudf::strings_column_view(col->view()); std::vector h_targets({"5W", "5W43", "0987 5W43"}); diff --git a/cpp/benchmarks/string/like.cpp b/cpp/benchmarks/string/like.cpp index 99cef640dc3..105ae65cbe8 100644 --- a/cpp/benchmarks/string/like.cpp +++ b/cpp/benchmarks/string/like.cpp @@ -18,68 +18,12 @@ #include -#include -#include -#include #include #include #include #include -namespace { -std::unique_ptr build_input_column(cudf::size_type n_rows, - cudf::size_type row_width, - int32_t hit_rate) -{ - // build input table using the following data - auto raw_data = cudf::test::strings_column_wrapper( - { - "123 abc 4567890 DEFGHI 0987 5W43", // matches always; - "012345 6789 01234 56789 0123 456", // the rest do not match - "abc 4567890 DEFGHI 0987 Wxyz 123", - "abcdefghijklmnopqrstuvwxyz 01234", - "", - "AbcéDEFGHIJKLMNOPQRSTUVWXYZ 01", - "9876543210,abcdefghijklmnopqrstU", - "9876543210,abcdefghijklmnopqrstU", - "123 édf 4567890 DéFG 0987 X5", - "1", - }) - .release(); - if (row_width / 32 > 1) { - std::vector columns; - for (int i = 0; i < row_width / 32; ++i) { - columns.push_back(raw_data->view()); - } - raw_data = cudf::strings::concatenate(cudf::table_view(columns)); - } - auto data_view = raw_data->view(); - - // compute number of rows in n_rows that should match - auto matches = static_cast(n_rows * hit_rate) / 100; - - // Create a randomized gather-map to build a column out of the strings in data. - data_profile gather_profile = - data_profile_builder().cardinality(0).null_probability(0.0).distribution( - cudf::type_id::INT32, distribution_id::UNIFORM, 1, data_view.size() - 1); - auto gather_table = - create_random_table({cudf::type_id::INT32}, row_count{n_rows}, gather_profile); - gather_table->get_column(0).set_null_mask(rmm::device_buffer{}, 0); - - // Create scatter map by placing 0-index values throughout the gather-map - auto scatter_data = cudf::sequence( - matches, cudf::numeric_scalar(0), cudf::numeric_scalar(n_rows / matches)); - auto zero_scalar = cudf::numeric_scalar(0); - auto table = cudf::scatter({zero_scalar}, scatter_data->view(), gather_table->view()); - auto gather_map = table->view().column(0); - table = cudf::gather(cudf::table_view({data_view}), gather_map); - - return std::move(table->release().front()); -} - -} // namespace - static void bench_like(nvbench::state& state) { auto const n_rows = static_cast(state.get_int64("num_rows")); @@ -91,7 +35,7 @@ static void bench_like(nvbench::state& state) state.skip("Skip benchmarks greater than size_type limit"); } - auto col = build_input_column(n_rows, row_width, hit_rate); + auto col = create_string_column(n_rows, row_width, hit_rate); auto input = cudf::strings_column_view(col->view()); // This pattern forces reading the entire target string (when matched expected)