Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added strings AST vs BINARY_OP benchmarks #17128

Merged
merged 26 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
f22f03a
deduplicated comparison string column generator
lamarrr Oct 18, 2024
7d44a85
added strings ast vs binary_op benchmarks
lamarrr Oct 18, 2024
bf32805
Merge branch 'branch-24.12' into strings-benchmarks
lamarrr Oct 18, 2024
b7a3d55
fixed random string generator documentation
lamarrr Oct 21, 2024
8ca5189
fixed include style
lamarrr Oct 21, 2024
cf364b1
refactored create_string_column identifier names
lamarrr Oct 21, 2024
65eb972
Merge branch 'branch-24.12' into strings-benchmarks
lamarrr Oct 21, 2024
2565339
refactored chars_size accumulation
lamarrr Oct 22, 2024
27b7c79
removed checked indexing
lamarrr Oct 22, 2024
e6f7d53
refactored chars_size accumulation
lamarrr Oct 22, 2024
10f82c2
removed checked indexing
lamarrr Oct 22, 2024
ef6763d
fixed possible overflow for calculating string matches
lamarrr Oct 22, 2024
533dfa0
fixed code formatting
lamarrr Oct 22, 2024
0626aaf
added assert for checking comparison count
lamarrr Oct 22, 2024
94d392b
fixed chars_size calculation
lamarrr Oct 22, 2024
5add488
parameterized string comparison benchmarks comparison and reduce oper…
lamarrr Oct 22, 2024
5e506a3
parameterized string comparison benchmarks comparison and reduce oper…
lamarrr Oct 22, 2024
d775da9
Merge branch 'branch-24.12' into strings-benchmarks
lamarrr Oct 23, 2024
aa1e554
removed storing of intermediates in binary_op strings benchmark
lamarrr Oct 23, 2024
70da2cf
Merge branch 'strings-benchmarks' of https://github.com/lamarrr/cudf …
lamarrr Oct 23, 2024
4914439
Merge branch 'branch-24.12' into strings-benchmarks
lamarrr Oct 24, 2024
6bd3f3c
updated ast and binops benchmarks
lamarrr Oct 25, 2024
accf956
updated ast and binops benchmarks
lamarrr Oct 25, 2024
0db10fe
Merge branch 'branch-24.12' into strings-benchmarks
lamarrr Oct 25, 2024
7d7fcd2
fixed ast and binops benchmarks throughput calculation integer overflow
lamarrr Oct 26, 2024
f7f6084
Merge branch 'branch-24.12' into strings-benchmarks
lamarrr Oct 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 94 additions & 1 deletion cpp/benchmarks/ast/transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,29 @@

#include <benchmarks/common/generate_input.hpp>

#include <cudf_test/column_wrapper.hpp>

#include <cudf/ast/expressions.hpp>
#include <cudf/column/column.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/table/table.hpp>
#include <cudf/transform.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>

#include <thrust/iterator/counting_iterator.h>

#include <nvbench/nvbench.cuh>
#include <nvbench/types.cuh>

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <iterator>
#include <list>
#include <memory>
#include <optional>
Expand Down Expand Up @@ -86,7 +99,71 @@ static void BM_ast_transform(nvbench::state& state)
auto const& expression_tree_root = expressions.back();

// Use the number of bytes read from global memory
state.add_global_memory_reads<key_type>(table_size * (tree_levels + 1));
state.add_global_memory_reads<key_type>(static_cast<size_t>(table_size) * (tree_levels + 1));
state.add_global_memory_writes<key_type>(table_size);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); });
}

template <cudf::ast::ast_operator cmp_op, cudf::ast::ast_operator reduce_op>
static void BM_string_compare_ast_transform(nvbench::state& state)
{
auto const string_width = static_cast<cudf::size_type>(state.get_int64("string_width"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const num_comparisons = static_cast<cudf::size_type>(state.get_int64("num_comparisons"));
auto const hit_rate = static_cast<cudf::size_type>(state.get_int64("hit_rate"));

CUDF_EXPECTS(num_comparisons > 0, "benchmarks require 1 or more comparisons");

// Create table data
auto const num_cols = num_comparisons * 2;
std::vector<std::unique_ptr<cudf::column>> columns;
std::for_each(
thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) {
columns.emplace_back(create_string_column(num_rows, string_width, hit_rate));
});

cudf::table table{std::move(columns)};
cudf::table_view const table_view = table.view();

int64_t const chars_size = std::accumulate(
table_view.begin(),
table_view.end(),
static_cast<int64_t>(0),
[](int64_t size, auto& column) -> int64_t {
return size + cudf::strings_column_view{column}.chars_size(cudf::get_default_stream());
});

// Create column references
auto column_refs = std::vector<cudf::ast::column_reference>();
std::transform(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(num_cols),
std::back_inserter(column_refs),
[](auto const& column_id) { return cudf::ast::column_reference(column_id); });

// Create expression trees
std::list<cudf::ast::operation> expressions;

// Construct AST tree (a == b && c == d && e == f && ...)

expressions.emplace_back(cudf::ast::operation(cmp_op, column_refs[0], column_refs[1]));

std::for_each(thrust::make_counting_iterator(1),
thrust::make_counting_iterator(num_comparisons),
[&](size_t idx) {
auto const& lhs = expressions.back();
auto const& rhs = expressions.emplace_back(
cudf::ast::operation(cmp_op, column_refs[idx * 2], column_refs[idx * 2 + 1]));
expressions.emplace_back(cudf::ast::operation(reduce_op, lhs, rhs));
});

auto const& expression_tree_root = expressions.back();

// Use the number of bytes read from global memory
state.add_element_count(chars_size, "chars_size");
state.add_global_memory_reads<nvbench::uint8_t>(chars_size);
state.add_global_memory_writes<nvbench::int32_t>(num_rows);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); });
Expand Down Expand Up @@ -115,3 +192,19 @@ AST_TRANSFORM_BENCHMARK_DEFINE(
ast_int32_imbalanced_reuse_nulls, int32_t, TreeType::IMBALANCED_LEFT, true, true);
AST_TRANSFORM_BENCHMARK_DEFINE(
ast_double_imbalanced_unique_nulls, double, TreeType::IMBALANCED_LEFT, false, true);

#define AST_STRING_COMPARE_TRANSFORM_BENCHMARK_DEFINE(name, cmp_op, reduce_op) \
static void name(::nvbench::state& st) \
{ \
::BM_string_compare_ast_transform<cmp_op, reduce_op>(st); \
} \
NVBENCH_BENCH(name) \
.set_name(#name) \
.add_int64_axis("string_width", {32, 64, 128, 256}) \
.add_int64_axis("num_rows", {32768, 262144, 2097152}) \
.add_int64_axis("num_comparisons", {1, 2, 3, 4}) \
.add_int64_axis("hit_rate", {50, 100})

AST_STRING_COMPARE_TRANSFORM_BENCHMARK_DEFINE(ast_string_equal_logical_and,
cudf::ast::ast_operator::EQUAL,
cudf::ast::ast_operator::LOGICAL_AND);
82 changes: 80 additions & 2 deletions cpp/benchmarks/binaryop/binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,18 @@
#include <benchmarks/common/generate_input.hpp>

#include <cudf/binaryop.hpp>
#include <cudf/column/column.hpp>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>

#include <thrust/iterator/counting_iterator.h>

#include <nvbench/nvbench.cuh>

#include <algorithm>
#include <cstddef>
#include <memory>

// This set of benchmarks is designed to be a comparison for the AST benchmarks

Expand All @@ -44,7 +50,8 @@ static void BM_binaryop_transform(nvbench::state& state)
cudf::table_view table{*source_table};

// Use the number of bytes read from global memory
state.add_global_memory_reads<key_type>(table_size * (tree_levels + 1));
state.add_global_memory_reads<key_type>(static_cast<size_t>(table_size) * (tree_levels + 1));
state.add_global_memory_writes<key_type>(table_size);

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) {
// Execute tree that chains additions like (((a + b) + c) + d)
Expand All @@ -64,11 +71,65 @@ static void BM_binaryop_transform(nvbench::state& state)
});
}

template <cudf::binary_operator cmp_op, cudf::binary_operator reduce_op>
static void BM_string_compare_binaryop_transform(nvbench::state& state)
{
auto const string_width = static_cast<cudf::size_type>(state.get_int64("string_width"));
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const num_comparisons = static_cast<cudf::size_type>(state.get_int64("num_comparisons"));
auto const hit_rate = static_cast<cudf::size_type>(state.get_int64("hit_rate"));

CUDF_EXPECTS(num_comparisons > 0, "benchmarks require 1 or more comparisons");

// Create table data
auto const num_cols = num_comparisons * 2;
std::vector<std::unique_ptr<cudf::column>> columns;
std::for_each(
thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_cols), [&](size_t) {
columns.emplace_back(create_string_column(num_rows, string_width, hit_rate));
});

cudf::table table{std::move(columns)};
cudf::table_view const table_view = table.view();

int64_t const chars_size = std::accumulate(
table_view.begin(), table_view.end(), static_cast<int64_t>(0), [](int64_t size, auto& column) {
return size + cudf::strings_column_view{column}.chars_size(cudf::get_default_stream());
});

// Create column references

// Use the number of bytes read from global memory
state.add_element_count(chars_size, "chars_size");
state.add_global_memory_reads<nvbench::uint8_t>(chars_size);
state.add_global_memory_writes<nvbench::int32_t>(num_rows);

// Construct binary operations (a == b && c == d && e == f && ...)
auto constexpr bool_type = cudf::data_type{cudf::type_id::BOOL8};

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
rmm::cuda_stream_view stream{launch.get_stream().get_stream()};
std::unique_ptr<cudf::column> reduction =
cudf::binary_operation(table.get_column(0), table.get_column(1), cmp_op, bool_type, stream);
std::for_each(
thrust::make_counting_iterator(1),
thrust::make_counting_iterator(num_comparisons),
[&](size_t idx) {
std::unique_ptr<cudf::column> comparison = cudf::binary_operation(
table.get_column(idx * 2), table.get_column(idx * 2 + 1), cmp_op, bool_type, stream);
std::unique_ptr<cudf::column> reduced =
cudf::binary_operation(*comparison, *reduction, reduce_op, bool_type, stream);
stream.synchronize();
reduction = std::move(reduced);
});
});
}

#define BINARYOP_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns) \
\
static void name(::nvbench::state& st) \
{ \
BM_binaryop_transform<key_type, tree_type, reuse_columns>(st); \
::BM_binaryop_transform<key_type, tree_type, reuse_columns>(st); \
} \
NVBENCH_BENCH(name) \
.add_int64_axis("tree_levels", {1, 2, 5, 10}) \
Expand All @@ -86,3 +147,20 @@ BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_double_imbalanced_unique,
double,
TreeType::IMBALANCED_LEFT,
false);

#define STRING_COMPARE_BINARYOP_TRANSFORM_BENCHMARK_DEFINE(name, cmp_op, reduce_op) \
\
static void name(::nvbench::state& st) \
{ \
::BM_string_compare_binaryop_transform<cmp_op, reduce_op>(st); \
} \
NVBENCH_BENCH(name) \
.set_name(#name) \
.add_int64_axis("string_width", {32, 64, 128, 256}) \
.add_int64_axis("num_rows", {32768, 262144, 2097152}) \
.add_int64_axis("num_comparisons", {1, 2, 3, 4}) \
.add_int64_axis("hit_rate", {50, 100})

STRING_COMPARE_BINARYOP_TRANSFORM_BENCHMARK_DEFINE(string_compare_binaryop_transform,
cudf::binary_operator::EQUAL,
cudf::binary_operator::LOGICAL_AND);
2 changes: 1 addition & 1 deletion cpp/benchmarks/binaryop/compiled_binaryop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop)
// use number of bytes read and written to global memory
state.add_global_memory_reads<TypeLhs>(table_size);
state.add_global_memory_reads<TypeRhs>(table_size);
state.add_global_memory_reads<TypeOut>(table_size);
state.add_global_memory_writes<TypeOut>(table_size);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch&) { cudf::binary_operation(lhs, rhs, binop, output_dtype); });
Expand Down
56 changes: 56 additions & 0 deletions cpp/benchmarks/common/generate_input.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,17 @@
#include "generate_input.hpp"
#include "random_distribution_factory.cuh"

#include <cudf_test/column_wrapper.hpp>

#include <cudf/column/column.hpp>
#include <cudf/column/column_factories.hpp>
#include <cudf/copying.hpp>
#include <cudf/detail/gather.hpp>
#include <cudf/detail/valid_if.cuh>
#include <cudf/filling.hpp>
#include <cudf/null_mask.hpp>
#include <cudf/scalar/scalar_factories.hpp>
#include <cudf/strings/combine.hpp>
#include <cudf/table/table.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
Expand Down Expand Up @@ -918,6 +922,58 @@ std::unique_ptr<cudf::table> create_sequence_table(std::vector<cudf::type_id> co
return std::make_unique<cudf::table>(std::move(columns));
}

std::unique_ptr<cudf::column> create_string_column(cudf::size_type num_rows,
cudf::size_type row_width,
int32_t hit_rate)
{
// build input table using the following data
auto raw_data = cudf::test::strings_column_wrapper(
{
"123 abc 4567890 DEFGHI 0987 5W43", // matches both patterns;
"012345 6789 01234 56789 0123 456", // the rest do not match
"abc 4567890 DEFGHI 0987 Wxyz 123",
"abcdefghijklmnopqrstuvwxyz 01234",
"",
"AbcéDEFGHIJKLMNOPQRSTUVWXYZ 01",
"9876543210,abcdefghijklmnopqrstU",
"9876543210,abcdefghijklmnopqrstU",
"123 édf 4567890 DéFG 0987 X5",
"1",
})
.release();

if (row_width / 32 > 1) {
std::vector<cudf::column_view> columns;
for (int i = 0; i < row_width / 32; ++i) {
columns.push_back(raw_data->view());
}
raw_data = cudf::strings::concatenate(cudf::table_view(columns));
}
auto data_view = raw_data->view();

// compute number of rows in n_rows that should match
auto const num_matches = (static_cast<int64_t>(num_rows) * hit_rate) / 100;

// Create a randomized gather-map to build a column out of the strings in data.
data_profile gather_profile =
data_profile_builder().cardinality(0).null_probability(0.0).distribution(
cudf::type_id::INT32, distribution_id::UNIFORM, 1, data_view.size() - 1);
auto gather_table =
create_random_table({cudf::type_id::INT32}, row_count{num_rows}, gather_profile);
gather_table->get_column(0).set_null_mask(rmm::device_buffer{}, 0);

// Create scatter map by placing 0-index values throughout the gather-map
auto scatter_data = cudf::sequence(num_matches,
cudf::numeric_scalar<int32_t>(0),
cudf::numeric_scalar<int32_t>(num_rows / num_matches));
auto zero_scalar = cudf::numeric_scalar<int32_t>(0);
auto table = cudf::scatter({zero_scalar}, scatter_data->view(), gather_table->view());
auto gather_map = table->view().column(0);
table = cudf::gather(cudf::table_view({data_view}), gather_map);

return std::move(table->release().front());
}

std::pair<rmm::device_buffer, cudf::size_type> create_random_null_mask(
cudf::size_type size, std::optional<double> null_probability, unsigned seed)
{
Expand Down
12 changes: 12 additions & 0 deletions cpp/benchmarks/common/generate_input.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,18 @@ std::unique_ptr<cudf::column> create_random_column(cudf::type_id dtype_id,
data_profile const& data_params = data_profile{},
unsigned seed = 1);

/**
* @brief Deterministically generates a large string column filled with data with the given
* parameters.
*
* @param num_rows Number of rows in the output column
* @param row_width Width of each string in the column
* @param hit_rate The hit rate percentage, ranging from 0 to 100
*/
std::unique_ptr<cudf::column> create_string_column(cudf::size_type num_rows,
cudf::size_type row_width,
int32_t hit_rate);

/**
* @brief Generate sequence columns starting with value 0 in first row and increasing by 1 in
* subsequent rows.
Expand Down
Loading
Loading