Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle empty child columns in row_bit_count() #8791

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions cpp/src/transform/row_bit_count.cu
Original file line number Diff line number Diff line change
Expand Up @@ -334,10 +334,21 @@ template <>
__device__ size_type row_size_functor::operator()<string_view>(column_device_view const& col,
row_span const& span)
{
column_device_view const& offsets = col.child(strings_column_view::offsets_column_index);
auto const num_rows{span.row_end - span.row_start};
if (num_rows == 0) {
// For empty columns, the `span` cannot have a row size.
return 0;
}

auto const& offsets = col.child(strings_column_view::offsets_column_index);
auto const row_start{span.row_start + col.offset()};
auto const row_end{span.row_end + col.offset()};
mythrocks marked this conversation as resolved.
Show resolved Hide resolved
if (row_start == row_end) {
// Empty row contributes 0 bits to row_bit_count().
// Note: Validity bit doesn't count either. There are no rows in the child column
// corresponding to this span.
return 0;
}

auto const offsets_size = sizeof(offset_type) * CHAR_BIT;
auto const validity_size = col.nullable() ? 1 : 0;
Expand Down Expand Up @@ -434,7 +445,7 @@ __global__ void compute_row_sizes(device_span<column_device_view const> cols,
size += cudf::type_dispatcher(col.type(), row_size_functor{}, col, cur_span);

// if this is a list column, update the working span from our offsets
if (col.type().id() == type_id::LIST) {
if (col.type().id() == type_id::LIST && col.size() > 0) {
column_device_view const& offsets = col.child(lists_column_view::offsets_column_index);
auto const base_offset = offsets.data<offset_type>()[col.offset()];
cur_span.row_start =
Expand Down
56 changes: 56 additions & 0 deletions cpp/tests/transform/row_bit_count_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,62 @@ TEST_F(RowBitCount, NestedTypes)
}
}

TEST_F(RowBitCount, NullsInStringsList)
{
using offsets_wrapper = cudf::test::fixed_width_column_wrapper<offset_type>;

// clang-format off
auto strings = std::vector<std::string>{ "daïs", "def", "", "z", "bananas", "warp", "", "zing" };
auto valids = std::vector<bool>{ 1, 0, 0, 1, 0, 1, 1, 1 };
// clang-format on

cudf::test::strings_column_wrapper col(strings.begin(), strings.end(), valids.begin());

auto offsets = cudf::test::fixed_width_column_wrapper<int>{0, 2, 4, 6, 8};
auto lists_col = cudf::make_lists_column(
4,
offsets_wrapper{0, 2, 4, 6, 8}.release(),
cudf::test::strings_column_wrapper{strings.begin(), strings.end(), valids.begin()}.release(),
0,
{});
CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
cudf::row_bit_count(table_view{{lists_col->view()}})->view(),
cudf::test::fixed_width_column_wrapper<offset_type>{138, 106, 130, 130});
}

TEST_F(RowBitCount, EmptyChildColumnInListOfStrings)
{
// Test with a list<string> column with 4 empty list rows.
// Note: Since there are no strings in any of the lists,
// the lists column's child can be empty.
auto offsets = cudf::test::fixed_width_column_wrapper<offset_type>{0, 0, 0, 0, 0};
auto lists_col = cudf::make_lists_column(
4, offsets.release(), cudf::make_empty_column(cudf::data_type{cudf::type_id::STRING}), 0, {});

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
cudf::row_bit_count(table_view{{lists_col->view()}})->view(),
cudf::test::fixed_width_column_wrapper<offset_type>{32, 32, 32, 32});
}

TEST_F(RowBitCount, EmptyChildColumnInListOfLists)
{
// Test with a list<list> column with 4 empty list rows.
// Note: Since there are no elements in any of the lists,
// the lists column's child can be empty.
auto empty_child_lists_column = [] {
auto exemplar = cudf::test::lists_column_wrapper<int32_t>{{0, 1, 2}, {3, 4, 5}};
return cudf::empty_like(exemplar);
};

auto offsets = cudf::test::fixed_width_column_wrapper<offset_type>{0, 0, 0, 0, 0};
auto lists_col = cudf::make_lists_column(4, offsets.release(), empty_child_lists_column(), 0, {});
auto constexpr offset_nbits = sizeof(offset_type) * CHAR_BIT;

CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
cudf::row_bit_count(table_view{{lists_col->view()}})->view(),
cudf::test::fixed_width_column_wrapper<offset_type>{32, 32, 32, 32});
}

struct sum_functor {
size_type const* s0;
size_type const* s1;
Expand Down