Skip to content
This repository was archived by the owner on May 10, 2024. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions src/parquet/arrow/arrow-reader-writer-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,13 @@ class TestParquetIO : public ::testing::Test {
*out = MakeSimpleTable(lists->Slice(3, size - 6), nullable_lists);
}

// Prepare table of empty lists, with null values array (ARROW-2744)
void PrepareEmptyListsTable(int64_t size, std::shared_ptr<Table>* out) {
std::shared_ptr<Array> lists;
ASSERT_OK(MakeEmptyListsArray(size, &lists));
*out = MakeSimpleTable(lists, true /* nullable_lists */);
}

void PrepareListOfListTable(int64_t size, bool nullable_parent_lists,
bool nullable_lists, bool nullable_elements,
int64_t null_count, std::shared_ptr<Table>* out) {
Expand Down Expand Up @@ -713,6 +720,12 @@ TYPED_TEST(TestParquetIO, SingleColumnTableOptionalReadWrite) {
ASSERT_NO_FATAL_FAILURE(this->CheckRoundTrip(table));
}

TYPED_TEST(TestParquetIO, SingleEmptyListsColumnReadWrite) {
std::shared_ptr<Table> table;
ASSERT_NO_FATAL_FAILURE(this->PrepareEmptyListsTable(SMALL_SIZE, &table));
ASSERT_NO_FATAL_FAILURE(this->CheckRoundTrip(table));
}

TYPED_TEST(TestParquetIO, SingleNullableListNullableColumnReadWrite) {
std::shared_ptr<Table> table;
ASSERT_NO_FATAL_FAILURE(this->PrepareListTable(SMALL_SIZE, true, true, 10, &table));
Expand Down Expand Up @@ -1524,8 +1537,6 @@ void MakeDoubleTable(int num_columns, int num_rows, int nchunks,

void MakeListArray(int num_rows, std::shared_ptr<::DataType>* out_type,
std::shared_ptr<Array>* out_array) {
::arrow::Int32Builder offset_builder;

std::vector<int32_t> length_draws;
randint(num_rows, 0, 100, &length_draws);

Expand Down
27 changes: 27 additions & 0 deletions src/parquet/arrow/test-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,33 @@ Status MakeListArray(const std::shared_ptr<Array>& values, int64_t size,
return Status::OK();
}

// Make an array containing only empty lists, with a null values array
Status MakeEmptyListsArray(int64_t size, std::shared_ptr<Array>* out_array) {
// Allocate an offsets buffer containing only zeroes
std::shared_ptr<Buffer> offsets_buffer;
const int64_t offsets_nbytes = (size + 1) * sizeof(int32_t);
RETURN_NOT_OK(::arrow::AllocateBuffer(::arrow::default_memory_pool(), offsets_nbytes,
&offsets_buffer));
memset(offsets_buffer->mutable_data(), 0, offsets_nbytes);

auto value_field = ::arrow::field("item", ::arrow::float64(),
false /* nullable_values */);
auto list_type = ::arrow::list(value_field);

std::vector<std::shared_ptr<Buffer>> child_buffers = {nullptr /* null bitmap */,
nullptr /* values */ };
auto child_data = ::arrow::ArrayData::Make(value_field->type(), 0,
std::move(child_buffers));

std::vector<std::shared_ptr<Buffer>> buffers = {nullptr /* bitmap */,
offsets_buffer };
auto array_data = ::arrow::ArrayData::Make(list_type, size, std::move(buffers));
array_data->child_data.push_back(child_data);

*out_array = ::arrow::MakeArray(array_data);
return Status::OK();
}

static std::shared_ptr<::arrow::Column> MakeColumn(const std::string& name,
const std::shared_ptr<Array>& array,
bool nullable) {
Expand Down
17 changes: 14 additions & 3 deletions src/parquet/arrow/writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -411,8 +411,13 @@ Status ArrowColumnWriter::TypedWriteBatch(const Array& array, int64_t num_levels
using ArrowCType = typename ArrowType::c_type;

const auto& data = static_cast<const PrimitiveArray&>(array);
auto values =
reinterpret_cast<const ArrowCType*>(data.values()->data()) + data.offset();
const ArrowCType* values = nullptr;
// The values buffer may be null if the array is empty (ARROW-2744)
if (data.values() != nullptr) {
values = reinterpret_cast<const ArrowCType*>(data.values()->data()) + data.offset();
} else {
DCHECK_EQ(data.length(), 0);
}

if (writer_->descr()->schema_node()->is_required() || (data.null_count() == 0)) {
// no nulls, just dump the data
Expand Down Expand Up @@ -706,7 +711,13 @@ Status ArrowColumnWriter::TypedWriteBatch<BooleanType, ::arrow::BooleanType>(
RETURN_NOT_OK(ctx_->GetScratchData<bool>(array.length(), &buffer));

const auto& data = static_cast<const BooleanArray&>(array);
auto values = reinterpret_cast<const uint8_t*>(data.values()->data());
const uint8_t* values = nullptr;
// The values buffer may be null if the array is empty (ARROW-2744)
if (data.values() != nullptr) {
values = reinterpret_cast<const uint8_t*>(data.values()->data());
} else {
DCHECK_EQ(data.length(), 0);
}

int buffer_idx = 0;
int64_t offset = array.offset();
Expand Down