diff --git a/src/parquet/arrow/arrow-reader-writer-test.cc b/src/parquet/arrow/arrow-reader-writer-test.cc
index 6d7e1ebb..e0ff7aab 100644
--- a/src/parquet/arrow/arrow-reader-writer-test.cc
+++ b/src/parquet/arrow/arrow-reader-writer-test.cc
@@ -536,6 +536,13 @@ class TestParquetIO : public ::testing::Test {
*out = MakeSimpleTable(lists->Slice(3, size - 6), nullable_lists);
}
+ // Prepare table of empty lists, with null values array (ARROW-2744)
+ void PrepareEmptyListsTable(int64_t size, std::shared_ptr
* out) {
+ std::shared_ptr lists;
+ ASSERT_OK(MakeEmptyListsArray(size, &lists));
+ *out = MakeSimpleTable(lists, true /* nullable_lists */);
+ }
+
void PrepareListOfListTable(int64_t size, bool nullable_parent_lists,
bool nullable_lists, bool nullable_elements,
int64_t null_count, std::shared_ptr* out) {
@@ -713,6 +720,12 @@ TYPED_TEST(TestParquetIO, SingleColumnTableOptionalReadWrite) {
ASSERT_NO_FATAL_FAILURE(this->CheckRoundTrip(table));
}
+TYPED_TEST(TestParquetIO, SingleEmptyListsColumnReadWrite) {
+ std::shared_ptr table;
+ ASSERT_NO_FATAL_FAILURE(this->PrepareEmptyListsTable(SMALL_SIZE, &table));
+ ASSERT_NO_FATAL_FAILURE(this->CheckRoundTrip(table));
+}
+
TYPED_TEST(TestParquetIO, SingleNullableListNullableColumnReadWrite) {
std::shared_ptr table;
ASSERT_NO_FATAL_FAILURE(this->PrepareListTable(SMALL_SIZE, true, true, 10, &table));
@@ -1524,8 +1537,6 @@ void MakeDoubleTable(int num_columns, int num_rows, int nchunks,
void MakeListArray(int num_rows, std::shared_ptr<::DataType>* out_type,
std::shared_ptr* out_array) {
- ::arrow::Int32Builder offset_builder;
-
std::vector length_draws;
randint(num_rows, 0, 100, &length_draws);
diff --git a/src/parquet/arrow/test-util.h b/src/parquet/arrow/test-util.h
index 7264324d..c70e0eff 100644
--- a/src/parquet/arrow/test-util.h
+++ b/src/parquet/arrow/test-util.h
@@ -394,6 +394,33 @@ Status MakeListArray(const std::shared_ptr& values, int64_t size,
return Status::OK();
}
+// Make an array containing only empty lists, with a null values array
+Status MakeEmptyListsArray(int64_t size, std::shared_ptr* out_array) {
+ // Allocate an offsets buffer containing only zeroes
+ std::shared_ptr offsets_buffer;
+ const int64_t offsets_nbytes = (size + 1) * sizeof(int32_t);
+ RETURN_NOT_OK(::arrow::AllocateBuffer(::arrow::default_memory_pool(), offsets_nbytes,
+ &offsets_buffer));
+ memset(offsets_buffer->mutable_data(), 0, offsets_nbytes);
+
+ auto value_field = ::arrow::field("item", ::arrow::float64(),
+ false /* nullable_values */);
+ auto list_type = ::arrow::list(value_field);
+
+ std::vector> child_buffers = {nullptr /* null bitmap */,
+ nullptr /* values */ };
+ auto child_data = ::arrow::ArrayData::Make(value_field->type(), 0,
+ std::move(child_buffers));
+
+ std::vector> buffers = {nullptr /* bitmap */,
+ offsets_buffer };
+ auto array_data = ::arrow::ArrayData::Make(list_type, size, std::move(buffers));
+ array_data->child_data.push_back(child_data);
+
+ *out_array = ::arrow::MakeArray(array_data);
+ return Status::OK();
+}
+
static std::shared_ptr<::arrow::Column> MakeColumn(const std::string& name,
const std::shared_ptr& array,
bool nullable) {
diff --git a/src/parquet/arrow/writer.cc b/src/parquet/arrow/writer.cc
index 50b4649e..f7727381 100644
--- a/src/parquet/arrow/writer.cc
+++ b/src/parquet/arrow/writer.cc
@@ -411,8 +411,13 @@ Status ArrowColumnWriter::TypedWriteBatch(const Array& array, int64_t num_levels
using ArrowCType = typename ArrowType::c_type;
const auto& data = static_cast(array);
- auto values =
- reinterpret_cast(data.values()->data()) + data.offset();
+ const ArrowCType* values = nullptr;
+ // The values buffer may be null if the array is empty (ARROW-2744)
+ if (data.values() != nullptr) {
+ values = reinterpret_cast(data.values()->data()) + data.offset();
+ } else {
+ DCHECK_EQ(data.length(), 0);
+ }
if (writer_->descr()->schema_node()->is_required() || (data.null_count() == 0)) {
// no nulls, just dump the data
@@ -706,7 +711,13 @@ Status ArrowColumnWriter::TypedWriteBatch(
RETURN_NOT_OK(ctx_->GetScratchData(array.length(), &buffer));
const auto& data = static_cast(array);
- auto values = reinterpret_cast(data.values()->data());
+ const uint8_t* values = nullptr;
+ // The values buffer may be null if the array is empty (ARROW-2744)
+ if (data.values() != nullptr) {
+ values = reinterpret_cast(data.values()->data());
+ } else {
+ DCHECK_EQ(data.length(), 0);
+ }
int buffer_idx = 0;
int64_t offset = array.offset();