Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cpp/src/arrow/array-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2919,6 +2919,8 @@ TEST_F(TestListArray, TestAppendNull) {

auto values = result_->values();
ASSERT_EQ(0, values->length());
// Values buffer should be non-null
ASSERT_NE(nullptr, values->data()->buffers[1]);
}

void ValidateBasicListArray(const ListArray* result, const vector<int32_t>& values,
Expand Down
58 changes: 38 additions & 20 deletions cpp/src/arrow/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -664,12 +664,30 @@ namespace internal {
struct ValidateVisitor {
Status Visit(const NullArray&) { return Status::OK(); }

Status Visit(const PrimitiveArray&) { return Status::OK(); }
Status Visit(const PrimitiveArray& array) {
if (array.data()->buffers.size() != 2) {
return Status::Invalid("number of buffers was != 2");
}
if (array.values() == nullptr) {
return Status::Invalid("values was null");
}
return Status::OK();
}

Status Visit(const Decimal128Array&) { return Status::OK(); }
Status Visit(const Decimal128Array& array) {
if (array.data()->buffers.size() != 2) {
return Status::Invalid("number of buffers was != 2");
}
if (array.values() == nullptr) {
return Status::Invalid("values was null");
}
return Status::OK();
}

Status Visit(const BinaryArray&) {
// TODO(wesm): what to do here?
Status Visit(const BinaryArray& array) {
if (array.data()->buffers.size() != 3) {
return Status::Invalid("number of buffers was != 3");
}
return Status::OK();
}

Expand All @@ -688,24 +706,24 @@ struct ValidateVisitor {
<< " isn't large enough for length: " << array.length();
return Status::Invalid(ss.str());
}

if (!array.values()) {
return Status::Invalid("values was null");
}

const int32_t last_offset = array.value_offset(array.length());
if (last_offset > 0) {
if (!array.values()) {
return Status::Invalid("last offset was non-zero and values was null");
}
if (array.values()->length() != last_offset) {
std::stringstream ss;
ss << "Final offset invariant not equal to values length: " << last_offset
<< "!=" << array.values()->length();
return Status::Invalid(ss.str());
}
if (array.values()->length() != last_offset) {
std::stringstream ss;
ss << "Final offset invariant not equal to values length: " << last_offset
<< "!=" << array.values()->length();
return Status::Invalid(ss.str());
}

const Status child_valid = ValidateArray(*array.values());
if (!child_valid.ok()) {
std::stringstream ss;
ss << "Child array invalid: " << child_valid.ToString();
return Status::Invalid(ss.str());
}
const Status child_valid = ValidateArray(*array.values());
if (!child_valid.ok()) {
std::stringstream ss;
ss << "Child array invalid: " << child_valid.ToString();
return Status::Invalid(ss.str());
}

int32_t prev_offset = array.value_offset(0);
Expand Down
9 changes: 9 additions & 0 deletions cpp/src/arrow/builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ Status TrimBuffer(const int64_t bytes_filled, ResizableBuffer* buffer) {
// zero the padding
buffer->ZeroPadding();
} else {
// Null buffers are allowed in place of 0-byte buffers
DCHECK_EQ(bytes_filled, 0);
}
return Status::OK();
Expand Down Expand Up @@ -1336,6 +1337,10 @@ Status ListBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
if (values_) {
items = values_->data();
} else {
if (value_builder_->length() == 0) {
// Try to make sure we get a non-null values buffer (ARROW-2744)
RETURN_NOT_OK(value_builder_->Resize(0));
}
RETURN_NOT_OK(value_builder_->FinishInternal(&items));
}

Expand Down Expand Up @@ -1632,6 +1637,10 @@ Status StructBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {

(*out)->child_data.resize(field_builders_.size());
for (size_t i = 0; i < field_builders_.size(); ++i) {
if (length_ == 0) {
// Try to make sure the child buffers are initialized
RETURN_NOT_OK(field_builders_[i]->Resize(0));
}
RETURN_NOT_OK(field_builders_[i]->FinishInternal(&(*out)->child_data[i]));
}

Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/ipc/test-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ Status MakeRandomInt32Array(int64_t length, bool include_nulls, MemoryPool* pool
std::shared_ptr<PoolBuffer> data;
RETURN_NOT_OK(test::MakeRandomInt32PoolBuffer(length, pool, &data));
Int32Builder builder(int32(), pool);
RETURN_NOT_OK(builder.Init(length));
if (include_nulls) {
std::shared_ptr<PoolBuffer> valid_bytes;
RETURN_NOT_OK(test::MakeRandomBytePoolBuffer(length, pool, &valid_bytes));
Expand Down Expand Up @@ -127,6 +128,7 @@ Status MakeRandomListArray(const std::shared_ptr<Array>& child_array, int num_li
std::vector<int32_t> offsets(
num_lists + 1, 0); // +1 so we can shift for nulls. See partial sum below.
const uint32_t seed = static_cast<uint32_t>(child_array->length());

if (num_lists > 0) {
test::rand_uniform_int(num_lists, seed, 0, max_list_size, list_sizes.data());
// make sure sizes are consistent with null
Expand Down
12 changes: 10 additions & 2 deletions python/pyarrow/tests/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def test_pandas_parquet_2_0_rountrip(tmpdir, chunk_size):


@parquet
def test_chunked_table_write(tmpdir):
def test_chunked_table_write():
# ARROW-232
df = alltypes_sample(size=10)

Expand All @@ -176,7 +176,7 @@ def test_chunked_table_write(tmpdir):


@parquet
def test_empty_table_roundtrip(tmpdir):
def test_empty_table_roundtrip():
df = alltypes_sample(size=10)
# The nanosecond->us conversion is a nuisance, so we just avoid it here
del df['datetime']
Expand All @@ -192,6 +192,14 @@ def test_empty_table_roundtrip(tmpdir):
_check_roundtrip(table, version='2.0')


@parquet
def test_empty_lists_table_roundtrip():
# ARROW-2744: Shouldn't crash when writing an array of empty lists
arr = pa.array([[], []], type=pa.list_(pa.int32()))
table = pa.Table.from_arrays([arr], ["A"])
_check_roundtrip(table)


@parquet
def test_pandas_parquet_datetime_tz():
import pyarrow.parquet as pq
Expand Down