Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-41114: [C++] Add is_validity_defined_by_bitmap() predicate #41115

Merged
merged 5 commits into from
Apr 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions cpp/src/arrow/array/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -603,11 +603,11 @@ void AssertAppendScalar(MemoryPool* pool, const std::shared_ptr<Scalar>& scalar)
ASSERT_EQ(out->length(), 9);

auto out_type_id = out->type()->id();
const bool has_validity = internal::HasValidityBitmap(out_type_id);
const bool can_check_nulls = internal::may_have_validity_bitmap(out_type_id);
// For a dictionary builder, the output dictionary won't necessarily be the same
const bool can_check_values = !is_dictionary(out_type_id);

if (has_validity) {
if (can_check_nulls) {
ASSERT_EQ(out->null_count(), 4);
} else {
ASSERT_EQ(out->null_count(), 0);
Expand Down Expand Up @@ -855,7 +855,8 @@ TEST_F(TestArray, TestAppendArraySlice) {
span.SetMembers(*nulls->data());
ASSERT_OK(builder->AppendArraySlice(span, 0, 4));
ASSERT_EQ(12, builder->length());
const bool has_validity_bitmap = internal::HasValidityBitmap(scalar->type->id());
const bool has_validity_bitmap =
internal::may_have_validity_bitmap(scalar->type->id());
if (has_validity_bitmap) {
ASSERT_EQ(4, builder->null_count());
}
Expand Down
18 changes: 7 additions & 11 deletions cpp/src/arrow/array/builder_nested.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,13 +181,11 @@ class ARROW_EXPORT VarLengthListLikeBuilder : public ArrayBuilder {
if constexpr (is_list_view(TYPE::type_id)) {
sizes = array.GetValues<offset_type>(2);
}
const bool all_valid = !array.MayHaveLogicalNulls();
const uint8_t* validity = array.HasValidityBitmap() ? array.buffers[0].data : NULLPTR;
static_assert(internal::may_have_validity_bitmap(TYPE::type_id));
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR;
ARROW_RETURN_NOT_OK(Reserve(length));
for (int64_t row = offset; row < offset + length; row++) {
const bool is_valid =
all_valid || (validity && bit_util::GetBit(validity, array.offset + row)) ||
array.IsValid(row);
const bool is_valid = !validity || bit_util::GetBit(validity, array.offset + row);
int64_t size = 0;
if (is_valid) {
if constexpr (is_list_view(TYPE::type_id)) {
Expand Down Expand Up @@ -569,13 +567,11 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder {

Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) override {
const int32_t* offsets = array.GetValues<int32_t>(1);
const bool all_valid = !array.MayHaveLogicalNulls();
const uint8_t* validity = array.HasValidityBitmap() ? array.buffers[0].data : NULLPTR;
const auto* offsets = array.GetValues<int32_t>(1);
static_assert(internal::may_have_validity_bitmap(MapType::type_id));
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR;
for (int64_t row = offset; row < offset + length; row++) {
const bool is_valid =
all_valid || (validity && bit_util::GetBit(validity, array.offset + row)) ||
array.IsValid(row);
const bool is_valid = !validity || bit_util::GetBit(validity, array.offset + row);
if (is_valid) {
ARROW_RETURN_NOT_OK(Append());
const int64_t slot_length = offsets[row + 1] - offsets[row];
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/concatenate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ class ConcatenateImpl {
}

Status Concatenate(std::shared_ptr<ArrayData>* out) && {
if (out_->null_count != 0 && internal::HasValidityBitmap(out_->type->id())) {
if (out_->null_count != 0 && internal::may_have_validity_bitmap(out_->type->id())) {
RETURN_NOT_OK(ConcatenateBitmaps(Bitmaps(0), pool_, &out_->buffers[0]));
}
RETURN_NOT_OK(VisitTypeInline(*out_->type, this));
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/array/data.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ static inline void AdjustNonNullable(Type::type type_id, int64_t length,
if (type_id == Type::NA) {
*null_count = length;
(*buffers)[0] = nullptr;
} else if (internal::HasValidityBitmap(type_id)) {
} else if (internal::may_have_validity_bitmap(type_id)) {
if (*null_count == 0) {
// In case there are no nulls, don't keep an allocated null bitmap around
(*buffers)[0] = nullptr;
Expand Down Expand Up @@ -345,7 +345,7 @@ void FillZeroLengthArray(const DataType* type, ArraySpan* span) {
span->buffers[i].size = 0;
}

if (!HasValidityBitmap(type->id())) {
if (!may_have_validity_bitmap(type->id())) {
span->buffers[0] = {};
}

Expand Down Expand Up @@ -380,7 +380,7 @@ void ArraySpan::FillFromScalar(const Scalar& value) {

if (type_id == Type::NA) {
this->null_count = 1;
} else if (!internal::HasValidityBitmap(type_id)) {
} else if (!internal::may_have_validity_bitmap(type_id)) {
this->null_count = 0;
} else {
// Populate null count and validity bitmap
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/array/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ ARROW_EXPORT bool IsNullRunEndEncoded(const ArrayData& data, int64_t i);
ARROW_EXPORT bool UnionMayHaveLogicalNulls(const ArrayData& data);
ARROW_EXPORT bool RunEndEncodedMayHaveLogicalNulls(const ArrayData& data);
ARROW_EXPORT bool DictionaryMayHaveLogicalNulls(const ArrayData& data);

} // namespace internal

// When slicing, we do not know the null count of the sliced range without
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class ArrayDataEndianSwapper {
Status SwapType(const DataType& type) {
RETURN_NOT_OK(VisitTypeInline(type, this));
RETURN_NOT_OK(SwapChildren(type.fields()));
if (internal::HasValidityBitmap(type.id())) {
if (internal::may_have_validity_bitmap(type.id())) {
// Copy null bitmap
out_->buffers[0] = data_->buffers[0];
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/validate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ struct ValidateArrayImpl {
if (full_validation) {
if (data.null_count != kUnknownNullCount) {
int64_t actual_null_count;
if (HasValidityBitmap(data.type->id()) && data.buffers[0]) {
if (may_have_validity_bitmap(data.type->id()) && data.buffers[0]) {
// Do not call GetNullCount() as it would also set the `null_count` member
actual_null_count = data.length - CountSetBits(data.buffers[0]->data(),
data.offset, data.length);
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/c/bridge.cc
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ struct ArrayExporter {
// Store buffer pointers
size_t n_buffers = data->buffers.size();
auto buffers_begin = data->buffers.begin();
if (n_buffers > 0 && !internal::HasValidityBitmap(data->type->id())) {
if (n_buffers > 0 && !internal::may_have_validity_bitmap(data->type->id())) {
--n_buffers;
++buffers_begin;
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/c/bridge_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ struct ArrayExportChecker {

auto expected_n_buffers = static_cast<int64_t>(expected_data.buffers.size());
auto expected_buffers = expected_data.buffers.data();
if (!internal::HasValidityBitmap(expected_data.type->id())) {
if (!internal::may_have_validity_bitmap(expected_data.type->id())) {
--expected_n_buffers;
++expected_buffers;
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/exec.cc
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ struct NullGeneralization {
if (dtype_id == Type::NA) {
return ALL_NULL;
}
if (!arrow::internal::HasValidityBitmap(dtype_id)) {
if (!arrow::internal::may_have_validity_bitmap(dtype_id)) {
return ALL_VALID;
}
if (value.is_scalar()) {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/integration/json_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1849,7 +1849,7 @@ class ArrayReader {
Result<std::shared_ptr<ArrayData>> Parse() {
ARROW_ASSIGN_OR_RAISE(length_, GetMemberInt<int32_t>(obj_, "count"));

if (::arrow::internal::HasValidityBitmap(type_->id())) {
if (::arrow::internal::may_have_validity_bitmap(type_->id())) {
// Null and union types don't have a validity bitmap
RETURN_NOT_OK(ParseValidityBitmap());
}
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/arrow/ipc/metadata_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,9 @@ flatbuf::MetadataVersion MetadataVersionToFlatbuffer(MetadataVersion version) {
bool HasValidityBitmap(Type::type type_id, MetadataVersion version) {
// In V4, null types have no validity bitmap
// In V5 and later, null and union types have no validity bitmap
return (version < MetadataVersion::V5) ? (type_id != Type::NA)
: ::arrow::internal::HasValidityBitmap(type_id);
return (version < MetadataVersion::V5)
? (type_id != Type::NA)
: ::arrow::internal::may_have_validity_bitmap(type_id);
}

namespace {
Expand Down
5 changes: 4 additions & 1 deletion cpp/src/arrow/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -2482,7 +2482,7 @@ Result<std::shared_ptr<Schema>> UnifySchemas(

namespace internal {

constexpr bool HasValidityBitmap(Type::type id) {
constexpr bool may_have_validity_bitmap(Type::type id) {
switch (id) {
case Type::NA:
case Type::DENSE_UNION:
Expand All @@ -2494,6 +2494,9 @@ constexpr bool HasValidityBitmap(Type::type id) {
}
}

ARROW_DEPRECATED("Deprecated in 17.0.0. Use may_have_validity_bitmap() instead.")
constexpr bool HasValidityBitmap(Type::type id) { return may_have_validity_bitmap(id); }

ARROW_EXPORT
std::string ToString(Type::type id);

Expand Down
Loading