-
Notifications
You must be signed in to change notification settings - Fork 3.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
GH-35141: [C++] Versions of IsNull/IsValid that don't branch on type #35149
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -180,25 +180,46 @@ struct ARROW_EXPORT ArrayData { | |
|
||
std::shared_ptr<ArrayData> Copy() const { return std::make_shared<ArrayData>(*this); } | ||
|
||
bool IsNull(int64_t i) const { return !IsValid(i); } | ||
inline bool IsNull(int64_t i) const { return !IsValid(i); } | ||
|
||
bool IsValid(int64_t i) const { | ||
inline bool IsValid(int64_t i) const { | ||
if (buffers[0] != NULLPTR) { | ||
return bit_util::GetBit(buffers[0]->data(), i + offset); | ||
} | ||
const auto type = this->type->id(); | ||
if (type == Type::SPARSE_UNION) { | ||
return !internal::IsNullSparseUnion(*this, i); | ||
} | ||
if (type == Type::DENSE_UNION) { | ||
} else if (type == Type::DENSE_UNION) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: this line of change seems to be complained by clang-tidy. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not on my editor. Where did you see this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But this check is not enabled on Arrow's .clang-tidy and I don't think putting an else after a return is always bad. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In this specific function, the use of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good. I was overwhelmed by this kind of warning in the internal toolchain from my employer. :) |
||
return !internal::IsNullDenseUnion(*this, i); | ||
} | ||
if (type == Type::RUN_END_ENCODED) { | ||
} else if (type == Type::RUN_END_ENCODED) { | ||
return !internal::IsNullRunEndEncoded(*this, i); | ||
} | ||
return null_count.load() != length; | ||
} | ||
|
||
template <typename ArrowType> | ||
bool IsNullFast(int64_t i) const { | ||
return !IsValidFast<ArrowType>(i); | ||
} | ||
|
||
template <typename ArrowType> | ||
bool IsValidFast(int64_t i) const { | ||
if constexpr (ArrowType::type_id == Type::NA) { | ||
return false; | ||
} else if constexpr (ArrowType::type_id == Type::SPARSE_UNION) { | ||
return !internal::IsNullSparseUnion(*this, i); | ||
} else if constexpr (ArrowType::type_id == Type::DENSE_UNION) { | ||
return !internal::IsNullDenseUnion(*this, i); | ||
} else if constexpr (ArrowType::type_id == Type::RUN_END_ENCODED) { | ||
return !internal::IsNullRunEndEncoded(*this, i); | ||
} else { | ||
if (buffers[0] != NULLPTR) { | ||
return bit_util::GetBit(buffers[0]->data(), i + offset); | ||
} | ||
return null_count.load() != length; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe a dumb question: if the validity bitmap does not exist, shouldn't There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
} | ||
} | ||
|
||
// Access a buffer's data as a typed C pointer | ||
template <typename T> | ||
inline const T* GetValues(int i, int64_t absolute_offset) const { | ||
|
@@ -434,16 +455,36 @@ struct ARROW_EXPORT ArraySpan { | |
inline bool IsValid(int64_t i) const { | ||
if (this->buffers[0].data != NULLPTR) { | ||
return bit_util::GetBit(this->buffers[0].data, i + this->offset); | ||
} | ||
const auto type = this->type->id(); | ||
if (type == Type::SPARSE_UNION) { | ||
return !IsNullSparseUnion(i); | ||
} else if (type == Type::DENSE_UNION) { | ||
return !IsNullDenseUnion(i); | ||
} else if (type == Type::RUN_END_ENCODED) { | ||
return !IsNullRunEndEncoded(i); | ||
} | ||
return this->null_count != this->length; | ||
} | ||
|
||
template <typename ArrowType> | ||
inline bool IsNullFast(int64_t i) const { | ||
return !IsValidFast<ArrowType>(i); | ||
} | ||
|
||
template <typename ArrowType> | ||
inline bool IsValidFast(int64_t i) const { | ||
if constexpr (ArrowType::type_id == Type::NA) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to add a DCHECK to check that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was tempted, but it adds too high of an overhead for this function as it's used from tight loops and is expected to be fully inlined. Even considering that it would be affecting only debug builds. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we just need care the release performance? An array using outerside There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Debug perf can make CI run much slower. Imagine changing the code from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @mapleFU note that all the special branches perform runtime type checks, so this is not completely unchecked. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, though it's still a bit wired for me. I think we can hear about others idea. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To be more clear: the body of these member functions in the } else if constexpr (ArrowType::type_id == Type::SPARSE_UNION) {
return !IsNullSparseUnion(i);
} else if constexpr (ArrowType::type_id == Type::DENSE_UNION) {
return !IsNullDenseUnion(i);
} else if constexpr (ArrowType::type_id == Type::RUN_END_ENCODED) {
return !IsNullRunEndEncoded(i); |
||
return false; | ||
} else if constexpr (ArrowType::type_id == Type::SPARSE_UNION) { | ||
return !IsNullSparseUnion(i); | ||
} else if constexpr (ArrowType::type_id == Type::DENSE_UNION) { | ||
return !IsNullDenseUnion(i); | ||
} else if constexpr (ArrowType::type_id == Type::RUN_END_ENCODED) { | ||
return !IsNullRunEndEncoded(i); | ||
} else { | ||
const auto type = this->type->id(); | ||
if (type == Type::SPARSE_UNION) { | ||
return !IsNullSparseUnion(i); | ||
} | ||
if (type == Type::DENSE_UNION) { | ||
return !IsNullDenseUnion(i); | ||
} | ||
if (type == Type::RUN_END_ENCODED) { | ||
return !IsNullRunEndEncoded(i); | ||
if (this->buffers[0].data != NULLPTR) { | ||
return bit_util::GetBit(this->buffers[0].data, i + this->offset); | ||
} | ||
return this->null_count != this->length; | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why change it to if else? I think it's not necessary here
clang-tidy has a check about this style ( https://clang.llvm.org/extra/clang-tidy/checks/readability/else-after-return.html ), although we don't enable this check, I think just return without if else is ok
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
More compact. In sync with the
if constexpr
used in the other cases. Arrow's.clang-tidy
doesn't complain about this. No extra indentation levels are introduced because of this.