Skip to content

Commit

Permalink
review comments: use c array, revert preset
Browse files Browse the repository at this point in the history
  • Loading branch information
bkietz committed Jun 15, 2023
1 parent b542319 commit eadcefc
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 15 deletions.
1 change: 0 additions & 1 deletion cpp/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
"hidden": true,
"generator": "Ninja",
"cacheVariables": {
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
"ARROW_BUILD_STATIC": "OFF"
}
},
Expand Down
31 changes: 18 additions & 13 deletions cpp/src/arrow/array/data.cc
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ void ArraySpan::SetMembers(const ArrayData& data) {
namespace {

template <typename offset_type>
void SetOffsetsForScalar(ArraySpan* span, uint8_t* buffer, int64_t value_size,
void SetOffsetsForScalar(ArraySpan* span, uint8_t* buffer, offset_type value_size,
int buffer_index = 1) {
auto* offsets = reinterpret_cast<offset_type*>(buffer);
offsets[0] = 0;
Expand Down Expand Up @@ -298,7 +298,7 @@ void FillZeroLengthArray(const DataType* type, ArraySpan* span) {
void ArraySpan::FillFromScalar(const Scalar& value) {
static uint8_t kTrueBit = 0x01;
static uint8_t kFalseBit = 0x00;
auto* scratch_space = const_cast<Scalar&>(value).scratch_space_.data();
auto* scratch_space = const_cast<Scalar&>(value).scratch_space_;

this->type = value.type.get();
this->length = 1;
Expand Down Expand Up @@ -338,10 +338,10 @@ void ArraySpan::FillFromScalar(const Scalar& value) {
data_size = scalar.value->size();
}
if (is_binary_like(type_id)) {
SetOffsetsForScalar<int32_t>(this, scratch_space, data_size);
SetOffsetsForScalar(this, scratch_space, static_cast<int32_t>(data_size));
} else {
// is_large_binary_like
SetOffsetsForScalar<int64_t>(this, scratch_space, data_size);
SetOffsetsForScalar(this, scratch_space, data_size);
}
this->buffers[2].data = const_cast<uint8_t*>(data_buffer);
this->buffers[2].size = data_size;
Expand All @@ -366,9 +366,9 @@ void ArraySpan::FillFromScalar(const Scalar& value) {
}

if (type_id == Type::LIST || type_id == Type::MAP) {
SetOffsetsForScalar<int32_t>(this, scratch_space, value_length);
SetOffsetsForScalar(this, scratch_space, static_cast<int32_t>(value_length));
} else if (type_id == Type::LARGE_LIST) {
SetOffsetsForScalar<int64_t>(this, scratch_space, value_length);
SetOffsetsForScalar(this, scratch_space, value_length);
} else {
// FIXED_SIZE_LIST: does not have a second buffer
this->buffers[1] = {};
Expand All @@ -381,24 +381,29 @@ void ArraySpan::FillFromScalar(const Scalar& value) {
this->child_data[i].FillFromScalar(*scalar.value[i]);
}
} else if (is_union(type_id)) {
// Dense union needs scratch space to store both offsets and a type code
struct UnionScratchSpace {
uint8_t type_code;
alignas(int32_t) uint8_t offsets[sizeof(int32_t) * 2];
};
auto* union_scratch_space = new (scratch_space) UnionScratchSpace{};

// First buffer is kept null since unions have no validity vector
this->buffers[0] = {};

this->buffers[1].data = scratch_space;
this->buffers[1].data = &union_scratch_space->type_code;
this->buffers[1].size = 1;
auto* type_codes = reinterpret_cast<int8_t*>(scratch_space);
type_codes[0] = checked_cast<const UnionScalar&>(value).type_code;
new (&union_scratch_space->type_code)
int8_t{checked_cast<const UnionScalar&>(value).type_code};

this->child_data.resize(this->type->num_fields());
if (type_id == Type::DENSE_UNION) {
const auto& scalar = checked_cast<const DenseUnionScalar&>(value);
// Has offset; start 4 bytes in so it's aligned to a 32-bit boundaries
SetOffsetsForScalar<int32_t>(this, scratch_space, 1, 2);
SetOffsetsForScalar(this, union_scratch_space->offsets, static_cast<int32_t>(1), 2);
// We can't "see" the other arrays in the union, but we put the "active"
// union array in the right place and fill zero-length arrays for the
// others
const std::vector<int>& child_ids =
checked_cast<const UnionType*>(this->type)->child_ids();
const auto& child_ids = checked_cast<const UnionType*>(this->type)->child_ids();
DCHECK_GE(scalar.type_code, 0);
DCHECK_LT(scalar.type_code, static_cast<int>(child_ids.size()));
for (int i = 0; i < static_cast<int>(this->child_data.size()); ++i) {
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/arrow/scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,11 @@ struct ARROW_EXPORT Scalar : public std::enable_shared_from_this<Scalar>,
Scalar(std::shared_ptr<DataType> type, bool is_valid)
: type(std::move(type)), is_valid(is_valid) {}

private:
// 16 bytes of scratch space to enable ArraySpan to be a view onto any
// Scalar- including binary scalars where we need to create a buffer
// that looks like two 32-bit or 64-bit offsets.
alignas(int64_t) std::array<uint8_t, sizeof(int64_t) * 2> scratch_space_;
alignas(int64_t) mutable uint8_t scratch_space_[sizeof(int64_t) * 2];
friend struct ArraySpan;
};

Expand Down

0 comments on commit eadcefc

Please sign in to comment.