Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions cpp/examples/arrow/compute_register_example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ std::unique_ptr<cp::FunctionOptions> ExampleFunctionOptionsType::Copy(
return std::unique_ptr<cp::FunctionOptions>(new ExampleFunctionOptions());
}

arrow::Status ExampleFunctionImpl(cp::KernelContext* ctx, const cp::ExecBatch& batch,
arrow::Datum* out) {
*out->mutable_array() = *batch[0].array();
arrow::Status ExampleFunctionImpl(cp::KernelContext* ctx, const cp::ExecSpan& batch,
cp::ExecResult* out) {
out->value = batch[0].array.ToArrayData();
return arrow::Status::OK();
}

Expand Down
20 changes: 13 additions & 7 deletions cpp/examples/arrow/udf_example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,17 @@ const cp::FunctionDoc func_doc{
{"x", "y", "z"},
"UDFOptions"};

arrow::Status SampleFunction(cp::KernelContext* ctx, const cp::ExecBatch& batch,
arrow::Datum* out) {
// temp = x + y; return temp + z
ARROW_ASSIGN_OR_RAISE(auto temp, cp::CallFunction("add", {batch[0], batch[1]}));
return cp::CallFunction("add", {temp, batch[2]}).Value(out);
arrow::Status SampleFunction(cp::KernelContext* ctx, const cp::ExecSpan& batch,
cp::ExecResult* out) {
// return x + y + z
const int64_t* x = batch[0].array.GetValues<int64_t>(1);
const int64_t* y = batch[1].array.GetValues<int64_t>(1);
const int64_t* z = batch[2].array.GetValues<int64_t>(1);
int64_t* out_values = out->array_span()->GetValues<int64_t>(1);
for (int64_t i = 0; i < batch.length; ++i) {
*out_values++ = *x++ + *y++ + *z++;
}
return arrow::Status::OK();
}

arrow::Status Execute() {
Expand All @@ -74,8 +80,8 @@ arrow::Status Execute() {
cp::InputType::Array(arrow::int64())},
arrow::int64(), SampleFunction);

kernel.mem_allocation = cp::MemAllocation::NO_PREALLOCATE;
kernel.null_handling = cp::NullHandling::COMPUTED_NO_PREALLOCATE;
kernel.mem_allocation = cp::MemAllocation::PREALLOCATE;
kernel.null_handling = cp::NullHandling::INTERSECTION;

ARROW_RETURN_NOT_OK(func->AddKernel(std::move(kernel)));

Expand Down
12 changes: 6 additions & 6 deletions cpp/src/arrow/adapters/orc/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -586,8 +586,8 @@ Status WriteGenericBatch(const Array& array, int64_t orc_offset,
batch->hasNulls = true;
}
Appender<DataType, BatchType> appender{array_, batch, orc_offset, 0};
ArrayDataVisitor<DataType> visitor;
RETURN_NOT_OK(visitor.Visit(*(array_.data()), &appender));
ArraySpanVisitor<DataType> visitor;
RETURN_NOT_OK(visitor.Visit(*array_.data(), &appender));
return Status::OK();
}

Expand All @@ -608,8 +608,8 @@ Status WriteTimestampBatch(const Array& array, int64_t orc_offset,
0,
conversion_factor_from_second,
conversion_factor_to_nano};
ArrayDataVisitor<DataType> visitor;
RETURN_NOT_OK(visitor.Visit(*(array_.data()), &appender));
ArraySpanVisitor<DataType> visitor;
RETURN_NOT_OK(visitor.Visit(*array_.data(), &appender));
return Status::OK();
}

Expand All @@ -621,8 +621,8 @@ Status WriteFixedSizeBinaryBatch(const Array& array, int64_t orc_offset,
batch->hasNulls = true;
}
FixedSizeBinaryAppender appender{array_, batch, orc_offset, 0, array_.byte_width()};
ArrayDataVisitor<FixedSizeBinaryType> visitor;
RETURN_NOT_OK(visitor.Visit(*(array_.data()), &appender));
ArraySpanVisitor<FixedSizeBinaryType> visitor;
RETURN_NOT_OK(visitor.Visit(*array_.data(), &appender));
return Status::OK();
}

Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/array/array_binary_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -859,7 +859,7 @@ TEST(TestChunkedStringBuilder, BasicOperation) {
}

// ----------------------------------------------------------------------
// ArrayDataVisitor<binary-like> tests
// ArraySpanVisitor<binary-like> tests

struct BinaryAppender {
Status VisitNull() {
Expand All @@ -885,7 +885,7 @@ class TestBaseBinaryDataVisitor : public ::testing::Test {
void TestBasics() {
auto array = ArrayFromJSON(type_, R"(["foo", null, "bar"])");
BinaryAppender appender;
ArrayDataVisitor<TypeClass> visitor;
ArraySpanVisitor<TypeClass> visitor;
ASSERT_OK(visitor.Visit(*array->data(), &appender));
ASSERT_THAT(appender.data, ::testing::ElementsAreArray({"foo", "(null)", "bar"}));
ARROW_UNUSED(visitor); // Workaround weird MSVC warning
Expand All @@ -894,7 +894,7 @@ class TestBaseBinaryDataVisitor : public ::testing::Test {
void TestSliced() {
auto array = ArrayFromJSON(type_, R"(["ab", null, "cd", "ef"])")->Slice(1, 2);
BinaryAppender appender;
ArrayDataVisitor<TypeClass> visitor;
ArraySpanVisitor<TypeClass> visitor;
ASSERT_OK(visitor.Visit(*array->data(), &appender));
ASSERT_THAT(appender.data, ::testing::ElementsAreArray({"(null)", "cd"}));
ARROW_UNUSED(visitor); // Workaround weird MSVC warning
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/array/array_dict.cc
Original file line number Diff line number Diff line change
Expand Up @@ -290,8 +290,8 @@ class DictionaryUnifierImpl : public DictionaryUnifier {

Status GetResultWithIndexType(const std::shared_ptr<DataType>& index_type,
std::shared_ptr<Array>* out_dict) override {
int64_t dict_length = memo_table_.size();
if (!internal::IntegersCanFit(Datum(dict_length), *index_type).ok()) {
Int64Scalar dict_length(memo_table_.size());
if (!internal::IntegersCanFit(dict_length, *index_type).ok()) {
return Status::Invalid(
"These dictionaries cannot be combined. The unified dictionary requires a "
"larger index type.");
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/arrow/array/array_nested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,7 @@ const ArrayVector& StructArray::fields() const {
return boxed_fields_;
}

std::shared_ptr<Array> StructArray::field(int i) const {
const std::shared_ptr<Array>& StructArray::field(int i) const {
std::shared_ptr<Array> result = internal::atomic_load(&boxed_fields_[i]);
if (!result) {
std::shared_ptr<ArrayData> field_data;
Expand All @@ -578,10 +578,11 @@ std::shared_ptr<Array> StructArray::field(int i) const {
} else {
field_data = data_->child_data[i];
}
result = MakeArray(field_data);
std::shared_ptr<Array> result = MakeArray(field_data);
internal::atomic_store(&boxed_fields_[i], result);
return boxed_fields_[i];
}
return result;
return boxed_fields_[i];
}

std::shared_ptr<Array> StructArray::GetFieldByName(const std::string& name) const {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/array_nested.h
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ class ARROW_EXPORT StructArray : public Array {
// Return a shared pointer in case the requestor desires to share ownership
// with this array. The returned array has its offset, length and null
// count adjusted.
std::shared_ptr<Array> field(int pos) const;
const std::shared_ptr<Array>& field(int pos) const;
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes a shared_ptr copy that is often not needed


const ArrayVector& fields() const;

Expand Down
37 changes: 21 additions & 16 deletions cpp/src/arrow/array/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,7 @@ TEST_F(TestArray, TestMakeEmptyArray) {

TEST_F(TestArray, TestAppendArraySlice) {
auto scalars = GetScalars();
ArraySpan span;
for (const auto& scalar : scalars) {
ARROW_SCOPED_TRACE(*scalar->type);
ASSERT_OK_AND_ASSIGN(auto array, MakeArrayFromScalar(*scalar, 16));
Expand All @@ -704,31 +705,33 @@ TEST_F(TestArray, TestAppendArraySlice) {
std::unique_ptr<arrow::ArrayBuilder> builder;
ASSERT_OK(MakeBuilder(pool_, scalar->type, &builder));

ASSERT_OK(builder->AppendArraySlice(*array->data(), 0, 4));
span.SetMembers(*array->data());
ASSERT_OK(builder->AppendArraySlice(span, 0, 4));
ASSERT_EQ(4, builder->length());
ASSERT_OK(builder->AppendArraySlice(*array->data(), 0, 0));
ASSERT_OK(builder->AppendArraySlice(span, 0, 0));
ASSERT_EQ(4, builder->length());
ASSERT_OK(builder->AppendArraySlice(*array->data(), 1, 0));
ASSERT_OK(builder->AppendArraySlice(span, 1, 0));
ASSERT_EQ(4, builder->length());
ASSERT_OK(builder->AppendArraySlice(*array->data(), 1, 4));
ASSERT_OK(builder->AppendArraySlice(span, 1, 4));
ASSERT_EQ(8, builder->length());

ASSERT_OK(builder->AppendArraySlice(*nulls->data(), 0, 4));
span.SetMembers(*nulls->data());
ASSERT_OK(builder->AppendArraySlice(span, 0, 4));
ASSERT_EQ(12, builder->length());
if (!is_union(scalar->type->id())) {
ASSERT_EQ(4, builder->null_count());
}
ASSERT_OK(builder->AppendArraySlice(*nulls->data(), 0, 0));
ASSERT_OK(builder->AppendArraySlice(span, 0, 0));
ASSERT_EQ(12, builder->length());
if (!is_union(scalar->type->id())) {
ASSERT_EQ(4, builder->null_count());
}
ASSERT_OK(builder->AppendArraySlice(*nulls->data(), 1, 0));
ASSERT_OK(builder->AppendArraySlice(span, 1, 0));
ASSERT_EQ(12, builder->length());
if (!is_union(scalar->type->id())) {
ASSERT_EQ(4, builder->null_count());
}
ASSERT_OK(builder->AppendArraySlice(*nulls->data(), 1, 4));
ASSERT_OK(builder->AppendArraySlice(span, 1, 4));
ASSERT_EQ(16, builder->length());
if (!is_union(scalar->type->id())) {
ASSERT_EQ(8, builder->null_count());
Expand All @@ -746,13 +749,15 @@ TEST_F(TestArray, TestAppendArraySlice) {
{
ASSERT_OK_AND_ASSIGN(auto array, MakeArrayOfNull(null(), 16));
NullBuilder builder(pool_);
ASSERT_OK(builder.AppendArraySlice(*array->data(), 0, 4));

span.SetMembers(*array->data());
ASSERT_OK(builder.AppendArraySlice(span, 0, 4));
ASSERT_EQ(4, builder.length());
ASSERT_OK(builder.AppendArraySlice(*array->data(), 0, 0));
ASSERT_OK(builder.AppendArraySlice(span, 0, 0));
ASSERT_EQ(4, builder.length());
ASSERT_OK(builder.AppendArraySlice(*array->data(), 1, 0));
ASSERT_OK(builder.AppendArraySlice(span, 1, 0));
ASSERT_EQ(4, builder.length());
ASSERT_OK(builder.AppendArraySlice(*array->data(), 1, 4));
ASSERT_OK(builder.AppendArraySlice(span, 1, 4));
ASSERT_EQ(8, builder.length());
std::shared_ptr<Array> result;
ASSERT_OK(builder.Finish(&result));
Expand Down Expand Up @@ -2257,23 +2262,23 @@ struct FWBinaryAppender {
std::vector<util::string_view> data;
};

TEST_F(TestFWBinaryArray, ArrayDataVisitor) {
TEST_F(TestFWBinaryArray, ArraySpanVisitor) {
auto type = fixed_size_binary(3);

auto array = ArrayFromJSON(type, R"(["abc", null, "def"])");
FWBinaryAppender appender;
ArrayDataVisitor<FixedSizeBinaryType> visitor;
ArraySpanVisitor<FixedSizeBinaryType> visitor;
ASSERT_OK(visitor.Visit(*array->data(), &appender));
ASSERT_THAT(appender.data, ::testing::ElementsAreArray({"abc", "(null)", "def"}));
ARROW_UNUSED(visitor); // Workaround weird MSVC warning
}

TEST_F(TestFWBinaryArray, ArrayDataVisitorSliced) {
TEST_F(TestFWBinaryArray, ArraySpanVisitorSliced) {
auto type = fixed_size_binary(3);

auto array = ArrayFromJSON(type, R"(["abc", null, "def", "ghi"])")->Slice(1, 2);
FWBinaryAppender appender;
ArrayDataVisitor<FixedSizeBinaryType> visitor;
ArraySpanVisitor<FixedSizeBinaryType> visitor;
ASSERT_OK(visitor.Visit(*array->data(), &appender));
ASSERT_THAT(appender.data, ::testing::ElementsAreArray({"(null)", "def"}));
ARROW_UNUSED(visitor); // Workaround weird MSVC warning
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/builder_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ class ARROW_EXPORT ArrayBuilder {
/// \brief Append a range of values from an array.
///
/// The given array must be the same type as the builder.
virtual Status AppendArraySlice(const ArrayData& array, int64_t offset,
virtual Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) {
return Status::NotImplemented("AppendArraySlice for builder for ", *type());
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/array/builder_binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ class BaseBinaryBuilder : public ArrayBuilder {
return Status::OK();
}

Status AppendArraySlice(const ArrayData& array, int64_t offset,
Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) override {
auto bitmap = array.GetValues<uint8_t>(0, 0);
auto offsets = array.GetValues<offset_type>(1);
Expand Down Expand Up @@ -516,7 +516,7 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
Status AppendEmptyValue() final;
Status AppendEmptyValues(int64_t length) final;

Status AppendArraySlice(const ArrayData& array, int64_t offset,
Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) override {
return AppendValues(
array.GetValues<uint8_t>(1, 0) + ((array.offset + offset) * byte_width_), length,
Expand Down
9 changes: 5 additions & 4 deletions cpp/src/arrow/array/builder_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -366,10 +366,11 @@ class DictionaryBuilderBase : public ArrayBuilder {
return Status::OK();
}

Status AppendArraySlice(const ArrayData& array, int64_t offset, int64_t length) final {
Status AppendArraySlice(const ArraySpan& array, int64_t offset, int64_t length) final {
// Visit the indices and insert the unpacked values.
const auto& dict_ty = internal::checked_cast<const DictionaryType&>(*array.type);
const typename TypeTraits<T>::ArrayType dict(array.dictionary);
// See if possible to avoid using ToArrayData here
const typename TypeTraits<T>::ArrayType dict(array.dictionary().ToArrayData());
ARROW_RETURN_NOT_OK(Reserve(length));
switch (dict_ty.index_type()->id()) {
case Type::UINT8:
Expand Down Expand Up @@ -490,10 +491,10 @@ class DictionaryBuilderBase : public ArrayBuilder {
protected:
template <typename c_type>
Status AppendArraySliceImpl(const typename TypeTraits<T>::ArrayType& dict,
const ArrayData& array, int64_t offset, int64_t length) {
const ArraySpan& array, int64_t offset, int64_t length) {
const c_type* values = array.GetValues<c_type>(1) + offset;
return VisitBitBlocks(
array.buffers[0], array.offset + offset, length,
array.buffers[0].data, array.offset + offset, length,
[&](const int64_t position) {
const int64_t index = static_cast<int64_t>(values[position]);
if (dict.IsValid(index)) {
Expand Down
26 changes: 13 additions & 13 deletions cpp/src/arrow/array/builder_nested.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,15 +126,15 @@ class BaseListBuilder : public ArrayBuilder {
return Status::OK();
}

Status AppendArraySlice(const ArrayData& array, int64_t offset,
Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) override {
const offset_type* offsets = array.GetValues<offset_type>(1);
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0]->data() : NULLPTR;
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR;
for (int64_t row = offset; row < offset + length; row++) {
if (!validity || bit_util::GetBit(validity, array.offset + row)) {
ARROW_RETURN_NOT_OK(Append());
int64_t slot_length = offsets[row + 1] - offsets[row];
ARROW_RETURN_NOT_OK(value_builder_->AppendArraySlice(*array.child_data[0],
ARROW_RETURN_NOT_OK(value_builder_->AppendArraySlice(array.child_data[0],
offsets[row], slot_length));
} else {
ARROW_RETURN_NOT_OK(AppendNull());
Expand Down Expand Up @@ -296,18 +296,18 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder {

Status AppendEmptyValues(int64_t length) final;

Status AppendArraySlice(const ArrayData& array, int64_t offset,
Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) override {
const int32_t* offsets = array.GetValues<int32_t>(1);
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0]->data() : NULLPTR;
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR;
for (int64_t row = offset; row < offset + length; row++) {
if (!validity || bit_util::GetBit(validity, array.offset + row)) {
ARROW_RETURN_NOT_OK(Append());
const int64_t slot_length = offsets[row + 1] - offsets[row];
ARROW_RETURN_NOT_OK(key_builder_->AppendArraySlice(
*array.child_data[0]->child_data[0], offsets[row], slot_length));
array.child_data[0].child_data[0], offsets[row], slot_length));
ARROW_RETURN_NOT_OK(item_builder_->AppendArraySlice(
*array.child_data[0]->child_data[1], offsets[row], slot_length));
array.child_data[0].child_data[1], offsets[row], slot_length));
} else {
ARROW_RETURN_NOT_OK(AppendNull());
}
Expand Down Expand Up @@ -425,12 +425,12 @@ class ARROW_EXPORT FixedSizeListBuilder : public ArrayBuilder {

Status AppendEmptyValues(int64_t length) final;

Status AppendArraySlice(const ArrayData& array, int64_t offset, int64_t length) final {
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0]->data() : NULLPTR;
Status AppendArraySlice(const ArraySpan& array, int64_t offset, int64_t length) final {
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR;
for (int64_t row = offset; row < offset + length; row++) {
if (!validity || bit_util::GetBit(validity, array.offset + row)) {
ARROW_RETURN_NOT_OK(value_builder_->AppendArraySlice(
*array.child_data[0], list_size_ * (array.offset + row), list_size_));
array.child_data[0], list_size_ * (array.offset + row), list_size_));
ARROW_RETURN_NOT_OK(Append());
} else {
ARROW_RETURN_NOT_OK(AppendNull());
Expand Down Expand Up @@ -532,13 +532,13 @@ class ARROW_EXPORT StructBuilder : public ArrayBuilder {
return Status::OK();
}

Status AppendArraySlice(const ArrayData& array, int64_t offset,
Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) override {
for (int i = 0; static_cast<size_t>(i) < children_.size(); i++) {
ARROW_RETURN_NOT_OK(children_[i]->AppendArraySlice(*array.child_data[i],
ARROW_RETURN_NOT_OK(children_[i]->AppendArraySlice(array.child_data[i],
array.offset + offset, length));
}
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0]->data() : NULLPTR;
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR;
ARROW_RETURN_NOT_OK(Reserve(length));
UnsafeAppendToBitmap(validity, array.offset + offset, length);
return Status::OK();
Expand Down
Loading