diff --git a/cpp/src/arrow/array-test.cc b/cpp/src/arrow/array-test.cc index 743b40c79f1d1..e350b842fb2e4 100644 --- a/cpp/src/arrow/array-test.cc +++ b/cpp/src/arrow/array-test.cc @@ -1156,51 +1156,32 @@ TEST_F(TestBinaryBuilder, TestScalarAppend) { } TEST_F(TestBinaryBuilder, TestCapacityReserve) { - vector strings = {"a", "bb", "cc", "ddddd", "eeeee"}; - int64_t N = static_cast(strings.size()); + vector strings = {"aaaaa", "bbbbbbbbbb", "ccccccccccccccc", "dddddddddddddddddddd", "eeeeeeeeee"}; + int N = static_cast(strings.size()); + int reps = 10; int64_t length = 0; - int64_t data_length = 0; - int64_t capacity = N; - - ASSERT_OK(builder_->Reserve(capacity)); + int64_t capacity = 1000; + + ASSERT_OK(builder_->ReserveData(capacity)); - ASSERT_EQ(builder_->length(), length); - ASSERT_EQ(builder_->capacity(), BitUtil::NextPower2(capacity)); - ASSERT_EQ(builder_->value_data_length(), data_length); - ASSERT_EQ(builder_->value_data_capacity(), capacity); + ASSERT_EQ(length, builder_->value_data_length()); + ASSERT_EQ(BitUtil::RoundUpToMultipleOf64(capacity), builder_->value_data_capacity()); - for(const string& str : strings) { - ASSERT_OK(builder_->Append(str)); - length++; - data_length += static_cast(str.size()); + for (int j = 0; j < reps; ++j) { + for (int i = 0; i < N; ++i) { + ASSERT_OK(builder_->Append(strings[i])); + length += static_cast(strings[i].size()); - ASSERT_EQ(builder_->length(), length); - ASSERT_EQ(builder_->capacity(), BitUtil::NextPower2(capacity)); - ASSERT_EQ(builder_->value_data_length(), data_length); - if (data_length <= capacity) { - ASSERT_EQ(builder_->value_data_capacity(), capacity); - } else { - ASSERT_EQ(builder_->value_data_capacity(), data_length); + ASSERT_EQ(length, builder_->value_data_length()); + ASSERT_EQ(BitUtil::RoundUpToMultipleOf64(capacity), builder_->value_data_capacity()); } } - - int extra_capacity = 20; - - ASSERT_OK(builder_->Reserve(extra_capacity)); - ASSERT_OK(builder_->ReserveData(extra_capacity)); - - ASSERT_EQ(builder_->length(), length); - ASSERT_EQ(builder_->capacity(), BitUtil::NextPower2(length + extra_capacity)); - ASSERT_EQ(builder_->value_data_length(), data_length); - ASSERT_EQ(builder_->value_data_capacity(), data_length + extra_capacity); - Done(); - - ASSERT_EQ(result_->length(), N); - ASSERT_EQ(result_->null_count(), 0); - ASSERT_EQ(result_->value_data()->size(), data_length); - ASSERT_EQ(result_->value_data()->capacity(), BitUtil::RoundUpToMultipleOf64(data_length + extra_capacity)); + ASSERT_EQ(reps * N, result_->length()); + ASSERT_EQ(0, result_->null_count()); + ASSERT_EQ(reps * 60, result_->value_data()->size()); + ASSERT_EQ(BitUtil::RoundUpToMultipleOf64(capacity), result_->value_data()->capacity()); } TEST_F(TestBinaryBuilder, TestZeroLength) { diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index 63588867dfce0..f85da527f1b19 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -1208,7 +1208,7 @@ ArrayBuilder* ListBuilder::value_builder() const { // String and binary BinaryBuilder::BinaryBuilder(const std::shared_ptr& type, MemoryPool* pool) - : ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool), data_capacity_(0) {} + : ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool) {} BinaryBuilder::BinaryBuilder(MemoryPool* pool) : BinaryBuilder(binary(), pool) {} @@ -1226,14 +1226,12 @@ Status BinaryBuilder::Resize(int64_t capacity) { return ArrayBuilder::Resize(capacity); } -Status BinaryBuilder::ReserveData(int64_t capacity) { - if (value_data_length() + capacity > data_capacity_) { - if (value_data_length() + capacity > std::numeric_limits::max()) { +Status BinaryBuilder::ReserveData(int64_t elements) { + if (value_data_length() + elements > value_data_capacity()) { + if (value_data_length() + elements > std::numeric_limits::max()) { return Status::Invalid("Cannot reserve capacity larger than 2^31 - 1 in length for binary data"); - } - - RETURN_NOT_OK(value_data_builder_.Resize(value_data_length() + capacity)); - data_capacity_ = value_data_length() + capacity; + } + RETURN_NOT_OK(value_data_builder_.Reserve(elements)); } return Status::OK(); } @@ -1253,9 +1251,6 @@ Status BinaryBuilder::Append(const uint8_t* value, int32_t length) { RETURN_NOT_OK(Reserve(1)); RETURN_NOT_OK(AppendNextOffset()); RETURN_NOT_OK(value_data_builder_.Append(value, length)); - if (data_capacity_ < value_data_length()) { - data_capacity_ = value_data_length(); - } UnsafeAppendToBitmap(true); return Status::OK(); } diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h index a708122c2930b..886fdc7226eb6 100644 --- a/cpp/src/arrow/builder.h +++ b/cpp/src/arrow/builder.h @@ -682,13 +682,15 @@ class ARROW_EXPORT BinaryBuilder : public ArrayBuilder { Status Init(int64_t elements) override; Status Resize(int64_t capacity) override; - Status ReserveData(int64_t capacity); + /// Ensures there is enough space for adding the number of value elements + /// by checking value buffer capacity and resizing if necessary. + Status ReserveData(int64_t elements); Status FinishInternal(std::shared_ptr* out) override; /// \return size of values buffer so far int64_t value_data_length() const { return value_data_builder_.length(); } /// \return capacity of values buffer - int64_t value_data_capacity() const { return data_capacity_; } + int64_t value_data_capacity() const { return value_data_builder_.capacity(); } /// Temporary access to a value. /// @@ -699,7 +701,6 @@ class ARROW_EXPORT BinaryBuilder : public ArrayBuilder { TypedBufferBuilder offsets_builder_; TypedBufferBuilder value_data_builder_; - int64_t data_capacity_; static constexpr int64_t kMaximumCapacity = std::numeric_limits::max() - 1; Status AppendNextOffset();