Skip to content

Commit

Permalink
Merge pull request #3 from xuepanchen/xuepanchen-arrow-1712
Browse files Browse the repository at this point in the history
Update ReserveData(int64_t) method for BinaryBuilder
  • Loading branch information
xuepanchen authored Jan 19, 2018
2 parents b002e0b + d3c8202 commit 8e4c892
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 5 deletions.
48 changes: 48 additions & 0 deletions cpp/src/arrow/array-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1154,6 +1154,54 @@ TEST_F(TestBinaryBuilder, TestScalarAppend) {
}
}
}

TEST_F(TestBinaryBuilder, TestCapacityReserve) {
vector<string> strings = {"a", "bb", "cc", "ddddd", "eeeee"};
int64_t N = static_cast<int>(strings.size());
int64_t length = 0;
int64_t data_length = 0;
int64_t capacity = N;

ASSERT_OK(builder_->Reserve(capacity));
ASSERT_OK(builder_->ReserveData(capacity));

ASSERT_EQ(builder_->length(), length);
ASSERT_EQ(builder_->capacity(), BitUtil::NextPower2(capacity));
ASSERT_EQ(builder_->value_data_length(), data_length);
ASSERT_EQ(builder_->value_data_capacity(), capacity);

for(const string& str : strings) {
ASSERT_OK(builder_->Append(str));
length++;
data_length += static_cast<int>(str.size());

ASSERT_EQ(builder_->length(), length);
ASSERT_EQ(builder_->capacity(), BitUtil::NextPower2(capacity));
ASSERT_EQ(builder_->value_data_length(), data_length);
if (data_length <= capacity) {
ASSERT_EQ(builder_->value_data_capacity(), capacity);
} else {
ASSERT_EQ(builder_->value_data_capacity(), data_length);
}
}

int extra_capacity = 20;

ASSERT_OK(builder_->Reserve(extra_capacity));
ASSERT_OK(builder_->ReserveData(extra_capacity));

ASSERT_EQ(builder_->length(), length);
ASSERT_EQ(builder_->capacity(), BitUtil::NextPower2(length + extra_capacity));
ASSERT_EQ(builder_->value_data_length(), data_length);
ASSERT_EQ(builder_->value_data_capacity(), data_length + extra_capacity);

Done();

ASSERT_EQ(result_->length(), N);
ASSERT_EQ(result_->null_count(), 0);
ASSERT_EQ(result_->value_data()->size(), data_length);
ASSERT_EQ(result_->value_data()->capacity(), BitUtil::RoundUpToMultipleOf64(data_length + extra_capacity));
}

TEST_F(TestBinaryBuilder, TestZeroLength) {
// All buffers are null
Expand Down
16 changes: 13 additions & 3 deletions cpp/src/arrow/builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1208,7 +1208,7 @@ ArrayBuilder* ListBuilder::value_builder() const {
// String and binary

BinaryBuilder::BinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
: ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool) {}
: ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool), data_capacity_(0) {}

BinaryBuilder::BinaryBuilder(MemoryPool* pool) : BinaryBuilder(binary(), pool) {}

Expand All @@ -1227,8 +1227,15 @@ Status BinaryBuilder::Resize(int64_t capacity) {
}

Status BinaryBuilder::ReserveData(int64_t capacity) {
DCHECK_LT(capacity, std::numeric_limits<int32_t>::max());
return value_data_builder_.Resize(capacity * sizeof(int64_t));
if (value_data_length() + capacity > data_capacity_) {
if (value_data_length() + capacity > std::numeric_limits<int32_t>::max()) {
return Status::Invalid("Cannot reserve capacity larger than 2^31 - 1 in length for binary data");
}

RETURN_NOT_OK(value_data_builder_.Resize(value_data_length() + capacity));
data_capacity_ = value_data_length() + capacity;
}
return Status::OK();
}

Status BinaryBuilder::AppendNextOffset() {
Expand All @@ -1246,6 +1253,9 @@ Status BinaryBuilder::Append(const uint8_t* value, int32_t length) {
RETURN_NOT_OK(Reserve(1));
RETURN_NOT_OK(AppendNextOffset());
RETURN_NOT_OK(value_data_builder_.Append(value, length));
if (data_capacity_ < value_data_length()) {
data_capacity_ = value_data_length();
}
UnsafeAppendToBitmap(true);
return Status::OK();
}
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/arrow/builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -682,13 +682,13 @@ class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {

Status Init(int64_t elements) override;
Status Resize(int64_t capacity) override;
Status ReserveData(int64_t bytes);
Status ReserveData(int64_t capacity);
Status FinishInternal(std::shared_ptr<ArrayData>* out) override;

/// \return size of values buffer so far
int64_t value_data_length() const { return value_data_builder_.length(); }
/// \return capacity of values buffer
int64_t value_data_capacity() const { return value_data_builder_.capacity(); }
int64_t value_data_capacity() const { return data_capacity_; }

/// Temporary access to a value.
///
Expand All @@ -699,6 +699,7 @@ class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {
TypedBufferBuilder<int32_t> offsets_builder_;
TypedBufferBuilder<uint8_t> value_data_builder_;

int64_t data_capacity_;
static constexpr int64_t kMaximumCapacity = std::numeric_limits<int32_t>::max() - 1;

Status AppendNextOffset();
Expand Down

0 comments on commit 8e4c892

Please sign in to comment.