Skip to content

Commit

Permalink
ARROW-1712: [C++] Remove unneeded data member in BinaryBuilder and mo…
Browse files Browse the repository at this point in the history
…dify test case
  • Loading branch information
xuepanchen committed Jan 20, 2018
1 parent 5a5b70e commit bc5db7d
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 51 deletions.
55 changes: 18 additions & 37 deletions cpp/src/arrow/array-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1156,51 +1156,32 @@ TEST_F(TestBinaryBuilder, TestScalarAppend) {
}

TEST_F(TestBinaryBuilder, TestCapacityReserve) {
vector<string> strings = {"a", "bb", "cc", "ddddd", "eeeee"};
int64_t N = static_cast<int>(strings.size());
vector<string> strings = {"aaaaa", "bbbbbbbbbb", "ccccccccccccccc", "dddddddddddddddddddd", "eeeeeeeeee"};
int N = static_cast<int>(strings.size());
int reps = 10;
int64_t length = 0;
int64_t data_length = 0;
int64_t capacity = N;

ASSERT_OK(builder_->Reserve(capacity));
int64_t capacity = 1000;


ASSERT_OK(builder_->ReserveData(capacity));

ASSERT_EQ(builder_->length(), length);
ASSERT_EQ(builder_->capacity(), BitUtil::NextPower2(capacity));
ASSERT_EQ(builder_->value_data_length(), data_length);
ASSERT_EQ(builder_->value_data_capacity(), capacity);
ASSERT_EQ(length, builder_->value_data_length());
ASSERT_EQ(BitUtil::RoundUpToMultipleOf64(capacity), builder_->value_data_capacity());

for(const string& str : strings) {
ASSERT_OK(builder_->Append(str));
length++;
data_length += static_cast<int>(str.size());
for (int j = 0; j < reps; ++j) {
for (int i = 0; i < N; ++i) {
ASSERT_OK(builder_->Append(strings[i]));
length += static_cast<int>(strings[i].size());

ASSERT_EQ(builder_->length(), length);
ASSERT_EQ(builder_->capacity(), BitUtil::NextPower2(capacity));
ASSERT_EQ(builder_->value_data_length(), data_length);
if (data_length <= capacity) {
ASSERT_EQ(builder_->value_data_capacity(), capacity);
} else {
ASSERT_EQ(builder_->value_data_capacity(), data_length);
ASSERT_EQ(length, builder_->value_data_length());
ASSERT_EQ(BitUtil::RoundUpToMultipleOf64(capacity), builder_->value_data_capacity());
}
}

int extra_capacity = 20;

ASSERT_OK(builder_->Reserve(extra_capacity));
ASSERT_OK(builder_->ReserveData(extra_capacity));

ASSERT_EQ(builder_->length(), length);
ASSERT_EQ(builder_->capacity(), BitUtil::NextPower2(length + extra_capacity));
ASSERT_EQ(builder_->value_data_length(), data_length);
ASSERT_EQ(builder_->value_data_capacity(), data_length + extra_capacity);

Done();

ASSERT_EQ(result_->length(), N);
ASSERT_EQ(result_->null_count(), 0);
ASSERT_EQ(result_->value_data()->size(), data_length);
ASSERT_EQ(result_->value_data()->capacity(), BitUtil::RoundUpToMultipleOf64(data_length + extra_capacity));
ASSERT_EQ(reps * N, result_->length());
ASSERT_EQ(0, result_->null_count());
ASSERT_EQ(reps * 60, result_->value_data()->size());
ASSERT_EQ(BitUtil::RoundUpToMultipleOf64(capacity), result_->value_data()->capacity());
}

TEST_F(TestBinaryBuilder, TestZeroLength) {
Expand Down
17 changes: 6 additions & 11 deletions cpp/src/arrow/builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1208,7 +1208,7 @@ ArrayBuilder* ListBuilder::value_builder() const {
// String and binary

BinaryBuilder::BinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
: ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool), data_capacity_(0) {}
: ArrayBuilder(type, pool), offsets_builder_(pool), value_data_builder_(pool) {}

BinaryBuilder::BinaryBuilder(MemoryPool* pool) : BinaryBuilder(binary(), pool) {}

Expand All @@ -1226,14 +1226,12 @@ Status BinaryBuilder::Resize(int64_t capacity) {
return ArrayBuilder::Resize(capacity);
}

Status BinaryBuilder::ReserveData(int64_t capacity) {
if (value_data_length() + capacity > data_capacity_) {
if (value_data_length() + capacity > std::numeric_limits<int32_t>::max()) {
Status BinaryBuilder::ReserveData(int64_t elements) {
if (value_data_length() + elements > value_data_capacity()) {
if (value_data_length() + elements > std::numeric_limits<int32_t>::max()) {
return Status::Invalid("Cannot reserve capacity larger than 2^31 - 1 in length for binary data");
}

RETURN_NOT_OK(value_data_builder_.Resize(value_data_length() + capacity));
data_capacity_ = value_data_length() + capacity;
}
RETURN_NOT_OK(value_data_builder_.Reserve(elements));
}
return Status::OK();
}
Expand All @@ -1253,9 +1251,6 @@ Status BinaryBuilder::Append(const uint8_t* value, int32_t length) {
RETURN_NOT_OK(Reserve(1));
RETURN_NOT_OK(AppendNextOffset());
RETURN_NOT_OK(value_data_builder_.Append(value, length));
if (data_capacity_ < value_data_length()) {
data_capacity_ = value_data_length();
}
UnsafeAppendToBitmap(true);
return Status::OK();
}
Expand Down
7 changes: 4 additions & 3 deletions cpp/src/arrow/builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -682,13 +682,15 @@ class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {

Status Init(int64_t elements) override;
Status Resize(int64_t capacity) override;
Status ReserveData(int64_t capacity);
/// Ensures there is enough space for adding the number of value elements
/// by checking value buffer capacity and resizing if necessary.
Status ReserveData(int64_t elements);
Status FinishInternal(std::shared_ptr<ArrayData>* out) override;

/// \return size of values buffer so far
int64_t value_data_length() const { return value_data_builder_.length(); }
/// \return capacity of values buffer
int64_t value_data_capacity() const { return data_capacity_; }
int64_t value_data_capacity() const { return value_data_builder_.capacity(); }

/// Temporary access to a value.
///
Expand All @@ -699,7 +701,6 @@ class ARROW_EXPORT BinaryBuilder : public ArrayBuilder {
TypedBufferBuilder<int32_t> offsets_builder_;
TypedBufferBuilder<uint8_t> value_data_builder_;

int64_t data_capacity_;
static constexpr int64_t kMaximumCapacity = std::numeric_limits<int32_t>::max() - 1;

Status AppendNextOffset();
Expand Down

0 comments on commit bc5db7d

Please sign in to comment.