Skip to content

Commit

Permalink
ARROW-19: Add an externalized MemoryPool interface for use in builder…
Browse files Browse the repository at this point in the history
… classes

Memory management will be an ongoing concern, but this is a stride in the right direction. Applications requiring custom memory management will be able to implement a subclass of MemoryPool; we can evolve its API as user needs evolve.

Author: Wes McKinney <wesm@apache.org>

Closes #8 from wesm/ARROW-19 and squashes the following commits:

08d3895 [Wes McKinney] Some include cleanup
e319a36 [Wes McKinney] cpplint fixes
abca6eb [Wes McKinney] Add a MemoryPool abstract interface, change builder instances to request memory from pool via Buffer subclass
  • Loading branch information
wesm committed Mar 3, 2016
1 parent 1000d11 commit e418020
Show file tree
Hide file tree
Showing 25 changed files with 301 additions and 79 deletions.
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ if (UNIX)
add_custom_target(lint ${BUILD_SUPPORT_DIR}/cpplint.py
--verbose=2
--linelength=90
--filter=-whitespace/comments,-readability/todo,-build/header_guard
--filter=-whitespace/comments,-readability/todo,-build/header_guard,-build/c++11
`find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h`)
endif (UNIX)

Expand Down
10 changes: 8 additions & 2 deletions cpp/src/arrow/array-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <gtest/gtest.h>

#include <cstdint>
#include <cstdlib>
#include <memory>
#include <string>
#include <vector>
Expand All @@ -28,6 +29,8 @@
#include "arrow/types/integer.h"
#include "arrow/types/primitive.h"
#include "arrow/util/buffer.h"
#include "arrow/util/memory-pool.h"
#include "arrow/util/status.h"

using std::string;
using std::vector;
Expand All @@ -41,8 +44,10 @@ static TypePtr int32_nn = TypePtr(new Int32Type(false));
class TestArray : public ::testing::Test {
public:
void SetUp() {
auto data = std::make_shared<OwnedMutableBuffer>();
auto nulls = std::make_shared<OwnedMutableBuffer>();
pool_ = GetDefaultMemoryPool();

auto data = std::make_shared<PoolBuffer>(pool_);
auto nulls = std::make_shared<PoolBuffer>(pool_);

ASSERT_OK(data->Resize(400));
ASSERT_OK(nulls->Resize(128));
Expand All @@ -51,6 +56,7 @@ class TestArray : public ::testing::Test {
}

protected:
MemoryPool* pool_;
std::unique_ptr<Int32Array> arr_;
};

Expand Down
1 change: 0 additions & 1 deletion cpp/src/arrow/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#define ARROW_ARRAY_H

#include <cstdint>
#include <cstdlib>
#include <memory>

#include "arrow/type.h"
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Status ArrayBuilder::Init(int64_t capacity) {

if (nullable_) {
int64_t to_alloc = util::ceil_byte(capacity) / 8;
nulls_ = std::make_shared<OwnedMutableBuffer>();
nulls_ = std::make_shared<PoolBuffer>(pool_);
RETURN_NOT_OK(nulls_->Resize(to_alloc));
null_bits_ = nulls_->mutable_data();
memset(null_bits_, 0, to_alloc);
Expand Down
22 changes: 13 additions & 9 deletions cpp/src/arrow/builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,27 @@
#include <vector>

#include "arrow/type.h"
#include "arrow/util/buffer.h"
#include "arrow/util/macros.h"
#include "arrow/util/status.h"

namespace arrow {

class Array;
class MemoryPool;
class PoolBuffer;

static constexpr int64_t MIN_BUILDER_CAPACITY = 1 << 8;

// Base class for all data array builders
class ArrayBuilder {
public:
explicit ArrayBuilder(const TypePtr& type)
: type_(type),
nullable_(type_->nullable),
nulls_(nullptr), null_bits_(nullptr),
length_(0),
capacity_(0) {}
explicit ArrayBuilder(MemoryPool* pool, const TypePtr& type) :
pool_(pool),
type_(type),
nullable_(type_->nullable),
nulls_(nullptr), null_bits_(nullptr),
length_(0),
capacity_(0) {}

virtual ~ArrayBuilder() {}

Expand Down Expand Up @@ -71,18 +73,20 @@ class ArrayBuilder {
// this function responsibly.
Status Advance(int64_t elements);

const std::shared_ptr<OwnedMutableBuffer>& nulls() const { return nulls_;}
const std::shared_ptr<PoolBuffer>& nulls() const { return nulls_;}

// Creates new array object to hold the contents of the builder and transfers
// ownership of the data
virtual Status ToArray(Array** out) = 0;

protected:
MemoryPool* pool_;

TypePtr type_;
bool nullable_;

// If the type is not nullable, then null_ is nullptr after initialization
std::shared_ptr<OwnedMutableBuffer> nulls_;
std::shared_ptr<PoolBuffer> nulls_;
uint8_t* null_bits_;

// Array length, so far. Also, the index of the next element to be added
Expand Down
13 changes: 7 additions & 6 deletions cpp/src/arrow/types/construct.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,13 @@ class ArrayBuilder;
// Initially looked at doing this with vtables, but shared pointers makes it
// difficult

#define BUILDER_CASE(ENUM, BuilderType) \
case TypeEnum::ENUM: \
*out = static_cast<ArrayBuilder*>(new BuilderType(type)); \
#define BUILDER_CASE(ENUM, BuilderType) \
case TypeEnum::ENUM: \
*out = static_cast<ArrayBuilder*>(new BuilderType(pool, type)); \
return Status::OK();

Status make_builder(const TypePtr& type, ArrayBuilder** out) {
Status make_builder(MemoryPool* pool, const TypePtr& type,
ArrayBuilder** out) {
switch (type->type) {
BUILDER_CASE(UINT8, UInt8Builder);
BUILDER_CASE(INT8, Int8Builder);
Expand All @@ -59,10 +60,10 @@ Status make_builder(const TypePtr& type, ArrayBuilder** out) {
{
ListType* list_type = static_cast<ListType*>(type.get());
ArrayBuilder* value_builder;
RETURN_NOT_OK(make_builder(list_type->value_type, &value_builder));
RETURN_NOT_OK(make_builder(pool, list_type->value_type, &value_builder));

// The ListBuilder takes ownership of the value_builder
ListBuilder* builder = new ListBuilder(type, value_builder);
ListBuilder* builder = new ListBuilder(pool, type, value_builder);
*out = static_cast<ArrayBuilder*>(builder);
return Status::OK();
}
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/arrow/types/construct.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,11 @@
namespace arrow {

class ArrayBuilder;
class MemoryPool;
class Status;

Status make_builder(const TypePtr& type, ArrayBuilder** out);
Status make_builder(MemoryPool* pool, const TypePtr& type,
ArrayBuilder** out);

} // namespace arrow

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/types/list-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class TestListBuilder : public TestBuilder {
type_ = TypePtr(new ListType(value_type_));

ArrayBuilder* tmp;
ASSERT_OK(make_builder(type_, &tmp));
ASSERT_OK(make_builder(pool_, type_, &tmp));
builder_.reset(static_cast<ListBuilder*>(tmp));
}

Expand Down
7 changes: 5 additions & 2 deletions cpp/src/arrow/types/list.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@

namespace arrow {

class MemoryPool;

struct ListType : public DataType {
// List can contain any other logical value type
TypePtr value_type;
Expand Down Expand Up @@ -100,8 +102,9 @@ class ListArray : public Array {
// have been appended to the child array)
class ListBuilder : public Int32Builder {
public:
ListBuilder(const TypePtr& type, ArrayBuilder* value_builder)
: Int32Builder(type) {
ListBuilder(MemoryPool* pool, const TypePtr& type,
ArrayBuilder* value_builder)
: Int32Builder(pool, type) {
value_builder_.reset(value_builder);
}

Expand Down
5 changes: 2 additions & 3 deletions cpp/src/arrow/types/primitive-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include <gtest/gtest.h>

#include <cstdint>
#include <cstdlib>
#include <memory>
#include <string>
#include <vector>
Expand Down Expand Up @@ -104,10 +103,10 @@ class TestPrimitiveBuilder : public TestBuilder {
type_nn_ = Attrs::type(false);

ArrayBuilder* tmp;
ASSERT_OK(make_builder(type_, &tmp));
ASSERT_OK(make_builder(pool_, type_, &tmp));
builder_.reset(static_cast<BuilderType*>(tmp));

ASSERT_OK(make_builder(type_nn_, &tmp));
ASSERT_OK(make_builder(pool_, type_nn_, &tmp));
builder_nn_.reset(static_cast<BuilderType*>(tmp));
}

Expand Down
12 changes: 8 additions & 4 deletions cpp/src/arrow/types/primitive.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

#include <cstdint>
#include <cstring>
#include <memory>
#include <string>

#include "arrow/array.h"
Expand All @@ -31,6 +32,8 @@

namespace arrow {

class MemoryPool;

template <typename Derived>
struct PrimitiveType : public DataType {
explicit PrimitiveType(bool nullable = true)
Expand Down Expand Up @@ -113,8 +116,9 @@ class PrimitiveBuilder : public ArrayBuilder {
public:
typedef typename Type::c_type T;

explicit PrimitiveBuilder(const TypePtr& type)
: ArrayBuilder(type), values_(nullptr) {
explicit PrimitiveBuilder(MemoryPool* pool, const TypePtr& type) :
ArrayBuilder(pool, type),
values_(nullptr) {
elsize_ = sizeof(T);
}

Expand All @@ -139,7 +143,7 @@ class PrimitiveBuilder : public ArrayBuilder {
Status Init(int64_t capacity) {
RETURN_NOT_OK(ArrayBuilder::Init(capacity));

values_ = std::make_shared<OwnedMutableBuffer>();
values_ = std::make_shared<PoolBuffer>(pool_);
return values_->Resize(capacity * elsize_);
}

Expand Down Expand Up @@ -231,7 +235,7 @@ class PrimitiveBuilder : public ArrayBuilder {
}

protected:
std::shared_ptr<OwnedMutableBuffer> values_;
std::shared_ptr<PoolBuffer> values_;
int64_t elsize_;
};

Expand Down
29 changes: 13 additions & 16 deletions cpp/src/arrow/types/string-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,9 @@
#include "arrow/types/test-common.h"
#include "arrow/util/status.h"

using std::string;
using std::unique_ptr;
using std::vector;

namespace arrow {

class Buffer;

TEST(TypesTest, TestCharType) {
CharType t1(5);
Expand All @@ -45,7 +42,7 @@ TEST(TypesTest, TestCharType) {
ASSERT_TRUE(t1.nullable);
ASSERT_EQ(t1.size, 5);

ASSERT_EQ(t1.ToString(), string("char(5)"));
ASSERT_EQ(t1.ToString(), std::string("char(5)"));

// Test copy constructor
CharType t2 = t1;
Expand All @@ -63,7 +60,7 @@ TEST(TypesTest, TestVarcharType) {
ASSERT_EQ(t1.size, 5);
ASSERT_EQ(t1.physical_type.size, 6);

ASSERT_EQ(t1.ToString(), string("varchar(5)"));
ASSERT_EQ(t1.ToString(), std::string("varchar(5)"));

// Test copy constructor
VarcharType t2 = t1;
Expand All @@ -78,7 +75,7 @@ TEST(TypesTest, TestStringType) {
StringType str_nn(false);

ASSERT_EQ(str.type, TypeEnum::STRING);
ASSERT_EQ(str.name(), string("string"));
ASSERT_EQ(str.name(), std::string("string"));
ASSERT_TRUE(str.nullable);
ASSERT_FALSE(str_nn.nullable);
}
Expand Down Expand Up @@ -111,11 +108,11 @@ class TestStringContainer : public ::testing::Test {
}

protected:
vector<int32_t> offsets_;
vector<char> chars_;
vector<uint8_t> nulls_;
std::vector<int32_t> offsets_;
std::vector<char> chars_;
std::vector<uint8_t> nulls_;

vector<string> expected_;
std::vector<std::string> expected_;

std::shared_ptr<Buffer> value_buf_;
std::shared_ptr<Buffer> offsets_buf_;
Expand Down Expand Up @@ -175,7 +172,7 @@ class TestStringBuilder : public TestBuilder {
type_ = TypePtr(new StringType());

ArrayBuilder* tmp;
ASSERT_OK(make_builder(type_, &tmp));
ASSERT_OK(make_builder(pool_, type_, &tmp));
builder_.reset(static_cast<StringBuilder*>(tmp));
}

Expand All @@ -188,17 +185,17 @@ class TestStringBuilder : public TestBuilder {
protected:
TypePtr type_;

unique_ptr<StringBuilder> builder_;
unique_ptr<StringArray> result_;
std::unique_ptr<StringBuilder> builder_;
std::unique_ptr<StringArray> result_;
};

TEST_F(TestStringBuilder, TestAttrs) {
ASSERT_FALSE(builder_->value_builder()->nullable());
}

TEST_F(TestStringBuilder, TestScalarAppend) {
vector<string> strings = {"a", "bb", "", "", "ccc"};
vector<uint8_t> is_null = {0, 0, 0, 1, 0};
std::vector<std::string> strings = {"a", "bb", "", "", "ccc"};
std::vector<uint8_t> is_null = {0, 0, 0, 1, 0};

int N = strings.size();
int reps = 1000;
Expand Down
9 changes: 6 additions & 3 deletions cpp/src/arrow/types/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,13 @@
#include "arrow/type.h"
#include "arrow/types/integer.h"
#include "arrow/types/list.h"
#include "arrow/util/buffer.h"
#include "arrow/util/status.h"

namespace arrow {

class ArrayBuilder;
class Buffer;
class MemoryPool;

struct CharType : public DataType {
int size;
Expand Down Expand Up @@ -148,8 +149,9 @@ class StringArray : public ListArray {

class StringBuilder : public ListBuilder {
public:
explicit StringBuilder(const TypePtr& type) :
ListBuilder(type, static_cast<ArrayBuilder*>(new UInt8Builder(value_type_))) {
explicit StringBuilder(MemoryPool* pool, const TypePtr& type) :
ListBuilder(pool, type,
static_cast<ArrayBuilder*>(new UInt8Builder(pool, value_type_))) {
byte_builder_ = static_cast<UInt8Builder*>(value_builder_.get());
}

Expand All @@ -171,6 +173,7 @@ class StringBuilder : public ListBuilder {
}

protected:
std::shared_ptr<ListBuilder> list_builder_;
UInt8Builder* byte_builder_;

static TypePtr value_type_;
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/types/struct.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "arrow/types/struct.h"

#include <cstdlib>
#include <memory>
#include <sstream>
#include <string>
Expand Down
Loading

0 comments on commit e418020

Please sign in to comment.