Skip to content

Commit

Permalink
ARROW-17836: [C++] Allow specifying alignment of buffers (apache#14225)
Browse files Browse the repository at this point in the history
Lead-authored-by: Sasha Krassovsky <krassovskysasha@gmail.com>
Co-authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
  • Loading branch information
save-buffer and pitrou authored Nov 25, 2022
1 parent 63f013c commit 2078af7
Show file tree
Hide file tree
Showing 36 changed files with 827 additions and 392 deletions.
12 changes: 8 additions & 4 deletions cpp/src/arrow/array/builder_adaptive.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,11 @@ namespace arrow {

using internal::AdaptiveIntBuilderBase;

AdaptiveIntBuilderBase::AdaptiveIntBuilderBase(uint8_t start_int_size, MemoryPool* pool)
: ArrayBuilder(pool), start_int_size_(start_int_size), int_size_(start_int_size) {}
AdaptiveIntBuilderBase::AdaptiveIntBuilderBase(uint8_t start_int_size, MemoryPool* pool,
int64_t alignment)
: ArrayBuilder(pool, alignment),
start_int_size_(start_int_size),
int_size_(start_int_size) {}

void AdaptiveIntBuilderBase::Reset() {
ArrayBuilder::Reset();
Expand Down Expand Up @@ -125,8 +128,9 @@ std::shared_ptr<DataType> AdaptiveIntBuilder::type() const {
return nullptr;
}

AdaptiveIntBuilder::AdaptiveIntBuilder(uint8_t start_int_size, MemoryPool* pool)
: AdaptiveIntBuilderBase(start_int_size, pool) {}
AdaptiveIntBuilder::AdaptiveIntBuilder(uint8_t start_int_size, MemoryPool* pool,
int64_t alignment)
: AdaptiveIntBuilderBase(start_int_size, pool, alignment) {}

Status AdaptiveIntBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
RETURN_NOT_OK(CommitPendingData());
Expand Down
16 changes: 10 additions & 6 deletions cpp/src/arrow/array/builder_adaptive.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,12 @@ namespace internal {

class ARROW_EXPORT AdaptiveIntBuilderBase : public ArrayBuilder {
public:
AdaptiveIntBuilderBase(uint8_t start_int_size, MemoryPool* pool);
AdaptiveIntBuilderBase(uint8_t start_int_size, MemoryPool* pool,
int64_t alignment = kDefaultBufferAlignment);

explicit AdaptiveIntBuilderBase(MemoryPool* pool)
: AdaptiveIntBuilderBase(sizeof(uint8_t), pool) {}
explicit AdaptiveIntBuilderBase(MemoryPool* pool,
int64_t alignment = kDefaultBufferAlignment)
: AdaptiveIntBuilderBase(sizeof(uint8_t), pool, alignment) {}

/// \brief Append multiple nulls
/// \param[in] length the number of nulls to append
Expand Down Expand Up @@ -173,10 +175,12 @@ class ARROW_EXPORT AdaptiveUIntBuilder : public internal::AdaptiveIntBuilderBase
class ARROW_EXPORT AdaptiveIntBuilder : public internal::AdaptiveIntBuilderBase {
public:
explicit AdaptiveIntBuilder(uint8_t start_int_size,
MemoryPool* pool = default_memory_pool());
MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment);

explicit AdaptiveIntBuilder(MemoryPool* pool = default_memory_pool())
: AdaptiveIntBuilder(sizeof(uint8_t), pool) {}
explicit AdaptiveIntBuilder(MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment)
: AdaptiveIntBuilder(sizeof(uint8_t), pool, alignment) {}

using ArrayBuilder::Advance;
using internal::AdaptiveIntBuilderBase::Reset;
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/arrow/array/builder_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ constexpr int64_t kListMaximumElements = std::numeric_limits<int32_t>::max() - 1
/// For example, ArrayBuilder* pointing to BinaryBuilder should be downcast before use.
class ARROW_EXPORT ArrayBuilder {
public:
explicit ArrayBuilder(MemoryPool* pool) : pool_(pool), null_bitmap_builder_(pool) {}
explicit ArrayBuilder(MemoryPool* pool, int64_t alignment = kDefaultBufferAlignment)
: pool_(pool), alignment_(alignment), null_bitmap_builder_(pool, alignment) {}

ARROW_DEFAULT_MOVE_AND_ASSIGN(ArrayBuilder);

Expand Down Expand Up @@ -283,6 +284,7 @@ class ARROW_EXPORT ArrayBuilder {
const char* message);

MemoryPool* pool_;
int64_t alignment_;

TypedBufferBuilder<bool> null_bitmap_builder_;
int64_t null_count_ = 0;
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/array/builder_binary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ using internal::checked_cast;
// Fixed width binary

FixedSizeBinaryBuilder::FixedSizeBinaryBuilder(const std::shared_ptr<DataType>& type,
MemoryPool* pool)
: ArrayBuilder(pool),
MemoryPool* pool, int64_t alignment)
: ArrayBuilder(pool, alignment),
byte_width_(checked_cast<const FixedSizeBinaryType&>(*type).byte_width()),
byte_builder_(pool) {}
byte_builder_(pool, alignment) {}

void FixedSizeBinaryBuilder::CheckValueSize(int64_t size) {
DCHECK_EQ(size, byte_width_) << "Appending wrong size to FixedSizeBinaryBuilder";
Expand Down
10 changes: 7 additions & 3 deletions cpp/src/arrow/array/builder_binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,11 @@ class BaseBinaryBuilder : public ArrayBuilder {
using TypeClass = TYPE;
using offset_type = typename TypeClass::offset_type;

explicit BaseBinaryBuilder(MemoryPool* pool = default_memory_pool())
: ArrayBuilder(pool), offsets_builder_(pool), value_data_builder_(pool) {}
explicit BaseBinaryBuilder(MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment)
: ArrayBuilder(pool, alignment),
offsets_builder_(pool, alignment),
value_data_builder_(pool, alignment) {}

BaseBinaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
: BaseBinaryBuilder(pool) {}
Expand Down Expand Up @@ -464,7 +467,8 @@ class ARROW_EXPORT FixedSizeBinaryBuilder : public ArrayBuilder {
using TypeClass = FixedSizeBinaryType;

explicit FixedSizeBinaryBuilder(const std::shared_ptr<DataType>& type,
MemoryPool* pool = default_memory_pool());
MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment);

Status Append(const uint8_t* value) {
ARROW_RETURN_NOT_OK(Reserve(1));
Expand Down
8 changes: 4 additions & 4 deletions cpp/src/arrow/array/builder_decimal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ class MemoryPool;
// Decimal128Builder

Decimal128Builder::Decimal128Builder(const std::shared_ptr<DataType>& type,
MemoryPool* pool)
: FixedSizeBinaryBuilder(type, pool),
MemoryPool* pool, int64_t alignment)
: FixedSizeBinaryBuilder(type, pool, alignment),
decimal_type_(internal::checked_pointer_cast<Decimal128Type>(type)) {}

Status Decimal128Builder::Append(Decimal128 value) {
Expand Down Expand Up @@ -71,8 +71,8 @@ Status Decimal128Builder::FinishInternal(std::shared_ptr<ArrayData>* out) {
// Decimal256Builder

Decimal256Builder::Decimal256Builder(const std::shared_ptr<DataType>& type,
MemoryPool* pool)
: FixedSizeBinaryBuilder(type, pool),
MemoryPool* pool, int64_t alignment)
: FixedSizeBinaryBuilder(type, pool, alignment),
decimal_type_(internal::checked_pointer_cast<Decimal256Type>(type)) {}

Status Decimal256Builder::Append(const Decimal256& value) {
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/arrow/array/builder_decimal.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ class ARROW_EXPORT Decimal128Builder : public FixedSizeBinaryBuilder {
using ValueType = Decimal128;

explicit Decimal128Builder(const std::shared_ptr<DataType>& type,
MemoryPool* pool = default_memory_pool());
MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment);

using FixedSizeBinaryBuilder::Append;
using FixedSizeBinaryBuilder::AppendValues;
Expand Down Expand Up @@ -69,7 +70,8 @@ class ARROW_EXPORT Decimal256Builder : public FixedSizeBinaryBuilder {
using ValueType = Decimal256;

explicit Decimal256Builder(const std::shared_ptr<DataType>& type,
MemoryPool* pool = default_memory_pool());
MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment);

using FixedSizeBinaryBuilder::Append;
using FixedSizeBinaryBuilder::AppendValues;
Expand Down
49 changes: 28 additions & 21 deletions cpp/src/arrow/array/builder_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,37 +146,40 @@ class DictionaryBuilderBase : public ArrayBuilder {
!is_fixed_size_binary_type<T1>::value,
const std::shared_ptr<DataType>&>
value_type,
MemoryPool* pool = default_memory_pool())
: ArrayBuilder(pool),
MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment)
: ArrayBuilder(pool, alignment),
memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
delta_offset_(0),
byte_width_(-1),
indices_builder_(start_int_size, pool),
indices_builder_(start_int_size, pool, alignment),
value_type_(value_type) {}

template <typename T1 = T>
explicit DictionaryBuilderBase(
enable_if_t<!is_fixed_size_binary_type<T1>::value, const std::shared_ptr<DataType>&>
value_type,
MemoryPool* pool = default_memory_pool())
: ArrayBuilder(pool),
MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment)
: ArrayBuilder(pool, alignment),
memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
delta_offset_(0),
byte_width_(-1),
indices_builder_(pool),
indices_builder_(pool, alignment),
value_type_(value_type) {}

template <typename T1 = T>
explicit DictionaryBuilderBase(
const std::shared_ptr<DataType>& index_type,
enable_if_t<!is_fixed_size_binary_type<T1>::value, const std::shared_ptr<DataType>&>
value_type,
MemoryPool* pool = default_memory_pool())
: ArrayBuilder(pool),
MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment)
: ArrayBuilder(pool, alignment),
memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
delta_offset_(0),
byte_width_(-1),
indices_builder_(index_type, pool),
indices_builder_(index_type, pool, alignment),
value_type_(value_type) {}

template <typename B = BuilderType, typename T1 = T>
Expand All @@ -185,35 +188,38 @@ class DictionaryBuilderBase : public ArrayBuilder {
is_fixed_size_binary_type<T1>::value,
const std::shared_ptr<DataType>&>
value_type,
MemoryPool* pool = default_memory_pool())
: ArrayBuilder(pool),
MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment)
: ArrayBuilder(pool, alignment),
memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
delta_offset_(0),
byte_width_(static_cast<const T1&>(*value_type).byte_width()),
indices_builder_(start_int_size, pool),
indices_builder_(start_int_size, pool, alignment),
value_type_(value_type) {}

template <typename T1 = T>
explicit DictionaryBuilderBase(
enable_if_fixed_size_binary<T1, const std::shared_ptr<DataType>&> value_type,
MemoryPool* pool = default_memory_pool())
: ArrayBuilder(pool),
MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment)
: ArrayBuilder(pool, alignment),
memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
delta_offset_(0),
byte_width_(static_cast<const T1&>(*value_type).byte_width()),
indices_builder_(pool),
indices_builder_(pool, alignment),
value_type_(value_type) {}

template <typename T1 = T>
explicit DictionaryBuilderBase(
const std::shared_ptr<DataType>& index_type,
enable_if_fixed_size_binary<T1, const std::shared_ptr<DataType>&> value_type,
MemoryPool* pool = default_memory_pool())
: ArrayBuilder(pool),
MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment)
: ArrayBuilder(pool, alignment),
memo_table_(new internal::DictionaryMemoTable(pool, value_type)),
delta_offset_(0),
byte_width_(static_cast<const T1&>(*value_type).byte_width()),
indices_builder_(index_type, pool),
indices_builder_(index_type, pool, alignment),
value_type_(value_type) {}

template <typename T1 = T>
Expand All @@ -223,12 +229,13 @@ class DictionaryBuilderBase : public ArrayBuilder {

// This constructor doesn't check for errors. Use InsertMemoValues instead.
explicit DictionaryBuilderBase(const std::shared_ptr<Array>& dictionary,
MemoryPool* pool = default_memory_pool())
: ArrayBuilder(pool),
MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment)
: ArrayBuilder(pool, alignment),
memo_table_(new internal::DictionaryMemoTable(pool, dictionary)),
delta_offset_(0),
byte_width_(-1),
indices_builder_(pool),
indices_builder_(pool, alignment),
value_type_(dictionary->type()) {}

~DictionaryBuilderBase() override = default;
Expand Down
12 changes: 7 additions & 5 deletions cpp/src/arrow/array/builder_nested.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,16 @@ class BaseListBuilder : public ArrayBuilder {
/// Use this constructor to incrementally build the value array along with offsets and
/// null bitmap.
BaseListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> const& value_builder,
const std::shared_ptr<DataType>& type)
: ArrayBuilder(pool),
offsets_builder_(pool),
const std::shared_ptr<DataType>& type,
int64_t alignment = kDefaultBufferAlignment)
: ArrayBuilder(pool, alignment),
offsets_builder_(pool, alignment),
value_builder_(value_builder),
value_field_(type->field(0)->WithType(NULLPTR)) {}

BaseListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> const& value_builder)
: BaseListBuilder(pool, value_builder, list(value_builder->type())) {}
BaseListBuilder(MemoryPool* pool, std::shared_ptr<ArrayBuilder> const& value_builder,
int64_t alignment = kDefaultBufferAlignment)
: BaseListBuilder(pool, value_builder, list(value_builder->type()), alignment) {}

Status Resize(int64_t capacity) override {
if (capacity > maximum_elements()) {
Expand Down
9 changes: 5 additions & 4 deletions cpp/src/arrow/array/builder_primitive.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,12 @@ Status NullBuilder::FinishInternal(std::shared_ptr<ArrayData>* out) {
return Status::OK();
}

BooleanBuilder::BooleanBuilder(MemoryPool* pool)
: ArrayBuilder(pool), data_builder_(pool) {}
BooleanBuilder::BooleanBuilder(MemoryPool* pool, int64_t alignment)
: ArrayBuilder(pool, alignment), data_builder_(pool, alignment) {}

BooleanBuilder::BooleanBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
: BooleanBuilder(pool) {
BooleanBuilder::BooleanBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
int64_t alignment)
: BooleanBuilder(pool, alignment) {
ARROW_CHECK_EQ(Type::BOOL, type->id());
}

Expand Down
27 changes: 18 additions & 9 deletions cpp/src/arrow/array/builder_primitive.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,13 @@ namespace arrow {

class ARROW_EXPORT NullBuilder : public ArrayBuilder {
public:
explicit NullBuilder(MemoryPool* pool = default_memory_pool()) : ArrayBuilder(pool) {}
explicit NullBuilder(MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment)
: ArrayBuilder(pool) {}
explicit NullBuilder(const std::shared_ptr<DataType>& type,
MemoryPool* pool = default_memory_pool())
: NullBuilder(pool) {}
MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment)
: NullBuilder(pool, alignment) {}

/// \brief Append the specified number of null elements
Status AppendNulls(int64_t length) final {
Expand Down Expand Up @@ -82,11 +85,15 @@ class NumericBuilder : public ArrayBuilder {

template <typename T1 = T>
explicit NumericBuilder(
enable_if_parameter_free<T1, MemoryPool*> pool = default_memory_pool())
: ArrayBuilder(pool), type_(TypeTraits<T>::type_singleton()), data_builder_(pool) {}
enable_if_parameter_free<T1, MemoryPool*> pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment)
: ArrayBuilder(pool, alignment),
type_(TypeTraits<T>::type_singleton()),
data_builder_(pool, alignment) {}

NumericBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool)
: ArrayBuilder(pool), type_(type), data_builder_(pool) {}
NumericBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool,
int64_t alignment = kDefaultBufferAlignment)
: ArrayBuilder(pool, alignment), type_(type), data_builder_(pool, alignment) {}

/// Append a single scalar and increase the size if necessary.
Status Append(const value_type val) {
Expand Down Expand Up @@ -347,10 +354,12 @@ class ARROW_EXPORT BooleanBuilder : public ArrayBuilder {
using TypeClass = BooleanType;
using value_type = bool;

explicit BooleanBuilder(MemoryPool* pool = default_memory_pool());
explicit BooleanBuilder(MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment);

BooleanBuilder(const std::shared_ptr<DataType>& type,
MemoryPool* pool = default_memory_pool());
MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment);

/// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
Status AppendNulls(int64_t length) final {
Expand Down
20 changes: 12 additions & 8 deletions cpp/src/arrow/array/builder_time.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,25 +36,29 @@ class ARROW_EXPORT DayTimeIntervalBuilder : public NumericBuilder<DayTimeInterva
public:
using DayMilliseconds = DayTimeIntervalType::DayMilliseconds;

explicit DayTimeIntervalBuilder(MemoryPool* pool = default_memory_pool())
: DayTimeIntervalBuilder(day_time_interval(), pool) {}
explicit DayTimeIntervalBuilder(MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment)
: DayTimeIntervalBuilder(day_time_interval(), pool, alignment) {}

explicit DayTimeIntervalBuilder(std::shared_ptr<DataType> type,
MemoryPool* pool = default_memory_pool())
: NumericBuilder<DayTimeIntervalType>(type, pool) {}
MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment)
: NumericBuilder<DayTimeIntervalType>(type, pool, alignment) {}
};

class ARROW_EXPORT MonthDayNanoIntervalBuilder
: public NumericBuilder<MonthDayNanoIntervalType> {
public:
using MonthDayNanos = MonthDayNanoIntervalType::MonthDayNanos;

explicit MonthDayNanoIntervalBuilder(MemoryPool* pool = default_memory_pool())
: MonthDayNanoIntervalBuilder(month_day_nano_interval(), pool) {}
explicit MonthDayNanoIntervalBuilder(MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment)
: MonthDayNanoIntervalBuilder(month_day_nano_interval(), pool, alignment) {}

explicit MonthDayNanoIntervalBuilder(std::shared_ptr<DataType> type,
MemoryPool* pool = default_memory_pool())
: NumericBuilder<MonthDayNanoIntervalType>(type, pool) {}
MemoryPool* pool = default_memory_pool(),
int64_t alignment = kDefaultBufferAlignment)
: NumericBuilder<MonthDayNanoIntervalType>(type, pool, alignment) {}
};

/// @}
Expand Down
Loading

0 comments on commit 2078af7

Please sign in to comment.