From 6ff3048dc63a35b660c9e5043077a0174987b2a6 Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Wed, 11 May 2016 09:00:00 +0000 Subject: [PATCH 1/7] ARROW-185: Make padding and alignment for all buffers be 64 bytes --- cpp/src/arrow/builder.cc | 11 ++++++++--- cpp/src/arrow/ipc/adapter.cc | 10 +++++++++- cpp/src/arrow/ipc/ipc-adapter-test.cc | 6 +++--- cpp/src/arrow/types/list.h | 3 +++ cpp/src/arrow/types/primitive.cc | 7 +------ cpp/src/arrow/util/buffer.cc | 17 +++++++++++++++++ cpp/src/arrow/util/buffer.h | 25 +++++++++++++++---------- cpp/src/arrow/util/memory-pool-test.cc | 1 + cpp/src/arrow/util/memory-pool.cc | 13 +++++++++++-- 9 files changed, 68 insertions(+), 25 deletions(-) diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc index 87c1219025d37..1fba96169228f 100644 --- a/cpp/src/arrow/builder.cc +++ b/cpp/src/arrow/builder.cc @@ -45,12 +45,14 @@ Status ArrayBuilder::AppendToBitmap(const uint8_t* valid_bytes, int32_t length) } Status ArrayBuilder::Init(int32_t capacity) { - capacity_ = capacity; int32_t to_alloc = util::ceil_byte(capacity) / 8; null_bitmap_ = std::make_shared(pool_); RETURN_NOT_OK(null_bitmap_->Resize(to_alloc)); + // Buffers might allocate more then necessary to satisfy padding requirements + const int byte_capacity = null_bitmap_->capacity(); + capacity_ = capacity; null_bitmap_data_ = null_bitmap_->mutable_data(); - memset(null_bitmap_data_, 0, to_alloc); + memset(null_bitmap_data_, 0, byte_capacity); return Status::OK(); } @@ -60,8 +62,11 @@ Status ArrayBuilder::Resize(int32_t new_bits) { int32_t old_bytes = null_bitmap_->size(); RETURN_NOT_OK(null_bitmap_->Resize(new_bytes)); null_bitmap_data_ = null_bitmap_->mutable_data(); + // The buffer might be overpadded to deal with padding according to the spec + const int32_t byte_capacity = null_bitmap_->capacity(); + capacity_ = new_bits; if (old_bytes < new_bytes) { - memset(null_bitmap_data_ + old_bytes, 0, new_bytes - old_bytes); + memset(null_bitmap_data_ + old_bytes, 0, byte_capacity - old_bytes); } return Status::OK(); } diff --git a/cpp/src/arrow/ipc/adapter.cc b/cpp/src/arrow/ipc/adapter.cc index 34700080746e7..c4fe99ed3e25e 100644 --- a/cpp/src/arrow/ipc/adapter.cc +++ b/cpp/src/arrow/ipc/adapter.cc @@ -115,6 +115,8 @@ Status VisitArray(const Array* arr, std::vector* field_nodes } else if (arr->type_enum() == Type::STRUCT) { // TODO(wesm) return Status::NotImplemented("Struct type"); + } else { + return Status::NotImplemented("Unrecognized type"); } return Status::OK(); } @@ -142,7 +144,13 @@ class RowBatchWriter { int64_t size = 0; // The buffer might be null if we are handling zero row lengths. - if (buffer) { size = buffer->size(); } + if (buffer) { + // We use capacity here, because size might not reflect the padding + // requirements of buffers but capacity always should. + size = buffer->capacity(); + // check that padding is appropriate + DCHECK_EQ(size % 64, 0); + } // TODO(wesm): We currently have no notion of shared memory page id's, // but we've included it in the metadata IDL for when we have it in the // future. Use page=0 for now diff --git a/cpp/src/arrow/ipc/ipc-adapter-test.cc b/cpp/src/arrow/ipc/ipc-adapter-test.cc index 3b147343f772a..eb47ac6fee8a1 100644 --- a/cpp/src/arrow/ipc/ipc-adapter-test.cc +++ b/cpp/src/arrow/ipc/ipc-adapter-test.cc @@ -197,8 +197,8 @@ INSTANTIATE_TEST_CASE_P(RoundTripTests, TestWriteRowBatch, void TestGetRowBatchSize(std::shared_ptr batch) { MockMemorySource mock_source(1 << 16); - int64_t mock_header_location; - int64_t size; + int64_t mock_header_location = -1; + int64_t size = -1; ASSERT_OK(WriteRowBatch(&mock_source, batch.get(), 0, &mock_header_location)); ASSERT_OK(GetRowBatchSize(batch.get(), &size)); ASSERT_EQ(mock_source.GetExtentBytesWritten(), size); @@ -270,7 +270,7 @@ TEST_F(RecursionLimits, WriteLimit) { } TEST_F(RecursionLimits, ReadLimit) { - int64_t header_location; + int64_t header_location = -1; std::shared_ptr schema; ASSERT_OK(WriteToMmap(64, true, &header_location, &schema)); diff --git a/cpp/src/arrow/types/list.h b/cpp/src/arrow/types/list.h index e2302d917b8f6..a020b8ad22668 100644 --- a/cpp/src/arrow/types/list.h +++ b/cpp/src/arrow/types/list.h @@ -20,6 +20,7 @@ #include #include +#include #include #include "arrow/array.h" @@ -113,12 +114,14 @@ class ListBuilder : public ArrayBuilder { values_(values) {} Status Init(int32_t elements) override { + DCHECK_LT(elements, std::numeric_limits::max()); RETURN_NOT_OK(ArrayBuilder::Init(elements)); // one more then requested for offsets return offset_builder_.Resize((elements + 1) * sizeof(int32_t)); } Status Resize(int32_t capacity) override { + DCHECK_LT(capacity, std::numeric_limits::max()); // one more then requested for offsets RETURN_NOT_OK(offset_builder_.Resize((capacity + 1) * sizeof(int32_t))); return ArrayBuilder::Resize(capacity); diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc index 9102c530e25da..125df830ef4f5 100644 --- a/cpp/src/arrow/types/primitive.cc +++ b/cpp/src/arrow/types/primitive.cc @@ -76,7 +76,6 @@ Status PrimitiveBuilder::Init(int32_t capacity) { int64_t nbytes = type_traits::bytes_required(capacity); RETURN_NOT_OK(data_->Resize(nbytes)); - memset(data_->mutable_data(), 0, nbytes); raw_data_ = reinterpret_cast(data_->mutable_data()); return Status::OK(); @@ -92,14 +91,10 @@ Status PrimitiveBuilder::Resize(int32_t capacity) { } else { RETURN_NOT_OK(ArrayBuilder::Resize(capacity)); - int64_t old_bytes = data_->size(); - int64_t new_bytes = type_traits::bytes_required(capacity); + const int64_t new_bytes = type_traits::bytes_required(capacity); RETURN_NOT_OK(data_->Resize(new_bytes)); raw_data_ = reinterpret_cast(data_->mutable_data()); - - memset(data_->mutable_data() + old_bytes, 0, new_bytes - old_bytes); } - capacity_ = capacity; return Status::OK(); } diff --git a/cpp/src/arrow/util/buffer.cc b/cpp/src/arrow/util/buffer.cc index bc9c22c10de44..5ae8129af48b4 100644 --- a/cpp/src/arrow/util/buffer.cc +++ b/cpp/src/arrow/util/buffer.cc @@ -19,15 +19,31 @@ #include +#include "arrow/util/logging.h" #include "arrow/util/memory-pool.h" #include "arrow/util/status.h" namespace arrow { +namespace { +int64_t RoundUpToMultipleOf64(int64_t num) { + DCHECK_GE(num, 0); + constexpr int64_t round_to = 64; + constexpr int64_t multiple_bitmask = round_to - 1; + int64_t remainder = num & multiple_bitmask; + int rounded = num; + if (remainder) { rounded += 64 - remainder; } + // handle overflow. This should result in a malloc error upstream + if (rounded > 0) { num = rounded; } + return num; +} +} // namespace + Buffer::Buffer(const std::shared_ptr& parent, int64_t offset, int64_t size) { data_ = parent->data() + offset; size_ = size; parent_ = parent; + capacity_ = size; } Buffer::~Buffer() {} @@ -48,6 +64,7 @@ PoolBuffer::~PoolBuffer() { Status PoolBuffer::Reserve(int64_t new_capacity) { if (!mutable_data_ || new_capacity > capacity_) { uint8_t* new_data; + new_capacity = RoundUpToMultipleOf64(new_capacity); if (mutable_data_) { RETURN_NOT_OK(pool_->Allocate(new_capacity, &new_data)); memcpy(new_data, mutable_data_, size_); diff --git a/cpp/src/arrow/util/buffer.h b/cpp/src/arrow/util/buffer.h index 5ef0076953cea..ef1e43860858a 100644 --- a/cpp/src/arrow/util/buffer.h +++ b/cpp/src/arrow/util/buffer.h @@ -36,15 +36,20 @@ class Status; // Buffer classes // Immutable API for a chunk of bytes which may or may not be owned by the -// class instance +// class instance. Buffers have two related notions of length: size and +// capacity. Size is the number of bytes that might have valid data. +// Capacity is the number of bytes that where allocated for the buffer in +// total. +// The following invariant is always true: Size < Capacity class Buffer : public std::enable_shared_from_this { public: - Buffer(const uint8_t* data, int64_t size) : data_(data), size_(size) {} + Buffer(const uint8_t* data, int64_t size) : data_(data), size_(size), capacity_(size) {} virtual ~Buffer(); // An offset into data that is owned by another buffer, but we want to be // able to retain a valid pointer to it even after other shared_ptr's to the // parent buffer have been destroyed + // TODO(emkornfield) how will this play with 64 byte alignment/padding? Buffer(const std::shared_ptr& parent, int64_t offset, int64_t size); std::shared_ptr get_shared_ptr() { return shared_from_this(); } @@ -63,6 +68,7 @@ class Buffer : public std::enable_shared_from_this { (data_ == other.data_ || !memcmp(data_, other.data_, size_))); } + int64_t capacity() const { return capacity_; } const uint8_t* data() const { return data_; } int64_t size() const { return size_; } @@ -76,6 +82,7 @@ class Buffer : public std::enable_shared_from_this { protected: const uint8_t* data_; int64_t size_; + int64_t capacity_; // nullptr by default, but may be set std::shared_ptr parent_; @@ -113,10 +120,7 @@ class ResizableBuffer : public MutableBuffer { virtual Status Reserve(int64_t new_capacity) = 0; protected: - ResizableBuffer(uint8_t* data, int64_t size) - : MutableBuffer(data, size), capacity_(size) {} - - int64_t capacity_; + ResizableBuffer(uint8_t* data, int64_t size) : MutableBuffer(data, size) {} }; // A Buffer whose lifetime is tied to a particular MemoryPool @@ -125,8 +129,8 @@ class PoolBuffer : public ResizableBuffer { explicit PoolBuffer(MemoryPool* pool = nullptr); virtual ~PoolBuffer(); - virtual Status Resize(int64_t new_size); - virtual Status Reserve(int64_t new_capacity); + Status Resize(int64_t new_size) override; + Status Reserve(int64_t new_capacity) override; private: MemoryPool* pool_; @@ -138,10 +142,11 @@ class BufferBuilder { public: explicit BufferBuilder(MemoryPool* pool) : pool_(pool), capacity_(0), size_(0) {} + // Resizes the buffer to the nearest multiple of 64 bytes per Layout.md Status Resize(int32_t elements) { if (capacity_ == 0) { buffer_ = std::make_shared(pool_); } - capacity_ = elements; - RETURN_NOT_OK(buffer_->Resize(capacity_)); + RETURN_NOT_OK(buffer_->Resize(elements)); + capacity_ = buffer_->capacity(); data_ = buffer_->mutable_data(); return Status::OK(); } diff --git a/cpp/src/arrow/util/memory-pool-test.cc b/cpp/src/arrow/util/memory-pool-test.cc index e4600a9bd9b27..4ab9736c2b468 100644 --- a/cpp/src/arrow/util/memory-pool-test.cc +++ b/cpp/src/arrow/util/memory-pool-test.cc @@ -31,6 +31,7 @@ TEST(DefaultMemoryPool, MemoryTracking) { uint8_t* data; ASSERT_OK(pool->Allocate(100, &data)); + EXPECT_EQ(0, reinterpret_cast(data) % 64); ASSERT_EQ(100, pool->bytes_allocated()); pool->Free(data, 100); diff --git a/cpp/src/arrow/util/memory-pool.cc b/cpp/src/arrow/util/memory-pool.cc index 961554fe06bcc..b21d61bf1e8ee 100644 --- a/cpp/src/arrow/util/memory-pool.cc +++ b/cpp/src/arrow/util/memory-pool.cc @@ -17,6 +17,7 @@ #include "arrow/util/memory-pool.h" +#include #include #include #include @@ -44,14 +45,22 @@ class InternalMemoryPool : public MemoryPool { }; Status InternalMemoryPool::Allocate(int64_t size, uint8_t** out) { + constexpr size_t kAlignment = 64; std::lock_guard guard(pool_lock_); - *out = static_cast(std::malloc(size)); - if (*out == nullptr) { + // TODO(emkornfield) find something compatible with windows + const int result = posix_memalign(reinterpret_cast(out), kAlignment, size); + if (result == ENOMEM) { std::stringstream ss; ss << "malloc of size " << size << " failed"; return Status::OutOfMemory(ss.str()); } + if (result == EINVAL) { + std::stringstream ss; + ss << "invalid alignment parameter: " << kAlignment; + return Status::Invalid(ss.str()); + } + bytes_allocated_ += size; return Status::OK(); From 05653cbbda8069ca0fdcc225517c082bed91fd99 Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Fri, 13 May 2016 08:01:00 +0000 Subject: [PATCH 2/7] add back in memsets because they make valgrind happy --- cpp/src/arrow/types/primitive.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/types/primitive.cc b/cpp/src/arrow/types/primitive.cc index 125df830ef4f5..57a3f1e4e150b 100644 --- a/cpp/src/arrow/types/primitive.cc +++ b/cpp/src/arrow/types/primitive.cc @@ -76,6 +76,8 @@ Status PrimitiveBuilder::Init(int32_t capacity) { int64_t nbytes = type_traits::bytes_required(capacity); RETURN_NOT_OK(data_->Resize(nbytes)); + // TODO(emkornfield) valgrind complains without this + memset(data_->mutable_data(), 0, nbytes); raw_data_ = reinterpret_cast(data_->mutable_data()); return Status::OK(); @@ -90,10 +92,12 @@ Status PrimitiveBuilder::Resize(int32_t capacity) { RETURN_NOT_OK(Init(capacity)); } else { RETURN_NOT_OK(ArrayBuilder::Resize(capacity)); - + const int64_t old_bytes = data_->size(); const int64_t new_bytes = type_traits::bytes_required(capacity); RETURN_NOT_OK(data_->Resize(new_bytes)); raw_data_ = reinterpret_cast(data_->mutable_data()); + + memset(data_->mutable_data() + old_bytes, 0, new_bytes - old_bytes); } return Status::OK(); } From 11b3fd793cf70904e6c825e3149f61ffa0f3dd00 Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Mon, 16 May 2016 08:56:33 +0000 Subject: [PATCH 3/7] replace cython string conversion with string builder --- python/src/pyarrow/adapters/pandas.cc | 41 ++++++--------------------- 1 file changed, 9 insertions(+), 32 deletions(-) diff --git a/python/src/pyarrow/adapters/pandas.cc b/python/src/pyarrow/adapters/pandas.cc index b39fde92034aa..5159d86865caa 100644 --- a/python/src/pyarrow/adapters/pandas.cc +++ b/python/src/pyarrow/adapters/pandas.cc @@ -147,17 +147,12 @@ class ArrowSerializer { Status ConvertObjectStrings(std::shared_ptr* out) { PyObject** objects = reinterpret_cast(PyArray_DATA(arr_)); + arrow::TypePtr string_type(new arrow::StringType()); + arrow::StringBuilder string_builder(pool_, string_type); + RETURN_ARROW_NOT_OK(string_builder.Resize(length_)); - auto offsets_buffer = std::make_shared(pool_); - RETURN_ARROW_NOT_OK(offsets_buffer->Resize(sizeof(int32_t) * (length_ + 1))); - int32_t* offsets = reinterpret_cast(offsets_buffer->mutable_data()); - - arrow::BufferBuilder data_builder(pool_); arrow::Status s; PyObject* obj; - int length; - int offset = 0; - int64_t null_count = 0; for (int64_t i = 0; i < length_; ++i) { obj = objects[i]; if (PyUnicode_Check(obj)) { @@ -166,38 +161,20 @@ class ArrowSerializer { PyErr_Clear(); return Status::TypeError("failed converting unicode to UTF8"); } - length = PyBytes_GET_SIZE(obj); - s = data_builder.Append( - reinterpret_cast(PyBytes_AS_STRING(obj)), length); + const int32_t length = PyBytes_GET_SIZE(obj); + s = string_builder.Append(PyBytes_AS_STRING(obj), length); Py_DECREF(obj); if (!s.ok()) { return Status::ArrowError(s.ToString()); } - util::set_bit(null_bitmap_data_, i); } else if (PyBytes_Check(obj)) { - length = PyBytes_GET_SIZE(obj); - RETURN_ARROW_NOT_OK(data_builder.Append( - reinterpret_cast(PyBytes_AS_STRING(obj)), length)); - util::set_bit(null_bitmap_data_, i); + const int32_t length = PyBytes_GET_SIZE(obj); + RETURN_ARROW_NOT_OK(string_builder.Append(PyBytes_AS_STRING(obj), length)); } else { - // NULL - // No change to offset - length = 0; - ++null_count; + string_builder.AppendNull(); } - offsets[i] = offset; - offset += length; } - // End offset - offsets[length_] = offset; - - std::shared_ptr data_buffer = data_builder.Finish(); - - auto values = std::make_shared(data_buffer->size(), - data_buffer); - *out = std::shared_ptr( - new arrow::StringArray(length_, offsets_buffer, values, null_count, - null_bitmap_)); + *out = std::shared_ptr(string_builder.Finish()); return Status::OK(); } From 75432676e05de70aadf306a931a03d184bdf8246 Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Tue, 17 May 2016 09:55:48 +0000 Subject: [PATCH 4/7] cleanup --- cpp/src/arrow/ipc/adapter.cc | 12 ++++++++- cpp/src/arrow/util/bit-util-test.cc | 10 ++++++++ cpp/src/arrow/util/bit-util.h | 4 +++ cpp/src/arrow/util/buffer.cc | 11 ++++----- cpp/src/arrow/util/buffer.h | 11 ++++++--- cpp/src/arrow/util/memory-pool.cc | 38 +++++++++++++++++------------ 6 files changed, 61 insertions(+), 25 deletions(-) diff --git a/cpp/src/arrow/ipc/adapter.cc b/cpp/src/arrow/ipc/adapter.cc index c4fe99ed3e25e..45cc288cd6b9e 100644 --- a/cpp/src/arrow/ipc/adapter.cc +++ b/cpp/src/arrow/ipc/adapter.cc @@ -43,6 +43,15 @@ namespace flatbuf = apache::arrow::flatbuf; namespace ipc { +namespace { +Status CheckMultipleOf64(int64_t size) { + if (util::is_multiple_of_64(size)) { return Status::OK(); } + return Status::Invalid( + "Attempted to write a buffer that " + "wasn't a multiple of 64 bytes"); +} +} + static bool IsPrimitive(const DataType* type) { DCHECK(type != nullptr); switch (type->type) { @@ -149,7 +158,7 @@ class RowBatchWriter { // requirements of buffers but capacity always should. size = buffer->capacity(); // check that padding is appropriate - DCHECK_EQ(size % 64, 0); + RETURN_NOT_OK(CheckMultipleOf64(size)); } // TODO(wesm): We currently have no notion of shared memory page id's, // but we've included it in the metadata IDL for when we have it in the @@ -313,6 +322,7 @@ class RowBatchReader::Impl { Status GetBuffer(int buffer_index, std::shared_ptr* out) { BufferMetadata metadata = metadata_->buffer(buffer_index); + RETURN_NOT_OK(CheckMultipleOf64(metadata.length)); return source_->ReadAt(metadata.offset, metadata.length, out); } diff --git a/cpp/src/arrow/util/bit-util-test.cc b/cpp/src/arrow/util/bit-util-test.cc index 26554d2c9069c..e1d8a0808b41a 100644 --- a/cpp/src/arrow/util/bit-util-test.cc +++ b/cpp/src/arrow/util/bit-util-test.cc @@ -21,6 +21,16 @@ namespace arrow { +TEST(UtilTests, TestIsMultipleOf64) { + using util::is_multiple_of_64; + EXPECT_TRUE(is_multiple_of_64(64)); + EXPECT_TRUE(is_multiple_of_64(0)); + EXPECT_TRUE(is_multiple_of_64(128)); + EXPECT_TRUE(is_multiple_of_64(192)); + EXPECT_FALSE(is_multiple_of_64(23)); + EXPECT_FALSE(is_multiple_of_64(32)); +} + TEST(UtilTests, TestNextPower2) { using util::next_power2; diff --git a/cpp/src/arrow/util/bit-util.h b/cpp/src/arrow/util/bit-util.h index 1f0f08c4d88ef..a6c8dd904d8e0 100644 --- a/cpp/src/arrow/util/bit-util.h +++ b/cpp/src/arrow/util/bit-util.h @@ -71,6 +71,10 @@ static inline int64_t next_power2(int64_t n) { return n; } +static inline bool is_multiple_of_64(int64_t n) { + return (n & 63) == 0; +} + void bytes_to_bits(const std::vector& bytes, uint8_t* bits); Status bytes_to_bits(const std::vector&, std::shared_ptr*); diff --git a/cpp/src/arrow/util/buffer.cc b/cpp/src/arrow/util/buffer.cc index 5ae8129af48b4..ec5df08973aeb 100644 --- a/cpp/src/arrow/util/buffer.cc +++ b/cpp/src/arrow/util/buffer.cc @@ -29,12 +29,11 @@ namespace { int64_t RoundUpToMultipleOf64(int64_t num) { DCHECK_GE(num, 0); constexpr int64_t round_to = 64; - constexpr int64_t multiple_bitmask = round_to - 1; - int64_t remainder = num & multiple_bitmask; - int rounded = num; - if (remainder) { rounded += 64 - remainder; } - // handle overflow. This should result in a malloc error upstream - if (rounded > 0) { num = rounded; } + constexpr int64_t force_carry_addend = round_to - 1; + constexpr int64_t truncate_bitmask = ~(round_to - 1); + constexpr int64_t max_roundable_num = std::numeric_limits::max() - round_to; + if (num <= max_roundable_num) { return (num + force_carry_addend) & truncate_bitmask; } + // handle overflow case. This should result in a malloc error upstream return num; } } // namespace diff --git a/cpp/src/arrow/util/buffer.h b/cpp/src/arrow/util/buffer.h index ef1e43860858a..f845d67761fe4 100644 --- a/cpp/src/arrow/util/buffer.h +++ b/cpp/src/arrow/util/buffer.h @@ -49,7 +49,10 @@ class Buffer : public std::enable_shared_from_this { // An offset into data that is owned by another buffer, but we want to be // able to retain a valid pointer to it even after other shared_ptr's to the // parent buffer have been destroyed - // TODO(emkornfield) how will this play with 64 byte alignment/padding? + // + // This method makes no assertions about alignment or padding of the buffer but + // in general we expected buffers to be aligned and padded to 64 bytes. In the future + // we might add utility methods to help determine if a buffer satisfies this contract. Buffer(const std::shared_ptr& parent, int64_t offset, int64_t size); std::shared_ptr get_shared_ptr() { return shared_from_this(); } @@ -112,11 +115,13 @@ class MutableBuffer : public Buffer { class ResizableBuffer : public MutableBuffer { public: // Change buffer reported size to indicated size, allocating memory if - // necessary + // necessary. This will ensure that the capacity of the buffer is a multiple + // of 64 bytes as defined in Layout.md. virtual Status Resize(int64_t new_size) = 0; // Ensure that buffer has enough memory allocated to fit the indicated - // capacity. Does not change buffer's reported size + // capacity (and meets the 64 byte padding requirement in Layout.md). + // It does not change buffer's reported size. virtual Status Reserve(int64_t new_capacity) = 0; protected: diff --git a/cpp/src/arrow/util/memory-pool.cc b/cpp/src/arrow/util/memory-pool.cc index b21d61bf1e8ee..0a58e5aa21f72 100644 --- a/cpp/src/arrow/util/memory-pool.cc +++ b/cpp/src/arrow/util/memory-pool.cc @@ -26,6 +26,28 @@ namespace arrow { +namespace { +// Allocate memory according to the alignment requirements for Arrow +// (as of May 2016 64 bytes) +Status AllocateAligned(int64_t size, uint8_t** out) { + // TODO(emkornfield) find something compatible with windows + constexpr size_t kAlignment = 64; + const int result = posix_memalign(reinterpret_cast(out), kAlignment, size); + if (result == ENOMEM) { + std::stringstream ss; + ss << "malloc of size " << size << " failed"; + return Status::OutOfMemory(ss.str()); + } + + if (result == EINVAL) { + std::stringstream ss; + ss << "invalid alignment parameter: " << kAlignment; + return Status::Invalid(ss.str()); + } + return Status::OK(); +} +} // namespace + MemoryPool::~MemoryPool() {} class InternalMemoryPool : public MemoryPool { @@ -45,22 +67,8 @@ class InternalMemoryPool : public MemoryPool { }; Status InternalMemoryPool::Allocate(int64_t size, uint8_t** out) { - constexpr size_t kAlignment = 64; std::lock_guard guard(pool_lock_); - // TODO(emkornfield) find something compatible with windows - const int result = posix_memalign(reinterpret_cast(out), kAlignment, size); - if (result == ENOMEM) { - std::stringstream ss; - ss << "malloc of size " << size << " failed"; - return Status::OutOfMemory(ss.str()); - } - - if (result == EINVAL) { - std::stringstream ss; - ss << "invalid alignment parameter: " << kAlignment; - return Status::Invalid(ss.str()); - } - + RETURN_NOT_OK(AllocateAligned(size, out)); bytes_allocated_ += size; return Status::OK(); From c140e0454c31471de2da53eece99d3ed6387e3af Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Tue, 17 May 2016 10:01:13 +0000 Subject: [PATCH 5/7] fix lint --- cpp/src/arrow/util/buffer.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/util/buffer.cc b/cpp/src/arrow/util/buffer.cc index ec5df08973aeb..703ef8384ac07 100644 --- a/cpp/src/arrow/util/buffer.cc +++ b/cpp/src/arrow/util/buffer.cc @@ -18,6 +18,7 @@ #include "arrow/util/buffer.h" #include +#include #include "arrow/util/logging.h" #include "arrow/util/memory-pool.h" From 1d006d8193868f31a6153da837e57f20b7e40a4b Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Tue, 17 May 2016 10:05:16 +0000 Subject: [PATCH 6/7] fix warning --- cpp/src/arrow/types/list.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc index fc3331139c6d8..645a2121139e2 100644 --- a/cpp/src/arrow/types/list.cc +++ b/cpp/src/arrow/types/list.cc @@ -47,7 +47,7 @@ bool ListArray::Equals(const std::shared_ptr& arr) const { Status ListArray::Validate() const { if (length_ < 0) { return Status::Invalid("Length was negative"); } if (!offset_buf_) { return Status::Invalid("offset_buf_ was null"); } - if (offset_buf_->size() / sizeof(int32_t) < length_) { + if (offset_buf_->size() / (int)sizeof(int32_t) < length_) { std::stringstream ss; ss << "offset buffer size (bytes): " << offset_buf_->size() << " isn't large enough for length: " << length_; From e3cca144d3c336bedbbe6e9da60cbd6aa83c66bd Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Tue, 17 May 2016 10:21:44 +0000 Subject: [PATCH 7/7] fix cast style --- cpp/src/arrow/types/list.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/types/list.cc b/cpp/src/arrow/types/list.cc index 645a2121139e2..76e7fe5f4d429 100644 --- a/cpp/src/arrow/types/list.cc +++ b/cpp/src/arrow/types/list.cc @@ -47,7 +47,7 @@ bool ListArray::Equals(const std::shared_ptr& arr) const { Status ListArray::Validate() const { if (length_ < 0) { return Status::Invalid("Length was negative"); } if (!offset_buf_) { return Status::Invalid("offset_buf_ was null"); } - if (offset_buf_->size() / (int)sizeof(int32_t) < length_) { + if (offset_buf_->size() / static_cast(sizeof(int32_t)) < length_) { std::stringstream ss; ss << "offset buffer size (bytes): " << offset_buf_->size() << " isn't large enough for length: " << length_;