diff --git a/cpp/src/arrow/chunked_array.cc b/cpp/src/arrow/chunked_array.cc index c36b736d5d5df..ce4cc7afcb57d 100644 --- a/cpp/src/arrow/chunked_array.cc +++ b/cpp/src/arrow/chunked_array.cc @@ -48,6 +48,7 @@ ChunkedArray::ChunkedArray(ArrayVector chunks, std::shared_ptr type) type_(std::move(type)), length_(0), null_count_(0), + device_type_(DeviceAllocationType::kCPU), chunk_resolver_{chunks_} { if (type_ == nullptr) { ARROW_CHECK_GT(chunks_.size(), 0) @@ -55,9 +56,14 @@ ChunkedArray::ChunkedArray(ArrayVector chunks, std::shared_ptr type) type_ = chunks_[0]->type(); } + if (chunks_.size() > 0) { + device_type_ = chunks[0]->device_type(); + } + for (const auto& chunk : chunks_) { length_ += chunk->length(); null_count_ += chunk->null_count(); + DCHECK_EQ(device_type_, chunk->device_type()); } } @@ -93,6 +99,9 @@ bool ChunkedArray::Equals(const ChunkedArray& other, const EqualOptions& opts) c if (null_count_ != other.null_count()) { return false; } + if (device_type_ != other.device_type()) { + return false; + } // We cannot toggle check_metadata here yet, so we don't check it if (!type_->Equals(*other.type_, /*check_metadata=*/false)) { return false; @@ -148,6 +157,9 @@ bool ChunkedArray::ApproxEquals(const ChunkedArray& other, if (null_count_ != other.null_count()) { return false; } + if (device_type_ != other.device_type()) { + return false; + } // We cannot toggle check_metadata here yet, so we don't check it if (!type_->Equals(*other.type_, /*check_metadata=*/false)) { return false; diff --git a/cpp/src/arrow/chunked_array.h b/cpp/src/arrow/chunked_array.h index 5d300861d85c2..7fda020f7288b 100644 --- a/cpp/src/arrow/chunked_array.h +++ b/cpp/src/arrow/chunked_array.h @@ -25,6 +25,7 @@ #include "arrow/chunk_resolver.h" #include "arrow/compare.h" +#include "arrow/device.h" #include "arrow/result.h" #include "arrow/status.h" #include "arrow/type_fwd.h" @@ -182,11 +183,21 @@ class ARROW_EXPORT ChunkedArray { /// \return Status Status ValidateFull() const; + /// \brief Return the device_type that this chunked array's data is allocated + /// on. + /// + /// This just delegates to calling device_type on the underlying ArrayData + /// object which backs this Array. + /// + /// \return DeviceAllocationType + DeviceAllocationType device_type() const { return device_type_; } + protected: ArrayVector chunks_; std::shared_ptr type_; int64_t length_; int64_t null_count_; + DeviceAllocationType device_type_; private: template diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 6f510cfc0c06c..3855a36fa3674 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -983,6 +983,8 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: CResult[vector[shared_ptr[CChunkedArray]]] Flatten(CMemoryPool* pool) + CDeviceAllocationType device_type() + CStatus Validate() const CStatus ValidateFull() const diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd index a7c3b496a0045..deba7463344c0 100644 --- a/python/pyarrow/lib.pxd +++ b/python/pyarrow/lib.pxd @@ -510,6 +510,7 @@ cdef class ChunkedArray(_PandasConvertible): cdef void init(self, const shared_ptr[CChunkedArray]& chunked_array) cdef getitem(self, int64_t i) + cdef void _assert_cpu(self) except * cdef class _Tabular(_PandasConvertible): diff --git a/python/pyarrow/table.pxi b/python/pyarrow/table.pxi index 6d34c71c9df40..6f680fe1016d4 100644 --- a/python/pyarrow/table.pxi +++ b/python/pyarrow/table.pxi @@ -1407,6 +1407,28 @@ cdef class ChunkedArray(_PandasConvertible): self.init(c_chunked_array) return self + @property + def device_type(self): + """ + The device type where the chunks in the ChunkedArray reside. + + Returns + ------- + DeviceAllocationType + """ + return _wrap_device_allocation_type(self.sp_chunked_array.get().device_type()) + + @property + def is_cpu(self): + """ + Whether the ChunkedArrays's chunks are CPU-accessible. + """ + return self.device_type == DeviceAllocationType.CPU + + cdef void _assert_cpu(self) except *: + if self.sp_chunked_array.get().device_type() != CDeviceAllocationType_kCPU: + raise NotImplementedError("Implemented only for data on CPU device") + def chunked_array(arrays, type=None): """