diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 64a6ceaa6eaa4..a9af1419ae496 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -956,6 +956,7 @@ cdef class Array(_PandasConvertible): +"two-and-a-half" """ + self._assert_cpu() cdef c_string result with nogil: result = self.ap.Diff(deref(other.ap)) @@ -982,6 +983,7 @@ cdef class Array(_PandasConvertible): ------- cast : Array """ + self._assert_cpu() return _pc().cast(self, target_type, safe=safe, options=options, memory_pool=memory_pool) @@ -1000,6 +1002,7 @@ cdef class Array(_PandasConvertible): ------- view : Array """ + self._assert_cpu() cdef DataType type = ensure_type(target_type) cdef shared_ptr[CArray] result with nogil: @@ -1022,6 +1025,7 @@ cdef class Array(_PandasConvertible): sum : Scalar A scalar containing the sum value. """ + self._assert_cpu() options = _pc().ScalarAggregateOptions(**kwargs) return _pc().call_function('sum', [self], options) @@ -1034,6 +1038,7 @@ cdef class Array(_PandasConvertible): unique : Array An array of the same data type, with deduplicated elements. """ + self._assert_cpu() return _pc().call_function('unique', [self]) def dictionary_encode(self, null_encoding='mask'): @@ -1052,6 +1057,7 @@ cdef class Array(_PandasConvertible): encoded : DictionaryArray A dictionary-encoded version of this array. """ + self._assert_cpu() options = _pc().DictionaryEncodeOptions(null_encoding) return _pc().call_function('dictionary_encode', [self], options) @@ -1064,6 +1070,7 @@ cdef class Array(_PandasConvertible): StructArray An array of structs """ + self._assert_cpu() return _pc().call_function('value_counts', [self]) @staticmethod @@ -1105,6 +1112,7 @@ cdef class Array(_PandasConvertible): memory_pool=memory_pool) def __reduce__(self): + self._assert_cpu() return _restore_array, \ (_reduce_array_data(self.sp_array.get().data().get()),) @@ -1172,6 +1180,7 @@ cdef class Array(_PandasConvertible): @property def null_count(self): + self._assert_cpu() return self.sp_array.get().null_count() @property @@ -1191,9 +1200,8 @@ cdef class Array(_PandasConvertible): The dictionary of dictionary arrays will always be counted in their entirety even if the array only references a portion of the dictionary. """ - cdef: - CResult[int64_t] c_size_res - + self._assert_cpu() + cdef CResult[int64_t] c_size_res with nogil: c_size_res = ReferencedBufferSize(deref(self.ap)) size = GetResultValue(c_size_res) @@ -1210,16 +1218,17 @@ cdef class Array(_PandasConvertible): If a buffer is referenced multiple times then it will only be counted once. """ - cdef: - int64_t total_buffer_size - + self._assert_cpu() + cdef int64_t total_buffer_size total_buffer_size = TotalBufferSize(deref(self.ap)) return total_buffer_size def __sizeof__(self): + self._assert_cpu() return super(Array, self).__sizeof__() + self.nbytes def __iter__(self): + self._assert_cpu() for i in range(len(self)): yield self.getitem(i) @@ -1252,6 +1261,8 @@ cdef class Array(_PandasConvertible): If the array should be rendered as a single line of text or if each element should be on its own line. """ + self._assert_cpu() + cdef: c_string result PrettyPrintOptions options @@ -1307,6 +1318,8 @@ cdef class Array(_PandasConvertible): ------- bool """ + self._assert_cpu() + other._assert_cpu() return self.ap.Equals(deref(other.ap)) def __len__(self): @@ -1331,6 +1344,7 @@ cdef class Array(_PandasConvertible): ------- array : boolean Array """ + self._assert_cpu() options = _pc().NullOptions(nan_is_null=nan_is_null) return _pc().call_function('is_null', [self], options) @@ -1342,12 +1356,14 @@ cdef class Array(_PandasConvertible): ------- array : boolean Array """ + self._assert_cpu() return _pc().call_function('is_nan', [self]) def is_valid(self): """ Return BooleanArray indicating the non-null values. """ + self._assert_cpu() return _pc().is_valid(self) def fill_null(self, fill_value): @@ -1364,6 +1380,7 @@ cdef class Array(_PandasConvertible): result : Array A new array with nulls replaced by the given value. """ + self._assert_cpu() return _pc().fill_null(self, fill_value) def __getitem__(self, key): @@ -1380,12 +1397,14 @@ cdef class Array(_PandasConvertible): ------- value : Scalar (index) or Array (slice) """ + self._assert_cpu() if isinstance(key, slice): return _normalize_slice(self, key) return self.getitem(_normalize_index(key, self.length())) cdef getitem(self, int64_t i): + self._assert_cpu() return Scalar.wrap(GetResultValue(self.ap.GetScalar(i))) def slice(self, offset=0, length=None): @@ -1404,8 +1423,7 @@ cdef class Array(_PandasConvertible): ------- sliced : RecordBatch """ - cdef: - shared_ptr[CArray] result + cdef shared_ptr[CArray] result if offset < 0: raise IndexError('Offset must be non-negative') @@ -1436,12 +1454,14 @@ cdef class Array(_PandasConvertible): taken : Array An array with the same datatype, containing the taken values. """ + self._assert_cpu() return _pc().take(self, indices) def drop_null(self): """ Remove missing values from an array. """ + self._assert_cpu() return _pc().drop_null(self) def filter(self, object mask, *, null_selection_behavior='drop'): @@ -1463,6 +1483,7 @@ cdef class Array(_PandasConvertible): An array of the same type, with only the elements selected by the boolean mask. """ + self._assert_cpu() return _pc().filter(self, mask, null_selection_behavior=null_selection_behavior) @@ -1488,6 +1509,7 @@ cdef class Array(_PandasConvertible): index : Int64Scalar The index of the value in the array (-1 if not found). """ + self._assert_cpu() return _pc().index(self, value, start, end, memory_pool=memory_pool) def sort(self, order="ascending", **kwargs): @@ -1507,6 +1529,7 @@ cdef class Array(_PandasConvertible): ------- result : Array """ + self._assert_cpu() indices = _pc().sort_indices( self, options=_pc().SortOptions(sort_keys=[("", order)], **kwargs) @@ -1514,9 +1537,12 @@ cdef class Array(_PandasConvertible): return self.take(indices) def _to_pandas(self, options, types_mapper=None, **kwargs): + self._assert_cpu() return _array_like_to_pandas(self, options, types_mapper=types_mapper) def __array__(self, dtype=None, copy=None): + self._assert_cpu() + if copy is False: try: values = self.to_numpy(zero_copy_only=True) @@ -1566,6 +1592,8 @@ cdef class Array(_PandasConvertible): ------- array : numpy.ndarray """ + self._assert_cpu() + cdef: PyObject* out PandasOptions c_options @@ -1604,6 +1632,7 @@ cdef class Array(_PandasConvertible): ------- lst : list """ + self._assert_cpu() return [x.as_py() for x in self] def tolist(self): @@ -1629,6 +1658,7 @@ cdef class Array(_PandasConvertible): ArrowInvalid """ if full: + self._assert_cpu() with nogil: check_status(self.ap.ValidateFull()) else: @@ -1737,6 +1767,8 @@ cdef class Array(_PandasConvertible): A pair of PyCapsules containing a C ArrowSchema and ArrowArray, respectively. """ + self._assert_cpu() + cdef: ArrowArray* c_array ArrowSchema* c_schema @@ -1885,6 +1917,28 @@ cdef class Array(_PandasConvertible): device = GetResultValue(ExportDevice(self.sp_array)) return device.device_type, device.device_id + @property + def device_type(self): + """ + The device type where the array resides. + + Returns + ------- + DeviceAllocationType + """ + return _wrap_device_allocation_type(self.sp_array.get().device_type()) + + @property + def is_cpu(self): + """ + Whether the array is CPU-accessible. + """ + return self.device_type == DeviceAllocationType.CPU + + cdef void _assert_cpu(self) except *: + if self.sp_array.get().device_type() != CDeviceAllocationType_kCPU: + raise NotImplementedError("Implemented only for data on CPU device") + cdef _array_like_to_pandas(obj, options, types_mapper): cdef: diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 53ad95f2430be..5d9e7fcaeb25b 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -234,6 +234,7 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: CStatus Validate() const CStatus ValidateFull() const CResult[shared_ptr[CArray]] View(const shared_ptr[CDataType]& type) + CDeviceAllocationType device_type() shared_ptr[CArray] MakeArray(const shared_ptr[CArrayData]& data) CResult[shared_ptr[CArray]] MakeArrayOfNull( diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd index 1bc639cc8d2ba..082d8470cdbb0 100644 --- a/python/pyarrow/lib.pxd +++ b/python/pyarrow/lib.pxd @@ -286,6 +286,7 @@ cdef class Array(_PandasConvertible): cdef void init(self, const shared_ptr[CArray]& sp_array) except * cdef getitem(self, int64_t i) cdef int64_t length(self) + cdef void _assert_cpu(self) except * cdef class Tensor(_Weakrefable): diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index becf00ead829d..62a3eebb8c039 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -4012,3 +4012,90 @@ def test_swapped_byte_order_fails(numpy_native_dtype): # Struct type array with pytest.raises(pa.ArrowNotImplementedError): pa.StructArray.from_arrays([np_arr], names=['a']) + + +def test_non_cpu_array(): + cuda = pytest.importorskip("pyarrow.cuda") + ctx = cuda.Context(0) + + data = np.arange(4, dtype=np.int32) + validity = np.array([True, False, True, False], dtype=np.bool_) + cuda_data_buf = ctx.buffer_from_data(data) + cuda_validity_buf = ctx.buffer_from_data(validity) + arr = pa.Array.from_buffers(pa.int32(), 4, [None, cuda_data_buf]) + arr2 = pa.Array.from_buffers(pa.int32(), 4, [None, cuda_data_buf]) + arr_with_nulls = pa.Array.from_buffers( + pa.int32(), 4, [cuda_validity_buf, cuda_data_buf]) + + # Supported + arr.validate() + assert arr.offset == 0 + assert arr.buffers() == [None, cuda_data_buf] + assert arr.device_type == pa.DeviceAllocationType.CUDA + assert arr.is_cpu is False + assert len(arr) == 4 + assert arr.slice(2, 2).offset == 2 + + # TODO support DLPack for CUDA + with pytest.raises(NotImplementedError): + arr.__dlpack__() + with pytest.raises(NotImplementedError): + arr.__dlpack_device__() + + # Not Supported + with pytest.raises(NotImplementedError): + arr.diff(arr2) + with pytest.raises(NotImplementedError): + arr.cast(pa.int64()) + with pytest.raises(NotImplementedError): + arr.view(pa.int64()) + with pytest.raises(NotImplementedError): + arr.sum() + with pytest.raises(NotImplementedError): + arr.unique() + with pytest.raises(NotImplementedError): + arr.dictionary_encode() + with pytest.raises(NotImplementedError): + arr.value_counts() + with pytest.raises(NotImplementedError): + arr_with_nulls.null_count + with pytest.raises(NotImplementedError): + arr.nbytes + with pytest.raises(NotImplementedError): + arr.get_total_buffer_size() + with pytest.raises(NotImplementedError): + [i for i in iter(arr)] + with pytest.raises(NotImplementedError): + repr(arr) + with pytest.raises(NotImplementedError): + str(arr) + with pytest.raises(NotImplementedError): + arr == arr2 + with pytest.raises(NotImplementedError): + arr.is_null() + with pytest.raises(NotImplementedError): + arr.is_nan() + with pytest.raises(NotImplementedError): + arr.is_valid() + with pytest.raises(NotImplementedError): + arr.fill_null(0) + with pytest.raises(NotImplementedError): + arr[0] + with pytest.raises(NotImplementedError): + arr.take([0]) + with pytest.raises(NotImplementedError): + arr.drop_null() + with pytest.raises(NotImplementedError): + arr.filter([True, True, False, False]) + with pytest.raises(NotImplementedError): + arr.index(0) + with pytest.raises(NotImplementedError): + arr.sort() + with pytest.raises(NotImplementedError): + arr.__array__() + with pytest.raises(NotImplementedError): + arr.to_numpy() + with pytest.raises(NotImplementedError): + arr.tolist() + with pytest.raises(NotImplementedError): + arr.validate(full=True)