From feada2795c86eb7ce8afba129df529c91e7e6946 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 22 Mar 2024 12:10:15 -0300 Subject: [PATCH 01/19] make device type/id part of the struct --- python/src/nanoarrow/_lib.pyx | 43 +++++++++++++++++++++++++++++++---- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx index a83e029c0..8ae9d74dc 100644 --- a/python/src/nanoarrow/_lib.pyx +++ b/python/src/nanoarrow/_lib.pyx @@ -34,7 +34,6 @@ generally have better autocomplete + documentation available to IDEs). from libc.stdint cimport uintptr_t, uint8_t, int64_t from libc.string cimport memcpy from libc.stdio cimport snprintf -from libc.errno cimport ENOMEM from cpython.bytes cimport PyBytes_FromStringAndSize from cpython.pycapsule cimport PyCapsule_New, PyCapsule_GetPointer, PyCapsule_IsValid from cpython cimport ( @@ -1027,6 +1026,8 @@ cdef class CArray: cdef object _base cdef ArrowArray* _ptr cdef CSchema _schema + cdef ArrowDeviceType _device_type + cdef int _device_id @staticmethod def allocate(CSchema schema): @@ -1038,6 +1039,12 @@ cdef class CArray: self._base = base self._ptr = addr self._schema = schema + self._device_type = ARROW_DEVICE_CPU + self._device_id = 0 + + cdef _set_device(self, ArrowDeviceType device_type, int64_t device_id): + self._device_type = device_type + self._device_id = device_id @staticmethod def _import_from_c_capsule(schema_capsule, array_capsule): @@ -1095,7 +1102,9 @@ cdef class CArray: c_array_out.offset = c_array_out.offset + start c_array_out.length = stop - start - return CArray(base, c_array_out, self._schema) + cdef CArray out = CArray(base, c_array_out, self._schema) + out._set_device(self._device_type, self._device_id) + return out def __arrow_c_array__(self, requested_schema=None): """ @@ -1115,6 +1124,11 @@ cdef class CArray: """ self._assert_valid() + if self._device_type != ARROW_DEVICE_CPU: + raise ValueError( + "Can't invoke __arrow_c_aray__ on non-CPU array " + f"with device_type {self._device_type}") + if requested_schema is not None: raise NotImplementedError("requested_schema") @@ -1137,10 +1151,22 @@ cdef class CArray: if self._ptr.release == NULL: raise RuntimeError("CArray is released") + def view(self): + device = CDevice.resolve(self._device_type, self._device_id) + return CArrayView.from_array(self, device) + @property def schema(self): return self._schema + @property + def device_type(self): + return self._device_type + + @property + def device_id(self): + return self._device_id + @property def length(self): self._assert_valid() @@ -1175,7 +1201,13 @@ cdef class CArray: self._assert_valid() if i < 0 or i >= self._ptr.n_children: raise IndexError(f"{i} out of range [0, {self._ptr.n_children})") - return CArray(self._base, self._ptr.children[i], self._schema.child(i)) + cdef CArray out = CArray( + self._base, + self._ptr.children[i], + self._schema.child(i) + ) + out._set_device(self._device_type, self._device_id) + return out @property def children(self): @@ -1185,8 +1217,11 @@ cdef class CArray: @property def dictionary(self): self._assert_valid() + cdef CArray out if self._ptr.dictionary != NULL: - return CArray(self, self._ptr.dictionary, self._schema.dictionary) + out = CArray(self, self._ptr.dictionary, self._schema.dictionary) + out._set_device(self._device_type, self._device_id) + return out else: return None From e940caa85202515f25a5178dc8a54c6e839fc2a7 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 22 Mar 2024 12:18:17 -0300 Subject: [PATCH 02/19] slightly better connection with the cdevice array --- python/src/nanoarrow/_lib.pyx | 7 ++++++- python/src/nanoarrow/device.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx index 8ae9d74dc..a2a3e140b 100644 --- a/python/src/nanoarrow/_lib.pyx +++ b/python/src/nanoarrow/_lib.pyx @@ -2375,7 +2375,12 @@ cdef class CDeviceArray: @property def array(self): - return CArray(self, &self._ptr.array, self._schema) + cdef CArray out = CArray(self, &self._ptr.array, self._schema) + out._set_device(self._device_type, self._device_id) + return out + + def __arrow_c_array__(self, requested_schema=None): + return self.array.__arrow_c_array__(requested_schema=requested_schema) def __repr__(self): return _repr_utils.device_array_repr(self) diff --git a/python/src/nanoarrow/device.py b/python/src/nanoarrow/device.py index 2bc5d408c..776dc41cf 100644 --- a/python/src/nanoarrow/device.py +++ b/python/src/nanoarrow/device.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -from nanoarrow._lib import CDEVICE_CPU, CDevice, CDeviceArray +from nanoarrow._lib import CDEVICE_CPU, CDevice, CDeviceArray, CArray from nanoarrow.c_lib import c_array From 0d8bbe97b398400f4be8fb02d212c930d7b46307 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 22 Mar 2024 12:35:22 -0300 Subject: [PATCH 03/19] start --- python/src/nanoarrow/_lib.pyx | 59 ++++++++++++++++++++++++++-------- python/src/nanoarrow/device.py | 2 +- 2 files changed, 46 insertions(+), 15 deletions(-) diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx index a2a3e140b..f865f7ebc 100644 --- a/python/src/nanoarrow/_lib.pyx +++ b/python/src/nanoarrow/_lib.pyx @@ -163,10 +163,11 @@ cdef object alloc_c_array_view(ArrowArrayView** c_array_view) noexcept: return PyCapsule_New(c_array_view[0], 'nanoarrow_array_view', &pycapsule_array_view_deleter) -cdef void arrow_array_release(ArrowArray* array) noexcept with gil: - Py_DECREF(array.private_data) - array.private_data = NULL - array.release = NULL +cdef void arrow_array_release(ArrowArray* array) noexcept nogil: + with gil: + Py_DECREF(array.private_data) + array.private_data = NULL + array.release = NULL cdef void c_array_shallow_copy(object base, const ArrowArray* c_array, @@ -182,7 +183,7 @@ cdef void c_array_shallow_copy(object base, const ArrowArray* c_array, c_array_out.release = arrow_array_release -cdef object alloc_c_array_shallow_copy(object base, const ArrowArray* c_array) noexcept: +cdef object alloc_c_array_shallow_copy(object base, const ArrowArray* c_array): """Make a shallow copy of an ArrowArray To more safely implement export of an ArrowArray whose address may be @@ -197,6 +198,30 @@ cdef object alloc_c_array_shallow_copy(object base, const ArrowArray* c_array) n return array_capsule +cdef void c_device_array_shallow_copy(object base, const ArrowDeviceArray* c_array, + ArrowDeviceArray* c_array_out) noexcept: + # shallow copy + memcpy(c_array_out, c_array, sizeof(ArrowDeviceArray)) + c_array_out.array.release = NULL + c_array_out.array.private_data = NULL + + # track original base + c_array_out.array.private_data = base + Py_INCREF(base) + c_array_out.array.release = arrow_array_release + + +cdef object alloc_c_device_array_shallow_copy(object base, const ArrowDeviceArray* c_array): + """Make a shallow copy of an ArrowDeviceArray + + See :func:`arrow_c_array_shallow_copy()` + """ + cdef ArrowDeviceArray* c_array_out + array_capsule = alloc_c_device_array(&c_array_out) + c_device_array_shallow_copy(base, c_array, c_array_out) + return array_capsule + + cdef void pycapsule_buffer_deleter(object stream_capsule) noexcept: cdef ArrowBuffer* buffer = PyCapsule_GetPointer( stream_capsule, 'nanoarrow_buffer' @@ -206,16 +231,17 @@ cdef void pycapsule_buffer_deleter(object stream_capsule) noexcept: ArrowFree(buffer) -cdef object alloc_c_buffer(ArrowBuffer** c_buffer) noexcept: +cdef object alloc_c_buffer(ArrowBuffer** c_buffer): c_buffer[0] = ArrowMalloc(sizeof(ArrowBuffer)) ArrowBufferInit(c_buffer[0]) return PyCapsule_New(c_buffer[0], 'nanoarrow_buffer', &pycapsule_buffer_deleter) -cdef void c_deallocate_pybuffer(ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size) noexcept with gil: - cdef Py_buffer* buffer = allocator.private_data - PyBuffer_Release(buffer) - ArrowFree(buffer) - +cdef void c_deallocate_pybuffer(ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size) noexcept nogil: + cdef Py_buffer* buffer + with gil: + buffer = allocator.private_data + PyBuffer_Release(buffer) + ArrowFree(buffer) cdef ArrowBufferAllocator c_pybuffer_deallocator(Py_buffer* buffer): # This should probably be changed in nanoarrow C; however, currently, the deallocator @@ -2375,9 +2401,14 @@ cdef class CDeviceArray: @property def array(self): - cdef CArray out = CArray(self, &self._ptr.array, self._schema) - out._set_device(self._device_type, self._device_id) - return out + # TODO: We loose access to the sync_event here, so we probably need to + # synchronize (or propatate it, or somehow prevent data access downstream) + cdef CArray array = CArray(self, &self._ptr.array, self._schema) + array._set_device(self._device_type, self._device_id) + return array + + def view(self): + return self.array.view() def __arrow_c_array__(self, requested_schema=None): return self.array.__arrow_c_array__(requested_schema=requested_schema) diff --git a/python/src/nanoarrow/device.py b/python/src/nanoarrow/device.py index 776dc41cf..2bc5d408c 100644 --- a/python/src/nanoarrow/device.py +++ b/python/src/nanoarrow/device.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -from nanoarrow._lib import CDEVICE_CPU, CDevice, CDeviceArray, CArray +from nanoarrow._lib import CDEVICE_CPU, CDevice, CDeviceArray from nanoarrow.c_lib import c_array From 569618ffcf99e01614dee7e37a8d08556381ad83 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 25 Mar 2024 12:29:47 -0300 Subject: [PATCH 04/19] fix export to array --- python/src/nanoarrow/_lib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx index f865f7ebc..7bc9a2ffd 100644 --- a/python/src/nanoarrow/_lib.pyx +++ b/python/src/nanoarrow/_lib.pyx @@ -2404,7 +2404,7 @@ cdef class CDeviceArray: # TODO: We loose access to the sync_event here, so we probably need to # synchronize (or propatate it, or somehow prevent data access downstream) cdef CArray array = CArray(self, &self._ptr.array, self._schema) - array._set_device(self._device_type, self._device_id) + array._set_device(self._ptr.device_type, self._ptr.device_id) return array def view(self): From 10f167b7a2d7f77c4de66e7d4729d2bdf90d27ff Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 25 Mar 2024 12:32:29 -0300 Subject: [PATCH 05/19] maybe implement device protocol export --- python/src/nanoarrow/_lib.pyx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx index 7bc9a2ffd..e274a506c 100644 --- a/python/src/nanoarrow/_lib.pyx +++ b/python/src/nanoarrow/_lib.pyx @@ -2413,5 +2413,12 @@ cdef class CDeviceArray: def __arrow_c_array__(self, requested_schema=None): return self.array.__arrow_c_array__(requested_schema=requested_schema) + def __arrow_c_device_array__(self, requested_schema=None): + if requested_schema is not None: + raise NotImplementedError("requested_schema") + + device_array_capsule = alloc_c_device_array_shallow_copy(self._base, self._ptr) + return self._schema.__arrow_c_schema__(), device_array_capsule + def __repr__(self): return _repr_utils.device_array_repr(self) From 414bbc44d3e84ecac2807713438d6988ff4d5245 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 25 Mar 2024 13:52:21 -0300 Subject: [PATCH 06/19] import/export device array --- python/src/nanoarrow/_lib.pyx | 31 ++++++++++++++++++++++++++ python/src/nanoarrow/device.py | 19 +++++++++++----- python/tests/test_device.py | 40 +++++++++++++++++++++++++++++----- 3 files changed, 79 insertions(+), 11 deletions(-) diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx index e274a506c..f2543aeb6 100644 --- a/python/src/nanoarrow/_lib.pyx +++ b/python/src/nanoarrow/_lib.pyx @@ -2391,6 +2391,10 @@ cdef class CDeviceArray: self._ptr = addr self._schema = schema + @property + def schema(self): + return self._schema + @property def device_type(self): return self._ptr.device_type @@ -2420,5 +2424,32 @@ cdef class CDeviceArray: device_array_capsule = alloc_c_device_array_shallow_copy(self._base, self._ptr) return self._schema.__arrow_c_schema__(), device_array_capsule + @staticmethod + def _import_from_c_capsule(schema_capsule, device_array_capsule): + """ + Import from a ArrowSchema and ArrowArray PyCapsule tuple. + + Parameters + ---------- + schema_capsule : PyCapsule + A valid PyCapsule with name 'arrow_schema' containing an + ArrowSchema pointer. + device_array_capsule : PyCapsule + A valid PyCapsule with name 'arrow_device_array' containing an + ArrowArray pointer. + """ + cdef: + CSchema out_schema + CDeviceArray out + + out_schema = CSchema._import_from_c_capsule(schema_capsule) + out = CDeviceArray( + device_array_capsule, + PyCapsule_GetPointer(device_array_capsule, 'arrow_device_array'), + out_schema + ) + + return out + def __repr__(self): return _repr_utils.device_array_repr(self) diff --git a/python/src/nanoarrow/device.py b/python/src/nanoarrow/device.py index 2bc5d408c..86f526d02 100644 --- a/python/src/nanoarrow/device.py +++ b/python/src/nanoarrow/device.py @@ -16,7 +16,7 @@ # under the License. from nanoarrow._lib import CDEVICE_CPU, CDevice, CDeviceArray -from nanoarrow.c_lib import c_array +from nanoarrow.c_lib import c_array, c_schema def cpu(): @@ -27,11 +27,20 @@ def resolve(device_type, device_id): return CDevice.resolve(device_type, device_id) -def c_device_array(obj): - if isinstance(obj, CDeviceArray): +def c_device_array(obj, schema=None): + if schema is not None: + schema = c_schema(schema) + + if isinstance(obj, CDeviceArray) and schema is None: return obj - # Only CPU for now - cpu_array = c_array(obj) + if hasattr(obj, "__arrow_c_device_array__"): + schema_capsule = None if schema is None else schema.__arrow_c_schema__() + schema_capsule, device_array_capsule = obj.__arrow_c_device_array__( + requested_schema=schema_capsule + ) + return CDeviceArray._import_from_c_capsule(schema_capsule, device_array_capsule) + # Attempt to create a CPU array and wrap it + cpu_array = c_array(obj, schema=schema) return cpu()._array_init(cpu_array._addr(), cpu_array.schema) diff --git a/python/tests/test_device.py b/python/tests/test_device.py index 93028816e..d5265e286 100644 --- a/python/tests/test_device.py +++ b/python/tests/test_device.py @@ -15,12 +15,9 @@ # specific language governing permissions and limitations # under the License. -import pytest - +import nanoarrow as na from nanoarrow import device -pa = pytest.importorskip("pyarrow") - def test_cpu_device(): cpu = device.cpu() @@ -31,12 +28,43 @@ def test_cpu_device(): cpu = device.resolve(1, 0) assert cpu.device_type == 1 - pa_array = pa.array([1, 2, 3]) - darray = device.c_device_array(pa_array) +def test_c_device_array(): + # Unrecognized arguments should be passed to c_array() to generate CPU array + darray = device.c_device_array([1, 2, 3], na.int32()) + assert darray.device_type == 1 assert darray.device_id == 0 + assert darray.schema.format == "i" assert darray.array.length == 3 + assert darray.array.device_type == device.cpu().device_type + assert darray.array.device_id == device.cpu().device_id assert "device_type: 1" in repr(darray) + # A CDeviceArray should be returned as is assert device.c_device_array(darray) is darray + + # A CPU device array should be able to export to a regular array + array = na.c_array(darray) + assert array.schema.format == "i" + assert array.buffers == darray.array.buffers + + +# Wrapper to prevent c_device_array() from returning early when it detects the +# input is already a CDeviceArray +class DeviceArrayWrapper: + def __init__(self, obj): + self.obj = obj + + def __arrow_c_device_array__(self, requested_schema=None): + return self.obj.__arrow_c_device_array__(requested_schema=requested_schema) + + +def test_c_device_array_protocol(): + darray = device.c_device_array([1, 2, 3], na.int32()) + wrapper = DeviceArrayWrapper(darray) + + darray2 = device.c_device_array(wrapper) + assert darray2.schema.format == "i" + assert darray2.array.length == 3 + assert darray2.array.buffers == darray.array.buffers From b6e76d3c5919171ea1d2cfbc66d1adb0cb7eb93d Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 25 Mar 2024 16:02:51 -0300 Subject: [PATCH 07/19] coverage --- python/src/nanoarrow/c_lib.py | 2 +- python/tests/test_c_array.py | 8 ++++++++ python/tests/test_device.py | 32 +++++++++++++++++++++----------- 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/python/src/nanoarrow/c_lib.py b/python/src/nanoarrow/c_lib.py index 68a53b782..0acc0a9d0 100644 --- a/python/src/nanoarrow/c_lib.py +++ b/python/src/nanoarrow/c_lib.py @@ -427,7 +427,7 @@ def c_array_view(obj, schema=None) -> CArrayView: if isinstance(obj, CArrayView) and schema is None: return obj - return CArrayView.from_array(c_array(obj, schema)) + return c_array(obj, schema).view() def c_buffer(obj, schema=None) -> CBuffer: diff --git a/python/tests/test_c_array.py b/python/tests/test_c_array.py index 75ab2aa7b..1536d159e 100644 --- a/python/tests/test_c_array.py +++ b/python/tests/test_c_array.py @@ -39,6 +39,8 @@ def test_c_array_from_c_array(): assert c_array_from_c_array.length == c_array.length assert c_array_from_c_array.buffers == c_array.buffers + assert list(c_array.view().buffer(1)) == [1, 2, 3] + def test_c_array_from_capsule_protocol(): class CArrayWrapper: @@ -54,6 +56,8 @@ def __arrow_c_array__(self, *args, **kwargs): assert c_array_from_protocol.length == c_array.length assert c_array_from_protocol.buffers == c_array.buffers + assert list(c_array_from_protocol.view().buffer(1)) == [1, 2, 3] + def test_c_array_from_old_pyarrow(): # Simulate a pyarrow Array with no __arrow_c_array__ @@ -73,6 +77,8 @@ def _export_to_c(self, *args): assert c_array.length == 3 assert c_array.schema.format == "i" + assert list(c_array.view().buffer(1)) == [1, 2, 3] + # Make sure that this heuristic won't result in trying to import # something else that has an _export_to_c method with pytest.raises(TypeError, match="Can't convert object of type DataType"): @@ -97,6 +103,8 @@ def test_c_array_from_bare_capsule(): assert c_array_from_capsule.length == c_array.length assert c_array_from_capsule.buffers == c_array.buffers + assert list(c_array_from_capsule.view().buffer(1)) == [1, 2, 3] + def test_c_array_type_not_supported(): with pytest.raises(TypeError, match="Can't convert object of type NoneType"): diff --git a/python/tests/test_device.py b/python/tests/test_device.py index d5265e286..b074045b7 100644 --- a/python/tests/test_device.py +++ b/python/tests/test_device.py @@ -15,6 +15,8 @@ # specific language governing permissions and limitations # under the License. +import pytest + import nanoarrow as na from nanoarrow import device @@ -35,11 +37,17 @@ def test_c_device_array(): assert darray.device_type == 1 assert darray.device_id == 0 + assert "device_type: 1" in repr(darray) + assert darray.schema.format == "i" + assert darray.array.length == 3 assert darray.array.device_type == device.cpu().device_type assert darray.array.device_id == device.cpu().device_id - assert "device_type: 1" in repr(darray) + + darray_view = darray.view() + assert darray_view.length == 3 + assert list(darray_view.buffer(1)) == [1, 2, 3] # A CDeviceArray should be returned as is assert device.c_device_array(darray) is darray @@ -50,21 +58,23 @@ def test_c_device_array(): assert array.buffers == darray.array.buffers -# Wrapper to prevent c_device_array() from returning early when it detects the -# input is already a CDeviceArray -class DeviceArrayWrapper: - def __init__(self, obj): - self.obj = obj - - def __arrow_c_device_array__(self, requested_schema=None): - return self.obj.__arrow_c_device_array__(requested_schema=requested_schema) +def test_c_device_array_protocol(): + # Wrapper to prevent c_device_array() from returning early when it detects the + # input is already a CDeviceArray + class CDeviceArrayWrapper: + def __init__(self, obj): + self.obj = obj + def __arrow_c_device_array__(self, requested_schema=None): + return self.obj.__arrow_c_device_array__(requested_schema=requested_schema) -def test_c_device_array_protocol(): darray = device.c_device_array([1, 2, 3], na.int32()) - wrapper = DeviceArrayWrapper(darray) + wrapper = CDeviceArrayWrapper(darray) darray2 = device.c_device_array(wrapper) assert darray2.schema.format == "i" assert darray2.array.length == 3 assert darray2.array.buffers == darray.array.buffers + + with pytest.raises(NotImplementedError): + device.c_device_array(wrapper, na.int64()) From 0a96880f59cd9a5be6d6deede5b7a8f4e77e5d63 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 25 Mar 2024 16:19:47 -0300 Subject: [PATCH 08/19] clean up --- python/src/nanoarrow/_lib.pyx | 21 ++++++++++----------- python/tests/test_device.py | 2 +- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx index f2543aeb6..07ba85d7b 100644 --- a/python/src/nanoarrow/_lib.pyx +++ b/python/src/nanoarrow/_lib.pyx @@ -163,11 +163,10 @@ cdef object alloc_c_array_view(ArrowArrayView** c_array_view) noexcept: return PyCapsule_New(c_array_view[0], 'nanoarrow_array_view', &pycapsule_array_view_deleter) -cdef void arrow_array_release(ArrowArray* array) noexcept nogil: - with gil: - Py_DECREF(array.private_data) - array.private_data = NULL - array.release = NULL +cdef void arrow_array_release(ArrowArray* array) noexcept with gil: + Py_DECREF(array.private_data) + array.private_data = NULL + array.release = NULL cdef void c_array_shallow_copy(object base, const ArrowArray* c_array, @@ -236,12 +235,12 @@ cdef object alloc_c_buffer(ArrowBuffer** c_buffer): ArrowBufferInit(c_buffer[0]) return PyCapsule_New(c_buffer[0], 'nanoarrow_buffer', &pycapsule_buffer_deleter) -cdef void c_deallocate_pybuffer(ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size) noexcept nogil: - cdef Py_buffer* buffer - with gil: - buffer = allocator.private_data - PyBuffer_Release(buffer) - ArrowFree(buffer) + +cdef void c_deallocate_pybuffer(ArrowBufferAllocator* allocator, uint8_t* ptr, int64_t size) noexcept with gil: + cdef Py_buffer* buffer = allocator.private_data + PyBuffer_Release(buffer) + ArrowFree(buffer) + cdef ArrowBufferAllocator c_pybuffer_deallocator(Py_buffer* buffer): # This should probably be changed in nanoarrow C; however, currently, the deallocator diff --git a/python/tests/test_device.py b/python/tests/test_device.py index b074045b7..d852cd01c 100644 --- a/python/tests/test_device.py +++ b/python/tests/test_device.py @@ -32,7 +32,7 @@ def test_cpu_device(): def test_c_device_array(): - # Unrecognized arguments should be passed to c_array() to generate CPU array + # Unrecognized arguments should be passed to c_array() to generate CPU array darray = device.c_device_array([1, 2, 3], na.int32()) assert darray.device_type == 1 From 83349acc01e3c596436b80ea9b63defe1111c384 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 25 Mar 2024 16:23:29 -0300 Subject: [PATCH 09/19] add note about sync --- python/src/nanoarrow/_lib.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx index 07ba85d7b..57910aefa 100644 --- a/python/src/nanoarrow/_lib.pyx +++ b/python/src/nanoarrow/_lib.pyx @@ -2420,6 +2420,8 @@ cdef class CDeviceArray: if requested_schema is not None: raise NotImplementedError("requested_schema") + # TODO: evaluate whether we need to synchronize here or whether we should + # move device arrays instead of shallow-copying them device_array_capsule = alloc_c_device_array_shallow_copy(self._base, self._ptr) return self._schema.__arrow_c_schema__(), device_array_capsule From 6aa1caca3e8820b8ef38ad8694ad41d717f9cd4a Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 25 Mar 2024 20:36:56 -0300 Subject: [PATCH 10/19] fix documentation --- python/src/nanoarrow/_lib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx index 57910aefa..b6eb1217d 100644 --- a/python/src/nanoarrow/_lib.pyx +++ b/python/src/nanoarrow/_lib.pyx @@ -2437,7 +2437,7 @@ cdef class CDeviceArray: ArrowSchema pointer. device_array_capsule : PyCapsule A valid PyCapsule with name 'arrow_device_array' containing an - ArrowArray pointer. + ArrowDeviceArray pointer. """ cdef: CSchema out_schema From 25a8dfc768f37ebfe9ceea3e5f1914e7c49ca2e7 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Apr 2024 16:26:34 -0300 Subject: [PATCH 11/19] Apply suggestions from code review Co-authored-by: Dane Pitkin <48041712+danepitkin@users.noreply.github.com> --- python/src/nanoarrow/_lib.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx index b6eb1217d..e31013d2e 100644 --- a/python/src/nanoarrow/_lib.pyx +++ b/python/src/nanoarrow/_lib.pyx @@ -1151,7 +1151,7 @@ cdef class CArray: if self._device_type != ARROW_DEVICE_CPU: raise ValueError( - "Can't invoke __arrow_c_aray__ on non-CPU array " + "Can't invoke __arrow_c_array__ on non-CPU array " f"with device_type {self._device_type}") if requested_schema is not None: @@ -2404,8 +2404,8 @@ cdef class CDeviceArray: @property def array(self): - # TODO: We loose access to the sync_event here, so we probably need to - # synchronize (or propatate it, or somehow prevent data access downstream) + # TODO: We lose access to the sync_event here, so we probably need to + # synchronize (or propagate it, or somehow prevent data access downstream) cdef CArray array = CArray(self, &self._ptr.array, self._schema) array._set_device(self._ptr.device_type, self._ptr.device_id) return array @@ -2428,7 +2428,7 @@ cdef class CDeviceArray: @staticmethod def _import_from_c_capsule(schema_capsule, device_array_capsule): """ - Import from a ArrowSchema and ArrowArray PyCapsule tuple. + Import from an ArrowSchema and ArrowArray PyCapsule tuple. Parameters ---------- From 0c88a302d70a1140f4391771ba474c9e1f8e5cb6 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Apr 2024 23:01:33 -0300 Subject: [PATCH 12/19] expose enum for device types --- python/src/nanoarrow/_lib.pyx | 43 ++++++++++++++++++++- python/src/nanoarrow/_repr_utils.py | 9 +++-- python/src/nanoarrow/device.py | 2 +- python/src/nanoarrow/nanoarrow_device_c.pxd | 10 +++++ python/tests/test_device.py | 12 +++--- 5 files changed, 65 insertions(+), 11 deletions(-) diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx index e31013d2e..6c48b772d 100644 --- a/python/src/nanoarrow/_lib.pyx +++ b/python/src/nanoarrow/_lib.pyx @@ -50,6 +50,7 @@ from cpython.ref cimport Py_INCREF, Py_DECREF from nanoarrow_c cimport * from nanoarrow_device_c cimport * +from enum import Enum from sys import byteorder as sys_byteorder from struct import unpack_from, iter_unpack, calcsize, Struct from nanoarrow import _repr_utils @@ -523,6 +524,32 @@ cdef class CArrowTimeUnit: NANO = NANOARROW_TIME_UNIT_NANO +class DeviceType(Enum): + """ + An enum-like wrapper providing access to the device constant values + defined in the Arrow C Device interface. Unlike the other enum + accessors, this Python Enum is defined in Cython so that we can use + the bulit-in functionality to do better printing of device identifiers + for classes defined in Cython. Unlike the other enums, users don't + typically need to specify these (but would probably like them printed + nicely). + """ + + CPU = ARROW_DEVICE_CPU + CUDA = ARROW_DEVICE_CUDA + CUDA_HOST = ARROW_DEVICE_CUDA_HOST + OPENCL = ARROW_DEVICE_OPENCL + VULKAN = ARROW_DEVICE_VULKAN + METAL = ARROW_DEVICE_METAL + VPI = ARROW_DEVICE_VPI + ROCM = ARROW_DEVICE_ROCM + ROCM_HOST = ARROW_DEVICE_ROCM_HOST + EXT_DEV = ARROW_DEVICE_EXT_DEV + CUDA_MANAGED = ARROW_DEVICE_CUDA_MANAGED + ONEAPI = ARROW_DEVICE_ONEAPI + WEBGPU = ARROW_DEVICE_WEBGPU + HEXAGON = ARROW_DEVICE_HEXAGON + cdef class CDevice: """ArrowDevice wrapper @@ -554,6 +581,10 @@ cdef class CDevice: @property def device_type(self): + return DeviceType(self._ptr.device_type) + + @property + def device_type_id(self): return self._ptr.device_type @property @@ -561,8 +592,8 @@ cdef class CDevice: return self._ptr.device_id @staticmethod - def resolve(ArrowDeviceType device_type, int64_t device_id): - if device_type == ARROW_DEVICE_CPU: + def resolve(device_type, int64_t device_id): + if int(device_type) == ARROW_DEVICE_CPU: return CDEVICE_CPU else: raise ValueError(f"Device not found for type {device_type}/{device_id}") @@ -1186,6 +1217,10 @@ cdef class CArray: @property def device_type(self): + return DeviceType(self._device_type) + + @property + def device_type_id(self): return self._device_type @property @@ -2396,6 +2431,10 @@ cdef class CDeviceArray: @property def device_type(self): + return DeviceType(self._ptr.device_type) + + @property + def device_type_id(self): return self._ptr.device_type @property diff --git a/python/src/nanoarrow/_repr_utils.py b/python/src/nanoarrow/_repr_utils.py index 99b11fde0..3209a3413 100644 --- a/python/src/nanoarrow/_repr_utils.py +++ b/python/src/nanoarrow/_repr_utils.py @@ -169,7 +169,7 @@ def buffer_view_repr(buffer_view, max_char_width=80): prefix = f"{buffer_view.data_type}" prefix += f"[{buffer_view.size_bytes} b]" - if buffer_view.device.device_type == 1: + if buffer_view.device.device_type_id == 1: return ( prefix + " " @@ -232,7 +232,10 @@ def device_array_repr(device_array): class_label = make_class_label(device_array, module="nanoarrow.device") title_line = f"<{class_label}>" - device_type = f"- device_type: {device_array.device_type}" + device_type = ( + f"- device_type: {device_array.device_type.name} " + f"<{device_array.device_type_id}>" + ) device_id = f"- device_id: {device_array.device_id}" array = f"- array: {array_repr(device_array.array, indent=2)}" return "\n".join((title_line, device_type, device_id, array)) @@ -242,6 +245,6 @@ def device_repr(device): class_label = make_class_label(device, module="nanoarrow.device") title_line = f"<{class_label}>" - device_type = f"- device_type: {device.device_type}" + device_type = f"- device_type: {device.device_type.name} <{device.device_type_id}>" device_id = f"- device_id: {device.device_id}" return "\n".join([title_line, device_type, device_id]) diff --git a/python/src/nanoarrow/device.py b/python/src/nanoarrow/device.py index 86f526d02..1f8cc4681 100644 --- a/python/src/nanoarrow/device.py +++ b/python/src/nanoarrow/device.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -from nanoarrow._lib import CDEVICE_CPU, CDevice, CDeviceArray +from nanoarrow._lib import CDEVICE_CPU, CDevice, CDeviceArray, DeviceType from nanoarrow.c_lib import c_array, c_schema diff --git a/python/src/nanoarrow/nanoarrow_device_c.pxd b/python/src/nanoarrow/nanoarrow_device_c.pxd index f2a65a905..5c8a12ef8 100644 --- a/python/src/nanoarrow/nanoarrow_device_c.pxd +++ b/python/src/nanoarrow/nanoarrow_device_c.pxd @@ -26,7 +26,17 @@ cdef extern from "nanoarrow_device.h" nogil: int32_t ARROW_DEVICE_CPU int32_t ARROW_DEVICE_CUDA int32_t ARROW_DEVICE_CUDA_HOST + int32_t ARROW_DEVICE_OPENCL + int32_t ARROW_DEVICE_VULKAN int32_t ARROW_DEVICE_METAL + int32_t ARROW_DEVICE_VPI + int32_t ARROW_DEVICE_ROCM + int32_t ARROW_DEVICE_ROCM_HOST + int32_t ARROW_DEVICE_EXT_DEV + int32_t ARROW_DEVICE_CUDA_MANAGED + int32_t ARROW_DEVICE_ONEAPI + int32_t ARROW_DEVICE_WEBGPU + int32_t ARROW_DEVICE_HEXAGON struct ArrowDeviceArray: ArrowArray array diff --git a/python/tests/test_device.py b/python/tests/test_device.py index d852cd01c..2dd894b6b 100644 --- a/python/tests/test_device.py +++ b/python/tests/test_device.py @@ -23,21 +23,23 @@ def test_cpu_device(): cpu = device.cpu() - assert cpu.device_type == 1 + assert cpu.device_type_id == 1 + assert cpu.device_type == device.DeviceType.CPU assert cpu.device_id == 0 - assert "device_type: 1" in repr(cpu) + assert "device_type: CPU <1>" in repr(cpu) cpu = device.resolve(1, 0) - assert cpu.device_type == 1 + assert cpu.device_type_id == 1 def test_c_device_array(): # Unrecognized arguments should be passed to c_array() to generate CPU array darray = device.c_device_array([1, 2, 3], na.int32()) - assert darray.device_type == 1 + assert darray.device_type_id == 1 + assert darray.device_type == device.DeviceType.CPU assert darray.device_id == 0 - assert "device_type: 1" in repr(darray) + assert "device_type: CPU <1>" in repr(darray) assert darray.schema.format == "i" From 9c4f22dfc06bf5c626894ad111e47a76978fa3ee Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Apr 2024 23:04:04 -0300 Subject: [PATCH 13/19] CDevice -> Device --- python/src/nanoarrow/_lib.pyx | 18 +++++++++--------- python/src/nanoarrow/array.py | 14 +++++++------- python/src/nanoarrow/device.py | 4 ++-- python/tests/test_array.py | 2 +- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx index 6c48b772d..d561e1b18 100644 --- a/python/src/nanoarrow/_lib.pyx +++ b/python/src/nanoarrow/_lib.pyx @@ -551,7 +551,7 @@ class DeviceType(Enum): HEXAGON = ARROW_DEVICE_HEXAGON -cdef class CDevice: +cdef class Device: """ArrowDevice wrapper The ArrowDevice structure is a nanoarrow internal struct (i.e., @@ -601,7 +601,7 @@ cdef class CDevice: # Cache the CPU device # The CPU device is statically allocated (so base is None) -CDEVICE_CPU = CDevice(None, ArrowDeviceCpu()) +CDEVICE_CPU = Device(None, ArrowDeviceCpu()) cdef class CSchema: @@ -1208,7 +1208,7 @@ cdef class CArray: raise RuntimeError("CArray is released") def view(self): - device = CDevice.resolve(self._device_type, self._device_id) + device = Device.resolve(self._device_type, self._device_id) return CArrayView.from_array(self, device) @property @@ -1301,14 +1301,14 @@ cdef class CArrayView: cdef object _base cdef object _array_base cdef ArrowArrayView* _ptr - cdef CDevice _device + cdef Device _device def __cinit__(self, object base, uintptr_t addr): self._base = base self._ptr = addr self._device = CDEVICE_CPU - def _set_array(self, CArray array, CDevice device=CDEVICE_CPU): + def _set_array(self, CArray array, Device device=CDEVICE_CPU): cdef Error error = Error() cdef int code @@ -1444,7 +1444,7 @@ cdef class CArrayView: return CArrayView(base, c_array_view) @staticmethod - def from_array(CArray array, CDevice device=CDEVICE_CPU): + def from_array(CArray array, Device device=CDEVICE_CPU): out = CArrayView.from_schema(array._schema) return out._set_array(array, device) @@ -1491,7 +1491,7 @@ cdef class CBufferView: cdef object _base cdef ArrowBufferView _ptr cdef ArrowType _data_type - cdef CDevice _device + cdef Device _device cdef Py_ssize_t _element_size_bits cdef Py_ssize_t _shape cdef Py_ssize_t _strides @@ -1499,7 +1499,7 @@ cdef class CBufferView: def __cinit__(self, object base, uintptr_t addr, int64_t size_bytes, ArrowType data_type, - Py_ssize_t element_size_bits, CDevice device): + Py_ssize_t element_size_bits, Device device): self._base = base self._ptr.data.data = addr self._ptr.size_bytes = size_bytes @@ -1701,7 +1701,7 @@ cdef class CBuffer: cdef ArrowType _data_type cdef int _element_size_bits cdef char _format[32] - cdef CDevice _device + cdef Device _device cdef CBufferView _view cdef int _get_buffer_count diff --git a/python/src/nanoarrow/array.py b/python/src/nanoarrow/array.py index 78756e150..521cbb588 100644 --- a/python/src/nanoarrow/array.py +++ b/python/src/nanoarrow/array.py @@ -22,8 +22,8 @@ CDEVICE_CPU, CArray, CBuffer, - CDevice, CMaterializedArrayStream, + Device, ) from nanoarrow.c_lib import c_array, c_array_stream, c_array_view from nanoarrow.iterator import iter_py, iter_tuples @@ -65,7 +65,7 @@ def __init__(self): self._device = None @property - def device(self) -> CDevice: + def device(self) -> Device: return self._device @property @@ -121,7 +121,7 @@ class Array: :func:`c_array_stream`. schema : schema-like, optional An optional schema, passed to :func:`c_array_stream`. - device : CDevice, optional + device : Device, optional The device associated with the buffers held by this Array. Defaults to the CPU device. @@ -139,10 +139,10 @@ class Array: def __init__(self, obj, schema=None, device=None) -> None: if device is None: self._device = CDEVICE_CPU - elif isinstance(device, CDevice): + elif isinstance(device, Device): self._device = device else: - raise TypeError("device must be CDevice") + raise TypeError("device must be Device") if isinstance(obj, CMaterializedArrayStream) and schema is None: self._data = obj @@ -186,7 +186,7 @@ def __arrow_c_array__(self, requested_schema=None): self._assert_one_chunk("export ArrowArray") @property - def device(self) -> CDevice: + def device(self) -> Device: """Get the device on which the buffers for this array are allocated. Examples @@ -195,7 +195,7 @@ def device(self) -> CDevice: >>> import nanoarrow as na >>> array = na.Array([1, 2, 3], na.int32()) >>> array.device - + - device_type: 1 - device_id: 0 """ diff --git a/python/src/nanoarrow/device.py b/python/src/nanoarrow/device.py index 1f8cc4681..ea914b3a2 100644 --- a/python/src/nanoarrow/device.py +++ b/python/src/nanoarrow/device.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -from nanoarrow._lib import CDEVICE_CPU, CDevice, CDeviceArray, DeviceType +from nanoarrow._lib import CDEVICE_CPU, CDeviceArray, Device, DeviceType from nanoarrow.c_lib import c_array, c_schema @@ -24,7 +24,7 @@ def cpu(): def resolve(device_type, device_id): - return CDevice.resolve(device_type, device_id) + return Device.resolve(device_type, device_id) def c_device_array(obj, schema=None): diff --git a/python/tests/test_array.py b/python/tests/test_array.py index fe590e607..ee88d20d0 100644 --- a/python/tests/test_array.py +++ b/python/tests/test_array.py @@ -31,7 +31,7 @@ def test_array_construct(): array2 = na.Array(array._data) assert array2._data is array._data - with pytest.raises(TypeError, match="device must be CDevice"): + with pytest.raises(TypeError, match="device must be Device"): na.Array([], na.int32(), device=1234) with pytest.raises(NotImplementedError): From 5c1b424ce52bf79350b26eed17f9dbafacba334a Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Apr 2024 23:06:50 -0300 Subject: [PATCH 14/19] lint --- python/src/nanoarrow/device.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/src/nanoarrow/device.py b/python/src/nanoarrow/device.py index ea914b3a2..37a36b829 100644 --- a/python/src/nanoarrow/device.py +++ b/python/src/nanoarrow/device.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -from nanoarrow._lib import CDEVICE_CPU, CDeviceArray, Device, DeviceType +from nanoarrow._lib import CDEVICE_CPU, CDeviceArray, Device from nanoarrow.c_lib import c_array, c_schema From 306f543a999db6a7370ce5fcab880b30acce41ac Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Apr 2024 23:16:56 -0300 Subject: [PATCH 15/19] cpu device id is -1 --- .../nanoarrow_device/src/nanoarrow/nanoarrow_device.c | 4 ++-- .../src/nanoarrow/nanoarrow_device_test.cc | 2 +- python/src/nanoarrow/device.py | 2 +- python/tests/test_device.py | 8 ++++---- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/extensions/nanoarrow_device/src/nanoarrow/nanoarrow_device.c b/extensions/nanoarrow_device/src/nanoarrow/nanoarrow_device.c index 0c76d961e..3896283fa 100644 --- a/extensions/nanoarrow_device/src/nanoarrow/nanoarrow_device.c +++ b/extensions/nanoarrow_device/src/nanoarrow/nanoarrow_device.c @@ -115,7 +115,7 @@ struct ArrowDevice* ArrowDeviceCpu(void) { void ArrowDeviceInitCpu(struct ArrowDevice* device) { device->device_type = ARROW_DEVICE_CPU; - device->device_id = 0; + device->device_id = -1; device->array_init = NULL; device->array_move = NULL; device->buffer_init = &ArrowDeviceCpuBufferInit; @@ -135,7 +135,7 @@ struct ArrowDevice* ArrowDeviceCuda(ArrowDeviceType device_type, int64_t device_ #endif struct ArrowDevice* ArrowDeviceResolve(ArrowDeviceType device_type, int64_t device_id) { - if (device_type == ARROW_DEVICE_CPU && device_id == 0) { + if (device_type == ARROW_DEVICE_CPU) { return ArrowDeviceCpu(); } diff --git a/extensions/nanoarrow_device/src/nanoarrow/nanoarrow_device_test.cc b/extensions/nanoarrow_device/src/nanoarrow/nanoarrow_device_test.cc index f437b3698..8ed39a24b 100644 --- a/extensions/nanoarrow_device/src/nanoarrow/nanoarrow_device_test.cc +++ b/extensions/nanoarrow_device/src/nanoarrow/nanoarrow_device_test.cc @@ -28,7 +28,7 @@ TEST(NanoarrowDevice, CheckRuntime) { TEST(NanoarrowDevice, CpuDevice) { struct ArrowDevice* cpu = ArrowDeviceCpu(); EXPECT_EQ(cpu->device_type, ARROW_DEVICE_CPU); - EXPECT_EQ(cpu->device_id, 0); + EXPECT_EQ(cpu->device_id, -1); EXPECT_EQ(cpu, ArrowDeviceCpu()); void* sync_event = nullptr; diff --git a/python/src/nanoarrow/device.py b/python/src/nanoarrow/device.py index 37a36b829..138138343 100644 --- a/python/src/nanoarrow/device.py +++ b/python/src/nanoarrow/device.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -from nanoarrow._lib import CDEVICE_CPU, CDeviceArray, Device +from nanoarrow._lib import CDEVICE_CPU, CDeviceArray, Device, DeviceType # noqa: F401 from nanoarrow.c_lib import c_array, c_schema diff --git a/python/tests/test_device.py b/python/tests/test_device.py index 2dd894b6b..1158337a2 100644 --- a/python/tests/test_device.py +++ b/python/tests/test_device.py @@ -25,11 +25,11 @@ def test_cpu_device(): cpu = device.cpu() assert cpu.device_type_id == 1 assert cpu.device_type == device.DeviceType.CPU - assert cpu.device_id == 0 + assert cpu.device_id == -1 assert "device_type: CPU <1>" in repr(cpu) - cpu = device.resolve(1, 0) - assert cpu.device_type_id == 1 + cpu2 = device.resolve(1, 0) + assert cpu2 is cpu def test_c_device_array(): @@ -38,7 +38,7 @@ def test_c_device_array(): assert darray.device_type_id == 1 assert darray.device_type == device.DeviceType.CPU - assert darray.device_id == 0 + assert darray.device_id == -1 assert "device_type: CPU <1>" in repr(darray) assert darray.schema.format == "i" From 7b798a396a415ad34a3dff376f479222a54c3ad8 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Apr 2024 23:19:36 -0300 Subject: [PATCH 16/19] cdevicd_cpu -> device_cpu --- python/src/nanoarrow/_lib.pyx | 18 +++++++++--------- python/src/nanoarrow/array.py | 6 +++--- python/src/nanoarrow/device.py | 4 ++-- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx index d561e1b18..4bb97011f 100644 --- a/python/src/nanoarrow/_lib.pyx +++ b/python/src/nanoarrow/_lib.pyx @@ -594,14 +594,14 @@ cdef class Device: @staticmethod def resolve(device_type, int64_t device_id): if int(device_type) == ARROW_DEVICE_CPU: - return CDEVICE_CPU + return DEVICE_CPU else: raise ValueError(f"Device not found for type {device_type}/{device_id}") # Cache the CPU device # The CPU device is statically allocated (so base is None) -CDEVICE_CPU = Device(None, ArrowDeviceCpu()) +DEVICE_CPU = Device(None, ArrowDeviceCpu()) cdef class CSchema: @@ -1306,13 +1306,13 @@ cdef class CArrayView: def __cinit__(self, object base, uintptr_t addr): self._base = base self._ptr = addr - self._device = CDEVICE_CPU + self._device = DEVICE_CPU - def _set_array(self, CArray array, Device device=CDEVICE_CPU): + def _set_array(self, CArray array, Device device=DEVICE_CPU): cdef Error error = Error() cdef int code - if device is CDEVICE_CPU: + if device is DEVICE_CPU: code = ArrowArrayViewSetArray(self._ptr, array._ptr, &error.c_error) else: code = ArrowArrayViewSetArrayMinimal(self._ptr, array._ptr, &error.c_error) @@ -1444,7 +1444,7 @@ cdef class CArrayView: return CArrayView(base, c_array_view) @staticmethod - def from_array(CArray array, Device device=CDEVICE_CPU): + def from_array(CArray array, Device device=DEVICE_CPU): out = CArrayView.from_schema(array._schema) return out._set_array(array, device) @@ -1659,7 +1659,7 @@ cdef class CBufferView: self._do_releasebuffer(buffer) cdef _do_getbuffer(self, Py_buffer *buffer, int flags): - if self._device is not CDEVICE_CPU: + if self._device is not DEVICE_CPU: raise RuntimeError("CBufferView is not a CPU buffer") if flags & PyBUF_WRITABLE: @@ -1710,7 +1710,7 @@ cdef class CBuffer: self._ptr = NULL self._data_type = NANOARROW_TYPE_BINARY self._element_size_bits = 0 - self._device = CDEVICE_CPU + self._device = DEVICE_CPU # Set initial format to "B" (Cython makes this hard) self._format[0] = 66 self._format[1] = 0 @@ -1747,7 +1747,7 @@ cdef class CBuffer: cdef CBuffer out = CBuffer() out._base = alloc_c_buffer(&out._ptr) out._set_format(c_buffer_set_pybuffer(obj, &out._ptr)) - out._device = CDEVICE_CPU + out._device = DEVICE_CPU out._populate_view() return out diff --git a/python/src/nanoarrow/array.py b/python/src/nanoarrow/array.py index 521cbb588..d3cb938b5 100644 --- a/python/src/nanoarrow/array.py +++ b/python/src/nanoarrow/array.py @@ -19,7 +19,7 @@ from typing import Iterable, Tuple from nanoarrow._lib import ( - CDEVICE_CPU, + DEVICE_CPU, CArray, CBuffer, CMaterializedArrayStream, @@ -138,7 +138,7 @@ class Array: def __init__(self, obj, schema=None, device=None) -> None: if device is None: - self._device = CDEVICE_CPU + self._device = DEVICE_CPU elif isinstance(device, Device): self._device = device else: @@ -164,7 +164,7 @@ def _assert_one_chunk(self, op): raise ValueError(f"Can't {op} with non-contiguous Array") def _assert_cpu(self, op): - if self._device != CDEVICE_CPU: + if self._device != DEVICE_CPU: raise ValueError(f"Can't {op} with Array on non-CPU device") def __arrow_c_stream__(self, requested_schema=None): diff --git a/python/src/nanoarrow/device.py b/python/src/nanoarrow/device.py index 138138343..7bf0dcea0 100644 --- a/python/src/nanoarrow/device.py +++ b/python/src/nanoarrow/device.py @@ -15,12 +15,12 @@ # specific language governing permissions and limitations # under the License. -from nanoarrow._lib import CDEVICE_CPU, CDeviceArray, Device, DeviceType # noqa: F401 +from nanoarrow._lib import DEVICE_CPU, CDeviceArray, Device, DeviceType # noqa: F401 from nanoarrow.c_lib import c_array, c_schema def cpu(): - return CDEVICE_CPU + return DEVICE_CPU def resolve(device_type, device_id): From 10fbcc53e79c910d98fd88ab9b6518ae27c94aba Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Apr 2024 23:22:33 -0300 Subject: [PATCH 17/19] format --- python/src/nanoarrow/array.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/python/src/nanoarrow/array.py b/python/src/nanoarrow/array.py index d3cb938b5..17ec55899 100644 --- a/python/src/nanoarrow/array.py +++ b/python/src/nanoarrow/array.py @@ -18,13 +18,7 @@ from functools import cached_property from typing import Iterable, Tuple -from nanoarrow._lib import ( - DEVICE_CPU, - CArray, - CBuffer, - CMaterializedArrayStream, - Device, -) +from nanoarrow._lib import DEVICE_CPU, CArray, CBuffer, CMaterializedArrayStream, Device from nanoarrow.c_lib import c_array, c_array_stream, c_array_view from nanoarrow.iterator import iter_py, iter_tuples from nanoarrow.schema import Schema From ebcbd5e01f8a58613d7eb912c4380c8116a44df1 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Apr 2024 23:24:46 -0300 Subject: [PATCH 18/19] doctest --- python/src/nanoarrow/array.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/src/nanoarrow/array.py b/python/src/nanoarrow/array.py index 17ec55899..af2e3cd47 100644 --- a/python/src/nanoarrow/array.py +++ b/python/src/nanoarrow/array.py @@ -190,8 +190,8 @@ def device(self) -> Device: >>> array = na.Array([1, 2, 3], na.int32()) >>> array.device - - device_type: 1 - - device_id: 0 + - device_type: CPU <1> + - device_id: -1 """ return self._device From 4ce9e8bc6017c0c07c5bc21ea4f160de2f2b8c39 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 3 Apr 2024 23:55:48 -0300 Subject: [PATCH 19/19] typo --- python/src/nanoarrow/_lib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx index 4bb97011f..d9bda9d91 100644 --- a/python/src/nanoarrow/_lib.pyx +++ b/python/src/nanoarrow/_lib.pyx @@ -526,7 +526,7 @@ cdef class CArrowTimeUnit: class DeviceType(Enum): """ - An enum-like wrapper providing access to the device constant values + An enumerator providing access to the device constant values defined in the Arrow C Device interface. Unlike the other enum accessors, this Python Enum is defined in Cython so that we can use the bulit-in functionality to do better printing of device identifiers