diff --git a/dpctl/memory/_memory.pyx b/dpctl/memory/_memory.pyx index 044a5b55a1..5fe4fe6b58 100644 --- a/dpctl/memory/_memory.pyx +++ b/dpctl/memory/_memory.pyx @@ -679,17 +679,13 @@ cdef class _Memory: cdef class MemoryUSMShared(_Memory): """ MemoryUSMShared(nbytes, alignment=0, queue=None, copy=False) - allocates nbytes of USM shared memory. - Non-positive alignments are not used (malloc_shared is used instead). - For the queue=None case the ``dpctl.SyclQueue()`` is used to allocate - memory. + An object representing allocation of SYCL USM-shared memory. - MemoryUSMShared(usm_obj) constructor creates instance from `usm_obj` - expected to implement `__sycl_usm_array_interface__` protocol and to expose - a contiguous block of USM shared allocation. Use `copy=True` to - perform a copy if USM type of the allocation represented by the argument - is other than 'shared'. + Non-positive ``alignment`` values are not ignored and + the allocator ``malloc_shared`` is used for allocation instead. + If ``queue`` is ``None`` a cached default-constructed + :class:`dpctl.SyclQueue` is used to allocate memory. """ def __cinit__(self, other, *, Py_ssize_t alignment=0, SyclQueue queue=None, int copy=False): @@ -720,17 +716,13 @@ cdef class MemoryUSMShared(_Memory): cdef class MemoryUSMHost(_Memory): """ MemoryUSMHost(nbytes, alignment=0, queue=None, copy=False) - allocates nbytes of USM host memory. - Non-positive alignments are not used (malloc_host is used instead). - For the queue=None case the ``dpctl.SyclQueue()`` is used to allocate - memory. + An object representing allocation of SYCL USM-host memory. - MemoryUSMDevice(usm_obj) constructor create instance from `usm_obj` - expected to implement `__sycl_usm_array_interface__` protocol and to expose - a contiguous block of USM host allocation. Use `copy=True` to - perform a copy if USM type of the allocation represented by the argument - is other than 'host'. + Non-positive ``alignment`` values are not ignored and + the allocator ``malloc_host`` is used for allocation instead. + If ``queue`` is ``None`` a cached default-constructed + :class:`dpctl.SyclQueue` is used to allocate memory. """ def __cinit__(self, other, *, Py_ssize_t alignment=0, SyclQueue queue=None, int copy=False): @@ -762,17 +754,13 @@ cdef class MemoryUSMHost(_Memory): cdef class MemoryUSMDevice(_Memory): """ MemoryUSMDevice(nbytes, alignment=0, queue=None, copy=False) - allocates nbytes of USM device memory. - Non-positive alignments are not used (malloc_device is used instead). - For the queue=None case the ``dpctl.SyclQueue()`` is used to allocate - memory. + An object representing allocation of SYCL USM-device memory. - MemoryUSMDevice(usm_obj) constructor create instance from `usm_obj` - expected to implement `__sycl_usm_array_interface__` protocol and exposing - a contiguous block of USM device allocation. Use `copy=True` to - perform a copy if USM type of the allocation represented by the argument - is other than 'device'. + Non-positive ``alignment`` values are not ignored and + the allocator ``malloc_device`` is used for allocation instead. + If ``queue`` is ``None`` a cached default-constructed + :class:`dpctl.SyclQueue` is used to allocate memory. """ def __cinit__(self, other, *, Py_ssize_t alignment=0, SyclQueue queue=None, int copy=False): diff --git a/dpctl/memory/_sycl_usm_array_interface_utils.pxi b/dpctl/memory/_sycl_usm_array_interface_utils.pxi index 0812b2015f..f0ad9f4297 100644 --- a/dpctl/memory/_sycl_usm_array_interface_utils.pxi +++ b/dpctl/memory/_sycl_usm_array_interface_utils.pxi @@ -88,27 +88,38 @@ cdef object _pointers_from_shape_and_stride( Returns: tuple(min_disp, nbytes) """ + cdef Py_ssize_t nelems = 1 + cdef Py_ssize_t min_disp = 0 + cdef Py_ssize_t max_disp = 0 + cdef int i + cdef Py_ssize_t sh_i = 0 + cdef Py_ssize_t str_i = 0 if (nd > 0): if (ary_strides is None): nelems = 1 for si in ary_shape: sh_i = int(si) - if (sh_i <= 0): + if (sh_i < 0): raise ValueError("Array shape elements need to be positive") nelems = nelems * sh_i - return (ary_offset, nelems * itemsize) + return (ary_offset, max(nelems, 1) * itemsize) else: min_disp = ary_offset max_disp = ary_offset for i in range(nd): str_i = int(ary_strides[i]) sh_i = int(ary_shape[i]) - if (sh_i <= 0): + if (sh_i < 0): raise ValueError("Array shape elements need to be positive") - if (str_i > 0): - max_disp += str_i * (sh_i - 1) + if (sh_i > 0): + if (str_i > 0): + max_disp += str_i * (sh_i - 1) + else: + min_disp += str_i * (sh_i - 1) else: - min_disp += str_i * (sh_i - 1); + nelems = 0 + if nelems == 0: + return (ary_offset, itemsize) return (min_disp, (max_disp - min_disp + 1) * itemsize) elif (nd == 0): return (ary_offset, itemsize) diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index cecbdadb58..616bbb3498 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -24,6 +24,7 @@ import dpctl.tensor._tensor_impl as ti import dpctl.utils from dpctl.tensor._device import normalize_queue_device +from dpctl.tensor._usmarray import _is_object_with_buffer_protocol __doc__ = "Implementation of creation functions in :module:`dpctl.tensor`" @@ -66,11 +67,12 @@ def _array_info_dispatch(obj): return _empty_tuple, complex, _host_set if isinstance(obj, (list, tuple, range)): return _array_info_sequence(obj) - if any( - isinstance(obj, s) - for s in [np.integer, np.floating, np.complexfloating, np.bool_] - ): - return _empty_tuple, obj.dtype, _host_set + if _is_object_with_buffer_protocol(obj): + np_obj = np.array(obj) + return np_obj.shape, np_obj.dtype, _host_set + if hasattr(obj, "__sycl_usm_array_interface__"): + usm_ar = _usm_ndarray_from_suai(obj) + return usm_ar.shape, usm_ar.dtype, frozenset([usm_ar.sycl_queue]) raise ValueError(type(obj)) @@ -219,6 +221,18 @@ def _map_to_device_dtype(dt, q): raise RuntimeError(f"Unrecognized data type '{dt}' encountered.") +def _usm_ndarray_from_suai(obj): + sua_iface = getattr(obj, "__sycl_usm_array_interface__") + membuf = dpm.as_usm_memory(obj) + ary = dpt.usm_ndarray( + sua_iface["shape"], + dtype=sua_iface["typestr"], + buffer=membuf, + strides=sua_iface.get("strides", None), + ) + return ary + + def _asarray_from_numpy_ndarray( ary, dtype=None, usm_type=None, sycl_queue=None, order="K" ): @@ -276,17 +290,6 @@ def _asarray_from_numpy_ndarray( return res -def _is_object_with_buffer_protocol(obj): - "Returns `True` if object support Python buffer protocol" - try: - # use context manager to ensure - # buffer is instantly released - with memoryview(obj): - return True - except TypeError: - return False - - def _ensure_native_dtype_device_support(dtype, dev) -> None: """Check that dtype is natively supported by device. @@ -318,6 +321,122 @@ def _ensure_native_dtype_device_support(dtype, dev) -> None: ) +def _usm_types_walker(o, usm_types_list): + if isinstance(o, dpt.usm_ndarray): + usm_types_list.append(o.usm_type) + return + if hasattr(o, "__sycl_usm_array_interface__"): + usm_ar = _usm_ndarray_from_suai(o) + usm_types_list.append(usm_ar.usm_type) + return + if isinstance(o, (list, tuple)): + for el in o: + _usm_types_walker(el, usm_types_list) + return + raise TypeError + + +def _device_copy_walker(seq_o, res, events): + if isinstance(seq_o, dpt.usm_ndarray): + exec_q = res.sycl_queue + ht_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray( + src=seq_o, dst=res, sycl_queue=exec_q + ) + events.append(ht_ev) + return + if hasattr(seq_o, "__sycl_usm_array_interface__"): + usm_ar = _usm_ndarray_from_suai(seq_o) + exec_q = res.sycl_queue + ht_ev, _ = ti._copy_usm_ndarray_into_usm_ndarray( + src=usm_ar, dst=res, sycl_queue=exec_q + ) + events.append(ht_ev) + return + if isinstance(seq_o, (list, tuple)): + for i, el in enumerate(seq_o): + _device_copy_walker(el, res[i], events) + return + raise TypeError + + +def _copy_through_host_walker(seq_o, usm_res): + if isinstance(seq_o, dpt.usm_ndarray): + usm_res[...] = dpt.asnumpy(seq_o).copy() + return + if hasattr(seq_o, "__sycl_usm_array_interface__"): + usm_ar = _usm_ndarray_from_suai(seq_o) + usm_res[...] = dpt.asnumpy(usm_ar).copy() + return + if isinstance(seq_o, (list, tuple)): + for i, el in enumerate(seq_o): + _copy_through_host_walker(el, usm_res[i]) + return + usm_res[...] = np.asarray(seq_o) + + +def _asarray_from_seq( + seq_obj, + seq_shape, + seq_dt, + seq_dev, + dtype=None, + usm_type=None, + sycl_queue=None, + order="C", +): + "`obj` is a sequence" + if usm_type is None: + usm_types_in_seq = [] + _usm_types_walker(seq_obj, usm_types_in_seq) + usm_type = dpctl.utils.get_coerced_usm_type(usm_types_in_seq) + dpctl.utils.validate_usm_type(usm_type) + if sycl_queue is None: + exec_q = seq_dev + alloc_q = seq_dev + else: + exec_q = dpctl.utils.get_execution_queue( + ( + sycl_queue, + seq_dev, + ) + ) + alloc_q = sycl_queue + if dtype is None: + dtype = _map_to_device_dtype(seq_dt, alloc_q) + else: + _mapped_dt = _map_to_device_dtype(dtype, alloc_q) + if _mapped_dt != dtype: + raise ValueError( + f"Device {sycl_queue.sycl_device} " + f"does not support {dtype} natively." + ) + dtype = _mapped_dt + if order in "KA": + order = "C" + if isinstance(exec_q, dpctl.SyclQueue): + res = dpt.empty( + seq_shape, + dtype=dtype, + usm_type=usm_type, + sycl_queue=alloc_q, + order=order, + ) + ht_events = [] + _device_copy_walker(seq_obj, res, ht_events) + dpctl.SyclEvent.wait_for(ht_events) + return res + else: + res = dpt.empty( + seq_shape, + dtype=dtype, + usm_type=usm_type, + sycl_queue=alloc_q, + order=order, + ) + _copy_through_host_walker(seq_obj, res) + return res + + def asarray( obj, dtype=None, @@ -327,7 +446,9 @@ def asarray( sycl_queue=None, order="K", ): - """ + """ asarray(obj, dtype=None, copy=None, device=None, \ + usm_type=None, sycl_queue=None, order="K") + Converts `obj` to :class:`dpctl.tensor.usm_ndarray`. Args: @@ -347,7 +468,7 @@ def asarray( allocations if possible, but allowed to perform a copy otherwise. Default: `None`. order ("C","F","A","K", optional): memory layout of the output array. - Default: "C" + Default: "K" device (optional): array API concept of device where the output array is created. `device` can be `None`, a oneAPI filter selector string, an instance of :class:`dpctl.SyclDevice` corresponding to a @@ -407,14 +528,7 @@ def asarray( order=order, ) if hasattr(obj, "__sycl_usm_array_interface__"): - sua_iface = getattr(obj, "__sycl_usm_array_interface__") - membuf = dpm.as_usm_memory(obj) - ary = dpt.usm_ndarray( - sua_iface["shape"], - dtype=sua_iface["typestr"], - buffer=membuf, - strides=sua_iface.get("strides", None), - ) + ary = _usm_ndarray_from_suai(obj) return _asarray_from_usm_ndarray( ary, dtype=dtype, @@ -452,7 +566,7 @@ def asarray( raise ValueError( "Converting Python sequence to usm_ndarray requires a copy" ) - _, _, devs = _array_info_sequence(obj) + seq_shape, seq_dt, devs = _array_info_sequence(obj) if devs == _host_set: return _asarray_from_numpy_ndarray( np.asarray(obj, dtype=dtype, order=order), @@ -461,7 +575,17 @@ def asarray( sycl_queue=sycl_queue, order=order, ) - # for sequences + elif len(devs) == 1: + return _asarray_from_seq( + obj, + seq_shape, + seq_dt, + list(devs)[0], + dtype=dtype, + usm_type=usm_type, + sycl_queue=sycl_queue, + order=order, + ) raise NotImplementedError( "Converting Python sequences is not implemented" ) diff --git a/dpctl/tensor/_usmarray.pyx b/dpctl/tensor/_usmarray.pyx index 570490dd8d..d04da080e5 100644 --- a/dpctl/tensor/_usmarray.pyx +++ b/dpctl/tensor/_usmarray.pyx @@ -1629,3 +1629,8 @@ cdef api object UsmNDArray_MakeFromPtr( offset=offset ) return arr + + +def _is_object_with_buffer_protocol(o): + "Returns True if object support Python buffer protocol" + return _is_buffer(o) diff --git a/dpctl/tests/test_tensor_asarray.py b/dpctl/tests/test_tensor_asarray.py index a309b4e3a4..7a3b54ae83 100644 --- a/dpctl/tests/test_tensor_asarray.py +++ b/dpctl/tests/test_tensor_asarray.py @@ -234,3 +234,90 @@ def test_asarray_cross_device(): x = dpt.empty(10, dtype="i8", sycl_queue=q) y = dpt.asarray(x, sycl_queue=qprof) assert y.sycl_queue == qprof + + +def test_asarray_seq_of_arrays_simple(): + get_queue_or_skip() + r = dpt.arange(10) + m = dpt.asarray( + [ + r, + ] + * 4 + ) + assert m.shape == (4,) + r.shape + assert m.dtype == r.dtype + assert m.device == r.device + + +def test_asarray_seq_of_arrays(): + get_queue_or_skip() + m = dpt.ones((2, 4), dtype="i4") + w = dpt.zeros(4) + v = dpt.full(4, -1) + ar = dpt.asarray([m, [w, v]]) + assert ar.shape == (2, 2, 4) + assert ar.device == m.device + assert ar.device == w.device + assert ar.device == v.device + + +def test_asarray_seq_of_array_different_queue(): + get_queue_or_skip() + m = dpt.ones((2, 4), dtype="i4") + w = dpt.zeros(4) + v = dpt.full(4, -1) + qprof = dpctl.SyclQueue(property="enable_profiling") + ar = dpt.asarray([m, [w, v]], sycl_queue=qprof) + assert ar.shape == (2, 2, 4) + assert ar.sycl_queue == qprof + + +def test_asarray_seq_of_suai(): + get_queue_or_skip() + + class Dummy: + def __init__(self, obj, iface): + self.obj = obj + self.__sycl_usm_array_interface__ = iface + + o = dpt.empty(0, usm_type="shared") + d = Dummy(o, o.__sycl_usm_array_interface__) + x = dpt.asarray(d) + assert x.shape == (0,) + assert x.usm_type == o.usm_type + assert x._pointer == o._pointer + assert x.sycl_queue == o.sycl_queue + + x = dpt.asarray([d, d]) + assert x.shape == (2, 0) + assert x.usm_type == o.usm_type + assert x.sycl_queue == o.sycl_queue + + +def test_asarray_seq_of_suai_different_queue(): + q = get_queue_or_skip() + + class Dummy: + def __init__(self, obj, iface): + self.obj = obj + self.__sycl_usm_array_interface__ = iface + + @property + def shape(self): + return self.__sycl_usm_array_interface__["shape"] + + q2 = dpctl.SyclQueue() + assert q != q2 + o = dpt.empty((2, 2), usm_type="shared", sycl_queue=q2) + d = Dummy(o, o.__sycl_usm_array_interface__) + + x = dpt.asarray(d, sycl_queue=q) + assert x.sycl_queue == q + assert x.shape == d.shape + x = dpt.asarray([d], sycl_queue=q) + assert x.sycl_queue == q + assert x.shape == (1,) + d.shape + x = dpt.asarray([d, d], sycl_queue=q) + assert x.sycl_queue == q + assert x.shape == (2,) + d.shape