Skip to content

Commit

Permalink
Merge pull request #1073 from IntelPython/fix-gh-1071-from-dlpack
Browse files Browse the repository at this point in the history
Fix gh 1071 from dlpack
  • Loading branch information
oleksandr-pavlyk authored Feb 16, 2023
2 parents 8e63098 + 84b0232 commit 6e58a41
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 25 deletions.
55 changes: 38 additions & 17 deletions dpctl/tensor/_dlpack.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ cdef extern from 'dlpack/dlpack.h' nogil:
kDLFloat
kDLBfloat
kDLComplex
kDLBool

ctypedef struct DLDataType:
uint8_t code
Expand Down Expand Up @@ -244,7 +245,7 @@ cpdef to_dlpack_capsule(usm_ndarray usm_ary) except+:
dl_tensor.dtype.lanes = <uint16_t>1
dl_tensor.dtype.bits = <uint8_t>(ary_dt.itemsize * 8)
if (ary_dtk == "b"):
dl_tensor.dtype.code = <uint8_t>kDLUInt
dl_tensor.dtype.code = <uint8_t>kDLBool
elif (ary_dtk == "u"):
dl_tensor.dtype.code = <uint8_t>kDLUInt
elif (ary_dtk == "i"):
Expand Down Expand Up @@ -311,14 +312,17 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +:
cdef DLManagedTensor *dlm_tensor = NULL
cdef bytes usm_type
cdef size_t sz = 1
cdef size_t alloc_sz = 1
cdef int i
cdef int device_id = -1
cdef int element_bytesize = 0
cdef Py_ssize_t offset_min = 0
cdef Py_ssize_t offset_max = 0
cdef int64_t stride_i
cdef char *mem_ptr = NULL
cdef Py_ssize_t mem_ptr_delta = 0
cdef Py_ssize_t element_offset = 0
cdef int64_t stride_i = -1
cdef int64_t shape_i = -1

if not cpython.PyCapsule_IsValid(py_caps, 'dltensor'):
if cpython.PyCapsule_IsValid(py_caps, 'used_dltensor'):
Expand Down Expand Up @@ -370,22 +374,22 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +:
raise BufferError(
"Can not import DLPack tensor with lanes != 1"
)
offset_min = 0
if dlm_tensor.dl_tensor.strides is NULL:
for i in range(dlm_tensor.dl_tensor.ndim):
sz = sz * dlm_tensor.dl_tensor.shape[i]
offset_max = sz - 1
else:
offset_min = 0
offset_max = 0
for i in range(dlm_tensor.dl_tensor.ndim):
stride_i = dlm_tensor.dl_tensor.strides[i]
if stride_i > 0:
offset_max = offset_max + stride_i * (
dlm_tensor.dl_tensor.shape[i] - 1
)
else:
offset_min = offset_min + stride_i * (
dlm_tensor.dl_tensor.shape[i] - 1
)
shape_i = dlm_tensor.dl_tensor.shape[i]
if shape_i > 1:
shape_i -= 1
if stride_i > 0:
offset_max = offset_max + stride_i * shape_i
else:
offset_min = offset_min + stride_i * shape_i
sz = offset_max - offset_min + 1
if sz == 0:
sz = 1
Expand All @@ -401,14 +405,29 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +:
if dlm_tensor.dl_tensor.data is NULL:
usm_mem = dpmem.MemoryUSMDevice(sz, q)
else:
mem_ptr = <char *>dlm_tensor.dl_tensor.data + dlm_tensor.dl_tensor.byte_offset
mem_ptr = mem_ptr - (element_offset * element_bytesize)
usm_mem = c_dpmem._Memory.create_from_usm_pointer_size_qref(
mem_ptr_delta = dlm_tensor.dl_tensor.byte_offset - (
element_offset * element_bytesize
)
mem_ptr = <char *>dlm_tensor.dl_tensor.data
alloc_sz = dlm_tensor.dl_tensor.byte_offset + <uint64_t>(
(offset_max + 1) * element_bytesize)
tmp = c_dpmem._Memory.create_from_usm_pointer_size_qref(
<DPCTLSyclUSMRef> mem_ptr,
sz,
max(alloc_sz, <uint64_t>element_bytesize),
(<c_dpctl.SyclQueue>q).get_queue_ref(),
memory_owner=dlm_holder
)
if mem_ptr_delta == 0:
usm_mem = tmp
else:
alloc_sz = dlm_tensor.dl_tensor.byte_offset + <uint64_t>(
(offset_max * element_bytesize + mem_ptr_delta))
usm_mem = c_dpmem._Memory.create_from_usm_pointer_size_qref(
<DPCTLSyclUSMRef> (mem_ptr + (element_bytesize - mem_ptr_delta)),
max(alloc_sz, <uint64_t>element_bytesize),
(<c_dpctl.SyclQueue>q).get_queue_ref(),
memory_owner=tmp
)
py_shape = list()
for i in range(dlm_tensor.dl_tensor.ndim):
py_shape.append(dlm_tensor.dl_tensor.shape[i])
Expand All @@ -426,8 +445,10 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +:
ary_dt = np.dtype("f" + str(element_bytesize))
elif (dlm_tensor.dl_tensor.dtype.code == kDLComplex):
ary_dt = np.dtype("c" + str(element_bytesize))
elif (dlm_tensor.dl_tensor.dtype.code == kDLBool):
ary_dt = np.dtype("?")
else:
raise ValueError(
raise BufferError(
"Can not import DLPack tensor with type code {}.".format(
<object>dlm_tensor.dl_tensor.dtype.code
)
Expand All @@ -441,7 +462,7 @@ cpdef usm_ndarray from_dlpack_capsule(object py_caps) except +:
)
return res_ary
else:
raise ValueError(
raise BufferError(
"The DLPack tensor resides on unsupported device."
)

Expand Down
2 changes: 1 addition & 1 deletion dpctl/tensor/include/dlpack/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# DLPack header

The header `dlpack.h` downloaded from `https://github.com/dmlc/dlpack.git` remote at tag 0.7 commit [`e2bdd3bee8`](https://github.com/dmlc/dlpack/commit/e2bdd3bee8cb6501558042633fa59144cc8b7f5f).
The header `dlpack.h` downloaded from `https://github.com/dmlc/dlpack.git` remote at tag v0.8 commit [`365b823`](https://github.com/dmlc/dlpack/commit/365b823cedb281cd0240ca601aba9b78771f91a3).

The file can also be viewed using github web interface at https://github.com/dmlc/dlpack/blob/e2bdd3bee8cb6501558042633fa59144cc8b7f5f/include/dlpack/dlpack.h

Expand Down
11 changes: 7 additions & 4 deletions dpctl/tensor/include/dlpack/dlpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#endif

/*! \brief The current version of dlpack */
#define DLPACK_VERSION 70
#define DLPACK_VERSION 80

/*! \brief The current ABI version of dlpack */
#define DLPACK_ABI_VERSION 1
Expand Down Expand Up @@ -126,6 +126,8 @@ typedef enum {
* (C/C++/Python layout: compact struct per complex number)
*/
kDLComplex = 5U,
/*! \brief boolean */
kDLBool = 6U,
} DLDataTypeCode;

/*!
Expand All @@ -134,10 +136,11 @@ typedef enum {
* export an array with non-native endianness
*
* Examples
* - float: type_code = 2, bits = 32, lanes=1
* - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4
* - int8: type_code = 0, bits = 8, lanes=1
* - float: type_code = 2, bits = 32, lanes = 1
* - float4(vectorized 4 float): type_code = 2, bits = 32, lanes = 4
* - int8: type_code = 0, bits = 8, lanes = 1
* - std::complex<float>: type_code = 5, bits = 64, lanes = 1
* - bool: type_code = 6, bits = 8, lanes = 1 (as per common array library convention, the underlying storage size of bool is 8 bits)
*/
typedef struct {
/*!
Expand Down
26 changes: 23 additions & 3 deletions dpctl/tests/test_usm_ndarray_dlpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,7 @@ def test_from_dlpack(shape, typestr, usm_type):
X = dpt.empty(shape, dtype=typestr, usm_type=usm_type, device=sycl_dev)
Y = dpt.from_dlpack(X)
assert X.shape == Y.shape
assert X.dtype == Y.dtype or (
str(X.dtype) == "bool" and str(Y.dtype) == "uint8"
)
assert X.dtype == Y.dtype
assert X.sycl_device == Y.sycl_device
assert X.usm_type == Y.usm_type
assert X._pointer == Y._pointer
Expand All @@ -113,6 +111,28 @@ def test_from_dlpack(shape, typestr, usm_type):
assert V.strides == W.strides


@pytest.mark.parametrize("mod", [2, 5])
def test_from_dlpack_strides(mod, typestr, usm_type):
all_root_devices = dpctl.get_devices()
for sycl_dev in all_root_devices:
skip_if_dtype_not_supported(typestr, sycl_dev)
X0 = dpt.empty(
3 * mod, dtype=typestr, usm_type=usm_type, device=sycl_dev
)
for start in range(mod):
X = X0[slice(-start - 1, None, -mod)]
Y = dpt.from_dlpack(X)
assert X.shape == Y.shape
assert X.dtype == Y.dtype
assert X.sycl_device == Y.sycl_device
assert X.usm_type == Y.usm_type
assert X._pointer == Y._pointer
if Y.ndim:
V = Y[::-1]
W = dpt.from_dlpack(V)
assert V.strides == W.strides


def test_from_dlpack_input_validation():
vstr = dpt._dlpack.get_build_dlpack_version()
assert type(vstr) is str
Expand Down

0 comments on commit 6e58a41

Please sign in to comment.