Skip to content

Commit

Permalink
Merge pull request #441 from IntelPython/fix-slicing-bugs
Browse files Browse the repository at this point in the history
Fix for slicing bugs
  • Loading branch information
oleksandr-pavlyk authored May 17, 2021
2 parents 5f531ad + d240a38 commit 31f95d6
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 15 deletions.
5 changes: 4 additions & 1 deletion dpctl/tensor/_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ def create_device(cls, dev):
"targeting this device".format(dev)
)
else:
obj.sycl_queue_ = dpctl.SyclQueue(dev)
if dev is None:
obj.sycl_queue_ = dpctl.SyclQueue()
else:
obj.sycl_queue_ = dpctl.SyclQueue(dev)
return obj

@property
Expand Down
25 changes: 21 additions & 4 deletions dpctl/tensor/_slicing.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,23 @@
import numbers


cdef Py_ssize_t _slice_len(
Py_ssize_t sl_start,
Py_ssize_t sl_stop,
Py_ssize_t sl_step
):
"""
Compute len(range(sl_start, sl_stop, sl_step))
"""
if sl_start == sl_stop:
return 0
if sl_step > 0:
# 1 + argmax k such htat sl_start + sl_step*k < sl_stop
return 1 + ((sl_stop - sl_start - 1) // sl_step)
else:
return 1 + ((sl_stop - sl_start + 1) // sl_step)


cdef object _basic_slice_meta(object ind, tuple shape,
tuple strides, Py_ssize_t offset):
"""
Expand All @@ -33,9 +50,9 @@ cdef object _basic_slice_meta(object ind, tuple shape,
return ((1,) + shape, (0,) + strides, offset)
elif isinstance(ind, slice):
sl_start, sl_stop, sl_step = ind.indices(shape[0])
sh0 = (sl_stop - sl_start) // sl_step
sh0 = _slice_len(sl_start, sl_stop, sl_step)
str0 = sl_step * strides[0]
new_strides = strides if (sl_step == 1) else (str0,) + strides[1:]
new_strides = strides if (sl_step == 1 or sh0 == 0) else (str0,) + strides[1:]
return (
(sh0, ) + shape[1:],
new_strides,
Expand Down Expand Up @@ -101,8 +118,8 @@ cdef object _basic_slice_meta(object ind, tuple shape,
elif isinstance(ind_i, slice):
k_new = k + 1
sl_start, sl_stop, sl_step = ind_i.indices(shape[k])
sh_i = (sl_stop - sl_start) // sl_step
str_i = sl_step * strides[k]
sh_i = _slice_len(sl_start, sl_stop, sl_step)
str_i = (1 if sh_i == 0 else sl_step) * strides[k]
new_shape.append(sh_i)
new_strides.append(str_i)
new_offset = new_offset + sl_start * strides[k]
Expand Down
23 changes: 19 additions & 4 deletions dpctl/tensor/_stride_utils.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ cdef int _from_input_shape_strides(

# 0-d array
if (nd == 0):
contig[0] = USM_ARRAY_C_CONTIGUOUS
contig[0] = (USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS)
nelems[0] = 1
min_disp[0] = 0
max_disp[0] = 0
Expand All @@ -88,17 +88,28 @@ cdef int _from_input_shape_strides(
shape_arr[i] = <Py_ssize_t> shape[i]
elem_count *= shape_arr[i]
if elem_count == 0:
contig[0] = USM_ARRAY_C_CONTIGUOUS
contig[0] = (USM_ARRAY_C_CONTIGUOUS | USM_ARRAY_F_CONTIGUOUS)
nelems[0] = 1
min_disp[0] = 0
max_disp[0] = 0
strides_ptr[0] = <Py_ssize_t *>(<size_t>0)
if strides is None:
strides_ptr[0] = <Py_ssize_t *>(<size_t>0)
else:
strides_arr = <Py_ssize_t*>PyMem_Malloc(nd * sizeof(Py_ssize_t))
if (not strides_arr):
PyMem_Free(shape_ptr[0]);
shape_ptr[0] = <Py_ssize_t *>(<size_t>0)
return ERROR_MALLOC
strides_ptr[0] = strides_arr
for i in range(0, nd):
strides_arr[i] = <Py_ssize_t> strides[i]
return 0
nelems[0] = elem_count

if (strides is None):
# no need to allocate and populate strides
if (int(order) not in [ord('C'), ord('F'), ord('c'), ord('f')]):
PyMem_Free(shape_ptr[0]);
shape_ptr[0] = <Py_ssize_t *>(<size_t>0)
return ERROR_INCORRECT_ORDER
if order == <char> ord('C') or order == <char> ord('c'):
contig[0] = USM_ARRAY_C_CONTIGUOUS
Expand All @@ -112,6 +123,8 @@ cdef int _from_input_shape_strides(
and len(strides) == nd):
strides_arr = <Py_ssize_t*>PyMem_Malloc(nd * sizeof(Py_ssize_t))
if (not strides_arr):
PyMem_Free(shape_ptr[0]);
shape_ptr[0] = <Py_ssize_t *>(<size_t>0)
return ERROR_MALLOC
strides_ptr[0] = strides_arr
for i in range(0, nd):
Expand Down Expand Up @@ -143,6 +156,8 @@ cdef int _from_input_shape_strides(
contig[0] = 0 # non-contiguous
return 0
else:
PyMem_Free(shape_ptr[0]);
shape_ptr[0] = <Py_ssize_t *>(<size_t>0)
return ERROR_UNEXPECTED_STRIDES
# return ERROR_INTERNAL

Expand Down
8 changes: 4 additions & 4 deletions dpctl/tensor/_types.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ cdef str _make_typestr(int typenum):
Make typestring from type number
"""
cdef type_to_str = ['|b1', '|i1', '|u1', '|i2', '|u2',
'|i4', '|u4', '', '', '|i8', '|u8',
'|i4', '|u4', '|i4', '|u4', '|i8', '|u8',
'|f4', '|f8', '', '|c8', '|c16', '']

if (typenum < 0):
Expand All @@ -63,8 +63,8 @@ cdef int type_bytesize(int typenum):
NPY_USHORT=4 : 2
NPY_INT=5 : 4
NPY_UINT=6 : 4
NPY_LONG=7 :
NPY_ULONG=8 :
NPY_LONG=7 : 4
NPY_ULONG=8 : 4
NPY_LONGLONG=9 : 8
NPY_ULONGLONG=10 : 8
NPY_FLOAT=11 : 4
Expand All @@ -76,7 +76,7 @@ cdef int type_bytesize(int typenum):
NPY_HALF=23 : 2
"""
cdef int *type_to_bytesize = [
1, 1, 1, 2, 2, 4, 4, 8, 8, 8, 8, 4, 8, -1, 8, 16, -1]
1, 1, 1, 2, 2, 4, 4, 4, 4, 8, 8, 4, 8, -1, 8, 16, -1]

if typenum < 0:
return -1
Expand Down
3 changes: 2 additions & 1 deletion dpctl/tensor/_usmarray.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,8 @@ cdef class usm_ndarray:
cdef usm_ndarray res

res = usm_ndarray.__new__(
usm_ndarray, _meta[0],
usm_ndarray,
_meta[0],
dtype=_make_typestr(self.typenum_),
strides=_meta[1],
buffer=self.base_,
Expand Down
82 changes: 81 additions & 1 deletion dpctl/tests/test_usm_ndarray_ctor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@
import numbers

import numpy as np
import numpy.lib.stride_tricks as np_st
import pytest

import dpctl

# import dpctl.memory as dpmem
import dpctl.tensor as dpt
from dpctl.tensor._usmarray import Device


@pytest.mark.parametrize(
Expand Down Expand Up @@ -112,6 +114,8 @@ def test_properties():
(2, 2, None, 3, 4),
(Ellipsis,),
(None, slice(0, None, 2), Ellipsis, slice(0, None, 3)),
(None, slice(1, None, 2), Ellipsis, slice(1, None, 3)),
(None, slice(None, -1, -2), Ellipsis, slice(2, None, 3)),
(
slice(None, None, -1),
slice(None, None, -1),
Expand All @@ -121,10 +125,86 @@ def test_properties():
],
)
def test_basic_slice(ind):
X = dpt.usm_ndarray((2 * 3, 2 * 4, 3 * 5, 3 * 7), dtype="u1")
X = dpt.usm_ndarray((2 * 3, 2 * 4, 3 * 5, 2 * 7), dtype="u1")
Xnp = np.empty(X.shape, dtype=X.dtype)
S = X[ind]
Snp = Xnp[ind]
assert S.shape == Snp.shape
assert S.strides == Snp.strides
assert S.dtype == X.dtype


def _from_numpy(np_ary, device=None, usm_type="shared"):
if type(np_ary) is np.ndarray:
if np_ary.flags["FORC"]:
x = np_ary
else:
x = np.ascontiguous(np_ary)
R = dpt.usm_ndarray(
np_ary.shape,
dtype=np_ary.dtype,
buffer=usm_type,
buffer_ctor_kwargs={
"queue": Device.create_device(device).sycl_queue
},
)
R.usm_data.copy_from_host(x.reshape((-1)).view("|u1"))
return R
else:
raise ValueError("Expected numpy.ndarray, got {}".format(type(np_ary)))


def _to_numpy(usm_ary):
if type(usm_ary) is dpt.usm_ndarray:
usm_buf = usm_ary.usm_data
s = usm_buf.nbytes
host_buf = usm_buf.copy_to_host().view(usm_ary.dtype)
usm_ary_itemsize = usm_ary.itemsize
R_offset = (
usm_ary.__sycl_usm_array_interface__["offset"] * usm_ary_itemsize
)
R = np.ndarray((s,), dtype="u1", buffer=host_buf)
R = R[R_offset:].view(usm_ary.dtype)
R_strides = (usm_ary_itemsize * si for si in usm_ary.strides)
return np_st.as_strided(R, shape=usm_ary.shape, strides=R_strides)
else:
raise ValueError(
"Expected dpctl.tensor.usm_ndarray, got {}".format(type(usm_ary))
)


def test_slice_constructor_1d():
Xh = np.arange(37, dtype="i4")
Xusm = _from_numpy(Xh, device="gpu", usm_type="device")
for ind in [
slice(1, None, 2),
slice(0, None, 3),
slice(1, None, 3),
slice(2, None, 3),
slice(None, None, -1),
slice(-2, 2, -2),
slice(-1, 1, -2),
slice(None, None, -13),
]:
assert np.array_equal(
_to_numpy(Xusm[ind]), Xh[ind]
), "Failed for {}".format(ind)


def test_slice_constructor_3d():
Xh = np.empty((37, 24, 35), dtype="i4")
Xusm = _from_numpy(Xh, device="gpu", usm_type="device")
for ind in [
slice(1, None, 2),
slice(0, None, 3),
slice(1, None, 3),
slice(2, None, 3),
slice(None, None, -1),
slice(-2, 2, -2),
slice(-1, 1, -2),
slice(None, None, -13),
(slice(None, None, -2), Ellipsis, None, 15),
]:
assert np.array_equal(
_to_numpy(Xusm[ind]), Xh[ind]
), "Failed for {}".format(ind)

0 comments on commit 31f95d6

Please sign in to comment.