From 3203b9c340cd3646781949d5d908a8ffd18e5269 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Fri, 26 Aug 2022 19:58:44 -0700 Subject: [PATCH 01/14] Implemented dpt.eye constructor --- dpctl/tensor/__init__.py | 2 ++ dpctl/tensor/_ctors.py | 65 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index 5fc9329077..7178547a4a 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -29,6 +29,7 @@ empty_like, full, full_like, + eye, linspace, ones, ones_like, @@ -62,6 +63,7 @@ "zeros", "ones", "full", + "eye", "linspace", "empty_like", "zeros_like", diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index 0f660c8cee..1d0f368d43 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -1035,3 +1035,68 @@ def linspace( ) hev.wait() return res + +def eye( + n_rows, + n_cols=None, + /, + *, + k=0, + dtype=None, + order="C", + device=None, + usm_type="device", + sycl_queue=None + ): + """ + eye(n_rows, n_cols = None, /, *, k = 0, dtype = None, \ + device = None, usm_type="device", sycl_queue=None) -> usm_ndarray + + Creates `usm_ndarray` where the `k`th diagonal elements are one and others are zero. + + Args: + n_rows: number of rows in the output array. + n_cols (optional): number of columns in the output array. If None, + n_cols = n_rows. Default: `None`. + k: index of the diagonal, with 0 as the main diagonal. A positive value of k + is an upper diagonal, a negative value is a low diagonal. Default: `0`. + dtype (optional): data type of the array. Can be typestring, + a `numpy.dtype` object, `numpy` char string, or a numpy + scalar type. Default: None + order ("C" or F"): memory layout for the array. Default: "C" + device (optional): array API concept of device where the output array + is created. `device` can be `None`, a oneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a + non-partitioned SYCL device, an instance of + :class:`dpctl.SyclQueue`, or a `Device` object returnedby + `dpctl.tensor.usm_array.device`. Default: `None`. + usm_type ("device"|"shared"|"host", optional): The type of SYCL USM + allocation for the output array. Default: `"device"`. + sycl_queue (:class:`dpctl.SyclQueue`, optional): The SYCL queue to use + for output array allocation and copying. `sycl_queue` and `device` + are exclusive keywords, i.e. use one or another. If both are + specified, a `TypeError` is raised unless both imply the same + underlying SYCL queue to be used. If both are `None`, the + `dpctl.SyclQueue()` is used for allocation and copying. + Default: `None`. + """ + if n_cols is None: + n_cols = n_rows + #allocate a 1D array of zeros, length equal to n_cols * n_rows + x = zeros((n_rows * n_cols,), dtype=dtype, order=order, device=device, usm_type=usm_type, sycl_queue=sycl_queue) + if k > -n_rows and k < n_cols: + #find the length of an arbitrary diagonal + l = min(n_cols, n_rows, n_cols-k, n_rows+k) + #i is the first element of the diagonal, j is the last, s is the step size + if order == "C": + s = n_cols+1 + i = k if k >= 0 else n_cols*-k + else: + s = n_rows+1 + i = n_rows*k if k > 0 else -k + #last index + 1 prevents slice from excluding the last element + j = i+((l-1)*s)+1 + x[i:j:s] = 1 + #copy=False ensures no wasted memory copying the array + #and as the order parameter is the same, a copy should never be necessary + return dpt.reshape(x, (n_rows, n_cols), order=order, copy=False) From 7e1b918a6c962755bd3693b9b8bb87e6a32c7aab Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 29 Aug 2022 11:10:45 -0700 Subject: [PATCH 02/14] Added dpt.eye test, type error for k keyword --- dpctl/tensor/_ctors.py | 9 +++++++-- dpctl/tests/test_usm_ndarray_ctor.py | 23 +++++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index 1d0f368d43..43031f4d0f 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -1083,11 +1083,16 @@ def eye( if n_cols is None: n_cols = n_rows #allocate a 1D array of zeros, length equal to n_cols * n_rows + k_dt = type(k) + if not np.issubdtype(k_dt, np.integer): + raise TypeError( + "k keyword must be an integer, got {type}".format(type=k_dt) + ) x = zeros((n_rows * n_cols,), dtype=dtype, order=order, device=device, usm_type=usm_type, sycl_queue=sycl_queue) if k > -n_rows and k < n_cols: - #find the length of an arbitrary diagonal + #find the length of the diagonal l = min(n_cols, n_rows, n_cols-k, n_rows+k) - #i is the first element of the diagonal, j is the last, s is the step size + #i is the first index of the diagonal in 1D index space, j is the last, s is the step size if order == "C": s = n_cols+1 i = k if k >= 0 else n_cols*-k diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index 26ea45bc76..a6a8fa003c 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -16,6 +16,7 @@ import ctypes import numbers +from typing import Type import numpy as np import pytest @@ -1276,6 +1277,8 @@ def test_common_arg_validation(): dpt.ones_like(X, order=order) with pytest.raises(ValueError): dpt.full_like(X, 1, order=order) + with pytest.raises(ValueError): + dpt.eye(4, order=order) X = dict() # test for type validation with pytest.raises(TypeError): @@ -1286,3 +1289,23 @@ def test_common_arg_validation(): dpt.ones_like(X) with pytest.raises(TypeError): dpt.full_like(X, 1) + with pytest.raises(TypeError): + dpt.eye(4, k=1.2) + +@pytest.mark.parametrize("shapes", [(0,), (1,), (7,), (6, 1), (3, 9), (10,5)]) +@pytest.mark.parametrize("k", np.arange(-4, 5, 1)) +@pytest.mark.parametrize("orders", ["C", "F"]) +def test_eye(shapes, k, orders): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Queue could not be created") + + shape=shapes + k=k + order=orders + + Xnp = np.eye(*shape, k=k, order=order) + X = dpt.eye(*shape, k=k, order=order, sycl_queue=q) + + np.testing.assert_array_equal(Xnp, dpt.asnumpy(X)) From 717e60c4e641bd90b35468ef552839242614d8e3 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 29 Aug 2022 11:50:27 -0700 Subject: [PATCH 03/14] Formatting of test and imports corrected --- dpctl/tensor/__init__.py | 2 +- dpctl/tests/test_usm_ndarray_ctor.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index 7178547a4a..6c9ac88fc2 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -27,9 +27,9 @@ asarray, empty, empty_like, + eye, full, full_like, - eye, linspace, ones, ones_like, diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index a6a8fa003c..f6720af1d9 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -1301,9 +1301,9 @@ def test_eye(shapes, k, orders): except dpctl.SyclQueueCreationError: pytest.skip("Queue could not be created") - shape=shapes - k=k - order=orders + shape = shapes + k = k + order = orders Xnp = np.eye(*shape, k=k, order=order) X = dpt.eye(*shape, k=k, order=order, sycl_queue=q) From e694d17999e323c755b1a2af1c5eabf88d7542e1 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 29 Aug 2022 12:41:20 -0700 Subject: [PATCH 04/14] Formatting, documentation adjusted --- dpctl/tensor/_ctors.py | 125 +++++++++++++++------------ dpctl/tests/test_usm_ndarray_ctor.py | 4 +- 2 files changed, 71 insertions(+), 58 deletions(-) diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index 43031f4d0f..40091657ad 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -1036,6 +1036,7 @@ def linspace( hev.wait() return res + def eye( n_rows, n_cols=None, @@ -1047,61 +1048,73 @@ def eye( device=None, usm_type="device", sycl_queue=None - ): - """ - eye(n_rows, n_cols = None, /, *, k = 0, dtype = None, \ - device = None, usm_type="device", sycl_queue=None) -> usm_ndarray +): + """ + eye(n_rows, n_cols = None, /, *, k = 0, dtype = None, \ + device = None, usm_type="device", sycl_queue=None) -> usm_ndarray - Creates `usm_ndarray` where the `k`th diagonal elements are one and others are zero. + Creates `usm_ndarray` with ones on the `k`th diagonal. - Args: - n_rows: number of rows in the output array. - n_cols (optional): number of columns in the output array. If None, - n_cols = n_rows. Default: `None`. - k: index of the diagonal, with 0 as the main diagonal. A positive value of k - is an upper diagonal, a negative value is a low diagonal. Default: `0`. - dtype (optional): data type of the array. Can be typestring, - a `numpy.dtype` object, `numpy` char string, or a numpy - scalar type. Default: None - order ("C" or F"): memory layout for the array. Default: "C" - device (optional): array API concept of device where the output array - is created. `device` can be `None`, a oneAPI filter selector string, - an instance of :class:`dpctl.SyclDevice` corresponding to a - non-partitioned SYCL device, an instance of - :class:`dpctl.SyclQueue`, or a `Device` object returnedby - `dpctl.tensor.usm_array.device`. Default: `None`. - usm_type ("device"|"shared"|"host", optional): The type of SYCL USM - allocation for the output array. Default: `"device"`. - sycl_queue (:class:`dpctl.SyclQueue`, optional): The SYCL queue to use - for output array allocation and copying. `sycl_queue` and `device` - are exclusive keywords, i.e. use one or another. If both are - specified, a `TypeError` is raised unless both imply the same - underlying SYCL queue to be used. If both are `None`, the - `dpctl.SyclQueue()` is used for allocation and copying. - Default: `None`. - """ - if n_cols is None: - n_cols = n_rows - #allocate a 1D array of zeros, length equal to n_cols * n_rows - k_dt = type(k) - if not np.issubdtype(k_dt, np.integer): - raise TypeError( - "k keyword must be an integer, got {type}".format(type=k_dt) - ) - x = zeros((n_rows * n_cols,), dtype=dtype, order=order, device=device, usm_type=usm_type, sycl_queue=sycl_queue) - if k > -n_rows and k < n_cols: - #find the length of the diagonal - l = min(n_cols, n_rows, n_cols-k, n_rows+k) - #i is the first index of the diagonal in 1D index space, j is the last, s is the step size - if order == "C": - s = n_cols+1 - i = k if k >= 0 else n_cols*-k - else: - s = n_rows+1 - i = n_rows*k if k > 0 else -k - #last index + 1 prevents slice from excluding the last element - j = i+((l-1)*s)+1 - x[i:j:s] = 1 - #copy=False ensures no wasted memory copying the array - #and as the order parameter is the same, a copy should never be necessary - return dpt.reshape(x, (n_rows, n_cols), order=order, copy=False) + Args: + n_rows: number of rows in the output array. + n_cols (optional): number of columns in the output array. If None, + n_cols = n_rows. Default: `None`. + k: index of the diagonal, with 0 as the main diagonal. + A positive value of k is a superdiagonal, a negative value + is a subdiagonal. + Raises `TypeError` if k is not an integer. + Default: `0`. + dtype (optional): data type of the array. Can be typestring, + a `numpy.dtype` object, `numpy` char string, or a numpy + scalar type. Default: None + order ("C" or F"): memory layout for the array. Default: "C" + device (optional): array API concept of device where the output array + is created. `device` can be `None`, a oneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a + non-partitioned SYCL device, an instance of + :class:`dpctl.SyclQueue`, or a `Device` object returnedby + `dpctl.tensor.usm_array.device`. Default: `None`. + usm_type ("device"|"shared"|"host", optional): The type of SYCL USM + allocation for the output array. Default: `"device"`. + sycl_queue (:class:`dpctl.SyclQueue`, optional): The SYCL queue to use + for output array allocation and copying. `sycl_queue` and `device` + are exclusive keywords, i.e. use one or another. If both are + specified, a `TypeError` is raised unless both imply the same + underlying SYCL queue to be used. If both are `None`, the + `dpctl.SyclQueue()` is used for allocation and copying. + Default: `None`. + """ + if n_cols is None: + n_cols = n_rows + # allocate a 1D array of zeros, length equal to n_cols * n_rows + k_dt = type(k) + if not np.issubdtype(k_dt, np.integer): + raise TypeError( + "k keyword must be an integer, got {type}".format(type=k_dt) + ) + x = zeros( + (n_rows * n_cols,), + dtype=dtype, + order=order, + device=device, + usm_type=usm_type, + sycl_queue=sycl_queue + ) + if k > -n_rows and k < n_cols: + # find the length of the diagonal + L = min( + n_cols, + n_rows, + n_cols-k, + n_rows+k) + # i is the first index of diagonal, j is the last, s is the step size + if order == "C": + s = n_cols + 1 + i = k if k >= 0 else n_cols*-k + else: + s = n_rows + 1 + i = n_rows*k if k > 0 else -k + j = i + s*(L-1) + 1 + x[i:j:s] = 1 + # copy=False ensures no wasted memory copying the array + return dpt.reshape(x, (n_rows, n_cols), order=order, copy=False) diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index f6720af1d9..9b3b0318da 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -16,7 +16,6 @@ import ctypes import numbers -from typing import Type import numpy as np import pytest @@ -1292,7 +1291,8 @@ def test_common_arg_validation(): with pytest.raises(TypeError): dpt.eye(4, k=1.2) -@pytest.mark.parametrize("shapes", [(0,), (1,), (7,), (6, 1), (3, 9), (10,5)]) + +@pytest.mark.parametrize("shapes", [(0,), (1,), (7,), (6, 1), (3, 9), (10, 5)]) @pytest.mark.parametrize("k", np.arange(-4, 5, 1)) @pytest.mark.parametrize("orders", ["C", "F"]) def test_eye(shapes, k, orders): From 58bb59e8a57314f238b18ddeeec0170e62662204 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 29 Aug 2022 12:45:56 -0700 Subject: [PATCH 05/14] Trailing comma added --- dpctl/tensor/_ctors.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index 40091657ad..e368272e6c 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -1047,7 +1047,7 @@ def eye( order="C", device=None, usm_type="device", - sycl_queue=None + sycl_queue=None, ): """ eye(n_rows, n_cols = None, /, *, k = 0, dtype = None, \ @@ -1098,23 +1098,19 @@ def eye( order=order, device=device, usm_type=usm_type, - sycl_queue=sycl_queue + sycl_queue=sycl_queue, ) if k > -n_rows and k < n_cols: # find the length of the diagonal - L = min( - n_cols, - n_rows, - n_cols-k, - n_rows+k) + L = min(n_cols, n_rows, n_cols - k, n_rows + k) # i is the first index of diagonal, j is the last, s is the step size if order == "C": s = n_cols + 1 - i = k if k >= 0 else n_cols*-k + i = k if k >= 0 else n_cols * -k else: s = n_rows + 1 - i = n_rows*k if k > 0 else -k - j = i + s*(L-1) + 1 + i = n_rows * k if k > 0 else -k + j = i + s * (L - 1) + 1 x[i:j:s] = 1 # copy=False ensures no wasted memory copying the array return dpt.reshape(x, (n_rows, n_cols), order=order, copy=False) From b4b233a3f4e7f279d6e56c0e168cb104e86fd3d0 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 29 Aug 2022 12:54:12 -0700 Subject: [PATCH 06/14] Pre-commit corrections --- dpctl/tensor/_ctors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index e368272e6c..5ea583483f 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -1099,7 +1099,7 @@ def eye( device=device, usm_type=usm_type, sycl_queue=sycl_queue, - ) + ) if k > -n_rows and k < n_cols: # find the length of the diagonal L = min(n_cols, n_rows, n_cols - k, n_rows + k) From d3e41ec13300e4506da311125760d7a23d99cb1f Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Tue, 30 Aug 2022 15:09:38 -0700 Subject: [PATCH 07/14] dptcl.tensor.eye dtype test and order validation --- dpctl/tensor/_ctors.py | 12 ++++++++---- dpctl/tests/test_usm_ndarray_ctor.py | 25 +++++++++++++------------ 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index 5ea583483f..1e5be9e220 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -1087,11 +1087,15 @@ def eye( if n_cols is None: n_cols = n_rows # allocate a 1D array of zeros, length equal to n_cols * n_rows - k_dt = type(k) - if not np.issubdtype(k_dt, np.integer): - raise TypeError( - "k keyword must be an integer, got {type}".format(type=k_dt) + if not isinstance(order, str) or len(order) == 0 or order[0] not in "CcFf": + raise ValueError( + "Unrecognized order keyword value, expecting 'F' or 'C'." ) + else: + order = order[0].upper() + n_rows = operator.index(n_rows) + n_cols = operator.index(n_cols) + k = operator.index(k) x = zeros( (n_rows * n_cols,), dtype=dtype, diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index 9b3b0318da..dcd1267eb9 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -1288,24 +1288,25 @@ def test_common_arg_validation(): dpt.ones_like(X) with pytest.raises(TypeError): dpt.full_like(X, 1) - with pytest.raises(TypeError): - dpt.eye(4, k=1.2) -@pytest.mark.parametrize("shapes", [(0,), (1,), (7,), (6, 1), (3, 9), (10, 5)]) -@pytest.mark.parametrize("k", np.arange(-4, 5, 1)) -@pytest.mark.parametrize("orders", ["C", "F"]) -def test_eye(shapes, k, orders): +@pytest.mark.parametrize("dtype", _all_dtypes) +def test_eye(dtype): + X = dpt.eye(4, 5, dtype=dtype) + Xnp = np.eye(4, 5, dtype=dtype) + assert X.dtype == Xnp.dtype + assert np.array_equal(Xnp, dpt.asnumpy(X)) + + +@pytest.mark.parametrize("shape", [(7,), (6, 1), (10, 5), (3, 9)]) +@pytest.mark.parametrize("k", np.arange(-2, 2, 1)) +@pytest.mark.parametrize("order", ["C", "F"]) +def test_eye_shapes(shape, k, order): try: q = dpctl.SyclQueue() except dpctl.SyclQueueCreationError: pytest.skip("Queue could not be created") - - shape = shapes - k = k - order = orders - Xnp = np.eye(*shape, k=k, order=order) X = dpt.eye(*shape, k=k, order=order, sycl_queue=q) - np.testing.assert_array_equal(Xnp, dpt.asnumpy(X)) + assert np.array_equal(Xnp, dpt.asnumpy(X)) From 3a7d4f311904fb1f143e0cdf1c4c0d0fac446366 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 12 Sep 2022 10:30:04 -0700 Subject: [PATCH 08/14] Rewrote eye constructor to use a dedicated kernel --- dpctl/tensor/_ctors.py | 43 +++--- dpctl/tensor/libtensor/source/tensor_py.cpp | 153 ++++++++++++++++++++ 2 files changed, 173 insertions(+), 23 deletions(-) diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index 1e5be9e220..475436f1d1 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -1084,9 +1084,6 @@ def eye( `dpctl.SyclQueue()` is used for allocation and copying. Default: `None`. """ - if n_cols is None: - n_cols = n_rows - # allocate a 1D array of zeros, length equal to n_cols * n_rows if not isinstance(order, str) or len(order) == 0 or order[0] not in "CcFf": raise ValueError( "Unrecognized order keyword value, expecting 'F' or 'C'." @@ -1094,27 +1091,27 @@ def eye( else: order = order[0].upper() n_rows = operator.index(n_rows) - n_cols = operator.index(n_cols) + n_cols = n_rows if n_cols is None else operator.index(n_cols) k = operator.index(k) - x = zeros( - (n_rows * n_cols,), + if k >= n_cols or -k >= n_rows: + return dpt.zeros( + (n_rows, n_cols), + dtype=dtype, + order=order, + device=device, + usm_type=usm_type, + sycl_queue=sycl_queue, + ) + sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) + dpctl.utils.validate_usm_type(usm_type, allow_none=False) + res = dpt.usm_ndarray( + (n_rows, n_cols), dtype=dtype, + buffer=usm_type, order=order, - device=device, - usm_type=usm_type, - sycl_queue=sycl_queue, + buffer_ctor_kwargs={"queue": sycl_queue}, ) - if k > -n_rows and k < n_cols: - # find the length of the diagonal - L = min(n_cols, n_rows, n_cols - k, n_rows + k) - # i is the first index of diagonal, j is the last, s is the step size - if order == "C": - s = n_cols + 1 - i = k if k >= 0 else n_cols * -k - else: - s = n_rows + 1 - i = n_rows * k if k > 0 else -k - j = i + s * (L - 1) + 1 - x[i:j:s] = 1 - # copy=False ensures no wasted memory copying the array - return dpt.reshape(x, (n_rows, n_cols), order=order, copy=False) + if n_rows != 0 and n_cols != 0: + hev, _ = ti._eye(k, dst=res, sycl_queue=sycl_queue) + hev.wait() + return res diff --git a/dpctl/tensor/libtensor/source/tensor_py.cpp b/dpctl/tensor/libtensor/source/tensor_py.cpp index 2f0c4e2770..d13faeb496 100644 --- a/dpctl/tensor/libtensor/source/tensor_py.cpp +++ b/dpctl/tensor/libtensor/source/tensor_py.cpp @@ -45,6 +45,7 @@ template class copy_cast_spec_kernel; template class copy_for_reshape_generic_kernel; template class linear_sequence_step_kernel; template class linear_sequence_affine_kernel; +template class eye_kernel; static dpctl::tensor::detail::usm_ndarray_types array_types; @@ -1742,6 +1743,144 @@ usm_ndarray_full(py::object py_value, } } +/* ================ Eye ================== */ + +typedef sycl::event (*eye_fn_ptr_t)(sycl::queue, + size_t nelems, // num_elements + py::ssize_t start, + py::ssize_t end, + py::ssize_t step, + char *, // dst_data_ptr + const std::vector &); + +static eye_fn_ptr_t eye_dispatch_vector[_ns::num_types]; + +template class EyeFunctor +{ +private: + Ty *p = nullptr; + py::ssize_t start_v; + py::ssize_t end_v; + py::ssize_t step_v; + +public: + EyeFunctor(char *dst_p, + const py::ssize_t v0, + const py::ssize_t v1, + const py::ssize_t dv) + : p(reinterpret_cast(dst_p)), start_v(v0), end_v(v1), step_v(dv) + { + } + + void operator()(sycl::id<1> wiid) const + { + Ty set_v = 0; + py::ssize_t i = static_cast(wiid.get(0)); + if (i >= start_v and i <= end_v) { + if ((i - start_v) % step_v == 0) { + set_v = 1; + } + } + p[i] = set_v; + } +}; + +template +sycl::event eye_impl(sycl::queue exec_q, + size_t nelems, + const py::ssize_t start, + const py::ssize_t end, + const py::ssize_t step, + char *array_data, + const std::vector &depends) +{ + sycl::event eye_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + cgh.parallel_for>( + sycl::range<1>{nelems}, + EyeFunctor(array_data, start, end, step)); + }); + + return eye_event; +} + +template struct EyeFactory +{ + fnT get() + { + fnT f = eye_impl; + return f; + } +}; + +std::pair +eye(py::ssize_t k, + dpctl::tensor::usm_ndarray dst, + sycl::queue exec_q, + const std::vector &depends = {}) +{ + // dst must be 2D + + if (dst.get_ndim() != 2) { + throw py::value_error( + "usm_ndarray_eye: Expecting 2D array to populate"); + } + + sycl::queue dst_q = dst.get_queue(); + if (dst_q != exec_q && dst_q.get_context() != exec_q.get_context()) { + throw py::value_error( + "Execution queue context is not the same as allocation context"); + } + + int dst_typenum = dst.get_typenum(); + int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); + + const py::ssize_t nelem = dst.get_size(); + const py::ssize_t rows = dst.get_shape(0); + const py::ssize_t cols = dst.get_shape(1); + if (rows == 0 || cols == 0) { + // nothing to do + return std::make_pair(sycl::event{}, sycl::event{}); + } + + bool is_dst_c_contig = ((dst.get_flags() & USM_ARRAY_C_CONTIGUOUS) != 0); + bool is_dst_f_contig = ((dst.get_flags() & USM_ARRAY_F_CONTIGUOUS) != 0); + if (!is_dst_c_contig && !is_dst_f_contig) { + throw py::value_error("USM array is not contiguous"); + } + + py::ssize_t start; + if (is_dst_c_contig) { + start = (k < 0) ? -k * cols : k; + } + else { + start = (k < 0) ? -k : k * rows; + } + + py::ssize_t step; + if (dst.get_strides_raw() == nullptr) { + step = (is_dst_c_contig) ? cols + 1 : rows + 1; + } + else { + const py::ssize_t *strides = dst.get_strides_raw(); + step = strides[0] + strides[1]; + } + + const py::ssize_t length = std::min({rows, cols, rows + k, cols - k}); + const py::ssize_t end = start + step * (length - 1) + 1; + + char *dst_data = dst.get_data(); + sycl::event eye_event; + + auto fn = eye_dispatch_vector[dst_typeid]; + + eye_event = fn(exec_q, static_cast(nelem), start, end, step, + dst_data, depends); + + return std::make_pair(keep_args_alive(exec_q, {dst}, {eye_event}), + eye_event); +} + // populate dispatch tables void init_copy_and_cast_dispatch_tables(void) { @@ -1796,6 +1935,10 @@ void init_copy_for_reshape_dispatch_vector(void) dvb3; dvb3.populate_dispatch_vector(full_contig_dispatch_vector); + DispatchVectorBuilder + dvb4; + dvb4.populate_dispatch_vector(eye_dispatch_vector); + return; } @@ -1901,6 +2044,16 @@ PYBIND11_MODULE(_tensor_impl, m) py::arg("fill_value"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_eye", &eye, + "Fills input 2D contiguous usm_ndarray `dst` with " + "zeros outside of the diagonal " + "specified by " + "the diagonal index `k` " + "which is filled with ones." + "Returns a tuple of events: (ht_event, comp_event)", + py::arg("k"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("default_device_fp_type", [](sycl::queue q) -> std::string { return get_default_device_fp_type(q.get_device()); }); From 93bb3e121c8790f04d93bc89f00a831a850377cd Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 12 Sep 2022 10:37:07 -0700 Subject: [PATCH 09/14] Pre-commit changes --- dpctl/tensor/libtensor/source/tensor_py.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dpctl/tensor/libtensor/source/tensor_py.cpp b/dpctl/tensor/libtensor/source/tensor_py.cpp index d13faeb496..b74408c52e 100644 --- a/dpctl/tensor/libtensor/source/tensor_py.cpp +++ b/dpctl/tensor/libtensor/source/tensor_py.cpp @@ -1935,8 +1935,7 @@ void init_copy_for_reshape_dispatch_vector(void) dvb3; dvb3.populate_dispatch_vector(full_contig_dispatch_vector); - DispatchVectorBuilder - dvb4; + DispatchVectorBuilder dvb4; dvb4.populate_dispatch_vector(eye_dispatch_vector); return; From 9be71623ff79f2e7e6014ca8942c711e902ab8f3 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 12 Sep 2022 11:23:48 -0700 Subject: [PATCH 10/14] Rewrote test for dptcl.tensor.eye --- dpctl/tests/test_usm_ndarray_ctor.py | 42 +++++++++++++--------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index dcd1267eb9..3bac739d58 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -1256,6 +1256,24 @@ def test_full_like(dt, usm_kind): assert np.array_equal(dpt.asnumpy(Y), np.ones(X.shape, dtype=X.dtype)) +@pytest.mark.parametrize("dtype", _all_dtypes) +@pytest.mark.parametrize("usm_kind", ["shared", "device", "host"]) +def test_eye(dtype, usm_kind): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Queue could not be created") + + if dtype in ["f8", "c16"] and q.sycl_device.has_aspect_fp64 is False: + pytest.skip( + "Device does not support double precision floating point type" + ) + X = dpt.eye(4, 5, k=1, dtype=dtype, usm_type=usm_kind, sycl_queue=q) + Xnp = np.eye(4, 5, k=1, dtype=dtype) + assert X.dtype == Xnp.dtype + assert np.array_equal(Xnp, dpt.asnumpy(X)) + + def test_common_arg_validation(): order = "I" # invalid order must raise ValueError @@ -1267,6 +1285,8 @@ def test_common_arg_validation(): dpt.ones(10, order=order) with pytest.raises(ValueError): dpt.full(10, 1, order=order) + with pytest.raises(ValueError): + dpt.eye(10, order=order) X = dpt.empty(10) with pytest.raises(ValueError): dpt.empty_like(X, order=order) @@ -1288,25 +1308,3 @@ def test_common_arg_validation(): dpt.ones_like(X) with pytest.raises(TypeError): dpt.full_like(X, 1) - - -@pytest.mark.parametrize("dtype", _all_dtypes) -def test_eye(dtype): - X = dpt.eye(4, 5, dtype=dtype) - Xnp = np.eye(4, 5, dtype=dtype) - assert X.dtype == Xnp.dtype - assert np.array_equal(Xnp, dpt.asnumpy(X)) - - -@pytest.mark.parametrize("shape", [(7,), (6, 1), (10, 5), (3, 9)]) -@pytest.mark.parametrize("k", np.arange(-2, 2, 1)) -@pytest.mark.parametrize("order", ["C", "F"]) -def test_eye_shapes(shape, k, order): - try: - q = dpctl.SyclQueue() - except dpctl.SyclQueueCreationError: - pytest.skip("Queue could not be created") - Xnp = np.eye(*shape, k=k, order=order) - X = dpt.eye(*shape, k=k, order=order, sycl_queue=q) - - assert np.array_equal(Xnp, dpt.asnumpy(X)) From 8cf376e079f3b237fbdfa0ec47770d3a849b0ad1 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 12 Sep 2022 12:13:54 -0700 Subject: [PATCH 11/14] Removed duplicate in test_common_arg_validation --- dpctl/tests/test_usm_ndarray_ctor.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index 3bac739d58..c77933d56a 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -1296,8 +1296,6 @@ def test_common_arg_validation(): dpt.ones_like(X, order=order) with pytest.raises(ValueError): dpt.full_like(X, 1, order=order) - with pytest.raises(ValueError): - dpt.eye(4, order=order) X = dict() # test for type validation with pytest.raises(TypeError): From 95e0cf8a010a81bb041a9b955ddd82f801f9c320 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 12 Sep 2022 13:45:53 -0700 Subject: [PATCH 12/14] Eye default dtype now uses _get_dtype --- dpctl/tensor/_ctors.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index 475436f1d1..f68e3ca502 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -1102,8 +1102,9 @@ def eye( usm_type=usm_type, sycl_queue=sycl_queue, ) - sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) dpctl.utils.validate_usm_type(usm_type, allow_none=False) + sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) + dtype = _get_dtype(dtype, sycl_queue) res = dpt.usm_ndarray( (n_rows, n_cols), dtype=dtype, From 739d652946cf9550dd4df7954d580d2a4f81d7b9 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 12 Sep 2022 13:54:26 -0700 Subject: [PATCH 13/14] eye uses queues_are_compatible, endpoint fix --- dpctl/tensor/libtensor/source/tensor_py.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dpctl/tensor/libtensor/source/tensor_py.cpp b/dpctl/tensor/libtensor/source/tensor_py.cpp index b74408c52e..b0c526b54e 100644 --- a/dpctl/tensor/libtensor/source/tensor_py.cpp +++ b/dpctl/tensor/libtensor/source/tensor_py.cpp @@ -1827,9 +1827,9 @@ eye(py::ssize_t k, } sycl::queue dst_q = dst.get_queue(); - if (dst_q != exec_q && dst_q.get_context() != exec_q.get_context()) { - throw py::value_error( - "Execution queue context is not the same as allocation context"); + if (!dpctl::utils::queues_are_compatible(exec_q, {dst_q})) { + throw py::value_error("Execution queue is not compatible with the " + "allocation queue"); } int dst_typenum = dst.get_typenum(); @@ -1857,17 +1857,17 @@ eye(py::ssize_t k, start = (k < 0) ? -k : k * rows; } + const py::ssize_t *strides = dst.get_strides_raw(); py::ssize_t step; - if (dst.get_strides_raw() == nullptr) { + if (strides == nullptr) { step = (is_dst_c_contig) ? cols + 1 : rows + 1; } else { - const py::ssize_t *strides = dst.get_strides_raw(); step = strides[0] + strides[1]; } const py::ssize_t length = std::min({rows, cols, rows + k, cols - k}); - const py::ssize_t end = start + step * (length - 1) + 1; + const py::ssize_t end = start + step * (length - 1); char *dst_data = dst.get_data(); sycl::event eye_event; From ba3df7639cbff2b800002dfdca47e99f0a86af3d Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Mon, 12 Sep 2022 17:31:00 -0700 Subject: [PATCH 14/14] eye doc-string corrections --- dpctl/tensor/_ctors.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index f68e3ca502..e3bb8fe3ec 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -1058,12 +1058,12 @@ def eye( Args: n_rows: number of rows in the output array. n_cols (optional): number of columns in the output array. If None, - n_cols = n_rows. Default: `None`. + n_cols = n_rows. Default: `None`. k: index of the diagonal, with 0 as the main diagonal. - A positive value of k is a superdiagonal, a negative value - is a subdiagonal. - Raises `TypeError` if k is not an integer. - Default: `0`. + A positive value of k is a superdiagonal, a negative value + is a subdiagonal. + Raises `TypeError` if k is not an integer. + Default: `0`. dtype (optional): data type of the array. Can be typestring, a `numpy.dtype` object, `numpy` char string, or a numpy scalar type. Default: None