From 0bce9e00da53f2f5281593a470593571f97a55da Mon Sep 17 00:00:00 2001
From: Natalia Polina <natalia.polina@intel.com>
Date: Sun, 26 Feb 2023 18:59:38 -0800
Subject: [PATCH 01/16] Use meshgrid() function from dpctl.tensor. (#1300)

* Use meshgrid() function from dpctl.tensor.

* Added more tests for meshgrid() funcrion.
---
 dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx | 48 ----------------------
 dpnp/dpnp_container.py                     |  9 ++++
 dpnp/dpnp_iface_arraycreation.py           | 23 ++++++-----
 tests/skipped_tests.tbl                    |  6 ---
 tests/skipped_tests_gpu.tbl                |  6 ---
 tests/test_arraycreation.py                | 16 +++++++-
 tests/test_sycl_queue.py                   | 14 +++++++
 tests/test_usm_type.py                     | 10 +++++
 8 files changed, 60 insertions(+), 72 deletions(-)

diff --git a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
index cb44a08db59..50b8bb84070 100644
--- a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
@@ -41,7 +41,6 @@ __all__ += [
     "dpnp_identity",
     "dpnp_linspace",
     "dpnp_logspace",
-    "dpnp_meshgrid",
     "dpnp_ptp",
     "dpnp_trace",
     "dpnp_tri",
@@ -222,53 +221,6 @@ cpdef utils.dpnp_descriptor dpnp_logspace(start, stop, num, endpoint, base, dtyp
     return dpnp.get_dpnp_descriptor(dpnp.astype(dpnp.power(base, temp), dtype))
 
 
-cpdef list dpnp_meshgrid(xi, copy, sparse, indexing):
-    input_count = len(xi)
-
-    # simple case
-    if input_count == 0:
-        return []
-
-    # simple case
-    if input_count == 1:
-        return [dpnp_copy(dpnp.get_dpnp_descriptor(xi[0])).get_pyobj()]
-
-    shape_mult = 1
-    for i in range(input_count):
-        shape_mult = shape_mult * xi[i].size
-
-    shape_list = []
-    for i in range(input_count):
-        shape_list.append(xi[i].size)
-    if indexing == "xy":
-        temp = shape_list[0]
-        shape_list[0] = shape_list[1]
-        shape_list[1] = temp
-
-    steps = []
-    for i in range(input_count):
-        shape_mult = shape_mult // shape_list[i]
-        steps.append(shape_mult)
-    if indexing == "xy":
-        temp = steps[0]
-        steps[0] = steps[1]
-        steps[1] = temp
-
-    shape = tuple(shape_list)
-
-    cdef utils.dpnp_descriptor res_item
-    result = []
-    for i in range(input_count):
-        res_item = utils_py.create_output_descriptor_py(shape, xi[i].dtype, None)
-
-        for j in range(res_item.size):
-            res_item.get_pyobj()[j] = xi[i][(j // steps[i]) % xi[i].size]
-
-        result.append(res_item.get_pyobj())
-
-    return result
-
-
 cpdef dpnp_ptp(utils.dpnp_descriptor arr, axis=None):
     cdef shape_type_c shape_arr = arr.shape
     cdef shape_type_c output_shape
diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py
index 75e20f8a0cb..2adb2b9b7f2 100644
--- a/dpnp/dpnp_container.py
+++ b/dpnp/dpnp_container.py
@@ -180,6 +180,15 @@ def eye(N,
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 
+def meshgrid(*xi, indexing="xy"):
+    """Creates list of `dpnp_array` coordinate matrices from vectors."""
+    if len(xi) == 0:
+        return []
+    arrays = tuple(x.get_array() if isinstance(x, dpnp_array) else x for x in xi)
+    arrays_obj = dpt.meshgrid(*arrays, indexing=indexing)
+    return [dpnp_array._create_from_usm_ndarray(array_obj) for array_obj in arrays_obj]
+
+
 def ones(shape,
          *,
          dtype=None,
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index 5b062a346b9..8d892edce6d 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -1010,8 +1010,10 @@ def meshgrid(*xi, copy=True, sparse=False, indexing='xy'):
 
     Limitations
     -----------
+    Each array instance from `xi` is supported as either :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray`.
     Parameter ``copy`` is supported only with default value ``True``.
     Parameter ``sparse`` is supported only with default value ``False``.
+    Otherwise the function will be executed sequentially on CPU.
 
     Examples
     --------
@@ -1045,17 +1047,16 @@ def meshgrid(*xi, copy=True, sparse=False, indexing='xy'):
 
     """
 
-    if not use_origin_backend():
-        # original limitation
-        if indexing not in ["ij", "xy"]:
-            checker_throw_value_error("meshgrid", "indexing", indexing, "'ij' or 'xy'")
-
-        if copy is not True:
-            checker_throw_value_error("meshgrid", "copy", copy, True)
-        if sparse is not False:
-            checker_throw_value_error("meshgrid", "sparse", sparse, False)
-
-        return dpnp_meshgrid(xi, copy, sparse, indexing)
+    if not all((isinstance(x, (dpnp.ndarray, dpt.usm_ndarray)) for x in xi)):
+        pass
+    elif indexing not in ["ij", "xy"]:
+        pass
+    elif copy is not True:
+        pass
+    elif sparse is not False:
+        pass
+    else:
+        return dpnp_container.meshgrid(*xi, indexing=indexing)
 
     return call_origin(numpy.meshgrid, xi, copy, sparse, indexing)
 
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index 2f0334077a0..097f98c3250 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -406,16 +406,10 @@ tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_3_{copy
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_3_{copy=False, indexing='ij', sparse=True}::test_meshgrid1
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_3_{copy=False, indexing='ij', sparse=True}::test_meshgrid2
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_3_{copy=False, indexing='ij', sparse=True}::test_meshgrid3
-tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_4_{copy=True, indexing='xy', sparse=False}::test_meshgrid1
-tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_4_{copy=True, indexing='xy', sparse=False}::test_meshgrid2
-tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_4_{copy=True, indexing='xy', sparse=False}::test_meshgrid3
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_5_{copy=True, indexing='xy', sparse=True}::test_meshgrid0
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_5_{copy=True, indexing='xy', sparse=True}::test_meshgrid1
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_5_{copy=True, indexing='xy', sparse=True}::test_meshgrid2
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_5_{copy=True, indexing='xy', sparse=True}::test_meshgrid3
-tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_6_{copy=True, indexing='ij', sparse=False}::test_meshgrid1
-tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_6_{copy=True, indexing='ij', sparse=False}::test_meshgrid2
-tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_6_{copy=True, indexing='ij', sparse=False}::test_meshgrid3
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid0
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid1
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid2
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index e6598904e16..c517b5cf9de 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -581,16 +581,10 @@ tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_3_{copy
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_3_{copy=False, indexing='ij', sparse=True}::test_meshgrid1
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_3_{copy=False, indexing='ij', sparse=True}::test_meshgrid2
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_3_{copy=False, indexing='ij', sparse=True}::test_meshgrid3
-tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_4_{copy=True, indexing='xy', sparse=False}::test_meshgrid1
-tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_4_{copy=True, indexing='xy', sparse=False}::test_meshgrid2
-tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_4_{copy=True, indexing='xy', sparse=False}::test_meshgrid3
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_5_{copy=True, indexing='xy', sparse=True}::test_meshgrid0
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_5_{copy=True, indexing='xy', sparse=True}::test_meshgrid1
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_5_{copy=True, indexing='xy', sparse=True}::test_meshgrid2
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_5_{copy=True, indexing='xy', sparse=True}::test_meshgrid3
-tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_6_{copy=True, indexing='ij', sparse=False}::test_meshgrid1
-tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_6_{copy=True, indexing='ij', sparse=False}::test_meshgrid2
-tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_6_{copy=True, indexing='ij', sparse=False}::test_meshgrid3
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid0
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid1
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid2
diff --git a/tests/test_arraycreation.py b/tests/test_arraycreation.py
index 63435bca11f..71e6a7b7d07 100644
--- a/tests/test_arraycreation.py
+++ b/tests/test_arraycreation.py
@@ -507,7 +507,21 @@ def test_dpctl_tensor_input(func, args):
     new_args = [eval(val, {'x0' : x0}) for val in args]
     X = getattr(dpt, func)(*new_args)
     Y = getattr(dpnp, func)(*new_args)
-    if func is 'empty_like':
+    if func == 'empty_like':
         assert X.shape == Y.shape
     else:
         assert_array_equal(X, Y)
+
+
+@pytest.mark.parametrize("arrays",
+                         [[], [[1]], [[1, 2, 3], [4, 5, 6]], [[1, 2], [3, 4], [5, 6]]],
+                         ids=['[]', '[[1]]', '[[1, 2, 3], [4, 5, 6]]', '[[1, 2], [3, 4], [5, 6]]'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
+@pytest.mark.parametrize("indexing",
+                         ["ij", "xy"],
+                         ids=["ij", "xy"])
+def test_meshgrid(arrays, dtype, indexing):
+    func = lambda xp, xi: xp.meshgrid(*xi, indexing=indexing)
+    a = tuple(numpy.array(array, dtype=dtype) for array in arrays)
+    ia = tuple(dpnp.array(array, dtype=dtype) for array in arrays)
+    assert_array_equal(func(numpy, a), func(dpnp, ia))
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 1bffa18111b..77c02e96bed 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -177,6 +177,20 @@ def test_tril_triu(func, device):
     assert_sycl_queue_equal(x.sycl_queue, x0.sycl_queue)
 
 
+@pytest.mark.parametrize("device_x",
+                          valid_devices,
+                          ids=[device.filter_string for device in valid_devices])
+@pytest.mark.parametrize("device_y",
+                          valid_devices,
+                          ids=[device.filter_string for device in valid_devices])
+def test_meshgrid(device_x, device_y):
+    x = dpnp.arange(100, device = device_x)
+    y = dpnp.arange(100, device = device_y)
+    z = dpnp.meshgrid(x, y)
+    assert_sycl_queue_equal(z[0].sycl_queue, x.sycl_queue)
+    assert_sycl_queue_equal(z[1].sycl_queue, y.sycl_queue)
+
+
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize(
     "func,data",
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 1a33a1d655d..605cbb4f3e4 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -117,3 +117,13 @@ def test_coerced_usm_types_logic_op(op, usm_type_x, usm_type_y):
     assert x.usm_type == zx.usm_type == usm_type_x
     assert y.usm_type == zy.usm_type == usm_type_y
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
+
+
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+def test_meshgrid(usm_type_x, usm_type_y):
+    x = dp.arange(100, usm_type = usm_type_x)
+    y = dp.arange(100, usm_type = usm_type_y)
+    z = dp.meshgrid(x, y)
+    assert z[0].usm_type == usm_type_x
+    assert z[1].usm_type == usm_type_y

From bcbff63ce9f62d2890659f12ec06a8da1e2cbb6e Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Mon, 27 Feb 2023 14:44:23 +0100
Subject: [PATCH 02/16] Simplify asserts in dlpack tests (#1310)

---
 tests/test_sycl_queue.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 77c02e96bed..3b86f06ed2e 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -862,8 +862,7 @@ def test_from_dlpack(arr_dtype, shape, device):
     Y = dpnp.from_dlpack(X)
     assert_array_equal(X, Y)
     assert X.__dlpack_device__() == Y.__dlpack_device__()
-    assert X.sycl_device == Y.sycl_device
-    assert X.sycl_context == Y.sycl_context
+    assert_sycl_queue_equal(X.sycl_queue, Y.sycl_queue)
     assert X.usm_type == Y.usm_type
     if Y.ndim:
         V = Y[::-1]
@@ -882,6 +881,5 @@ def test_from_dlpack_with_dpt(arr_dtype, device):
     assert_array_equal(X, Y)
     assert isinstance(Y, dpnp.dpnp_array.dpnp_array)
     assert X.__dlpack_device__() == Y.__dlpack_device__()
-    assert X.sycl_device == Y.sycl_device
-    assert X.sycl_context == Y.sycl_context
     assert X.usm_type == Y.usm_type
+    assert_sycl_queue_equal(X.sycl_queue, Y.sycl_queue)

From 44e9fa9185080a0b1e69523622b2139f4671c73e Mon Sep 17 00:00:00 2001
From: samaid <55950596+samaid@users.noreply.github.com>
Date: Tue, 28 Feb 2023 02:42:18 -0600
Subject: [PATCH 03/16] Enable Google Analytics (#1320)

* Enable Google Analytics

* Update conf.py

Fixed typo in module name

---------

Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com>
---
 .github/workflows/build-sphinx.yml | 4 +++-
 doc/conf.py                        | 6 +++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml
index 178e1835fe2..95db7b640f1 100644
--- a/.github/workflows/build-sphinx.yml
+++ b/.github/workflows/build-sphinx.yml
@@ -67,7 +67,9 @@ jobs:
           channels: intel, conda-forge
 
       - name: Install sphinx dependencies
-        run: conda install sphinx sphinx_rtd_theme
+        run: |
+          conda install sphinx sphinx_rtd_theme
+          pip install sphinxcontrib-googleanalytics
 
       - name: Install dpnp dependencies
         run: |
diff --git a/doc/conf.py b/doc/conf.py
index 999b2504bd6..532715c4e44 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -27,7 +27,7 @@
 # -- Project information -----------------------------------------------------
 
 project = 'dpnp'
-copyright = '2020-2022, Intel'
+copyright = '2020-2023, Intel Corporation'
 author = 'Intel'
 
 # The short X.Y version
@@ -54,8 +54,12 @@
     'sphinx.ext.napoleon',
     'sphinx.ext.autodoc',
     'sphinx.ext.autosummary',
+    'sphinxcontrib.googleanalytics',
 ]
 
+googleanalytics_id = 'G-554F8VNE28'
+googleanalytics_enabled = True
+
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
 

From d1a90bca1cb0a3825eeb3ecd0a44af015ce88688 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Tue, 28 Feb 2023 11:19:13 +0100
Subject: [PATCH 04/16] Unwrap dpnp array while calling dpctl indexer operators
 (#1321)

---
 dpnp/dpnp_array.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index c50ed979272..5c4a5551d08 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -174,6 +174,9 @@ def __ge__(self, other):
  # '__getattribute__',
 
     def __getitem__(self, key):
+        if isinstance(key, dpnp_array):
+            key = key.get_array()
+
         item = self._array_obj.__getitem__(key)
         if not isinstance(item, dpt.usm_ndarray):
             raise RuntimeError(
@@ -290,6 +293,11 @@ def __rtruediv__(self, other):
  # '__setattr__',
 
     def __setitem__(self, key, val):
+        if isinstance(key, dpnp_array):
+            key = key.get_array()
+        if isinstance(val, dpnp_array):
+            val = val.get_array()
+
         self._array_obj.__setitem__(key, val)
 
  # '__setstate__',

From f85a362ae34131cbdd399eae53a9400eeecde26f Mon Sep 17 00:00:00 2001
From: Natalia Polina <natalia.polina@intel.com>
Date: Tue, 28 Feb 2023 05:33:04 -0800
Subject: [PATCH 05/16] Use linspace() function from dpctl.tensor. (#1281)

* Use linspace() function from dpctl.tensor

* Convert file cupy/creation_tests/test_ranges.py to unix

* Added support for array input arguments to linspace() function.

* Updated linspace implementation for arrays as input argument.

* Fixed linspace() function for complex dtype.

* Removed extra copy in linspace() function.

* Added comments for linspace() function.

* Added skipping cross device tests for linspace() function on Windows.

* Added reason for skipping tests for linspace() function.

Co-authored-by: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>

---------

Co-authored-by: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com>
---
 dpnp/dpnp_algo/dpnp_algo.pyx                  |   2 +
 dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx    |  90 ++-
 dpnp/dpnp_container.py                        |  68 +-
 dpnp/dpnp_iface_arraycreation.py              |  39 +-
 tests/skipped_tests.tbl                       |   8 +-
 tests/skipped_tests_gpu.tbl                   |  14 +-
 tests/test_arraycreation.py                   |  45 ++
 tests/test_special.py                         |   2 +-
 tests/test_sycl_queue.py                      |  86 +-
 tests/test_usm_type.py                        |  13 +
 .../cupy/creation_tests/test_ranges.py        | 742 +++++++++---------
 11 files changed, 650 insertions(+), 459 deletions(-)

diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index f12707ccc76..4737bcfd3c7 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -48,6 +48,8 @@ cimport dpnp.dpnp_utils as utils
 cimport numpy
 import numpy
 
+import operator
+
 
 __all__ = [
     "dpnp_astype",
diff --git a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
index 50b8bb84070..7b538118b93 100644
--- a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
@@ -189,31 +189,81 @@ cpdef utils.dpnp_descriptor dpnp_identity(n, result_dtype):
     return result
 
 
-# TODO this function should work through dpnp_arange_c
-cpdef tuple dpnp_linspace(start, stop, num, endpoint, retstep, dtype, axis):
-    cdef shape_type_c obj_shape = utils._object_to_tuple(num)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(obj_shape, dtype, None)
+def dpnp_linspace(start, stop, num, dtype=None, device=None, usm_type=None, sycl_queue=None, endpoint=True, retstep=False, axis=0):
+    usm_type_alloc, sycl_queue_alloc = utils_py.get_usm_allocations([start, stop])
 
-    if endpoint:
-        steps_count = num - 1
-    else:
-        steps_count = num
+    # Get sycl_queue.
+    if sycl_queue is None and device is None:
+        sycl_queue = sycl_queue_alloc
+    sycl_queue_normalized = dpnp.get_normalized_queue_device(sycl_queue=sycl_queue, device=device)
 
-    # if there are steps, then fill values
-    if steps_count > 0:
-        step = (dpnp.float64(stop) - start) / steps_count
-        for i in range(1, result.size):
-            result.get_pyobj()[i] = start + step * i
+    # Get temporary usm_type for getting dtype.
+    if usm_type is None:
+        _usm_type = "device" if usm_type_alloc is None else usm_type_alloc
     else:
-        step = dpnp.nan
+        _usm_type = usm_type
+
+    # Get dtype.
+    if not hasattr(start, "dtype") and not dpnp.isscalar(start):
+        start = dpnp.asarray(start, usm_type=_usm_type, sycl_queue=sycl_queue_normalized)
+    if not hasattr(stop, "dtype") and not dpnp.isscalar(stop):
+        stop = dpnp.asarray(stop, usm_type=_usm_type, sycl_queue=sycl_queue_normalized)
+    dt = numpy.result_type(start, stop, float(num))
+    dt = utils_py.map_dtype_to_device(dt, sycl_queue_normalized.sycl_device)
+    if dtype is None:
+        dtype = dt
+
+    if dpnp.isscalar(start) and dpnp.isscalar(stop):
+        # Call linspace() function for scalars.
+        res = dpnp_container.linspace(start,
+                                      stop,
+                                      num,
+                                      dtype=dt,
+                                      usm_type=_usm_type,
+                                      sycl_queue=sycl_queue_normalized,
+                                      endpoint=endpoint)
+    else:
+        num = operator.index(num)
+        if num < 0:
+            raise ValueError("Number of points must be non-negative")
+
+        # Get final usm_type and copy arrays if needed with current dtype, usm_type and sycl_queue.
+        # Do not need to copy usm_ndarray by usm_type if it is not explicitly stated.
+        if usm_type is None:
+            usm_type = _usm_type
+            if not hasattr(start, "usm_type"):
+                _start = dpnp.asarray(start, dtype=dt, usm_type=usm_type, sycl_queue=sycl_queue_normalized)
+            else:
+                _start = dpnp.asarray(start, dtype=dt, sycl_queue=sycl_queue_normalized)
+            if not hasattr(stop, "usm_type"):
+                _stop = dpnp.asarray(stop, dtype=dt, usm_type=usm_type, sycl_queue=sycl_queue_normalized)
+            else:
+                _stop = dpnp.asarray(stop, dtype=dt, sycl_queue=sycl_queue_normalized)
+        else:
+            _start = dpnp.asarray(start, dtype=dt, usm_type=usm_type, sycl_queue=sycl_queue_normalized)
+            _stop = dpnp.asarray(stop, dtype=dt, usm_type=usm_type, sycl_queue=sycl_queue_normalized)
 
-    # if result is not empty, then fiil first and last elements
-    if num > 0:
-        result.get_pyobj()[0] = start
-        if endpoint and result.size > 1:
-            result.get_pyobj()[result.size - 1] = stop
+        # FIXME: issue #1304. Mathematical operations with scalar don't follow data type.
+        _num = dpnp.asarray((num - 1) if endpoint else num, dtype=dt, usm_type=usm_type, sycl_queue=sycl_queue_normalized)
+
+        step = (_stop - _start) / _num
+
+        res = dpnp_container.arange(0,
+                                    stop=num,
+                                    step=1,
+                                    dtype=dt,
+                                    usm_type=usm_type,
+                                    sycl_queue=sycl_queue_normalized)
+
+        res = res.reshape((-1,) + (1,) * step.ndim)
+        res = res * step + _start
+
+        if endpoint and num > 1:
+            res[-1] = dpnp_container.full(step.shape, _stop)
 
-    return (result.get_pyobj(), step)
+    if numpy.issubdtype(dtype, dpnp.integer):
+        dpnp.floor(res, out=res)
+    return res.astype(dtype)
 
 
 cpdef utils.dpnp_descriptor dpnp_logspace(start, stop, num, endpoint, base, dtype, axis):
diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py
index 2adb2b9b7f2..12d28074b8f 100644
--- a/dpnp/dpnp_container.py
+++ b/dpnp/dpnp_container.py
@@ -47,6 +47,7 @@
     "empty",
     "eye",
     "full",
+    "linspace",
     "ones"
     "tril",
     "triu",
@@ -126,6 +127,33 @@ def empty(shape,
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 
+def eye(N,
+        M=None,
+        /,
+        *,
+        k=0,
+        dtype=None,
+        order="C",
+        device=None,
+        usm_type="device",
+        sycl_queue=None):
+    """Validate input parameters before passing them into `dpctl.tensor` module"""
+    dpu.validate_usm_type(usm_type, allow_none=False)
+    sycl_queue_normalized = dpnp.get_normalized_queue_device(sycl_queue=sycl_queue, device=device)
+    if order is None:
+        order = 'C'
+
+    """Creates `dpnp_array` with ones on the `k`th diagonal."""
+    array_obj = dpt.eye(N,
+                        M,
+                        k=k,
+                        dtype=dtype,
+                        order=order,
+                        usm_type=usm_type,
+                        sycl_queue=sycl_queue_normalized)
+    return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
+
+
 def full(shape,
          fill_value,
          *,
@@ -153,31 +181,29 @@ def full(shape,
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 
-def eye(N,
-        M=None,
-        /,
-        *,
-        k=0,
-        dtype=None,
-        order="C",
-        device=None,
-        usm_type="device",
-        sycl_queue=None):
+def linspace(start,
+             stop,
+             /,
+             num,
+             *,
+             dtype=None,
+             device=None,
+             usm_type="device",
+             sycl_queue=None,
+             endpoint=True):
     """Validate input parameters before passing them into `dpctl.tensor` module"""
     dpu.validate_usm_type(usm_type, allow_none=False)
     sycl_queue_normalized = dpnp.get_normalized_queue_device(sycl_queue=sycl_queue, device=device)
-    if order is None:
-        order = 'C'
 
-    """Creates `dpnp_array` with ones on the `k`th diagonal."""
-    array_obj = dpt.eye(N,
-                        M,
-                        k=k,
-                        dtype=dtype,
-                        order=order,
-                        usm_type=usm_type,
-                        sycl_queue=sycl_queue_normalized)
-    return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
+    """Creates `dpnp_array` with evenly spaced numbers of specified interval."""
+    array_obj = dpt.linspace(start,
+                             stop,
+                             num,
+                             dtype=dtype,
+                             usm_type=usm_type,
+                             sycl_queue=sycl_queue_normalized,
+                             endpoint=endpoint)
+    return dpnp_array(array_obj.shape, buffer=array_obj)
 
 
 def meshgrid(*xi, indexing="xy"):
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index 8d892edce6d..257fd660fbb 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -50,6 +50,7 @@
 
 import dpnp.dpnp_container as dpnp_container
 import dpctl.tensor as dpt
+import dpctl
 
 
 __all__ = [
@@ -879,7 +880,18 @@ def identity(n, dtype=None, *, like=None):
     return call_origin(numpy.identity, n, dtype=dtype, like=like)
 
 
-def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0):
+def linspace(start,
+             stop,
+             /,
+             num,
+             *,
+             dtype=None,
+             device=None,
+             usm_type=None,
+             sycl_queue=None,
+             endpoint=True,
+             retstep=False,
+             axis=0):
     """
     Return evenly spaced numbers over a specified interval.
 
@@ -888,6 +900,8 @@ def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis
     Limitations
     -----------
     Parameter ``axis`` is supported only with default value ``0``.
+    Parameter ``retstep`` is supported only with default value ``False``.
+    Otherwise the function will be executed sequentially on CPU.
 
     See Also
     --------
@@ -913,16 +927,19 @@ def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis
 
     """
 
-    if not use_origin_backend():
-        if axis != 0:
-            checker_throw_value_error("linspace", "axis", axis, 0)
-
-        res = dpnp_linspace(start, stop, num, endpoint, retstep, dtype, axis)
-
-        if retstep:
-            return res
-        else:
-            return res[0]
+    if retstep is not False:
+        pass
+    elif axis != 0:
+        pass
+    else:
+        return dpnp_linspace(start,
+                             stop,
+                             num,
+                             dtype=dtype,
+                             device=device,
+                             usm_type=usm_type,
+                             sycl_queue=sycl_queue,
+                             endpoint=endpoint)
 
     return call_origin(numpy.linspace, start, stop, num, endpoint, retstep, dtype, axis)
 
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index 097f98c3250..d598ea2ca9f 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -418,12 +418,10 @@ tests/third_party/cupy/creation_tests/test_ranges.py::TestMgrid::test_mgrid3
 tests/third_party/cupy/creation_tests/test_ranges.py::TestOgrid::test_ogrid3
 tests/third_party/cupy/creation_tests/test_ranges.py::TestOgrid::test_ogrid4
 tests/third_party/cupy/creation_tests/test_ranges.py::TestOgrid::test_ogrid5
-tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_array_start_stop
 tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_array_start_stop_axis1
-tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_float_underflow
-tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_mixed_start_stop
-tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_mixed_start_stop2
-tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_start_stop_list
+tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_one_num_no_endopoint_with_retstep
+tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_with_retstep
+tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_zero_num_no_endopoint_with_retstep
 tests/third_party/cupy/indexing_tests/test_generate.py::TestAxisConcatenator::test_AxisConcatenator_init1
 tests/third_party/cupy/indexing_tests/test_generate.py::TestAxisConcatenator::test_len
 tests/third_party/cupy/indexing_tests/test_generate.py::TestC_::test_c_1
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index c517b5cf9de..3dedcff4af0 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -594,19 +594,13 @@ tests/third_party/cupy/creation_tests/test_ranges.py::TestMgrid::test_mgrid5
 tests/third_party/cupy/creation_tests/test_ranges.py::TestOgrid::test_ogrid3
 tests/third_party/cupy/creation_tests/test_ranges.py::TestOgrid::test_ogrid4
 tests/third_party/cupy/creation_tests/test_ranges.py::TestOgrid::test_ogrid5
-
-tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_array_start_stop
-tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_array_start_stop_axis1
 tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_arange_negative_size
 tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_arange_no_dtype_int
-
-tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_float_underflow
-tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_mixed_start_stop
-tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_mixed_start_stop2
-tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_start_stop_list
-
+tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_array_start_stop_axis1
+tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_one_num_no_endopoint_with_retstep
+tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_with_retstep
+tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_linspace_zero_num_no_endopoint_with_retstep
 tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_logspace_zero_num
-
 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_1_{axes=None, norm=None, s=(1, None), shape=(3, 4)}::test_fft2
 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_fft2
 tests/third_party/cupy/fft_tests/test_fft.py::TestFft2_param_7_{axes=(), norm=None, s=None, shape=(3, 4)}::test_ifft2
diff --git a/tests/test_arraycreation.py b/tests/test_arraycreation.py
index 71e6a7b7d07..7944ff21003 100644
--- a/tests/test_arraycreation.py
+++ b/tests/test_arraycreation.py
@@ -513,6 +513,51 @@ def test_dpctl_tensor_input(func, args):
         assert_array_equal(X, Y)
 
 
+@pytest.mark.parametrize("start",
+                         [0, -5, 10, -2.5, 9.7],
+                         ids=['0', '-5', '10', '-2.5', '9.7'])
+@pytest.mark.parametrize("stop",
+                         [0, 10, -2, 20.5, 1000],
+                         ids=['0', '10', '-2', '20.5', '1000'])
+@pytest.mark.parametrize("num",
+                         [5, numpy.array(10), dpnp.array(17), dpt.asarray(100)],
+                         ids=['5', 'numpy.array(10)', 'dpnp.array(17)', 'dpt.asarray(100)'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_float16=False))
+def test_linspace(start, stop, num, dtype):
+    func = lambda xp: xp.linspace(start, stop, num, dtype=dtype)
+
+    if numpy.issubdtype(dtype, dpnp.integer):
+        assert_allclose(func(numpy), func(dpnp), rtol=1)
+    else:
+        assert_allclose(func(numpy), func(dpnp), atol=numpy.finfo(dtype).eps)
+
+
+@pytest.mark.parametrize("start_dtype",
+                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
+                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("stop_dtype",
+                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
+                         ids=['float64', 'float32', 'int64', 'int32'])
+def test_linspace_dtype(start_dtype, stop_dtype):
+    start = numpy.array([1, 2, 3], dtype=start_dtype)
+    stop = numpy.array([11, 7, -2], dtype=stop_dtype)
+    dpnp.linspace(start, stop, 10)
+
+
+@pytest.mark.parametrize("start",
+                         [dpnp.array(1), dpnp.array([2.6]), numpy.array([[-6.7, 3]]), [1, -4], (3, 5)])
+@pytest.mark.parametrize("stop",
+                         [dpnp.array([-4]), dpnp.array([[2.6], [- 4]]), numpy.array(2), [[-4.6]], (3,)])
+def test_linspace_arrays(start, stop):
+    func = lambda xp: xp.linspace(start, stop, 10)
+    assert func(numpy).shape == func(dpnp).shape
+
+
+def test_linspace_complex():
+    func = lambda xp: xp.linspace(0, 3 + 2j, num=1000)
+    assert_allclose(func(numpy), func(dpnp))
+
+
 @pytest.mark.parametrize("arrays",
                          [[], [[1]], [[1, 2, 3], [4, 5, 6]], [[1, 2], [3, 4], [5, 6]]],
                          ids=['[]', '[[1]]', '[[1, 2, 3], [4, 5, 6]]', '[[1, 2], [3, 4], [5, 6]]'])
diff --git a/tests/test_special.py b/tests/test_special.py
index da9938d75e9..21810661687 100644
--- a/tests/test_special.py
+++ b/tests/test_special.py
@@ -7,7 +7,7 @@ def test_erf():
     a = numpy.linspace(2.0, 3.0, num=10)
     ia = dpnp.linspace(2.0, 3.0, num=10)
 
-    numpy.testing.assert_array_equal(a, ia)
+    numpy.testing.assert_allclose(a, ia)
 
     expected = numpy.empty_like(a)
     for idx, val in enumerate(a):
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 3b86f06ed2e..2197dbe5414 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -1,5 +1,6 @@
 import pytest
 from .helper import get_all_dtypes
+import sys
 
 import dpnp
 import dpctl
@@ -90,6 +91,9 @@ def vvsort(val, vec, size, xp):
         pytest.param("eye",
                      [4, 2],
                      {}),
+        pytest.param("linspace",
+                     [0, 4, 8],
+                     {}),
         pytest.param("ones",
                      [(2,2)],
                      {}),
@@ -134,13 +138,63 @@ def test_empty_like(device_x, device_y):
 
 
 @pytest.mark.parametrize(
-    "func, kwargs",
+    "func, args, kwargs",
+    [
+        pytest.param("full_like",
+                     ['x0'],
+                     {'fill_value': 5}),
+        pytest.param("ones_like",
+                     ['x0'],
+                     {}),
+        pytest.param("zeros_like",
+                     ['x0'],
+                     {}),
+        pytest.param("tril",
+                     ['x0.reshape((2,2))'],
+                     {}),
+        pytest.param("triu",
+                     ['x0.reshape((2,2))'],
+                     {}),
+        pytest.param("linspace",
+                     ['x0', '4', '4'],
+                     {}),
+        pytest.param("linspace",
+                     ['1', 'x0', '4'],
+                     {})
+    ])
+@pytest.mark.parametrize("device",
+                          valid_devices,
+                          ids=[device.filter_string for device in valid_devices])
+def test_array_creation_follow_device(func, args, kwargs, device):
+    x_orig = numpy.array([1, 2, 3, 4])
+    numpy_args = [eval(val, {'x0' : x_orig}) for val in args]
+    y_orig = getattr(numpy, func)(*numpy_args, **kwargs)
+
+    x = dpnp.array([1, 2, 3, 4], device=device)
+    dpnp_args = [eval(val, {'x0' : x}) for val in args]
+
+    y = getattr(dpnp, func)(*dpnp_args, **kwargs)
+    numpy.testing.assert_allclose(y_orig, y)
+    assert_sycl_queue_equal(y.sycl_queue, x.sycl_queue)
+
+
+@pytest.mark.parametrize(
+    "func, args, kwargs",
     [
         pytest.param("full_like",
+                     ['x0'],
                      {'fill_value': 5}),
         pytest.param("ones_like",
+                     ['x0'],
                      {}),
         pytest.param("zeros_like",
+                     ['x0'],
+                     {}),
+        pytest.param("linspace",
+                     ['x0', '4', '4'],
+                     {}),
+        pytest.param("linspace",
+                     ['1', 'x0', '4'],
                      {})
     ])
 @pytest.mark.parametrize("device_x",
@@ -149,34 +203,26 @@ def test_empty_like(device_x, device_y):
 @pytest.mark.parametrize("device_y",
                           valid_devices,
                           ids=[device.filter_string for device in valid_devices])
-def test_array_creation_like(func, kwargs, device_x, device_y):
-    x_orig = numpy.ndarray([1, 2, 3])
-    y_orig = getattr(numpy, func)(x_orig, **kwargs)
+def test_array_creation_cross_device(func, args, kwargs, device_x, device_y):
+    if func is 'linspace' and sys.platform.startswith('win'):
+        pytest.skip("CPU driver experiences an instability on Windows.")
 
-    x = dpnp.ndarray([1, 2, 3], device=device_x)
+    x_orig = numpy.array([1, 2, 3, 4])
+    numpy_args = [eval(val, {'x0' : x_orig}) for val in args]
+    y_orig = getattr(numpy, func)(*numpy_args, **kwargs)
 
-    y = getattr(dpnp, func)(x, **kwargs)
-    numpy.testing.assert_array_equal(y_orig, y)
-    assert_sycl_queue_equal(y.sycl_queue, x.sycl_queue)
+    x = dpnp.array([1, 2, 3, 4], device=device_x)
+    dpnp_args = [eval(val, {'x0' : x}) for val in args]
 
     dpnp_kwargs = dict(kwargs)
     dpnp_kwargs['device'] = device_y
+    
+    y = getattr(dpnp, func)(*dpnp_args, **dpnp_kwargs)
+    numpy.testing.assert_allclose(y_orig, y)
 
-    y = getattr(dpnp, func)(x, **dpnp_kwargs)
-    numpy.testing.assert_array_equal(y_orig, y)
     assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue)
 
 
-@pytest.mark.parametrize("func", ["tril", "triu"], ids=["tril", "triu"])
-@pytest.mark.parametrize("device",
-                          valid_devices,
-                          ids=[device.filter_string for device in valid_devices])
-def test_tril_triu(func, device):
-    x0 = dpnp.ones((3,3), device=device)
-    x = getattr(dpnp, func)(x0)
-    assert_sycl_queue_equal(x.sycl_queue, x0.sycl_queue)
-
-
 @pytest.mark.parametrize("device_x",
                           valid_devices,
                           ids=[device.filter_string for device in valid_devices])
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 605cbb4f3e4..9cbef140519 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -76,6 +76,10 @@ def test_coerced_usm_types_divide(usm_type_x, usm_type_y):
                      ['x0']),
         pytest.param("empty_like",
                      ['x0']),
+        pytest.param("linspace",
+                     ['x0[0:2]', '4', '4']),
+        pytest.param("linspace",
+                     ['0', 'x0[3:5]', '4']),
     ])
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
 @pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
@@ -90,6 +94,15 @@ def test_array_creation(func, args, usm_type_x, usm_type_y):
     assert y.usm_type == usm_type_y
 
 
+@pytest.mark.parametrize("usm_type_start", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_stop", list_of_usm_types, ids=list_of_usm_types)
+def test_linspace_arrays(usm_type_start, usm_type_stop):
+    start = dp.asarray([0, 0], usm_type=usm_type_start)
+    stop = dp.asarray([2, 4], usm_type=usm_type_stop)
+    res = dp.linspace(start, stop, 4)
+    assert res.usm_type == du.get_coerced_usm_type([usm_type_start, usm_type_stop])
+
+
 @pytest.mark.skip()
 @pytest.mark.parametrize("func", ["tril", "triu"], ids=["tril", "triu"])
 @pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
diff --git a/tests/third_party/cupy/creation_tests/test_ranges.py b/tests/third_party/cupy/creation_tests/test_ranges.py
index 4d5bc03f81b..ac94297354f 100644
--- a/tests/third_party/cupy/creation_tests/test_ranges.py
+++ b/tests/third_party/cupy/creation_tests/test_ranges.py
@@ -1,371 +1,371 @@
-import math
-import sys
-import unittest
-
-import numpy
-import pytest
-
-import dpnp as cupy
-from tests.third_party.cupy import testing
-
-
-@testing.gpu
-class TestRanges(unittest.TestCase):
-
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_array_equal()
-    def test_arange(self, xp, dtype):
-        return xp.arange(10, dtype=dtype)
-
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_array_equal()
-    def test_arange2(self, xp, dtype):
-        return xp.arange(5, 10, dtype=dtype)
-
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_array_equal()
-    def test_arange3(self, xp, dtype):
-        return xp.arange(1, 11, 2, dtype=dtype)
-
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_array_equal()
-    def test_arange4(self, xp, dtype):
-        return xp.arange(20, 2, -3, dtype=dtype)
-
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_array_equal()
-    def test_arange5(self, xp, dtype):
-        return xp.arange(0, 100, None, dtype=dtype)
-
-    @testing.for_all_dtypes()
-    @testing.numpy_cupy_array_equal()
-    def test_arange6(self, xp, dtype):
-        return xp.arange(0, 2, dtype=dtype)
-
-    @testing.for_all_dtypes()
-    @testing.numpy_cupy_array_equal()
-    def test_arange7(self, xp, dtype):
-        return xp.arange(10, 11, dtype=dtype)
-
-    @testing.for_all_dtypes()
-    @testing.numpy_cupy_array_equal()
-    def test_arange8(self, xp, dtype):
-        return xp.arange(10, 8, -1, dtype=dtype)
-
-    def test_arange9(self):
-        for xp in (numpy, cupy):
-            with pytest.raises((ValueError, TypeError)):
-                xp.arange(10, dtype=xp.bool_)
-
-    @testing.numpy_cupy_array_equal()
-    def test_arange_no_dtype_int(self, xp):
-        return xp.arange(1, 11, 2)
-
-    @testing.numpy_cupy_array_equal()
-    def test_arange_no_dtype_float(self, xp):
-        return xp.arange(1.0, 11.0, 2.0)
-
-    @testing.numpy_cupy_array_equal()
-    def test_arange_negative_size(self, xp):
-        return xp.arange(3, 1)
-
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_array_equal()
-    def test_linspace(self, xp, dtype):
-        return xp.linspace(0, 10, 5, dtype=dtype)
-
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_array_equal()
-    def test_linspace2(self, xp, dtype):
-        return xp.linspace(10, 0, 5, dtype=dtype)
-
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_array_equal()
-    def test_linspace_zero_num(self, xp, dtype):
-        return xp.linspace(0, 10, 0, dtype=dtype)
-
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_array_equal()
-    def test_linspace_zero_num_no_endopoint_with_retstep(self, xp, dtype):
-        x, step = xp.linspace(0, 10, 0, dtype=dtype, endpoint=False,
-                              retstep=True)
-        self.assertTrue(math.isnan(step))
-        return x
-
-    @testing.with_requires('numpy>=1.18')
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_array_equal()
-    def test_linspace_one_num_no_endopoint_with_retstep(self, xp, dtype):
-        start, stop = 3, 7
-        x, step = xp.linspace(start, stop, 1, dtype=dtype, endpoint=False,
-                              retstep=True)
-        self.assertEqual(step, stop - start)
-        return x
-
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_array_equal()
-    def test_linspace_one_num(self, xp, dtype):
-        return xp.linspace(0, 2, 1, dtype=dtype)
-
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_array_equal()
-    def test_linspace_no_endpoint(self, xp, dtype):
-        return xp.linspace(0, 10, 5, dtype=dtype, endpoint=False)
-
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_array_equal()
-    def test_linspace_with_retstep(self, xp, dtype):
-        x, step = xp.linspace(0, 10, 5, dtype=dtype, retstep=True)
-        self.assertEqual(step, 2.5)
-        return x
-
-    @testing.numpy_cupy_allclose()
-    def test_linspace_no_dtype_int(self, xp):
-        return xp.linspace(0, 10)
-
-    @testing.numpy_cupy_allclose()
-    def test_linspace_no_dtype_float(self, xp):
-        return xp.linspace(0.0, 10.0)
-
-    @testing.numpy_cupy_allclose()
-    def test_linspace_float_args_with_int_dtype(self, xp):
-        return xp.linspace(0.1, 9.1, 11, dtype=int)
-
-    def test_linspace_neg_num(self):
-        for xp in (numpy, cupy):
-            with pytest.raises(ValueError):
-                xp.linspace(0, 10, -1)
-
-    @testing.numpy_cupy_allclose()
-    def test_linspace_float_overflow(self, xp):
-        return xp.linspace(0., sys.float_info.max / 5, 10, dtype=float)
-
-    @testing.numpy_cupy_array_equal()
-    def test_linspace_float_underflow(self, xp):
-        # find minimum subnormal number
-        x = sys.float_info.min
-        while x / 2 > 0:
-            x /= 2
-        return xp.linspace(0., x, 10, dtype=float)
-
-    @testing.with_requires('numpy>=1.16')
-    @testing.for_all_dtypes_combination(names=('dtype_range', 'dtype_out'),
-                                        no_bool=True, no_complex=True)
-    @testing.numpy_cupy_array_equal()
-    def test_linspace_array_start_stop(self, xp, dtype_range, dtype_out):
-        start = xp.array([0, 120], dtype=dtype_range)
-        stop = xp.array([100, 0], dtype=dtype_range)
-        return xp.linspace(start, stop, num=50, dtype=dtype_out)
-
-    @testing.with_requires('numpy>=1.16')
-    @testing.for_all_dtypes_combination(names=('dtype_range', 'dtype_out'),
-                                        no_bool=True, no_complex=True)
-    @testing.numpy_cupy_array_equal()
-    def test_linspace_mixed_start_stop(self, xp, dtype_range, dtype_out):
-        start = 0.0
-        if xp.dtype(dtype_range).kind in 'u':
-            stop = xp.array([100, 16], dtype=dtype_range)
-        else:
-            stop = xp.array([100, -100], dtype=dtype_range)
-        return xp.linspace(start, stop, num=50, dtype=dtype_out)
-
-    @testing.with_requires('numpy>=1.16')
-    @testing.for_all_dtypes_combination(names=('dtype_range', 'dtype_out'),
-                                        no_bool=True, no_complex=True)
-    @testing.numpy_cupy_array_equal()
-    def test_linspace_mixed_start_stop2(self, xp, dtype_range, dtype_out):
-        if xp.dtype(dtype_range).kind in 'u':
-            start = xp.array([160, 120], dtype=dtype_range)
-        else:
-            start = xp.array([-120, 120], dtype=dtype_range)
-        stop = 0
-        return xp.linspace(start, stop, num=50, dtype=dtype_out)
-
-    @testing.with_requires('numpy>=1.16')
-    @testing.for_all_dtypes_combination(names=('dtype_range', 'dtype_out'),
-                                        no_bool=True, no_complex=True)
-    @testing.numpy_cupy_array_equal()
-    def test_linspace_array_start_stop_axis1(self, xp, dtype_range, dtype_out):
-        start = xp.array([0, 120], dtype=dtype_range)
-        stop = xp.array([100, 0], dtype=dtype_range)
-        return xp.linspace(start, stop, num=50, dtype=dtype_out, axis=1)
-
-    @testing.with_requires('numpy>=1.16')
-    @testing.for_complex_dtypes()
-    @testing.numpy_cupy_array_equal()
-    def test_linspace_complex_start_stop(self, xp, dtype):
-        start = xp.array([0, 120], dtype=dtype)
-        stop = xp.array([100, 0], dtype=dtype)
-        return xp.linspace(start, stop, num=50, dtype=dtype)
-
-    @testing.with_requires('numpy>=1.16')
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_array_equal()
-    def test_linspace_start_stop_list(self, xp, dtype):
-        start = [0, 0]
-        stop = [100, 16]
-        return xp.linspace(start, stop, num=50, dtype=dtype)
-
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_allclose()
-    def test_logspace(self, xp, dtype):
-        return xp.logspace(0, 2, 5, dtype=dtype)
-
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_allclose()
-    def test_logspace2(self, xp, dtype):
-        return xp.logspace(2, 0, 5, dtype=dtype)
-
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_allclose()
-    def test_logspace_zero_num(self, xp, dtype):
-        return xp.logspace(0, 2, 0, dtype=dtype)
-
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_allclose()
-    def test_logspace_one_num(self, xp, dtype):
-        return xp.logspace(0, 2, 1, dtype=dtype)
-
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_allclose()
-    def test_logspace_no_endpoint(self, xp, dtype):
-        return xp.logspace(0, 2, 5, dtype=dtype, endpoint=False)
-
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-    @testing.numpy_cupy_allclose()
-    def test_logspace_no_dtype_int(self, xp):
-        return xp.logspace(0, 2)
-
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-    @testing.numpy_cupy_allclose()
-    def test_logspace_no_dtype_float(self, xp):
-        return xp.logspace(0.0, 2.0)
-
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-    @testing.numpy_cupy_allclose()
-    def test_logspace_float_args_with_int_dtype(self, xp):
-        return xp.logspace(0.1, 2.1, 11, dtype=int)
-
-    def test_logspace_neg_num(self):
-        for xp in (numpy, cupy):
-            with pytest.raises(ValueError):
-                xp.logspace(0, 10, -1)
-
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_allclose()
-    def test_logspace_base(self, xp, dtype):
-        return xp.logspace(0, 2, 5, base=2.0, dtype=dtype)
-
-
-@testing.parameterize(
-    *testing.product({
-        'indexing': ['xy', 'ij'],
-        'sparse': [False, True],
-        'copy': [False, True],
-    })
-)
-@testing.gpu
-class TestMeshgrid(unittest.TestCase):
-
-    @testing.for_all_dtypes()
-    def test_meshgrid0(self, dtype):
-        out = cupy.meshgrid(indexing=self.indexing, sparse=self.sparse,
-                            copy=self.copy)
-        assert(out == [])
-
-    @testing.for_all_dtypes()
-    @testing.numpy_cupy_array_equal()
-    def test_meshgrid1(self, xp, dtype):
-        x = xp.arange(2).astype(dtype)
-        return xp.meshgrid(x, indexing=self.indexing, sparse=self.sparse,
-                           copy=self.copy)
-
-    @testing.for_all_dtypes()
-    @testing.numpy_cupy_array_equal()
-    def test_meshgrid2(self, xp, dtype):
-        x = xp.arange(2).astype(dtype)
-        y = xp.arange(3).astype(dtype)
-        return xp.meshgrid(x, y, indexing=self.indexing, sparse=self.sparse,
-                           copy=self.copy)
-
-    @testing.for_all_dtypes()
-    @testing.numpy_cupy_array_equal()
-    def test_meshgrid3(self, xp, dtype):
-        x = xp.arange(2).astype(dtype)
-        y = xp.arange(3).astype(dtype)
-        z = xp.arange(4).astype(dtype)
-        return xp.meshgrid(x, y, z, indexing=self.indexing, sparse=self.sparse,
-                           copy=self.copy)
-
-
-@testing.gpu
-class TestMgrid(unittest.TestCase):
-
-    @testing.numpy_cupy_array_equal()
-    def test_mgrid0(self, xp):
-        return xp.mgrid[0:]
-
-    @testing.numpy_cupy_array_equal()
-    def test_mgrid1(self, xp):
-        return xp.mgrid[-10:10]
-
-    @testing.numpy_cupy_array_equal()
-    def test_mgrid2(self, xp):
-        return xp.mgrid[-10:10:10j]
-
-    @testing.numpy_cupy_array_equal()
-    def test_mgrid3(self, xp):
-        x = xp.zeros(10)[:, None]
-        y = xp.ones(10)[:, None]
-        return xp.mgrid[x:y:10j]
-
-    @testing.numpy_cupy_array_equal()
-    def test_mgrid4(self, xp):
-        # check len(keys) > 1
-        return xp.mgrid[-10:10:10j, -10:10:10j]
-
-    @testing.numpy_cupy_array_equal()
-    def test_mgrid5(self, xp):
-        # check len(keys) > 1
-        x = xp.zeros(10)[:, None]
-        y = xp.ones(10)[:, None]
-        return xp.mgrid[x:y:10j, x:y:10j]
-
-
-@testing.gpu
-class TestOgrid(unittest.TestCase):
-
-    @testing.numpy_cupy_array_equal()
-    def test_ogrid0(self, xp):
-        return xp.ogrid[0:]
-
-    @testing.numpy_cupy_array_equal()
-    def test_ogrid1(self, xp):
-        return xp.ogrid[-10:10]
-
-    @testing.numpy_cupy_array_equal()
-    def test_ogrid2(self, xp):
-        return xp.ogrid[-10:10:10j]
-
-    @testing.numpy_cupy_array_equal()
-    def test_ogrid3(self, xp):
-        x = xp.zeros(10)[:, None]
-        y = xp.ones(10)[:, None]
-        return xp.ogrid[x:y:10j]
-
-    @testing.numpy_cupy_array_equal()
-    def test_ogrid4(self, xp):
-        # check len(keys) > 1
-        return xp.ogrid[-10:10:10j, -10:10:10j]
-
-    @testing.numpy_cupy_array_equal()
-    def test_ogrid5(self, xp):
-        # check len(keys) > 1
-        x = xp.zeros(10)[:, None]
-        y = xp.ones(10)[:, None]
-        return xp.ogrid[x:y:10j, x:y:10j]
+import math
+import sys
+import unittest
+
+import numpy
+import pytest
+
+import dpnp as cupy
+from tests.third_party.cupy import testing
+
+
+@testing.gpu
+class TestRanges(unittest.TestCase):
+
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_array_equal()
+    def test_arange(self, xp, dtype):
+        return xp.arange(10, dtype=dtype)
+
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_array_equal()
+    def test_arange2(self, xp, dtype):
+        return xp.arange(5, 10, dtype=dtype)
+
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_array_equal()
+    def test_arange3(self, xp, dtype):
+        return xp.arange(1, 11, 2, dtype=dtype)
+
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_array_equal()
+    def test_arange4(self, xp, dtype):
+        return xp.arange(20, 2, -3, dtype=dtype)
+
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_array_equal()
+    def test_arange5(self, xp, dtype):
+        return xp.arange(0, 100, None, dtype=dtype)
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_array_equal()
+    def test_arange6(self, xp, dtype):
+        return xp.arange(0, 2, dtype=dtype)
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_array_equal()
+    def test_arange7(self, xp, dtype):
+        return xp.arange(10, 11, dtype=dtype)
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_array_equal()
+    def test_arange8(self, xp, dtype):
+        return xp.arange(10, 8, -1, dtype=dtype)
+
+    def test_arange9(self):
+        for xp in (numpy, cupy):
+            with pytest.raises((ValueError, TypeError)):
+                xp.arange(10, dtype=xp.bool_)
+
+    @testing.numpy_cupy_array_equal()
+    def test_arange_no_dtype_int(self, xp):
+        return xp.arange(1, 11, 2)
+
+    @testing.numpy_cupy_array_equal()
+    def test_arange_no_dtype_float(self, xp):
+        return xp.arange(1.0, 11.0, 2.0)
+
+    @testing.numpy_cupy_array_equal()
+    def test_arange_negative_size(self, xp):
+        return xp.arange(3, 1)
+
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_array_equal()
+    def test_linspace(self, xp, dtype):
+        return xp.linspace(0, 10, 5, dtype=dtype)
+
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_array_equal()
+    def test_linspace2(self, xp, dtype):
+        return xp.linspace(10, 0, 5, dtype=dtype)
+
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_array_equal()
+    def test_linspace_zero_num(self, xp, dtype):
+        return xp.linspace(0, 10, 0, dtype=dtype)
+
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_array_equal()
+    def test_linspace_zero_num_no_endopoint_with_retstep(self, xp, dtype):
+        x, step = xp.linspace(0, 10, 0, dtype=dtype, endpoint=False,
+                              retstep=True)
+        self.assertTrue(math.isnan(step))
+        return x
+
+    @testing.with_requires('numpy>=1.18')
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_array_equal()
+    def test_linspace_one_num_no_endopoint_with_retstep(self, xp, dtype):
+        start, stop = 3, 7
+        x, step = xp.linspace(start, stop, 1, dtype=dtype, endpoint=False,
+                              retstep=True)
+        self.assertEqual(step, stop - start)
+        return x
+
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_array_equal()
+    def test_linspace_one_num(self, xp, dtype):
+        return xp.linspace(0, 2, 1, dtype=dtype)
+
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_allclose()
+    def test_linspace_no_endpoint(self, xp, dtype):
+        return xp.linspace(0, 10, 5, dtype=dtype, endpoint=False)
+
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_array_equal()
+    def test_linspace_with_retstep(self, xp, dtype):
+        x, step = xp.linspace(0, 10, 5, dtype=dtype, retstep=True)
+        self.assertEqual(step, 2.5)
+        return x
+
+    @testing.numpy_cupy_allclose()
+    def test_linspace_no_dtype_int(self, xp):
+        return xp.linspace(0, 10, 50)
+
+    @testing.numpy_cupy_allclose()
+    def test_linspace_no_dtype_float(self, xp):
+        return xp.linspace(0.0, 10.0, 50)
+
+    @testing.numpy_cupy_array_equal()
+    def test_linspace_float_args_with_int_dtype(self, xp):
+        return xp.linspace(0.1, 9.1, 11, dtype=int)
+
+    def test_linspace_neg_num(self):
+        for xp in (numpy, cupy):
+            with pytest.raises(ValueError):
+                xp.linspace(0, 10, -1)
+
+    @testing.numpy_cupy_allclose()
+    def test_linspace_float_overflow(self, xp):
+        return xp.linspace(0., sys.float_info.max / 5, 10, dtype=float)
+
+    @testing.numpy_cupy_array_equal()
+    def test_linspace_float_underflow(self, xp):
+        # find minimum subnormal number
+        x = sys.float_info.min
+        while x / 2 > 0:
+            x /= 2
+        return xp.linspace(0., x, 10, dtype=float)
+
+    @testing.with_requires('numpy>=1.16')
+    @testing.for_all_dtypes_combination(names=('dtype_range', 'dtype_out'),
+                                        no_bool=True, no_complex=True)
+    @testing.numpy_cupy_allclose()
+    def test_linspace_array_start_stop(self, xp, dtype_range, dtype_out):
+        start = xp.array([0, 120], dtype=dtype_range)
+        stop = xp.array([100, 0], dtype=dtype_range)
+        return xp.linspace(start, stop, num=50, dtype=dtype_out)
+
+    @testing.with_requires('numpy>=1.16')
+    @testing.for_all_dtypes_combination(names=('dtype_range', 'dtype_out'),
+                                        no_bool=True, no_complex=True)
+    @testing.numpy_cupy_array_equal()
+    def test_linspace_mixed_start_stop(self, xp, dtype_range, dtype_out):
+        start = 0.0
+        if xp.dtype(dtype_range).kind in 'u':
+            stop = xp.array([100, 16], dtype=dtype_range)
+        else:
+            stop = xp.array([100, -100], dtype=dtype_range)
+        return xp.linspace(start, stop, num=50, dtype=dtype_out)
+
+    @testing.with_requires('numpy>=1.16')
+    @testing.for_all_dtypes_combination(names=('dtype_range', 'dtype_out'),
+                                        no_bool=True, no_complex=True)
+    @testing.numpy_cupy_allclose()
+    def test_linspace_mixed_start_stop2(self, xp, dtype_range, dtype_out):
+        if xp.dtype(dtype_range).kind in 'u':
+            start = xp.array([160, 120], dtype=dtype_range)
+        else:
+            start = xp.array([-120, 120], dtype=dtype_range)
+        stop = 0
+        return xp.linspace(start, stop, num=50, dtype=dtype_out)
+
+    @testing.with_requires('numpy>=1.16')
+    @testing.for_all_dtypes_combination(names=('dtype_range', 'dtype_out'),
+                                        no_bool=True, no_complex=True)
+    @testing.numpy_cupy_array_equal()
+    def test_linspace_array_start_stop_axis1(self, xp, dtype_range, dtype_out):
+        start = xp.array([0, 120], dtype=dtype_range)
+        stop = xp.array([100, 0], dtype=dtype_range)
+        return xp.linspace(start, stop, num=50, dtype=dtype_out, axis=1)
+
+    @testing.with_requires('numpy>=1.16')
+    @testing.for_complex_dtypes()
+    @testing.numpy_cupy_array_equal()
+    def test_linspace_complex_start_stop(self, xp, dtype):
+        start = xp.array([0, 120], dtype=dtype)
+        stop = xp.array([100, 0], dtype=dtype)
+        return xp.linspace(start, stop, num=50, dtype=dtype)
+
+    @testing.with_requires('numpy>=1.16')
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_array_equal()
+    def test_linspace_start_stop_list(self, xp, dtype):
+        start = [0, 0]
+        stop = [100, 16]
+        return xp.linspace(start, stop, num=50, dtype=dtype)
+
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_allclose()
+    def test_logspace(self, xp, dtype):
+        return xp.logspace(0, 2, 5, dtype=dtype)
+
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_allclose()
+    def test_logspace2(self, xp, dtype):
+        return xp.logspace(2, 0, 5, dtype=dtype)
+
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_allclose()
+    def test_logspace_zero_num(self, xp, dtype):
+        return xp.logspace(0, 2, 0, dtype=dtype)
+
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_allclose()
+    def test_logspace_one_num(self, xp, dtype):
+        return xp.logspace(0, 2, 1, dtype=dtype)
+
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_allclose()
+    def test_logspace_no_endpoint(self, xp, dtype):
+        return xp.logspace(0, 2, 5, dtype=dtype, endpoint=False)
+
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @testing.numpy_cupy_allclose()
+    def test_logspace_no_dtype_int(self, xp):
+        return xp.logspace(0, 2)
+
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @testing.numpy_cupy_allclose()
+    def test_logspace_no_dtype_float(self, xp):
+        return xp.logspace(0.0, 2.0)
+
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @testing.numpy_cupy_allclose()
+    def test_logspace_float_args_with_int_dtype(self, xp):
+        return xp.logspace(0.1, 2.1, 11, dtype=int)
+
+    def test_logspace_neg_num(self):
+        for xp in (numpy, cupy):
+            with pytest.raises(ValueError):
+                xp.logspace(0, 10, -1)
+
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_allclose()
+    def test_logspace_base(self, xp, dtype):
+        return xp.logspace(0, 2, 5, base=2.0, dtype=dtype)
+
+
+@testing.parameterize(
+    *testing.product({
+        'indexing': ['xy', 'ij'],
+        'sparse': [False, True],
+        'copy': [False, True],
+    })
+)
+@testing.gpu
+class TestMeshgrid(unittest.TestCase):
+
+    @testing.for_all_dtypes()
+    def test_meshgrid0(self, dtype):
+        out = cupy.meshgrid(indexing=self.indexing, sparse=self.sparse,
+                            copy=self.copy)
+        assert(out == [])
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_array_equal()
+    def test_meshgrid1(self, xp, dtype):
+        x = xp.arange(2).astype(dtype)
+        return xp.meshgrid(x, indexing=self.indexing, sparse=self.sparse,
+                           copy=self.copy)
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_array_equal()
+    def test_meshgrid2(self, xp, dtype):
+        x = xp.arange(2).astype(dtype)
+        y = xp.arange(3).astype(dtype)
+        return xp.meshgrid(x, y, indexing=self.indexing, sparse=self.sparse,
+                           copy=self.copy)
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_array_equal()
+    def test_meshgrid3(self, xp, dtype):
+        x = xp.arange(2).astype(dtype)
+        y = xp.arange(3).astype(dtype)
+        z = xp.arange(4).astype(dtype)
+        return xp.meshgrid(x, y, z, indexing=self.indexing, sparse=self.sparse,
+                           copy=self.copy)
+
+
+@testing.gpu
+class TestMgrid(unittest.TestCase):
+
+    @testing.numpy_cupy_array_equal()
+    def test_mgrid0(self, xp):
+        return xp.mgrid[0:]
+
+    @testing.numpy_cupy_array_equal()
+    def test_mgrid1(self, xp):
+        return xp.mgrid[-10:10]
+
+    @testing.numpy_cupy_array_equal()
+    def test_mgrid2(self, xp):
+        return xp.mgrid[-10:10:10j]
+
+    @testing.numpy_cupy_array_equal()
+    def test_mgrid3(self, xp):
+        x = xp.zeros(10)[:, None]
+        y = xp.ones(10)[:, None]
+        return xp.mgrid[x:y:10j]
+
+    @testing.numpy_cupy_array_equal()
+    def test_mgrid4(self, xp):
+        # check len(keys) > 1
+        return xp.mgrid[-10:10:10j, -10:10:10j]
+
+    @testing.numpy_cupy_array_equal()
+    def test_mgrid5(self, xp):
+        # check len(keys) > 1
+        x = xp.zeros(10)[:, None]
+        y = xp.ones(10)[:, None]
+        return xp.mgrid[x:y:10j, x:y:10j]
+
+
+@testing.gpu
+class TestOgrid(unittest.TestCase):
+
+    @testing.numpy_cupy_array_equal()
+    def test_ogrid0(self, xp):
+        return xp.ogrid[0:]
+
+    @testing.numpy_cupy_array_equal()
+    def test_ogrid1(self, xp):
+        return xp.ogrid[-10:10]
+
+    @testing.numpy_cupy_array_equal()
+    def test_ogrid2(self, xp):
+        return xp.ogrid[-10:10:10j]
+
+    @testing.numpy_cupy_array_equal()
+    def test_ogrid3(self, xp):
+        x = xp.zeros(10)[:, None]
+        y = xp.ones(10)[:, None]
+        return xp.ogrid[x:y:10j]
+
+    @testing.numpy_cupy_array_equal()
+    def test_ogrid4(self, xp):
+        # check len(keys) > 1
+        return xp.ogrid[-10:10:10j, -10:10:10j]
+
+    @testing.numpy_cupy_array_equal()
+    def test_ogrid5(self, xp):
+        # check len(keys) > 1
+        x = xp.zeros(10)[:, None]
+        y = xp.ones(10)[:, None]
+        return xp.ogrid[x:y:10j, x:y:10j]

From 351f50bb64e346596ef3b46dfee7387e1bff549a Mon Sep 17 00:00:00 2001
From: vlad-perevezentsev <vladislav.perevezentsev@intel.com>
Date: Tue, 28 Feb 2023 15:59:07 +0100
Subject: [PATCH 06/16] Change dpnp.asfarray to run on Iris Xe (#1305)

* Change asfarray func and fix tests for it

* Change dtype check in dpnp.asfarray and fix remarks

* Small fix

* Update copyright

---------

Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com>
---
 dpnp/dpnp_iface_manipulation.py | 11 ++++++-----
 tests/test_arraymanipulation.py | 30 ++++++++++++++----------------
 2 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index 22f34f514ee..2b782b50049 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -73,7 +73,7 @@
 ]
 
 
-def asfarray(x1, dtype=numpy.float64):
+def asfarray(x1, dtype=None):
     """
     Return an array converted to a float type.
 
@@ -82,14 +82,15 @@ def asfarray(x1, dtype=numpy.float64):
     Notes
     -----
     This function works exactly the same as :obj:`dpnp.array`.
+    If dtype is `None`, `bool` or one of the `int` dtypes, it is replaced with
+    the default floating type in DPNP depending on device capabilities.
 
     """
 
     x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
-        # behavior of original function: int types replaced with float64
-        if numpy.issubdtype(dtype, numpy.integer):
-            dtype = numpy.float64
+        if dtype is None or not numpy.issubdtype(dtype, numpy.inexact):
+            dtype = dpnp.default_float_type(sycl_queue=x1.sycl_queue)
 
         # if type is the same then same object should be returned
         if x1_desc.dtype == dtype:
diff --git a/tests/test_arraymanipulation.py b/tests/test_arraymanipulation.py
index c0cd3e6c2b1..f22e8175c3b 100644
--- a/tests/test_arraymanipulation.py
+++ b/tests/test_arraymanipulation.py
@@ -1,16 +1,15 @@
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp
 import numpy
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
-@pytest.mark.parametrize("data",
-                         [[1, 2, 3], [1., 2., 3.]],
-                         ids=["[1, 2, 3]", "[1., 2., 3.]"])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
+@pytest.mark.parametrize(
+    "data", [[1, 2, 3], [1.0, 2.0, 3.0]], ids=["[1, 2, 3]", "[1., 2., 3.]"]
+)
 def test_asfarray(dtype, data):
     expected = numpy.asfarray(data, dtype)
     result = dpnp.asfarray(data, dtype)
@@ -18,15 +17,12 @@ def test_asfarray(dtype, data):
     numpy.testing.assert_array_equal(result, expected)
 
 
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
-@pytest.mark.parametrize("data",
-                         [[1, 2, 3], [1., 2., 3.]],
-                         ids=["[1, 2, 3]", "[1., 2., 3.]"])
-def test_asfarray2(dtype, data):
-    expected = numpy.asfarray(numpy.array(data), dtype)
-    result = dpnp.asfarray(dpnp.array(data), dtype)
+@pytest.mark.parametrize("dtype", get_all_dtypes())
+@pytest.mark.parametrize("data", [[1.0, 2.0, 3.0]], ids=["[1., 2., 3.]"])
+@pytest.mark.parametrize("data_dtype", get_all_dtypes(no_none=True))
+def test_asfarray2(dtype, data, data_dtype):
+    expected = numpy.asfarray(numpy.array(data, dtype=data_dtype), dtype)
+    result = dpnp.asfarray(dpnp.array(data, dtype=data_dtype), dtype)
 
     numpy.testing.assert_array_equal(result, expected)
 
@@ -59,7 +55,9 @@ def test_concatenate(self):
         numpy.testing.assert_array_equal(dpnp.concatenate((r4, r3)), r4 + r3)
         # Mixed sequence types
         numpy.testing.assert_array_equal(dpnp.concatenate((tuple(r4), r3)), r4 + r3)
-        numpy.testing.assert_array_equal(dpnp.concatenate((dpnp.array(r4), r3)), r4 + r3)
+        numpy.testing.assert_array_equal(
+            dpnp.concatenate((dpnp.array(r4), r3)), r4 + r3
+        )
         # Explicit axis specification
         numpy.testing.assert_array_equal(dpnp.concatenate((r4, r3), 0), r4 + r3)
         # Including negative

From c1933c132636d45f9d556edacccbe3d3c402bb13 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Wed, 1 Mar 2023 13:45:57 +0100
Subject: [PATCH 07/16] Remove mixed host\dev implementation from dpnp.all()
 (#1301)

* Remove mixed host\dev implementation from dpnp.all()

* Reduce over group
---
 .gitignore                               |  4 +++
 dpnp/backend/kernels/dpnp_krnl_logic.cpp | 43 +++++++++++++++++-------
 dpnp/dpnp_array.py                       | 13 ++++---
 dpnp/dpnp_iface_logic.py                 | 37 +++++++++++---------
 tests/test_logic.py                      |  2 +-
 tests/test_usm_type.py                   |  1 -
 6 files changed, 66 insertions(+), 34 deletions(-)

diff --git a/.gitignore b/.gitignore
index fda4c163531..7ed68aab856 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
 # CMake build and local install directory
 build
 build_cython
+dpnp.egg-info
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
@@ -14,6 +15,9 @@ coverage.xml
 # Backup files kept after git merge/rebase
 *.orig
 
+# Build examples
+example3
+
 *dpnp_backend*
 dpnp/**/*.cpython*.so
 dpnp/**/*.pyd
diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
index 157347aa90c..8f4b1255100 100644
--- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
@@ -41,6 +41,8 @@ DPCTLSyclEventRef dpnp_all_c(DPCTLSyclQueueRef q_ref,
                              const size_t size,
                              const DPCTLEventVectorRef dep_event_vec_ref)
 {
+    static_assert(std::is_same_v<_ResultType, bool>, "Boolean result type is required");
+
     // avoid warning unused variable
     (void)dep_event_vec_ref;
 
@@ -52,38 +54,50 @@ DPCTLSyclEventRef dpnp_all_c(DPCTLSyclQueueRef q_ref,
     }
 
     sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));
-    sycl::event event;
 
-    DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, size);
-    DPNPC_ptr_adapter<_ResultType> result1_ptr(q_ref, result1, 1, true, true);
-    const _DataType* array_in = input1_ptr.get_ptr();
-    _ResultType* result = result1_ptr.get_ptr();
+    const _DataType* array_in = static_cast<const _DataType*>(array1_in);
+    bool* result = static_cast<bool*>(result1);
 
-    result[0] = true;
+    auto fill_event = q.fill(result, true, 1);
 
     if (!size)
     {
-        return event_ref;
+        event_ref = reinterpret_cast<DPCTLSyclEventRef>(&fill_event);
+        return DPCTLEvent_Copy(event_ref);
     }
 
-    sycl::range<1> gws(size);
-    auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {
-        size_t i = global_id[0];
+    constexpr size_t lws = 64;
+    constexpr size_t vec_sz = 8;
+
+    auto gws_range = sycl::range<1>(((size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);
+    auto lws_range = sycl::range<1>(lws);
+    sycl::nd_range<1> gws(gws_range, lws_range);
 
-        if (!array_in[i])
+    auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {
+        auto gr = nd_it.get_group();
+        const auto max_gr_size = gr.get_max_local_range()[0];
+        const size_t start =
+            vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + gr.get_group_id()[0] * max_gr_size);
+        const size_t end = sycl::min(start + vec_sz * max_gr_size, size);
+
+        // each work-item reduces over "vec_sz" elements in the input array
+        bool local_reduction = sycl::joint_none_of(
+            gr, &array_in[start], &array_in[end], [&](_DataType elem) { return elem == static_cast<_DataType>(0); });
+
+        if (gr.leader() && (local_reduction == false))
         {
             result[0] = false;
         }
     };
 
     auto kernel_func = [&](sycl::handler& cgh) {
+        cgh.depends_on(fill_event);
         cgh.parallel_for<class dpnp_all_c_kernel<_DataType, _ResultType>>(gws, kernel_parallel_for_func);
     };
 
-    event = q.submit(kernel_func);
+    auto event = q.submit(kernel_func);
 
     event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
-
     return DPCTLEvent_Copy(event_ref);
 }
 
@@ -98,6 +112,7 @@ void dpnp_all_c(const void* array1_in, void* result1, const size_t size)
                                                                      size,
                                                                      dep_event_vec_ref);
     DPCTLEvent_WaitAndThrow(event_ref);
+    DPCTLEvent_Delete(event_ref);
 }
 
 template <typename _DataType, typename _ResultType>
@@ -751,6 +766,8 @@ void func_map_init_logic(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_ALL_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_all_ext_c<int64_t, bool>};
     fmap[DPNPFuncName::DPNP_FN_ALL_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_all_ext_c<float, bool>};
     fmap[DPNPFuncName::DPNP_FN_ALL_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_all_ext_c<double, bool>};
+    fmap[DPNPFuncName::DPNP_FN_ALL_EXT][eft_C64][eft_C64] = {eft_C64, (void*)dpnp_all_ext_c<std::complex<float>, bool>};
+    fmap[DPNPFuncName::DPNP_FN_ALL_EXT][eft_C128][eft_C128] = {eft_C128, (void*)dpnp_all_ext_c<std::complex<double>, bool>};
 
     fmap[DPNPFuncName::DPNP_FN_ALLCLOSE][eft_INT][eft_INT] = {eft_BLN,
                                                               (void*)dpnp_allclose_default_c<int32_t, int32_t, bool>};
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 5c4a5551d08..23b02974e79 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -339,19 +339,24 @@ def _create_from_usm_ndarray(usm_ary : dpt.usm_ndarray):
         res._array_obj = usm_ary
         return res
 
-    def all(self, axis=None, out=None, keepdims=False):
+    def all(self,
+            axis=None,
+            out=None,
+            keepdims=False,
+            *,
+            where=True):
         """
         Returns True if all elements evaluate to True.
 
-        Refer to `numpy.all` for full documentation.
+        Refer to :obj:`dpnp.all` for full documentation.
 
         See Also
         --------
-        :obj:`numpy.all` : equivalent function
+        :obj:`dpnp.all` : equivalent function
 
         """
 
-        return dpnp.all(self, axis, out, keepdims)
+        return dpnp.all(self, axis=axis, out=out, keepdims=keepdims, where=where)
 
     def any(self, axis=None, out=None, keepdims=False):
         """
diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index e94b0f6c1ef..df78edfe22f 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -69,7 +69,13 @@
 ]
 
 
-def all(x1, axis=None, out=None, keepdims=False):
+def all(x1,
+        /,
+        axis=None,
+        out=None,
+        keepdims=False,
+        *,
+        where=True):
     """
     Test whether all array elements along a given axis evaluate to True.
 
@@ -80,9 +86,10 @@ def all(x1, axis=None, out=None, keepdims=False):
     Input array is supported as :obj:`dpnp.ndarray`.
     Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
-    Parameter ``axis`` is supported only with default value ``None``.
-    Parameter ``out`` is supported only with default value ``None``.
-    Parameter ``keepdims`` is supported only with default value ``False``.
+    Parameter `axis` is supported only with default value `None`.
+    Parameter `out` is supported only with default value `None`.
+    Parameter `keepdims` is supported only with default value `False`.
+    Parameter `where` is supported only with default value `True`.
 
     See Also
     --------
@@ -95,15 +102,15 @@ def all(x1, axis=None, out=None, keepdims=False):
 
     Examples
     --------
-    >>> import dpnp as np
-    >>> x = np.array([[True, False], [True, True]])
-    >>> np.all(x)
+    >>> import dpnp as dp
+    >>> x = dp.array([[True, False], [True, True]])
+    >>> dp.all(x)
     False
-    >>> x2 = np.array([-1, 4, 5])
-    >>> np.all(x2)
+    >>> x2 = dp.array([-1, 4, 5])
+    >>> dp.all(x2)
     True
-    >>> x3 = np.array([1.0, np.nan])
-    >>> np.all(x3)
+    >>> x3 = dp.array([1.0, dp.nan])
+    >>> dp.all(x3)
     True
 
     """
@@ -116,13 +123,13 @@ def all(x1, axis=None, out=None, keepdims=False):
             pass
         elif keepdims is not False:
             pass
+        elif where is not True:
+            pass
         else:
             result_obj = dpnp_all(x1_desc).get_pyobj()
-            result = dpnp.convert_single_elem_array_to_scalar(result_obj)
-
-            return result
+            return dpnp.convert_single_elem_array_to_scalar(result_obj)
 
-    return call_origin(numpy.all, x1, axis, out, keepdims)
+    return call_origin(numpy.all, x1, axis=axis, out=out, keepdims=keepdims, where=where)
 
 
 def allclose(x1, x2, rtol=1.e-5, atol=1.e-8, **kwargs):
diff --git a/tests/test_logic.py b/tests/test_logic.py
index 425106fd2ef..5b131d3e770 100644
--- a/tests/test_logic.py
+++ b/tests/test_logic.py
@@ -10,7 +10,7 @@
 )
 
 
-@pytest.mark.parametrize("type", get_all_dtypes(no_complex=True))
+@pytest.mark.parametrize("type", get_all_dtypes())
 @pytest.mark.parametrize("shape",
                          [(0,), (4,), (2, 3), (2, 2, 2)],
                          ids=['(0,)', '(4,)', '(2,3)', '(2,2,2)'])
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 9cbef140519..326d0313c93 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -103,7 +103,6 @@ def test_linspace_arrays(usm_type_start, usm_type_stop):
     assert res.usm_type == du.get_coerced_usm_type([usm_type_start, usm_type_stop])
 
 
-@pytest.mark.skip()
 @pytest.mark.parametrize("func", ["tril", "triu"], ids=["tril", "triu"])
 @pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
 def test_tril_triu(func, usm_type):

From 1c7b85f975074c29dae8cae66b7c8027436cbfd0 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Wed, 1 Mar 2023 15:24:15 +0100
Subject: [PATCH 08/16] Remove mixed host\dev implementation from dpnp.any()
 (#1302)

* Remove mixed host\dev implementation from dpnp.any()

* Reduce over group
---
 dpnp/backend/kernels/dpnp_krnl_logic.cpp | 43 +++++++++++++++++-------
 dpnp/dpnp_array.py                       | 13 ++++---
 dpnp/dpnp_iface_logic.py                 | 37 +++++++++++---------
 tests/test_logic.py                      |  2 +-
 4 files changed, 62 insertions(+), 33 deletions(-)

diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
index 8f4b1255100..d1a6767c2ad 100644
--- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
@@ -233,6 +233,8 @@ DPCTLSyclEventRef dpnp_any_c(DPCTLSyclQueueRef q_ref,
                              const size_t size,
                              const DPCTLEventVectorRef dep_event_vec_ref)
 {
+    static_assert(std::is_same_v<_ResultType, bool>, "Boolean result type is required");
+
     // avoid warning unused variable
     (void)dep_event_vec_ref;
 
@@ -244,38 +246,50 @@ DPCTLSyclEventRef dpnp_any_c(DPCTLSyclQueueRef q_ref,
     }
 
     sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));
-    sycl::event event;
 
-    DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, size);
-    DPNPC_ptr_adapter<_ResultType> result1_ptr(q_ref, result1, 1, true, true);
-    const _DataType* array_in = input1_ptr.get_ptr();
-    _ResultType* result = result1_ptr.get_ptr();
+    const _DataType* array_in = static_cast<const _DataType*>(array1_in);
+    bool* result = static_cast<bool*>(result1);
 
-    result[0] = false;
+    auto fill_event = q.fill(result, false, 1);
 
     if (!size)
     {
-        return event_ref;
+        event_ref = reinterpret_cast<DPCTLSyclEventRef>(&fill_event);
+        return DPCTLEvent_Copy(event_ref);
     }
 
-    sycl::range<1> gws(size);
-    auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {
-        size_t i = global_id[0];
+    constexpr size_t lws = 64;
+    constexpr size_t vec_sz = 8;
 
-        if (array_in[i])
+    auto gws_range = sycl::range<1>(((size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);
+    auto lws_range = sycl::range<1>(lws);
+    sycl::nd_range<1> gws(gws_range, lws_range);
+
+    auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {
+        auto gr = nd_it.get_group();
+        const auto max_gr_size = gr.get_max_local_range()[0];
+        const size_t start =
+            vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + gr.get_group_id()[0] * max_gr_size);
+        const size_t end = sycl::min(start + vec_sz * max_gr_size, size);
+
+        // each work-item reduces over "vec_sz" elements in the input array
+        bool local_reduction = sycl::joint_any_of(
+            gr, &array_in[start], &array_in[end], [&](_DataType elem) { return elem != static_cast<_DataType>(0); });
+
+        if (gr.leader() && (local_reduction == true))
         {
             result[0] = true;
         }
     };
 
     auto kernel_func = [&](sycl::handler& cgh) {
+        cgh.depends_on(fill_event);
         cgh.parallel_for<class dpnp_any_c_kernel<_DataType, _ResultType>>(gws, kernel_parallel_for_func);
     };
 
-    event = q.submit(kernel_func);
+    auto event = q.submit(kernel_func);
 
     event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
-
     return DPCTLEvent_Copy(event_ref);
 }
 
@@ -290,6 +304,7 @@ void dpnp_any_c(const void* array1_in, void* result1, const size_t size)
                                                                      size,
                                                                      dep_event_vec_ref);
     DPCTLEvent_WaitAndThrow(event_ref);
+    DPCTLEvent_Delete(event_ref);
 }
 
 template <typename _DataType, typename _ResultType>
@@ -846,6 +861,8 @@ void func_map_init_logic(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_ANY_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_any_ext_c<int64_t, bool>};
     fmap[DPNPFuncName::DPNP_FN_ANY_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_any_ext_c<float, bool>};
     fmap[DPNPFuncName::DPNP_FN_ANY_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_any_ext_c<double, bool>};
+    fmap[DPNPFuncName::DPNP_FN_ANY_EXT][eft_C64][eft_C64] = {eft_C64, (void*)dpnp_any_ext_c<std::complex<float>, bool>};
+    fmap[DPNPFuncName::DPNP_FN_ANY_EXT][eft_C128][eft_C128] = {eft_C128, (void*)dpnp_any_ext_c<std::complex<double>, bool>};
 
     func_map_logic_1arg_1type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL>(fmap);
     func_map_logic_2arg_2type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL>(fmap);
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 23b02974e79..c002d0db233 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -358,19 +358,24 @@ def all(self,
 
         return dpnp.all(self, axis=axis, out=out, keepdims=keepdims, where=where)
 
-    def any(self, axis=None, out=None, keepdims=False):
+    def any(self,
+            axis=None,
+            out=None,
+            keepdims=False,
+            *,
+            where=True):
         """
         Returns True if any of the elements of `a` evaluate to True.
 
-        Refer to `numpy.any` for full documentation.
+        Refer to :obj:`dpnp.any` for full documentation.
 
         See Also
         --------
-        :obj:`numpy.any` : equivalent function
+        :obj:`dpnp.any` : equivalent function
 
         """
 
-        return dpnp.any(self, axis, out, keepdims)
+        return dpnp.any(self, axis=axis, out=out, keepdims=keepdims, where=where)
 
     def argmax(self, axis=None, out=None):
         """
diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index df78edfe22f..8d3ba904628 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -170,7 +170,13 @@ def allclose(x1, x2, rtol=1.e-5, atol=1.e-8, **kwargs):
     return call_origin(numpy.allclose, x1, x2, rtol=rtol, atol=atol, **kwargs)
 
 
-def any(x1, axis=None, out=None, keepdims=False):
+def any(x1,
+        /,
+        axis=None,
+        out=None,
+        keepdims=False,
+        *,
+        where=True):
     """
     Test whether any array element along a given axis evaluates to True.
 
@@ -181,9 +187,10 @@ def any(x1, axis=None, out=None, keepdims=False):
     Input array is supported as :obj:`dpnp.ndarray`.
     Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
-    Parameter ``axis`` is supported only with default value ``None``.
-    Parameter ``out`` is supported only with default value ``None``.
-    Parameter ``keepdims`` is supported only with default value ``False``.
+    Parameter `axis` is supported only with default value `None`.
+    Parameter `out` is supported only with default value `None`.
+    Parameter `keepdims` is supported only with default value `False`.
+    Parameter `where` is supported only with default value `True`.
 
     See Also
     --------
@@ -196,15 +203,15 @@ def any(x1, axis=None, out=None, keepdims=False):
 
     Examples
     --------
-    >>> import dpnp as np
-    >>> x = np.array([[True, False], [True, True]])
-    >>> np.any(x)
+    >>> import dpnp as dp
+    >>> x = dp.array([[True, False], [True, True]])
+    >>> dp.any(x)
     True
-    >>> x2 = np.array([0, 0, 0])
-    >>> np.any(x2)
+    >>> x2 = dp.array([0, 0, 0])
+    >>> dp.any(x2)
     False
-    >>> x3 = np.array([1.0, np.nan])
-    >>> np.any(x3)
+    >>> x3 = dp.array([1.0, dp.nan])
+    >>> dp.any(x3)
     True
 
     """
@@ -217,13 +224,13 @@ def any(x1, axis=None, out=None, keepdims=False):
             pass
         elif keepdims is not False:
             pass
+        elif where is not True:
+            pass
         else:
             result_obj = dpnp_any(x1_desc).get_pyobj()
-            result = dpnp.convert_single_elem_array_to_scalar(result_obj)
-
-            return result
+            return dpnp.convert_single_elem_array_to_scalar(result_obj)
 
-    return call_origin(numpy.any, x1, axis, out, keepdims)
+    return call_origin(numpy.any, x1, axis=axis, out=out, keepdims=keepdims, where=where)
 
 
 def equal(x1,
diff --git a/tests/test_logic.py b/tests/test_logic.py
index 5b131d3e770..b2a545a118f 100644
--- a/tests/test_logic.py
+++ b/tests/test_logic.py
@@ -63,7 +63,7 @@ def test_allclose(type):
     assert_allclose(dpnp_res, np_res)
 
 
-@pytest.mark.parametrize("type", get_all_dtypes(no_complex=True))
+@pytest.mark.parametrize("type", get_all_dtypes())
 @pytest.mark.parametrize("shape",
                          [(0,), (4,), (2, 3), (2, 2, 2)],
                          ids=['(0,)', '(4,)', '(2,3)', '(2,2,2)'])

From 7ed9b400a312c9c91d7863a87a3b098689ad474b Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Thu, 2 Mar 2023 22:36:06 +0100
Subject: [PATCH 09/16] Allocate a separate FFT descriptor per FFT compute
 function (#1322)

---
 dpnp/backend/kernels/dpnp_krnl_fft.cpp | 163 +++++++++++++------------
 1 file changed, 87 insertions(+), 76 deletions(-)

diff --git a/dpnp/backend/kernels/dpnp_krnl_fft.cpp b/dpnp/backend/kernels/dpnp_krnl_fft.cpp
index b3f9716d73f..611f71c045d 100644
--- a/dpnp/backend/kernels/dpnp_krnl_fft.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_fft.cpp
@@ -178,7 +178,6 @@ static void dpnp_fft_fft_mathlib_cmplx_to_cmplx_c(DPCTLSyclQueueRef q_ref,
                                                   const size_t shape_size,
                                                   const size_t input_size,
                                                   const size_t result_size,
-                                                  _Descriptor_type& desc,
                                                   size_t inverse,
                                                   const size_t norm)
 {
@@ -187,14 +186,15 @@ static void dpnp_fft_fft_mathlib_cmplx_to_cmplx_c(DPCTLSyclQueueRef q_ref,
     (void)input_size;
     (void)result_size;
 
-    if (!shape_size) {
+    if (!shape_size)
+    {
         return;
     }
 
     sycl::queue queue = *(reinterpret_cast<sycl::queue*>(q_ref));
 
-    _DataType_input* array_1 = static_cast<_DataType_input *>(const_cast<void *>(array1_in));
-    _DataType_output* result = static_cast<_DataType_output *>(result_out);
+    _DataType_input* array_1 = static_cast<_DataType_input*>(const_cast<void*>(array1_in));
+    _DataType_output* result = static_cast<_DataType_output*>(result_out);
 
     const size_t n_iter =
         std::accumulate(input_shape, input_shape + shape_size - 1, 1, std::multiplies<shape_elem_type>());
@@ -204,39 +204,49 @@ static void dpnp_fft_fft_mathlib_cmplx_to_cmplx_c(DPCTLSyclQueueRef q_ref,
     double backward_scale = 1.;
     double forward_scale = 1.;
 
-    if (norm == 0) { // norm = "backward"
+    if (norm == 0) // norm = "backward"
+    {
         backward_scale = 1. / shift;
-    } else if (norm == 1) { // norm = "forward"
+    }
+    else if (norm == 1) // norm = "forward"
+    {
         forward_scale = 1. / shift;
-    } else { // norm = "ortho"
-        if (inverse) {
+    }
+    else // norm = "ortho"
+    {
+        if (inverse)
+        {
             backward_scale = 1. / sqrt(shift);
-        } else {
+        }
+        else
+        {
             forward_scale = 1. / sqrt(shift);
         }
     }
 
-    desc.set_value(mkl_dft::config_param::BACKWARD_SCALE, backward_scale);
-    desc.set_value(mkl_dft::config_param::FORWARD_SCALE, forward_scale);
-    // enum value from math library C interface
-    // instead of mkl_dft::config_value::NOT_INPLACE
-    desc.set_value(mkl_dft::config_param::PLACEMENT, DFTI_NOT_INPLACE);
-    desc.commit(queue);
-
-    std::vector<sycl::event> fft_events;
-    fft_events.reserve(n_iter);
-
-    for (size_t i = 0; i < n_iter; ++i) {
-        if (inverse) {
-            fft_events.push_back(mkl_dft::compute_backward(desc, array_1 + i * shift, result + i * shift));
-        } else {
-            fft_events.push_back(mkl_dft::compute_forward(desc, array_1 + i * shift, result + i * shift));
+    std::vector<sycl::event> fft_events(n_iter);
+
+    for (size_t i = 0; i < n_iter; ++i)
+    {
+        std::unique_ptr<_Descriptor_type> desc = std::make_unique<_Descriptor_type>(shift);
+        desc->set_value(mkl_dft::config_param::BACKWARD_SCALE, backward_scale);
+        desc->set_value(mkl_dft::config_param::FORWARD_SCALE, forward_scale);
+        desc->set_value(mkl_dft::config_param::PLACEMENT, DFTI_NOT_INPLACE);
+        desc->commit(queue);
+
+        if (inverse)
+        {
+            fft_events[i] = mkl_dft::compute_backward<_Descriptor_type, _DataType_input, _DataType_output>(
+                *desc, array_1 + i * shift, result + i * shift);
+        }
+        else
+        {
+            fft_events[i] = mkl_dft::compute_forward<_Descriptor_type, _DataType_input, _DataType_output>(
+                *desc, array_1 + i * shift, result + i * shift);
         }
     }
 
     sycl::event::wait(fft_events);
-
-    return;
 }
 
 template <typename _KernelNameSpecialization1, typename _KernelNameSpecialization2, typename _KernelNameSpecialization3>
@@ -251,7 +261,6 @@ static DPCTLSyclEventRef dpnp_fft_fft_mathlib_real_to_cmplx_c(DPCTLSyclQueueRef
                                                               const size_t shape_size,
                                                               const size_t input_size,
                                                               const size_t result_size,
-                                                              _Descriptor_type& desc,
                                                               size_t inverse,
                                                               const size_t norm,
                                                               const size_t real)
@@ -260,14 +269,15 @@ static DPCTLSyclEventRef dpnp_fft_fft_mathlib_real_to_cmplx_c(DPCTLSyclQueueRef
     (void)input_size;
 
     DPCTLSyclEventRef event_ref = nullptr;
-    if (!shape_size) {
+    if (!shape_size)
+    {
         return event_ref;
     }
 
     sycl::queue queue = *(reinterpret_cast<sycl::queue*>(q_ref));
 
-    _DataType_input* array_1 = static_cast<_DataType_input *>(const_cast<void *>(array1_in));
-    _DataType_output* result = static_cast<_DataType_output *>(result_out);
+    _DataType_input* array_1 = static_cast<_DataType_input*>(const_cast<void*>(array1_in));
+    _DataType_output* result = static_cast<_DataType_output*>(result_out);
 
     const size_t n_iter =
         std::accumulate(input_shape, input_shape + shape_size - 1, 1, std::multiplies<shape_elem_type>());
@@ -278,38 +288,52 @@ static DPCTLSyclEventRef dpnp_fft_fft_mathlib_real_to_cmplx_c(DPCTLSyclQueueRef
     double backward_scale = 1.;
     double forward_scale = 1.;
 
-    if (norm == 0) { // norm = "backward"
-        if (inverse) {
+    if (norm == 0) // norm = "backward"
+    {
+        if (inverse)
+        {
             forward_scale = 1. / result_shift;
-        } else {
+        }
+        else
+        {
             backward_scale = 1. / result_shift;
         }
-    } else if (norm == 1) { // norm = "forward"
-        if (inverse) {
+    }
+    else if (norm == 1) // norm = "forward"
+    {
+        if (inverse)
+        {
             backward_scale = 1. / result_shift;
-        } else {
+        }
+        else
+        {
             forward_scale = 1. / result_shift;
         }
-    } else { // norm = "ortho"
+    }
+    else // norm = "ortho"
+    {
         forward_scale = 1. / sqrt(result_shift);
     }
 
-    desc.set_value(mkl_dft::config_param::BACKWARD_SCALE, backward_scale);
-    desc.set_value(mkl_dft::config_param::FORWARD_SCALE, forward_scale);
-    desc.set_value(mkl_dft::config_param::PLACEMENT, DFTI_NOT_INPLACE);
+    std::vector<sycl::event> fft_events(n_iter);
 
-    desc.commit(queue);
-
-    std::vector<sycl::event> fft_events;
-    fft_events.reserve(n_iter);
-
-    for (size_t i = 0; i < n_iter; ++i) {
-        fft_events.push_back(mkl_dft::compute_forward(desc, array_1 + i * input_shift, result + i * result_shift * 2));
+    for (size_t i = 0; i < n_iter; ++i)
+    {
+        std::unique_ptr<_Descriptor_type> desc = std::make_unique<_Descriptor_type>(input_shift);
+        desc->set_value(mkl_dft::config_param::BACKWARD_SCALE, backward_scale);
+        desc->set_value(mkl_dft::config_param::FORWARD_SCALE, forward_scale);
+        desc->set_value(mkl_dft::config_param::PLACEMENT, DFTI_NOT_INPLACE);
+        desc->commit(queue);
+
+        // real result_size = 2 * result_size, because real type of "result" is twice wider than '_DataType_output'
+        fft_events[i] = mkl_dft::compute_forward<_Descriptor_type, _DataType_input, _DataType_output>(
+            *desc, array_1 + i * input_shift, result + i * result_shift * 2);
     }
 
     sycl::event::wait(fft_events);
 
-    if (real) { // the output size of the rfft function is input_size/2 + 1 so we don't need to fill the second half of the output
+    if (real) // the output size of the rfft function is input_size/2 + 1 so we don't need to fill the second half of the output
+    {
         return event_ref;
     }
 
@@ -325,19 +349,22 @@ static DPCTLSyclEventRef dpnp_fft_fft_mathlib_real_to_cmplx_c(DPCTLSyclQueueRef
             size_t j = global_id[1];
             {
                 *(reinterpret_cast<std::complex<_DataType_output>*>(result) + result_shift * (i + 1) - (j + 1)) =
-                    std::conj(*(reinterpret_cast<std::complex<_DataType_output>*>(result) + result_shift * i + (j + 1)));
+                    std::conj(
+                        *(reinterpret_cast<std::complex<_DataType_output>*>(result) + result_shift * i + (j + 1)));
             }
         }
     };
 
     auto kernel_func = [&](sycl::handler& cgh) {
-        cgh.parallel_for<class dpnp_fft_fft_mathlib_real_to_cmplx_c_kernel<_DataType_input, _DataType_output, _Descriptor_type>>(
+        cgh.parallel_for<
+            class dpnp_fft_fft_mathlib_real_to_cmplx_c_kernel<_DataType_input, _DataType_output, _Descriptor_type>>(
             gws, kernel_parallel_for_func);
     };
 
     event = queue.submit(kernel_func);
 
-    if (inverse) {
+    if (inverse)
+    {
         event.wait();
         event = oneapi::mkl::vm::conj(queue,
                                       result_size,
@@ -346,7 +373,6 @@ static DPCTLSyclEventRef dpnp_fft_fft_mathlib_real_to_cmplx_c(DPCTLSyclQueueRef
     }
 
     event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
-
     return DPCTLEvent_Copy(event_ref);
 }
 
@@ -375,43 +401,35 @@ DPCTLSyclEventRef dpnp_fft_fft_c(DPCTLSyclQueueRef q_ref,
     const size_t input_size =
         std::accumulate(input_shape, input_shape + shape_size, 1, std::multiplies<shape_elem_type>());
 
-    size_t dim = input_shape[shape_size - 1];
-
     if constexpr (std::is_same<_DataType_output, std::complex<float>>::value ||
                   std::is_same<_DataType_output, std::complex<double>>::value)
     {
         if constexpr (std::is_same<_DataType_input, std::complex<double>>::value &&
                       std::is_same<_DataType_output, std::complex<double>>::value)
         {
-            desc_dp_cmplx_t desc(dim);
             dpnp_fft_fft_mathlib_cmplx_to_cmplx_c<_DataType_input, _DataType_output, desc_dp_cmplx_t>(
-                q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm);
+                q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, inverse, norm);
         }
         /* complex-to-complex, single precision */
         else if constexpr (std::is_same<_DataType_input, std::complex<float>>::value &&
                            std::is_same<_DataType_output, std::complex<float>>::value)
         {
-            desc_sp_cmplx_t desc(dim);
             dpnp_fft_fft_mathlib_cmplx_to_cmplx_c<_DataType_input, _DataType_output, desc_sp_cmplx_t>(
-                q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm);
+                q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, inverse, norm);
         }
         /* real-to-complex, double precision */
         else if constexpr (std::is_same<_DataType_input, double>::value &&
                            std::is_same<_DataType_output, std::complex<double>>::value)
         {
-            desc_dp_real_t desc(dim);
-
             event_ref = dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, double, desc_dp_real_t>(
-                q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm, 0);
+                q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, inverse, norm, 0);
         }
         /* real-to-complex, single precision */
         else if constexpr (std::is_same<_DataType_input, float>::value &&
                            std::is_same<_DataType_output, std::complex<float>>::value)
         {
-            desc_sp_real_t desc(dim); // try: 2 * result_size
-
             event_ref = dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, float, desc_sp_real_t>(
-                q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm, 0);
+                q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, inverse, norm, 0);
         }
         else if constexpr (std::is_same<_DataType_input, int32_t>::value ||
                            std::is_same<_DataType_input, int64_t>::value)
@@ -428,9 +446,8 @@ DPCTLSyclEventRef dpnp_fft_fft_c(DPCTLSyclQueueRef q_ref,
             DPCTLEvent_WaitAndThrow(event_ref);
             DPCTLEvent_Delete(event_ref);
 
-            desc_dp_real_t desc(dim);
             event_ref = dpnp_fft_fft_mathlib_real_to_cmplx_c<double, double, desc_dp_real_t>(
-                q_ref, array1_copy, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm, 0);
+                q_ref, array1_copy, result_out, input_shape, result_shape, shape_size, input_size, result_size, inverse, norm, 0);
 
             DPCTLEvent_WaitAndThrow(event_ref);
             DPCTLEvent_Delete(event_ref);
@@ -537,26 +554,21 @@ DPCTLSyclEventRef dpnp_fft_rfft_c(DPCTLSyclQueueRef q_ref,
     const size_t input_size =
         std::accumulate(input_shape, input_shape + shape_size, 1, std::multiplies<shape_elem_type>());
 
-    size_t dim = input_shape[shape_size - 1];
-
     if constexpr (std::is_same<_DataType_output, std::complex<float>>::value ||
                   std::is_same<_DataType_output, std::complex<double>>::value)
     {
         if constexpr (std::is_same<_DataType_input, double>::value &&
-                           std::is_same<_DataType_output, std::complex<double>>::value)
+                      std::is_same<_DataType_output, std::complex<double>>::value)
         {
-            desc_dp_real_t desc(dim);
-
             event_ref = dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, double, desc_dp_real_t>(
-                q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm, 1);
+                q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, inverse, norm, 1);
         }
         /* real-to-complex, single precision */
         else if constexpr (std::is_same<_DataType_input, float>::value &&
                            std::is_same<_DataType_output, std::complex<float>>::value)
         {
-            desc_sp_real_t desc(dim); // try: 2 * result_size
             event_ref = dpnp_fft_fft_mathlib_real_to_cmplx_c<_DataType_input, float, desc_sp_real_t>(
-                q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm, 1);
+                q_ref, array1_in, result_out, input_shape, result_shape, shape_size, input_size, result_size, inverse, norm, 1);
         }
         else if constexpr (std::is_same<_DataType_input, int32_t>::value ||
                            std::is_same<_DataType_input, int64_t>::value)
@@ -573,9 +585,8 @@ DPCTLSyclEventRef dpnp_fft_rfft_c(DPCTLSyclQueueRef q_ref,
             DPCTLEvent_WaitAndThrow(event_ref);
             DPCTLEvent_Delete(event_ref);
 
-            desc_dp_real_t desc(dim);
             event_ref = dpnp_fft_fft_mathlib_real_to_cmplx_c<double, double, desc_dp_real_t>(
-                q_ref, array1_copy, result_out, input_shape, result_shape, shape_size, input_size, result_size, desc, inverse, norm, 1);
+                q_ref, array1_copy, result_out, input_shape, result_shape, shape_size, input_size, result_size, inverse, norm, 1);
 
             DPCTLEvent_WaitAndThrow(event_ref);
             DPCTLEvent_Delete(event_ref);

From 1897cf4386a3e9180d3ba47fc6f8c4ae41d02881 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Fri, 3 Mar 2023 12:36:35 +0100
Subject: [PATCH 10/16] Add missing dtype aliases (#1309)

* Add missing type aliases

* Remove dpnp.object_ type alias

* Remove dpnp.longcomplex and dpnp.void

* Add dpnp.pi, dpnp.e, dpnp.inf constants

* Add remaining numpy constants
---
 dpnp/dpnp_algo/dpnp_algo.pyx            | 20 ++---
 dpnp/dpnp_algo/dpnp_algo_statistics.pyx |  4 +-
 dpnp/dpnp_array.py                      |  4 +-
 dpnp/dpnp_iface_linearalgebra.py        |  4 +-
 dpnp/dpnp_iface_logic.py                |  9 +--
 dpnp/dpnp_iface_manipulation.py         |  2 +-
 dpnp/dpnp_iface_trigonometric.py        | 34 +++------
 dpnp/dpnp_iface_types.py                | 99 +++++++++++++++++++++++--
 dpnp/dpnp_utils/dpnp_algo_utils.pyx     | 12 +--
 dpnp/linalg/dpnp_algo_linalg.pyx        | 22 +++---
 dpnp/linalg/dpnp_iface_linalg.py        |  6 +-
 dpnp/random/dpnp_algo_random.pyx        | 68 ++++++++---------
 dpnp/random/dpnp_iface_random.py        |  8 +-
 tests/test_arraycreation.py             |  4 +-
 tests/test_dparray.py                   |  4 +-
 tests/test_random_state.py              |  8 +-
 16 files changed, 189 insertions(+), 119 deletions(-)

diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index 4737bcfd3c7..2fa9de34b99 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -233,11 +233,11 @@ cpdef dpnp_queue_is_cpu():
 Internal functions
 """
 cdef DPNPFuncType dpnp_dtype_to_DPNPFuncType(dtype):
-    dt_c = numpy.dtype(dtype).char
-    kind = numpy.dtype(dtype).kind
+    dt_c = dpnp.dtype(dtype).char
+    kind = dpnp.dtype(dtype).kind
     if isinstance(kind, int):
         kind = chr(kind)
-    itemsize = numpy.dtype(dtype).itemsize
+    itemsize = dpnp.dtype(dtype).itemsize
 
     if dt_c == 'd':
         return DPNP_FT_DOUBLE
@@ -266,19 +266,19 @@ cdef dpnp_DPNPFuncType_to_dtype(size_t type):
     TODO needs to use DPNPFuncType here
     """
     if type == <size_t > DPNP_FT_DOUBLE:
-        return numpy.float64
+        return dpnp.float64
     elif type == <size_t > DPNP_FT_FLOAT:
-        return numpy.float32
+        return dpnp.float32
     elif type == <size_t > DPNP_FT_LONG:
-        return numpy.int64
+        return dpnp.int64
     elif type == <size_t > DPNP_FT_INT:
-        return numpy.int32
+        return dpnp.int32
     elif type == <size_t > DPNP_FT_CMPLX64:
-        return numpy.complex64
+        return dpnp.complex64
     elif type == <size_t > DPNP_FT_CMPLX128:
-        return numpy.complex128
+        return dpnp.complex128
     elif type == <size_t > DPNP_FT_BOOL:
-        return numpy.bool_
+        return dpnp.bool
     else:
         utils.checker_throw_type_error("dpnp_DPNPFuncType_to_dtype", type)
 
diff --git a/dpnp/dpnp_algo/dpnp_algo_statistics.pyx b/dpnp/dpnp_algo/dpnp_algo_statistics.pyx
index 920068c7d63..5d21dcf8c74 100644
--- a/dpnp/dpnp_algo/dpnp_algo_statistics.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_statistics.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -129,7 +129,7 @@ cpdef dpnp_average(utils.dpnp_descriptor x1):
     array_sum = dpnp_sum(x1).get_pyobj()
 
     """ Numpy interface inconsistency """
-    return_type = numpy.float32 if (x1.dtype == numpy.float32) else numpy.float64
+    return_type = dpnp.float32 if (x1.dtype == dpnp.float32) else dpnp.float64
 
     return (return_type(array_sum / x1.size))
 
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index c002d0db233..1536acdbe41 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -537,7 +537,7 @@ def conj(self):
 
         """
 
-        if not numpy.issubsctype(self.dtype, numpy.complex_):
+        if not dpnp.issubsctype(self.dtype, dpnp.complex_):
             return self
         else:
             return dpnp.conjugate(self)
@@ -550,7 +550,7 @@ def conjugate(self):
 
         """
 
-        if not numpy.issubsctype(self.dtype, numpy.complex_):
+        if not dpnp.issubsctype(self.dtype, dpnp.complex_):
             return self
         else:
             return dpnp.conjugate(self)
diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py
index 1fd6eba2d9c..117a1b9b61d 100644
--- a/dpnp/dpnp_iface_linearalgebra.py
+++ b/dpnp/dpnp_iface_linearalgebra.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -269,7 +269,7 @@ def matmul(x1, x2, out=None, **kwargs):
                 array2_size = x2_desc.size
                 cost_size = 4096  # 2D array shape(64, 64)
 
-                if ((x1_desc.dtype == numpy.float64) or (x1_desc.dtype == numpy.float32)):
+                if ((x1_desc.dtype == dpnp.float64) or (x1_desc.dtype == dpnp.float32)):
                     """
                     Floating point types are handled via original math library better than SYCL math library
                     """
diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index 8d3ba904628..2104ee30602 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -511,9 +511,8 @@ def isfinite(x1, out=None, **kwargs):
 
     Examples
     --------
-    >>> import numpy
     >>> import dpnp as np
-    >>> x = np.array([-numpy.inf, 0., numpy.inf])
+    >>> x = np.array([-np.inf, 0., np.inf])
     >>> out = np.isfinite(x)
     >>> [i for i in out]
     [False, True, False]
@@ -556,9 +555,8 @@ def isinf(x1, out=None, **kwargs):
 
     Examples
     --------
-    >>> import numpy
     >>> import dpnp as np
-    >>> x = np.array([-numpy.inf, 0., numpy.inf])
+    >>> x = np.array([-np.inf, 0., np.inf])
     >>> out = np.isinf(x)
     >>> [i for i in out]
     [True, False, True]
@@ -602,9 +600,8 @@ def isnan(x1, out=None, **kwargs):
 
     Examples
     --------
-    >>> import numpy
     >>> import dpnp as np
-    >>> x = np.array([numpy.inf, 0., np.nan])
+    >>> x = np.array([np.inf, 0., np.nan])
     >>> out = np.isnan(x)
     >>> [i for i in out]
     [False, False, True]
diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index 2b782b50049..adc2bdf15f3 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -89,7 +89,7 @@ def asfarray(x1, dtype=None):
 
     x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
     if x1_desc:
-        if dtype is None or not numpy.issubdtype(dtype, numpy.inexact):
+        if dtype is None or not numpy.issubdtype(dtype, dpnp.inexact):
             dtype = dpnp.default_float_type(sycl_queue=x1.sycl_queue)
 
         # if type is the same then same object should be returned
diff --git a/dpnp/dpnp_iface_trigonometric.py b/dpnp/dpnp_iface_trigonometric.py
index c50ec260ada..098dd19648f 100644
--- a/dpnp/dpnp_iface_trigonometric.py
+++ b/dpnp/dpnp_iface_trigonometric.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -136,9 +136,8 @@ def arccosh(x1):
 
     Examples
     --------
-    >>> import numpy
     >>> import dpnp as np
-    >>> x = np.array([numpy.e, 10.0])
+    >>> x = np.array([np.e, 10.0])
     >>> out = np.arccosh(x)
     >>> [i for i in out]
     [1.65745445, 2.99322285]
@@ -205,9 +204,8 @@ def arcsinh(x1):
 
     Examples
     --------
-    >>> import numpy
     >>> import dpnp as np
-    >>> x = np.array([numpy.e, 10.0])
+    >>> x = np.array([np.e, 10.0])
     >>> out = np.arcsinh(x)
     >>> [i for i in out]
     [1.72538256, 2.99822295]
@@ -384,9 +382,8 @@ def cos(x1, out=None, **kwargs):
 
     Examples
     --------
-    >>> import numpy
     >>> import dpnp as np
-    >>> x = np.array([0, numpy.pi/2, numpy.pi])
+    >>> x = np.array([0, np.pi/2, np.pi])
     >>> out = np.cos(x)
     >>> [i for i in out]
     [1.0, 6.123233995736766e-17, -1.0]
@@ -464,9 +461,8 @@ def degrees(x1):
 
     Examples
     --------
-    >>> import numpy
     >>> import dpnp as np
-    >>> rad = np.arange(6.) * numpy.pi/6
+    >>> rad = np.arange(6.) * np.pi/6
     >>> out = np.degrees(rad)
     >>> [i for i in out]
     [0.0, 30.0, 60.0, 90.0, 120.0, 150.0]
@@ -652,9 +648,8 @@ def log(x1, out=None, **kwargs):
 
     Examples
     --------
-    >>> import numpy
     >>> import dpnp as np
-    >>> x = np.array([1.0, numpy.e, numpy.e**2, 0.0])
+    >>> x = np.array([1.0, np.e, np.e**2, 0.0])
     >>> out = np.log(x)
     >>> [i for i in out]
     [0.0, 1.0, 2.0, -inf]
@@ -867,9 +862,8 @@ def sin(x1, out=None, **kwargs):
 
     Examples
     --------
-    >>> import numpy
     >>> import dpnp as np
-    >>> x = np.array([0, numpy.pi/2, numpy.pi])
+    >>> x = np.array([0, np.pi/2, np.pi])
     >>> out = np.sin(x)
     >>> [i for i in out]
     [0.0, 1.0, 1.2246467991473532e-16]
@@ -897,9 +891,8 @@ def sinh(x1):
 
     Examples
     --------
-    >>> import numpy
     >>> import dpnp as np
-    >>> x = np.array([0, numpy.pi/2, numpy.pi])
+    >>> x = np.array([0, np.pi/2, np.pi])
     >>> out = np.sinh(x)
     >>> [i for i in out]
     [0.0, 2.3012989, 11.548739]
@@ -991,9 +984,8 @@ def tan(x1, out=None, **kwargs):
 
     Examples
     --------
-    >>> import numpy
     >>> import dpnp as np
-    >>> x = np.array([-numpy.pi, numpy.pi/2, numpy.pi])
+    >>> x = np.array([-np.pi, np.pi/2, np.pi])
     >>> out = np.tan(x)
     >>> [i for i in out]
     [1.22460635e-16, 1.63317787e+16, -1.22460635e-16]
@@ -1021,9 +1013,8 @@ def tanh(x1):
 
     Examples
     --------
-    >>> import numpy
     >>> import dpnp as np
-    >>> x = np.array([-numpy.pi, numpy.pi/2, numpy.pi])
+    >>> x = np.array([-np.pi, np.pi/2, np.pi])
     >>> out = np.tanh(x)
     >>> [i for i in out]
     [-0.996272, 0.917152, 0.996272]
@@ -1055,11 +1046,10 @@ def unwrap(x1):
 
     Examples
     --------
-    >>> import numpy
     >>> import dpnp as np
-    >>> phase = np.linspace(0, numpy.pi, num=5)
+    >>> phase = np.linspace(0, np.pi, num=5)
     >>> for i in range(3, 5):
-    >>>     phase[i] += numpy.pi
+    >>>     phase[i] += np.pi
     >>> out = np.unwrap(phase)
     >>> [i for i in out]
     [0.0, 0.78539816, 1.57079633, 5.49778714, 6.28318531]
diff --git a/dpnp/dpnp_iface_types.py b/dpnp/dpnp_iface_types.py
index a39cfa47cd1..0109f5bdd21 100644
--- a/dpnp/dpnp_iface_types.py
+++ b/dpnp/dpnp_iface_types.py
@@ -40,39 +40,105 @@
 __all__ = [
     "bool",
     "bool_",
+    "cdouble",
+    "complex_",
     "complex128",
     "complex64",
+    "complexfloating",
+    "cfloat",
+    "csingle",
+    "double",
     "dtype",
+    "e",
+    "euler_gamma",
     "float",
+    "float_",
     "float16",
     "float32",
     "float64",
+    "floating",
+    "inexact",
+    "Inf",
+    "inf",
+    "Infinity",
+    "infty",
     "int",
+    "int_",
     "int32",
     "int64",
     "integer",
+    "intc",
     "isscalar",
+    "issubdtype",
+    "issubsctype",
     "is_type_supported",
-    "longcomplex",
+    "NAN",
+    "NaN",
     "nan",
     "newaxis",
-    "void"
+    "NINF",
+    "NZERO",
+    "number",
+    "pi",
+    "PINF",
+    "PZERO",
+    "signedinteger",
+    "single",
+    "singlecomplex"
 ]
 
+
+# =============================================================================
+# Data types (borrowed from NumPy)
+# =============================================================================
 bool = numpy.bool_
 bool_ = numpy.bool_
+cdouble = numpy.cdouble
+complex_ = numpy.complex_
 complex128 = numpy.complex128
 complex64 = numpy.complex64
+complexfloating = numpy.complexfloating
+cfloat = numpy.cfloat
+csingle = numpy.csingle
+double = numpy.double
 dtype = numpy.dtype
+float = numpy.float_
+float_ = numpy.float_
 float16 = numpy.float16
 float32 = numpy.float32
 float64 = numpy.float64
-float = numpy.float_
+floating = numpy.floating
+inexact = numpy.inexact
+int = numpy.int_
+int_ = numpy.int_
 int32 = numpy.int32
 int64 = numpy.int64
 integer = numpy.integer
-int = numpy.int_
-longcomplex = numpy.longcomplex
+intc = numpy.intc
+number = numpy.number
+signedinteger = numpy.signedinteger
+single = numpy.single
+singlecomplex = numpy.singlecomplex
+
+
+# =============================================================================
+# Constants (borrowed from NumPy)
+# =============================================================================
+e = numpy.e
+euler_gamma = numpy.euler_gamma
+Inf = numpy.Inf
+inf = numpy.inf
+Infinity = numpy.Infinity
+infty = numpy.infty
+NAN = numpy.NAN
+NaN = numpy.NaN
+nan = numpy.nan
+newaxis = None
+NINF = numpy.NINF
+NZERO = numpy.NZERO
+pi = numpy.pi
+PINF = numpy.PINF
+PZERO = numpy.PZERO
 
 
 def isscalar(obj):
@@ -85,9 +151,26 @@ def isscalar(obj):
     return numpy.isscalar(obj)
 
 
-nan = numpy.nan
-newaxis = None
-void = numpy.void
+def issubdtype(arg1, arg2):
+    """
+    Returns True if first argument is a typecode lower/equal in type hierarchy.
+
+    For full documentation refer to :obj:`numpy.issubdtype`.
+
+    """
+
+    return numpy.issubdtype(arg1, arg2)
+
+
+def issubsctype(arg1, arg2):
+    """
+    Determine if the first argument is a subclass of the second argument.
+
+    For full documentation refer to :obj:`numpy.issubsctype`.
+
+    """
+
+    return numpy.issubsctype(arg1, arg2)
 
 
 def is_type_supported(obj_type):
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
index 672aa19e4dc..a9438178876 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
@@ -268,18 +268,18 @@ def map_dtype_to_device(dtype, device):
     Map an input ``dtype`` with type ``device`` may use
     """
 
-    dtype = numpy.dtype(dtype)
+    dtype = dpnp.dtype(dtype)
     if not hasattr(dtype, 'char'):
         raise TypeError(f"Invalid type of input dtype={dtype}")
     elif not isinstance(device, dpctl.SyclDevice):
         raise TypeError(f"Invalid type of input device={device}")
 
     dtc = dtype.char
-    if dtc == "?" or numpy.issubdtype(dtype, numpy.integer):
+    if dtc == "?" or dpnp.issubdtype(dtype, dpnp.integer):
         # bool or integer type
         return dtype
 
-    if numpy.issubdtype(dtype, numpy.floating):
+    if dpnp.issubdtype(dtype, dpnp.floating):
         if dtc == "f":
             # float32 type
             return dtype
@@ -294,7 +294,7 @@ def map_dtype_to_device(dtype, device):
         # float32 is default floating type
         return dpnp.dtype("f4")
 
-    if numpy.issubdtype(dtype, numpy.complexfloating):
+    if dpnp.issubdtype(dtype, dpnp.complexfloating):
         if dtc == "F":
             # complex64 type
             return dtype
@@ -418,14 +418,14 @@ cdef tuple get_shape_dtype(object input_obj):
 
             # shape and dtype does not match with siblings.
             if ((return_shape != elem_shape) or (return_dtype != elem_dtype)):
-                return (elem_shape, numpy.dtype(numpy.object_))
+                return (elem_shape, dpnp.dtype(numpy.object_))
 
         list_shape.push_back(len(input_obj))
         list_shape.insert(list_shape.end(), return_shape.begin(), return_shape.end())
         return (list_shape, return_dtype)
 
     # assume scalar or object
-    return (return_shape, numpy.dtype(type(input_obj)))
+    return (return_shape, dpnp.dtype(type(input_obj)))
 
 
 cpdef find_common_type(object x1_obj, object x2_obj):
diff --git a/dpnp/linalg/dpnp_algo_linalg.pyx b/dpnp/linalg/dpnp_algo_linalg.pyx
index e6b239eb880..ddcaf677499 100644
--- a/dpnp/linalg/dpnp_algo_linalg.pyx
+++ b/dpnp/linalg/dpnp_algo_linalg.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -119,10 +119,10 @@ cpdef object dpnp_cond(object input, object p):
         sqnorm = dpnp.dot(input, input)
         res = dpnp.sqrt(sqnorm)
         ret = dpnp.array([res])
-    elif p == numpy.inf:
+    elif p == dpnp.inf:
         dpnp_sum_val = dpnp.sum(dpnp.abs(input), axis=1)
         ret = dpnp.max(dpnp_sum_val)
-    elif p == -numpy.inf:
+    elif p == -dpnp.inf:
         dpnp_sum_val = dpnp.sum(dpnp.abs(input), axis=1)
         ret = dpnp.min(dpnp_sum_val)
     elif p == 1:
@@ -342,13 +342,13 @@ cpdef object dpnp_norm(object input, ord=None, axis=None):
     cdef long size_input = input.size
     cdef shape_type_c shape_input = input.shape
 
-    if input.dtype == numpy.float32:
-        res_type = numpy.float32
+    if input.dtype == dpnp.float32:
+        res_type = dpnp.float32
     else:
-        res_type = numpy.float64
+        res_type = dpnp.float64
 
     if size_input == 0:
-        return dpnp.array([numpy.nan], dtype=res_type)
+        return dpnp.array([dpnp.nan], dtype=res_type)
 
     if isinstance(axis, int):
         axis_ = tuple([axis])
@@ -368,9 +368,9 @@ cpdef object dpnp_norm(object input, ord=None, axis=None):
 
     len_axis = 1 if axis is None else len(axis_)
     if len_axis == 1:
-        if ord == numpy.inf:
+        if ord == dpnp.inf:
             return dpnp.array([dpnp.abs(input).max(axis=axis)])
-        elif ord == -numpy.inf:
+        elif ord == -dpnp.inf:
             return dpnp.array([dpnp.abs(input).min(axis=axis)])
         elif ord == 0:
             return input.dtype.type(dpnp.count_nonzero(input, axis=axis))
@@ -414,7 +414,7 @@ cpdef object dpnp_norm(object input, ord=None, axis=None):
                 col_axis -= 1
             dpnp_sum_val = dpnp.sum(dpnp.abs(input), axis=row_axis)
             ret = dpnp_sum_val.min(axis=col_axis)
-        elif ord == numpy.inf:
+        elif ord == dpnp.inf:
             if row_axis > col_axis:
                 row_axis -= 1
             dpnp_sum_val = dpnp.sum(dpnp.abs(input), axis=col_axis)
@@ -424,7 +424,7 @@ cpdef object dpnp_norm(object input, ord=None, axis=None):
                 col_axis -= 1
             dpnp_sum_val = dpnp.sum(dpnp.abs(input), axis=row_axis)
             ret = dpnp_sum_val.min(axis=col_axis)
-        elif ord == -numpy.inf:
+        elif ord == -dpnp.inf:
             if row_axis > col_axis:
                 row_axis -= 1
             dpnp_sum_val = dpnp.sum(dpnp.abs(input), axis=col_axis)
diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index 43a26c1b530..6e6f55db8f9 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -111,7 +111,7 @@ def cond(input, p=None):
     Limitations
     -----------
     Input array is supported as :obj:`dpnp.ndarray`.
-    Parameter p=[None, 1, -1, 2, -2, numpy.inf, -numpy.inf, 'fro'] is supported.
+    Parameter p=[None, 1, -1, 2, -2, dpnp.inf, -dpnp.inf, 'fro'] is supported.
 
     See Also
     --------
@@ -119,7 +119,7 @@ def cond(input, p=None):
     """
 
     if (not use_origin_backend(input)):
-        if p in [None, 1, -1, 2, -2, numpy.inf, -numpy.inf, 'fro']:
+        if p in [None, 1, -1, 2, -2, dpnp.inf, -dpnp.inf, 'fro']:
             result_obj = dpnp_cond(input, p)
             result = dpnp.convert_single_elem_array_to_scalar(result_obj)
 
diff --git a/dpnp/random/dpnp_algo_random.pyx b/dpnp/random/dpnp_algo_random.pyx
index 2e259625087..314906cee6d 100644
--- a/dpnp/random/dpnp_algo_random.pyx
+++ b/dpnp/random/dpnp_algo_random.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -353,9 +353,9 @@ cdef class MT19937:
 
     cdef bint is_integer(self, value):
         if isinstance(value, numbers.Number):
-            return isinstance(value, int) or isinstance(value, numpy.integer)
+            return isinstance(value, int) or isinstance(value, dpnp.integer)
         # cover an element of dpnp array:
-        return numpy.ndim(value) == 0 and hasattr(value, "dtype") and numpy.issubdtype(value, numpy.integer)
+        return numpy.ndim(value) == 0 and hasattr(value, "dtype") and dpnp.issubdtype(value, dpnp.integer)
 
 
     cdef bint is_uint_range(self, value):
@@ -455,7 +455,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_beta(double a, double b, size):
     """
 
     # convert string type names (array.dtype) to C enum DPNPFuncType
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(numpy.float64)
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.float64)
 
     # get the FPTR data structure
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_BETA_EXT, param1_type, param1_type)
@@ -488,7 +488,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_binomial(int ntrial, double p, size):
 
     """
 
-    dtype = numpy.int32
+    dtype = dpnp.int32
     cdef DPNPFuncType param1_type
     cdef DPNPFuncData kernel_data
     cdef fptr_dpnp_rng_binomial_c_1out_t func
@@ -527,7 +527,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_chisquare(int df, size):
     """
 
     # convert string type names (array.dtype) to C enum DPNPFuncType
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(numpy.float64)
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.float64)
 
     # get the FPTR data structure
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_CHISQUARE_EXT, param1_type, param1_type)
@@ -560,7 +560,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_exponential(double beta, size):
 
     """
 
-    dtype = numpy.float64
+    dtype = dpnp.float64
     # convert string type names (array.dtype) to C enum DPNPFuncType
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype)
 
@@ -593,7 +593,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_f(double df_num, double df_den, size):
     univariate F distribution.
     """
 
-    dtype = numpy.float64
+    dtype = dpnp.float64
     # convert string type names (array.dtype) to C enum DPNPFuncType
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype)
 
@@ -627,7 +627,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_gamma(double shape, double scale, size):
 
     """
 
-    dtype = numpy.float64
+    dtype = dpnp.float64
     cdef DPNPFuncType param1_type
     cdef DPNPFuncData kernel_data
     cdef fptr_dpnp_rng_gamma_c_1out_t func
@@ -664,7 +664,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_geometric(float p, size):
 
     """
 
-    dtype = numpy.int32
+    dtype = dpnp.int32
     cdef DPNPFuncType param1_type
     cdef DPNPFuncData kernel_data
     cdef fptr_dpnp_rng_geometric_c_1out_t func
@@ -702,7 +702,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_gumbel(double loc, double scale, size):
 
     """
 
-    dtype = numpy.float64
+    dtype = dpnp.float64
     cdef DPNPFuncType param1_type
     cdef DPNPFuncData kernel_data
     cdef fptr_dpnp_rng_gumbel_c_1out_t func
@@ -737,7 +737,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_hypergeometric(int l, int s, int m, size):
 
     """
 
-    dtype = numpy.int32
+    dtype = dpnp.int32
     cdef DPNPFuncType param1_type
     cdef DPNPFuncData kernel_data
     cdef fptr_dpnp_rng_hypergeometric_c_1out_t func
@@ -775,7 +775,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_laplace(double loc, double scale, size):
 
     """
 
-    dtype = numpy.float64
+    dtype = dpnp.float64
     cdef DPNPFuncType param1_type
     cdef DPNPFuncData kernel_data
     cdef fptr_dpnp_rng_laplace_c_1out_t func
@@ -814,7 +814,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_logistic(double loc, double scale, size):
     """
 
     # convert string type names (array.dtype) to C enum DPNPFuncType
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(numpy.float64)
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.float64)
 
     # get the FPTR data structure
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_LOGISTIC_EXT, param1_type, param1_type)
@@ -846,7 +846,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_lognormal(double mean, double stddev, size)
 
     """
 
-    dtype = numpy.float64
+    dtype = dpnp.float64
     cdef DPNPFuncType param1_type
     cdef DPNPFuncData kernel_data
     cdef fptr_dpnp_rng_lognormal_c_1out_t func
@@ -886,7 +886,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_multinomial(int ntrial, utils.dpnp_descript
 
     """
 
-    dtype = numpy.int32
+    dtype = dpnp.int32
     cdef DPNPFuncType param1_type
     cdef DPNPFuncData kernel_data
     cdef fptr_dpnp_rng_multinomial_c_1out_t func
@@ -934,7 +934,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_multivariate_normal(utils.dpnp_descriptor m
 
     """
 
-    dtype = numpy.float64
+    dtype = dpnp.float64
     cdef int dimen
     cdef size_t mean_size
     cdef size_t cov_size
@@ -981,7 +981,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_negative_binomial(double a, double p, size)
 
     """
 
-    dtype = numpy.int32
+    dtype = dpnp.int32
     cdef utils.dpnp_descriptor result
     cdef DPNPFuncType param1_type
     cdef DPNPFuncData kernel_data
@@ -1031,7 +1031,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_noncentral_chisquare(double df, double nonc
     """
 
     # convert string type names (array.dtype) to C enum DPNPFuncType
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(numpy.float64)
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.float64)
 
     # get the FPTR data structure
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_NONCENTRAL_CHISQUARE_EXT, param1_type, param1_type)
@@ -1063,7 +1063,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_pareto(double alpha, size):
 
     """
 
-    dtype = numpy.float64
+    dtype = dpnp.float64
     # convert string type names (array.dtype) to C enum DPNPFuncType
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype)
 
@@ -1098,7 +1098,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_poisson(double lam, size):
 
     """
 
-    dtype = numpy.int32
+    dtype = dpnp.int32
     cdef shape_type_c result_shape
     cdef utils.dpnp_descriptor result
     cdef DPNPFuncType param1_type
@@ -1143,7 +1143,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_power(double alpha, size):
     univariate power distribution of `alpha`.
     """
 
-    dtype = numpy.float64
+    dtype = dpnp.float64
     # convert string type names (array.dtype) to C enum DPNPFuncType
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype)
 
@@ -1177,7 +1177,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_rayleigh(double scale, size):
 
     """
 
-    dtype = numpy.float64
+    dtype = dpnp.float64
     cdef shape_type_c result_shape
     cdef utils.dpnp_descriptor result
     cdef DPNPFuncType param1_type
@@ -1252,7 +1252,7 @@ cpdef dpnp_rng_srand(seed):
     """
 
     # convert string type names (array.dtype) to C enum DPNPFuncType
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(numpy.float64)
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.float64)
 
     # get the FPTR data structure
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_SRAND, param1_type, param1_type)
@@ -1271,7 +1271,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_standard_cauchy(size):
     """
 
     # convert string type names (array.dtype) to C enum DPNPFuncType
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(numpy.float64)
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.float64)
 
     # get the FPTR data structure
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_STANDARD_CAUCHY_EXT, param1_type, param1_type)
@@ -1306,7 +1306,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_standard_exponential(size):
     cdef fptr_dpnp_rng_standard_exponential_c_1out_t func
 
     # convert string type names (array.dtype) to C enum DPNPFuncType
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(numpy.float64)
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.float64)
 
     # get the FPTR data structure
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_STANDARD_EXPONENTIAL_EXT, param1_type, param1_type)
@@ -1338,7 +1338,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_standard_gamma(double shape, size):
 
     """
 
-    dtype = numpy.float64
+    dtype = dpnp.float64
     cdef shape_type_c result_shape
     cdef utils.dpnp_descriptor result
     cdef DPNPFuncType param1_type
@@ -1385,7 +1385,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_standard_t(double df, size):
     """
 
     # convert string type names (array.dtype) to C enum DPNPFuncType
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(numpy.float64)
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.float64)
 
     # get the FPTR data structure
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_STANDARD_T_EXT, param1_type, param1_type)
@@ -1417,7 +1417,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_triangular(double left, double mode, double
 
     """
 
-    dtype = numpy.float64
+    dtype = dpnp.float64
     # convert string type names (array.dtype) to C enum DPNPFuncType
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype)
 
@@ -1452,7 +1452,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_vonmises(double mu, double kappa, size):
     """
 
     # convert string type names (array.dtype) to C enum DPNPFuncType
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(numpy.float64)
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.float64)
 
     # get the FPTR data structure
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_VONMISES_EXT, param1_type, param1_type)
@@ -1484,7 +1484,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_wald(double mean, double scale, size):
 
     """
 
-    dtype = numpy.float64
+    dtype = dpnp.float64
     # convert string type names (array.dtype) to C enum DPNPFuncType
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype)
 
@@ -1518,13 +1518,13 @@ cpdef utils.dpnp_descriptor dpnp_rng_weibull(double a, size):
 
     """
 
-    dtype = numpy.float64
+    dtype = dpnp.float64
     cdef DPNPFuncType param1_type
     cdef DPNPFuncData kernel_data
     cdef fptr_dpnp_rng_weibull_c_1out_t func
 
     # convert string type names (array.dtype) to C enum DPNPFuncType
-    param1_type = dpnp_dtype_to_DPNPFuncType(numpy.float64)
+    param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.float64)
 
     # get the FPTR data structure
     kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_WEIBULL_EXT, param1_type, param1_type)
@@ -1557,7 +1557,7 @@ cpdef utils.dpnp_descriptor dpnp_rng_zipf(double a, size):
     """
 
     # convert string type names (array.dtype) to C enum DPNPFuncType
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(numpy.float64)
+    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dpnp.float64)
 
     # get the FPTR data structure
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_RNG_ZIPF_EXT, param1_type, param1_type)
diff --git a/dpnp/random/dpnp_iface_random.py b/dpnp/random/dpnp_iface_random.py
index ade85bb2fe1..a2e6f164dde 100644
--- a/dpnp/random/dpnp_iface_random.py
+++ b/dpnp/random/dpnp_iface_random.py
@@ -670,7 +670,7 @@ def multinomial(n, pvals, size=None):
         d = len(pvals)
         if n < 0:
             pass
-        elif n > numpy.iinfo(numpy.int32).max:
+        elif n > numpy.iinfo(dpnp.int32).max:
             pass
         elif pvals_sum > 1.0:
             pass
@@ -714,11 +714,11 @@ def multivariate_normal(mean, cov, size=None, check_valid='warn', tol=1e-8):
     """
 
     if not use_origin_backend(mean):
-        mean_ = dpnp.get_dpnp_descriptor(dpnp.array(mean, dtype=numpy.float64))
-        cov_ = dpnp.get_dpnp_descriptor(dpnp.array(cov, dtype=numpy.float64))
+        mean_ = dpnp.get_dpnp_descriptor(dpnp.array(mean, dtype=dpnp.float64))
+        cov_ = dpnp.get_dpnp_descriptor(dpnp.array(cov, dtype=dpnp.float64))
         if size is None:
             shape = []
-        elif isinstance(size, (int, numpy.integer)):
+        elif isinstance(size, (int, dpnp.integer)):
             shape = [size]
         else:
             shape = size
diff --git a/tests/test_arraycreation.py b/tests/test_arraycreation.py
index 7944ff21003..f8167b7d926 100644
--- a/tests/test_arraycreation.py
+++ b/tests/test_arraycreation.py
@@ -30,7 +30,7 @@
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_float16=False))
 def test_arange(start, stop, step, dtype):
     rtol_mult = 2
-    if numpy.issubdtype(dtype, numpy.float16):
+    if dpnp.issubdtype(dtype, dpnp.float16):
         # numpy casts to float32 type when computes float16 data
         rtol_mult = 4
 
@@ -51,7 +51,7 @@ def test_arange(start, stop, step, dtype):
     else:
         _dtype = dtype
 
-    if numpy.issubdtype(_dtype, numpy.floating) or numpy.issubdtype(_dtype, numpy.complexfloating):
+    if dpnp.issubdtype(_dtype, dpnp.floating) or dpnp.issubdtype(_dtype, dpnp.complexfloating):
         assert_allclose(exp_array, res_array, rtol=rtol_mult*numpy.finfo(_dtype).eps)
     else:
         assert_array_equal(exp_array, res_array)
diff --git a/tests/test_dparray.py b/tests/test_dparray.py
index 62a0120f8a3..028d1201c0b 100644
--- a/tests/test_dparray.py
+++ b/tests/test_dparray.py
@@ -139,11 +139,11 @@ def test_print_dpnp_special_character():
     expected = "[ 1.  0. nan  3.]"
     assert(result==expected)
 # inf
-    result = repr(dpnp.array([1., 0., numpy.inf, 3.]))
+    result = repr(dpnp.array([1., 0., dpnp.inf, 3.]))
     expected = "array([ 1.,  0., inf,  3.])"
     assert(result==expected)
 
-    result = str(dpnp.array([1., 0., numpy.inf, 3.]))
+    result = str(dpnp.array([1., 0., dpnp.inf, 3.]))
     expected = "[ 1.  0. inf  3.]"
     assert(result==expected)
 
diff --git a/tests/test_random_state.py b/tests/test_random_state.py
index 0d1752c744e..5ce1b759879 100644
--- a/tests/test_random_state.py
+++ b/tests/test_random_state.py
@@ -174,9 +174,9 @@ def test_fallback(self, loc, scale):
 
     @pytest.mark.parametrize("dtype",
                              [dpnp.float16, float, dpnp.integer, dpnp.int64, dpnp.int32, dpnp.int, int,
-                              dpnp.longcomplex, dpnp.complex128, dpnp.complex64, dpnp.bool, dpnp.bool_],
+                              numpy.longcomplex, dpnp.complex128, dpnp.complex64, dpnp.bool, dpnp.bool_],
                              ids=['dpnp.float16', 'float', 'dpnp.integer', 'dpnp.int64', 'dpnp.int32', 'dpnp.int', 'int',
-                                  'dpnp.longcomplex', 'dpnp.complex128', 'dpnp.complex64', 'dpnp.bool', 'dpnp.bool_'])
+                                  'numpy.longcomplex', 'dpnp.complex128', 'dpnp.complex64', 'dpnp.bool', 'dpnp.bool_'])
     def test_invalid_dtype(self, dtype):
         # dtype must be float32 or float64
         assert_raises(TypeError, RandomState().normal, dtype=dtype)
@@ -834,9 +834,9 @@ def test_fallback(self, low, high):
 
     @pytest.mark.parametrize("dtype",
                              [dpnp.float16, float, dpnp.integer, dpnp.int64, dpnp.int, int,
-                              dpnp.longcomplex, dpnp.complex128, dpnp.complex64, dpnp.bool, dpnp.bool_],
+                              numpy.longcomplex, dpnp.complex128, dpnp.complex64, dpnp.bool, dpnp.bool_],
                              ids=['dpnp.float16', 'float', 'dpnp.integer', 'dpnp.int64', 'dpnp.int', 'int',
-                                  'dpnp.longcomplex', 'dpnp.complex128', 'dpnp.complex64', 'dpnp.bool', 'dpnp.bool_'])
+                                  'numpy.longcomplex', 'dpnp.complex128', 'dpnp.complex64', 'dpnp.bool', 'dpnp.bool_'])
     def test_invalid_dtype(self, dtype):
         if dtype in (dpnp.int, dpnp.integer) and dtype == dpnp.dtype('int32'):
             pytest.skip("dtype is alias on dpnp.int32 on the target OS, so no error here")

From 4b9a5cdc8d6f4d8573129e2e6ead684b5b55321d Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Fri, 3 Mar 2023 14:40:43 +0100
Subject: [PATCH 11/16] dpnp.power() doesn't work properly with a scalar
 (#1323)

* dpnp.power() doesn't work properly with a scalar

* skip tests with 0 value of complex128 on CPU

* State support of :class: in descriptions
---
 .../include/dpnp_gen_2arg_3type_tbl.hpp       |   8 +-
 dpnp/backend/kernels/dpnp_krnl_elemwise.cpp   |  59 +++------
 dpnp/dpnp_array.py                            |  11 +-
 dpnp/dpnp_iface_logic.py                      |  38 +++---
 dpnp/dpnp_iface_mathematical.py               |  98 ++++++++------
 tests/helper.py                               |  17 +++
 tests/skipped_tests.tbl                       |   1 -
 tests/skipped_tests_gpu.tbl                   |   1 -
 tests/test_mathematical.py                    | 123 ++++++++++++++----
 tests/test_sycl_queue.py                      |   8 +-
 tests/test_usm_type.py                        |  15 +++
 .../cupy/math_tests/test_arithmetic.py        |   2 +-
 12 files changed, 247 insertions(+), 134 deletions(-)

diff --git a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
index e345c6eefea..0964f18df81 100644
--- a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
@@ -176,11 +176,11 @@ MACRO_2ARG_3TYPES_OP(dpnp_multiply_c,
                      MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))
 
 MACRO_2ARG_3TYPES_OP(dpnp_power_c,
-                     sycl::pow((double)input1_elem, (double)input2_elem),
-                     nullptr,
-                     std::false_type,
+                     static_cast<_DataType_output>(std::pow(input1_elem, input2_elem)),
+                     sycl::pow(x1, x2),
+                     MACRO_UNPACK_TYPES(float, double),
                      oneapi::mkl::vm::pow,
-                     MACRO_UNPACK_TYPES(float, double))
+                     MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))
 
 MACRO_2ARG_3TYPES_OP(dpnp_subtract_c,
                      input1_elem - input2_elem,
diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
index 5133473d393..c8b32fa9809 100644
--- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
@@ -1247,28 +1247,34 @@ static void func_map_elemwise_2arg_3type_core(func_map_t& fmap)
                                  func_type_map_t::find_type<FT1>,
                                  func_type_map_t::find_type<FTs>>}),
      ...);
+    ((fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][FT1][FTs] =
+          {get_divide_res_type<FT1, FTs>(),
+           (void*)dpnp_divide_c_ext<func_type_map_t::find_type<get_divide_res_type<FT1, FTs>()>,
+                                    func_type_map_t::find_type<FT1>,
+                                    func_type_map_t::find_type<FTs>>,
+           get_divide_res_type<FT1, FTs, std::false_type>(),
+           (void*)dpnp_divide_c_ext<func_type_map_t::find_type<get_divide_res_type<FT1, FTs, std::false_type>()>,
+                                    func_type_map_t::find_type<FT1>,
+                                    func_type_map_t::find_type<FTs>>}),
+     ...);
     ((fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][FT1][FTs] =
           {populate_func_types<FT1, FTs>(),
            (void*)dpnp_multiply_c_ext<func_type_map_t::find_type<populate_func_types<FT1, FTs>()>,
                                       func_type_map_t::find_type<FT1>,
                                       func_type_map_t::find_type<FTs>>}),
      ...);
+    ((fmap[DPNPFuncName::DPNP_FN_POWER_EXT][FT1][FTs] =
+          {populate_func_types<FT1, FTs>(),
+           (void*)dpnp_power_c_ext<func_type_map_t::find_type<populate_func_types<FT1, FTs>()>,
+                                   func_type_map_t::find_type<FT1>,
+                                   func_type_map_t::find_type<FTs>>}),
+     ...);
     ((fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][FT1][FTs] =
           {populate_func_types<FT1, FTs>(),
            (void*)dpnp_subtract_c_ext<func_type_map_t::find_type<populate_func_types<FT1, FTs>()>,
                                       func_type_map_t::find_type<FT1>,
                                       func_type_map_t::find_type<FTs>>}),
      ...);
-    ((fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][FT1][FTs] =
-          {get_divide_res_type<FT1, FTs>(),
-           (void*)dpnp_divide_c_ext<func_type_map_t::find_type<get_divide_res_type<FT1, FTs>()>,
-                                    func_type_map_t::find_type<FT1>,
-                                    func_type_map_t::find_type<FTs>>,
-           get_divide_res_type<FT1, FTs, std::false_type>(),
-           (void*)dpnp_divide_c_ext<func_type_map_t::find_type<get_divide_res_type<FT1, FTs, std::false_type>()>,
-                                    func_type_map_t::find_type<FT1>,
-                                    func_type_map_t::find_type<FTs>>}),
-     ...);
 }
 
 template <DPNPFuncType... FTs>
@@ -1855,39 +1861,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_POWER][eft_DBL][eft_DBL] = {eft_DBL,
                                                            (void*)dpnp_power_c_default<double, double, double>};
 
-    fmap[DPNPFuncName::DPNP_FN_POWER_EXT][eft_INT][eft_INT] = {eft_INT,
-                                                               (void*)dpnp_power_c_ext<int32_t, int32_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_POWER_EXT][eft_INT][eft_LNG] = {eft_LNG,
-                                                               (void*)dpnp_power_c_ext<int64_t, int32_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_POWER_EXT][eft_INT][eft_FLT] = {eft_DBL,
-                                                               (void*)dpnp_power_c_ext<double, int32_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_POWER_EXT][eft_INT][eft_DBL] = {eft_DBL,
-                                                               (void*)dpnp_power_c_ext<double, int32_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_POWER_EXT][eft_LNG][eft_INT] = {eft_LNG,
-                                                               (void*)dpnp_power_c_ext<int64_t, int64_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_POWER_EXT][eft_LNG][eft_LNG] = {eft_LNG,
-                                                               (void*)dpnp_power_c_ext<int64_t, int64_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_POWER_EXT][eft_LNG][eft_FLT] = {eft_DBL,
-                                                               (void*)dpnp_power_c_ext<double, int64_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_POWER_EXT][eft_LNG][eft_DBL] = {eft_DBL,
-                                                               (void*)dpnp_power_c_ext<double, int64_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_POWER_EXT][eft_FLT][eft_INT] = {eft_DBL,
-                                                               (void*)dpnp_power_c_ext<double, float, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_POWER_EXT][eft_FLT][eft_LNG] = {eft_DBL,
-                                                               (void*)dpnp_power_c_ext<double, float, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_POWER_EXT][eft_FLT][eft_FLT] = {eft_FLT,
-                                                               (void*)dpnp_power_c_ext<float, float, float>};
-    fmap[DPNPFuncName::DPNP_FN_POWER_EXT][eft_FLT][eft_DBL] = {eft_DBL,
-                                                               (void*)dpnp_power_c_ext<double, float, double>};
-    fmap[DPNPFuncName::DPNP_FN_POWER_EXT][eft_DBL][eft_INT] = {eft_DBL,
-                                                               (void*)dpnp_power_c_ext<double, double, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_POWER_EXT][eft_DBL][eft_LNG] = {eft_DBL,
-                                                               (void*)dpnp_power_c_ext<double, double, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_POWER_EXT][eft_DBL][eft_FLT] = {eft_DBL,
-                                                               (void*)dpnp_power_c_ext<double, double, float>};
-    fmap[DPNPFuncName::DPNP_FN_POWER_EXT][eft_DBL][eft_DBL] = {eft_DBL,
-                                                               (void*)dpnp_power_c_ext<double, double, double>};
-
     fmap[DPNPFuncName::DPNP_FN_SUBTRACT][eft_INT][eft_INT] = {
         eft_INT, (void*)dpnp_subtract_c_default<int32_t, int32_t, int32_t>};
     fmap[DPNPFuncName::DPNP_FN_SUBTRACT][eft_INT][eft_LNG] = {
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 1536acdbe41..b3dfe7a339e 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -211,7 +211,11 @@ def __int__(self):
 
  # '__invert__',
  # '__ior__',
- # '__ipow__',
+
+    def __ipow__(self, other):
+        dpnp.power(self, other, out=self)
+        return self
+
  # '__irshift__',
  # '__isub__',
  # '__iter__',
@@ -279,7 +283,10 @@ def __rmul__(self, other):
         return dpnp.multiply(other, self)
 
  # '__ror__',
- # '__rpow__',
+ 
+    def __rpow__(self, other):
+        return dpnp.power(other, self)
+
  # '__rrshift__',
  # '__rshift__',
 
diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index 2104ee30602..716b2ff8a0f 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -253,8 +253,8 @@ def equal(x1,
 
     Limitations
     -----------
-    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
-    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
     Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`,
@@ -323,8 +323,8 @@ def greater(x1,
 
     Limitations
     -----------
-    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
-    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
     Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`,
@@ -393,8 +393,8 @@ def greater_equal(x1,
 
     Limitations
     -----------
-    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
-    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
     Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`,
@@ -638,8 +638,8 @@ def less(x1,
 
     Limitations
     -----------
-    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
-    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
     Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`,
@@ -708,8 +708,8 @@ def less_equal(x1,
 
     Limitations
     -----------
-    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
-    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
     Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`,
@@ -778,8 +778,8 @@ def logical_and(x1,
 
     Limitations
     -----------
-    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
-    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
     Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`,
@@ -847,7 +847,7 @@ def logical_not(x,
 
     Limitations
     -----------
-    Parameters `x` is only supported as :class:`dpnp.ndarray`.
+    Parameters `x` is only supported as either :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
     Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
     Input array data type is limited by supported DPNP :ref:`Data types`,
@@ -904,8 +904,8 @@ def logical_or(x1,
 
     Limitations
     -----------
-    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
-    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
     Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`,
@@ -973,8 +973,8 @@ def logical_xor(x1,
 
     Limitations
     -----------
-    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
-    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
     Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`,
@@ -1042,8 +1042,8 @@ def not_equal(x1,
 
     Limitations
     -----------
-    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
-    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
     Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`,
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index feff53288cf..45f3b50accc 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -45,6 +45,7 @@
 
 import dpnp
 import numpy
+import dpctl.tensor as dpt
 
 
 __all__ = [
@@ -175,8 +176,8 @@ def add(x1,
 
     Limitations
     -----------
-    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
-    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
     Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
@@ -565,8 +566,8 @@ def divide(x1,
     
     Limitations
     -----------
-    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
-    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
     Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
@@ -1117,8 +1118,8 @@ def multiply(x1,
 
     Limitations
     -----------
-    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
-    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
     Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
     Otherwise the functions will be executed sequentially on CPU.
@@ -1325,18 +1326,35 @@ def negative(x1, **kwargs):
     return call_origin(numpy.negative, x1, **kwargs)
 
 
-def power(x1, x2, dtype=None, out=None, where=True, **kwargs):
+def power(x1,
+          x2,
+          /,
+          out=None,
+          *,
+          where=True,
+          dtype=None,
+          subok=True,
+          **kwargs):
     """
     First array elements raised to powers from second array, element-wise.
 
+    An integer type (of either negative or positive value, but not zero)
+    raised to a negative integer power will return an array of zeroes.
+
     For full documentation refer to :obj:`numpy.power`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        The bases in `x1` raised to the exponents in `x2`.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
+    Parameters `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
-    Otherwise the functions will be executed sequentially on CPU.
+    Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
     See Also
@@ -1348,40 +1366,44 @@ def power(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     Example
     -------
-    >>> import dpnp as np
-    >>> a = np.array([1, 2, 3, 4, 5])
-    >>> b = np.array([2, 2, 2, 2, 2])
-    >>> result = np.power(a, b)
+    >>> import dpnp as dp
+    >>> a = dp.array([1, 2, 3, 4, 5])
+    >>> b = dp.array([2, 2, 2, 2, 2])
+    >>> result = dp.power(a, b)
     >>> [x for x in result]
     [1, 4, 9, 16, 25]
 
     """
 
-    x1_is_scalar = dpnp.isscalar(x1)
-    x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
+    if where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-    if x1_desc and x2_desc and not kwargs:
-        if not x1_desc and not x1_is_scalar:
-            pass
-        elif not x2_desc and not x2_is_scalar:
-            pass
-        elif x1_is_scalar and x2_is_scalar:
-            pass
-        elif x1_desc and x1_desc.ndim == 0:
-            pass
-        elif x2_desc and x2_desc.ndim == 0:
-            pass
-        elif dtype is not None:
-            pass
-        elif not where:
-            pass
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+
+        if out is not None:
+            if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
+                raise TypeError("return array must be of supported array type")
+            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
         else:
-            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
-            return dpnp_power(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj()
+            out_desc = None
+
+        if x1_desc and x2_desc:
+            return dpnp_power(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
 
-    return call_origin(numpy.power, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+    return call_origin(numpy.power, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def prod(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, where=True):
@@ -1552,8 +1574,8 @@ def subtract(x1,
     
     Limitations
     -----------
-    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
-    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
     Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
diff --git a/tests/helper.py b/tests/helper.py
index 17c62cecd28..bce21dcc29d 100644
--- a/tests/helper.py
+++ b/tests/helper.py
@@ -1,3 +1,5 @@
+from sys import platform
+
 import dpctl
 import dpnp
 
@@ -37,3 +39,18 @@ def get_all_dtypes(no_bool=False,
     if not no_none:
         dtypes.append(None)
     return dtypes
+
+
+def is_cpu_device(device=None):
+    """
+    Return True if a test is running on CPU device, False otherwise.
+    """
+    dev = dpctl.select_default_device() if device is None else device
+    return dev.has_aspect_cpu
+
+
+def is_win_platform():
+    """
+    Return True if a test is runing on Windows OS, False otherwise.
+    """
+    return platform.startswith('win')
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index d598ea2ca9f..57a7d393e09 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -761,7 +761,6 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticModf::test_m
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_5_{name='power', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_8_{name='floor_divide', nargs=2}::test_raises_with_numpy_input
 
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 3dedcff4af0..aebd577ed1a 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -976,7 +976,6 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_para
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_5_{name='power', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_8_{name='floor_divide', nargs=2}::test_raises_with_numpy_input
 
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index 78f62890833..e58e129c03b 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -1,5 +1,9 @@
 import pytest
-from .helper import get_all_dtypes
+from .helper import (
+    get_all_dtypes,
+    is_cpu_device,
+    is_win_platform
+)
 
 import dpnp
 
@@ -8,6 +12,7 @@
     assert_allclose,
     assert_array_almost_equal,
     assert_array_equal,
+    assert_equal,
     assert_raises
 )
 
@@ -66,7 +71,7 @@ def test_diff(array):
 @pytest.mark.parametrize("dtype1", get_all_dtypes())
 @pytest.mark.parametrize("dtype2", get_all_dtypes())
 @pytest.mark.parametrize("func",
-                         ['add', 'multiply', 'subtract', 'divide'])
+                         ['add', 'divide', 'multiply', 'power', 'subtract'])
 @pytest.mark.parametrize("data",
                          [[[1, 2], [3, 4]]],
                          ids=['[[1, 2], [3, 4]]'])
@@ -84,7 +89,7 @@ def test_op_multiple_dtypes(dtype1, func, dtype2, data):
     else:
         result = getattr(dpnp, func)(dpnp_a, dpnp_b)
         expected = getattr(numpy, func)(np_a, np_b)
-        assert_array_equal(result, expected)
+        assert_allclose(result, expected)
 
 
 @pytest.mark.parametrize("rhs", [[[1, 2, 3], [4, 5, 6]], [2.0, 1.5, 1.0], 3, 0.3])
@@ -116,7 +121,7 @@ def _test_mathematical(self, name, dtype, lhs, rhs):
         else:
             result = getattr(dpnp, name)(a_dpnp, b_dpnp)
             expected = getattr(numpy, name)(a_np, b_np)
-            assert_allclose(result, expected, atol=1e-4)
+            assert_allclose(result, expected, rtol=1e-6)
 
     @pytest.mark.parametrize("dtype", get_all_dtypes())
     def test_add(self, dtype, lhs, rhs):
@@ -170,8 +175,7 @@ def test_multiply(self, dtype, lhs, rhs):
     def test_remainder(self, dtype, lhs, rhs):
         self._test_mathematical('remainder', dtype, lhs, rhs)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
     def test_power(self, dtype, lhs, rhs):
         self._test_mathematical('power', dtype, lhs, rhs)
 
@@ -186,7 +190,7 @@ def test_subtract(self, dtype, lhs, rhs):
                          ids=['bool', 'int', 'float'])
 @pytest.mark.parametrize("data_type", get_all_dtypes())
 @pytest.mark.parametrize("func",
-                         ['add', 'multiply', 'subtract', 'divide'])
+                         ['add', 'divide', 'multiply', 'power', 'subtract'])
 @pytest.mark.parametrize("val",
                          [0, 1, 5],
                          ids=['0', '1', '5'])
@@ -206,6 +210,13 @@ def test_op_with_scalar(array, val, func, data_type, val_type):
     dpnp_a = dpnp.array(array, dtype=data_type)
     val_ = val_type(val)
 
+    if func == 'power':
+        if val_ == 0 and numpy.issubdtype(data_type, numpy.complexfloating):
+            pytest.skip("(0j ** 0) is different: (NaN + NaNj) in dpnp and (1 + 0j) in numpy")
+        elif is_cpu_device() and data_type == dpnp.complex128:
+            # TODO: discuss the bahavior with OneMKL team
+            pytest.skip("(0j ** 5) is different: (NaN + NaNj) in dpnp and (0j) in numpy")
+
     if func == 'subtract' and val_type == bool and data_type == dpnp.bool:
         with pytest.raises(TypeError):
             result = getattr(dpnp, func)(dpnp_a, val_)
@@ -216,11 +227,11 @@ def test_op_with_scalar(array, val, func, data_type, val_type):
     else:
         result = getattr(dpnp, func)(dpnp_a, val_)
         expected = getattr(numpy, func)(np_a, val_)
-        assert_allclose(result, expected)
+        assert_allclose(result, expected, rtol=1e-6)
 
         result = getattr(dpnp, func)(val_, dpnp_a)
         expected = getattr(numpy, func)(val_, np_a)
-        assert_allclose(result, expected)
+        assert_allclose(result, expected, rtol=1e-6)
 
 
 @pytest.mark.parametrize("shape",
@@ -275,6 +286,23 @@ def test_divide_scalar(shape, dtype):
     assert_allclose(result, expected)
 
 
+@pytest.mark.parametrize("shape",
+                         [(), (3, 2)],
+                         ids=['()', '(3, 2)'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
+def test_power_scalar(shape, dtype):
+    np_a = numpy.ones(shape, dtype=dtype)
+    dpnp_a = dpnp.ones(shape, dtype=dtype)
+
+    result = 4.2 ** dpnp_a ** -1.3
+    expected = 4.2 ** np_a ** -1.3
+    assert_allclose(result, expected, rtol=1e-6)
+
+    result **= dpnp_a
+    expected **= np_a
+    assert_allclose(result, expected, rtol=1e-6)
+
+
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("array", [[1, 2, 3, 4, 5],
                                    [1, 2, numpy.nan, 4, 5],
@@ -314,12 +342,11 @@ def test_negative(data, dtype):
     assert_array_equal(result, expected)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("val_type", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
-@pytest.mark.parametrize("data_type", get_all_dtypes(no_bool=True, no_complex=True))
+@pytest.mark.parametrize("val_type", get_all_dtypes(no_none=True))
+@pytest.mark.parametrize("data_type", get_all_dtypes())
 @pytest.mark.parametrize("val",
-                         [0, 1, 5],
-                         ids=['0', '1', '5'])
+                         [1.5, 1, 5],
+                         ids=['1.5', '1', '5'])
 @pytest.mark.parametrize("array",
                          [[[0, 0], [0, 0]],
                           [[1, 2], [1, 2]],
@@ -335,9 +362,14 @@ def test_power(array, val, data_type, val_type):
     np_a = numpy.array(array, dtype=data_type)
     dpnp_a = dpnp.array(array, dtype=data_type)
     val_ = val_type(val)
+
+    if is_cpu_device() and dpnp.complex128 in (data_type, val_type):
+        # TODO: discuss the behavior with OneMKL team
+        pytest.skip("(0j ** 5) is different: (NaN + NaNj) in dpnp and (0j) in numpy")
+
     result = dpnp.power(dpnp_a, val_)
     expected = numpy.power(np_a, val_)
-    assert_array_equal(expected, result)
+    assert_allclose(expected, result, rtol=1e-6)
 
 
 class TestEdiff1d:
@@ -622,13 +654,10 @@ def test_power(self):
 
         assert_array_equal(expected, result)
 
-    @pytest.mark.parametrize("dtype",
-                             [numpy.float32, numpy.int64, numpy.int32],
-                             ids=['numpy.float32', 'numpy.int64', 'numpy.int32'])
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True, no_none=True))
     def test_invalid_dtype(self, dtype):
-
-        dp_array1 = dpnp.arange(10, dtype=dpnp.float64)
-        dp_array2 = dpnp.arange(5, 15, dtype=dpnp.float64)
+        dp_array1 = dpnp.arange(10, dtype=dpnp.complex64)
+        dp_array2 = dpnp.arange(5, 15, dtype=dpnp.complex64)
         dp_out = dpnp.empty(10, dtype=dtype)
 
         with pytest.raises(ValueError):
@@ -638,10 +667,60 @@ def test_invalid_dtype(self, dtype):
                              [(0,), (15, ), (2, 2)],
                              ids=['(0,)', '(15, )', '(2,2)'])
     def test_invalid_shape(self, shape):
-
         dp_array1 = dpnp.arange(10, dtype=dpnp.float64)
         dp_array2 = dpnp.arange(5, 15, dtype=dpnp.float64)
         dp_out = dpnp.empty(shape, dtype=dpnp.float64)
 
         with pytest.raises(ValueError):
             dpnp.power(dp_array1, dp_array2, out=dp_out)
+
+    @pytest.mark.parametrize("out",
+                             [4, (), [], (3, 7), [2, 4]],
+                             ids=['4', '()', '[]', '(3, 7)', '[2, 4]'])
+    def test_invalid_out(self, out):
+        a = dpnp.arange(10)
+
+        assert_raises(TypeError, dpnp.power, a, 2,  out)
+        assert_raises(TypeError, numpy.power, a.asnumpy(), 2,  out)
+
+    @pytest.mark.usefixtures("suppress_invalid_numpy_warnings")
+    def test_complex_values(self):
+        np_arr = numpy.array([0j, 1+1j, 0+2j, 1+2j, numpy.nan, numpy.inf])
+        dp_arr = dpnp.array(np_arr)
+        func = lambda x: x ** 2
+
+        # Linux: ((inf + 0j) ** 2) == (Inf + NaNj) in dpnp and == (NaN + NaNj) in numpy
+        # Win:   ((inf + 0j) ** 2) == (Inf + 0j)   in dpnp and == (Inf + NaNj) in numpy
+        if is_win_platform():
+            assert_equal(func(dp_arr)[5], numpy.inf)
+        else:
+            assert_equal(func(dp_arr)[5], (numpy.inf + 0j) * 1)
+        assert_allclose(func(np_arr)[:5], func(dp_arr).asnumpy()[:5], rtol=1e-6)
+
+    @pytest.mark.parametrize("val", [0, 1], ids=['0', '1'])
+    @pytest.mark.parametrize("dtype", [dpnp.int32, dpnp.int64])
+    def test_integer_power_of_0_or_1(self, val, dtype):
+        np_arr = numpy.arange(10, dtype=dtype)
+        dp_arr = dpnp.array(np_arr)
+        func = lambda x: 1 ** x
+
+        assert_equal(func(np_arr), func(dp_arr))
+
+    @pytest.mark.parametrize("dtype", [dpnp.int32, dpnp.int64])
+    def test_integer_to_negative_power(self, dtype):
+        ones = dpnp.ones(10, dtype=dtype)
+        a = dpnp.arange(2, 10, dtype=dtype)
+        b = dpnp.full(10, -2, dtype=dtype)
+
+        assert_array_equal(ones ** (-2), ones)
+        assert_equal(a ** (-3), 0) # positive integer to negative integer power
+        assert_equal(b ** (-4), 0) # negative integer to negative integer power
+
+    def test_float_to_inf(self):
+        a = numpy.array([1, 1, 2, 2, -2, -2, numpy.inf, -numpy.inf], dtype=numpy.float32)
+        b = numpy.array([numpy.inf, -numpy.inf, numpy.inf, -numpy.inf,
+                         numpy.inf, -numpy.inf, numpy.inf, -numpy.inf], dtype=numpy.float32)
+        numpy_res = a ** b
+        dpnp_res = dpnp.array(a) ** dpnp.array(b)
+
+        assert_allclose(numpy_res, dpnp_res.asnumpy())
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 2197dbe5414..ea36a0992b2 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -1,6 +1,8 @@
 import pytest
-from .helper import get_all_dtypes
-import sys
+from .helper import (
+    get_all_dtypes,
+    is_win_platform
+)
 
 import dpnp
 import dpctl
@@ -204,7 +206,7 @@ def test_array_creation_follow_device(func, args, kwargs, device):
                           valid_devices,
                           ids=[device.filter_string for device in valid_devices])
 def test_array_creation_cross_device(func, args, kwargs, device_x, device_y):
-    if func is 'linspace' and sys.platform.startswith('win'):
+    if func is 'linspace' and is_win_platform():
         pytest.skip("CPU driver experiences an instability on Windows.")
 
     x_orig = numpy.array([1, 2, 3, 4])
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 326d0313c93..5fec346a000 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -63,6 +63,21 @@ def test_coerced_usm_types_divide(usm_type_x, usm_type_y):
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
 
 
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+def test_coerced_usm_types_power(usm_type_x, usm_type_y):
+    x = dp.arange(70, usm_type = usm_type_x).reshape((7, 5, 2))
+    y = dp.arange(70, usm_type = usm_type_y).reshape((7, 5, 2))
+
+    z = 2 ** x ** y ** 1.5
+    z **= x
+    z **= 1.7
+
+    assert x.usm_type == usm_type_x
+    assert y.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
+
+
 @pytest.mark.parametrize(
     "func, args",
     [
diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py
index 027722d8bef..39dc3e10f72 100644
--- a/tests/third_party/cupy/math_tests/test_arithmetic.py
+++ b/tests/third_party/cupy/math_tests/test_arithmetic.py
@@ -153,7 +153,7 @@ def check_binary(self, xp):
             is_int_float = lambda _x, _y: numpy.issubdtype(_x, numpy.integer) and numpy.issubdtype(_y, numpy.floating)
             is_same_type = lambda _x, _y, _type: numpy.issubdtype(_x, _type) and numpy.issubdtype(_y, _type)
 
-            if self.name in ('add', 'multiply', 'subtract'):
+            if self.name in ('add', 'multiply', 'power', 'subtract'):
                 if is_array_arg1 and is_array_arg2:
                     # If both inputs are arrays where one is of floating type and another - integer,
                     # NumPy will return an output array of always "float64" type,

From 648612da575dfcd9fd86fb3be9a76dc7eabc2dca Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Fri, 3 Mar 2023 18:14:06 +0100
Subject: [PATCH 12/16] Add support of complex types for dpnp.abs() (#1324)

* Add support of complex types for dpnp.abs()

* Add test coverage

* State support of :class: in descriptions
---
 .../kernels/dpnp_krnl_mathematical.cpp        |  90 +++++++++------
 dpnp/backend/src/dpnp_fptr.hpp                |   6 +
 dpnp/dpnp_iface_mathematical.py               |  59 ++++++----
 tests/helper.py                               |  53 +++++++--
 tests/skipped_tests_gpu.tbl                   |   2 -
 tests/test_absolute.py                        | 103 +++++++++---------
 tests/test_strides.py                         |   4 +-
 7 files changed, 200 insertions(+), 117 deletions(-)

diff --git a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
index 32f8ffe465d..cbcd191fae6 100644
--- a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -114,10 +114,10 @@ DPCTLSyclEventRef (*dpnp_around_ext_c)(DPCTLSyclQueueRef,
                                        const int,
                                        const DPCTLEventVectorRef) = dpnp_around_c<_DataType>;
 
-template <typename _KernelNameSpecialization>
+template <typename _KernelNameSpecialization1, typename _KernelNameSpecialization2>
 class dpnp_elemwise_absolute_c_kernel;
 
-template <typename _DataType>
+template <typename _DataType_input, typename _DataType_output>
 DPCTLSyclEventRef dpnp_elemwise_absolute_c(DPCTLSyclQueueRef q_ref,
                                            const void* input1_in,
                                            void* result1,
@@ -137,43 +137,63 @@ DPCTLSyclEventRef dpnp_elemwise_absolute_c(DPCTLSyclQueueRef q_ref,
     sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));
     sycl::event event;
 
-    DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, input1_in, size);
-    _DataType* array1 = input1_ptr.get_ptr();
-    DPNPC_ptr_adapter<_DataType> result1_ptr(q_ref, result1, size, false, true);
-    _DataType* result = result1_ptr.get_ptr();
+    _DataType_input* array1 = static_cast<_DataType_input*>(const_cast<void*>(input1_in));
+    _DataType_output* result = static_cast<_DataType_output*>(result1);
 
-    if constexpr (std::is_same<_DataType, double>::value || std::is_same<_DataType, float>::value)
+    if constexpr (is_any_v<_DataType_input, float, double, std::complex<float>, std::complex<double>>)
     {
-        // https://docs.oneapi.com/versions/latest/onemkl/abs.html
         event = oneapi::mkl::vm::abs(q, size, array1, result);
     }
     else
     {
-        sycl::range<1> gws(size);
-        auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {
-            const size_t idx = global_id[0];
+        static_assert(is_any_v<_DataType_input, int32_t, int64_t>,
+                      "Integer types are only expected to pass in 'abs' kernel");
+        static_assert(std::is_same_v<_DataType_input, _DataType_output>, "Result type must match a type of input data");
+
+        constexpr size_t lws = 64;
+        constexpr unsigned int vec_sz = 8;
+        constexpr sycl::access::address_space global_space = sycl::access::address_space::global_space;
+
+        auto gws_range = sycl::range<1>(((size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);
+        auto lws_range = sycl::range<1>(lws);
 
-            if (array1[idx] >= 0)
+        auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {
+            auto sg = nd_it.get_sub_group();
+            const auto max_sg_size = sg.get_max_local_range()[0];
+            const size_t start =
+                vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size);
+
+            if (start + static_cast<size_t>(vec_sz) * max_sg_size < size)
             {
-                result[idx] = array1[idx];
+                using input_ptrT = sycl::multi_ptr<_DataType_input, global_space>;
+                using result_ptrT = sycl::multi_ptr<_DataType_output, global_space>;
+
+                sycl::vec<_DataType_input, vec_sz> data_vec = sg.load<vec_sz>(input_ptrT(&array1[start]));
+
+                // sycl::abs() returns unsigned integers only, so explicit casting to signed ones is required
+                using result_absT = typename cl::sycl::detail::make_unsigned<_DataType_output>::type;
+                sycl::vec<_DataType_output, vec_sz> res_vec =
+                    dpnp_vec_cast<_DataType_output, result_absT, vec_sz>(sycl::abs(data_vec));
+
+                sg.store<vec_sz>(result_ptrT(&result[start]), res_vec);
             }
             else
             {
-                result[idx] = -1 * array1[idx];
+                for (size_t k = start + sg.get_local_id()[0]; k < size; k += max_sg_size)
+                {
+                    result[k] = std::abs(array1[k]);
+                }
             }
         };
 
         auto kernel_func = [&](sycl::handler& cgh) {
-            cgh.parallel_for<class dpnp_elemwise_absolute_c_kernel<_DataType>>(gws, kernel_parallel_for_func);
+            cgh.parallel_for<class dpnp_elemwise_absolute_c_kernel<_DataType_input, _DataType_output>>(
+                sycl::nd_range<1>(gws_range, lws_range), kernel_parallel_for_func);
         };
-
         event = q.submit(kernel_func);
     }
 
-    input1_ptr.depends_on(event);
-    result1_ptr.depends_on(event);
     event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
-
     return DPCTLEvent_Copy(event_ref);
 }
 
@@ -182,28 +202,24 @@ void dpnp_elemwise_absolute_c(const void* input1_in, void* result1, size_t size)
 {
     DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);
     DPCTLEventVectorRef dep_event_vec_ref = nullptr;
-    DPCTLSyclEventRef event_ref = dpnp_elemwise_absolute_c<_DataType>(q_ref,
-                                                                      input1_in,
-                                                                      result1,
-                                                                      size,
-                                                                      dep_event_vec_ref);
+    DPCTLSyclEventRef event_ref = dpnp_elemwise_absolute_c<_DataType, _DataType>(q_ref,
+                                                                                 input1_in,
+                                                                                 result1,
+                                                                                 size,
+                                                                                 dep_event_vec_ref);
     DPCTLEvent_WaitAndThrow(event_ref);
+    DPCTLEvent_Delete(event_ref);
 }
 
 template <typename _DataType>
 void (*dpnp_elemwise_absolute_default_c)(const void*, void*, size_t) = dpnp_elemwise_absolute_c<_DataType>;
 
-template <typename _DataType>
+template <typename _DataType_input, typename _DataType_output = _DataType_input>
 DPCTLSyclEventRef (*dpnp_elemwise_absolute_ext_c)(DPCTLSyclQueueRef,
                                                   const void*,
                                                   void*,
                                                   size_t,
-                                                  const DPCTLEventVectorRef) = dpnp_elemwise_absolute_c<_DataType>;
-
-// template void dpnp_elemwise_absolute_c<double>(void* array1_in, void* result1, size_t size);
-// template void dpnp_elemwise_absolute_c<float>(void* array1_in, void* result1, size_t size);
-// template void dpnp_elemwise_absolute_c<long>(void* array1_in, void* result1, size_t size);
-// template void dpnp_elemwise_absolute_c<int>(void* array1_in, void* result1, size_t size);
+                                                  const DPCTLEventVectorRef) = dpnp_elemwise_absolute_c<_DataType_input, _DataType_output>;
 
 template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>
 DPCTLSyclEventRef dpnp_cross_c(DPCTLSyclQueueRef q_ref,
@@ -1085,10 +1101,12 @@ void func_map_init_mathematical(func_map_t& fmap)
                                                                   (void*)dpnp_elemwise_absolute_ext_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_ABSOLUTE_EXT][eft_LNG][eft_LNG] = {eft_LNG,
                                                                   (void*)dpnp_elemwise_absolute_ext_c<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_ABSOLUTE_EXT][eft_FLT][eft_FLT] = {eft_FLT,
-                                                                  (void*)dpnp_elemwise_absolute_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_ABSOLUTE_EXT][eft_DBL][eft_DBL] = {eft_DBL,
-                                                                  (void*)dpnp_elemwise_absolute_ext_c<double>};
+    fmap[DPNPFuncName::DPNP_FN_ABSOLUTE_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_elemwise_absolute_ext_c<float>};
+    fmap[DPNPFuncName::DPNP_FN_ABSOLUTE_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_elemwise_absolute_ext_c<double>};
+    fmap[DPNPFuncName::DPNP_FN_ABSOLUTE_EXT][eft_C64][eft_C64] = {
+        eft_FLT, (void*)dpnp_elemwise_absolute_ext_c<std::complex<float>, float>};
+    fmap[DPNPFuncName::DPNP_FN_ABSOLUTE_EXT][eft_C128][eft_C128] = {
+        eft_DBL, (void*)dpnp_elemwise_absolute_ext_c<std::complex<double>, double>};
 
     fmap[DPNPFuncName::DPNP_FN_AROUND][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_around_default_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_AROUND][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_around_default_c<int64_t>};
diff --git a/dpnp/backend/src/dpnp_fptr.hpp b/dpnp/backend/src/dpnp_fptr.hpp
index 742e6dff378..d6c48784e6b 100644
--- a/dpnp/backend/src/dpnp_fptr.hpp
+++ b/dpnp/backend/src/dpnp_fptr.hpp
@@ -163,6 +163,12 @@ struct is_any : std::disjunction<std::is_same<T, Ts>...> {};
 template <typename T, typename... Ts>
 struct are_same : std::conjunction<std::is_same<T, Ts>...> {};
 
+/**
+ * A template constant to check if type T matces any type from Ts.
+ */
+template <typename T, typename... Ts>
+constexpr auto is_any_v = is_any<T, Ts...>::value;
+
 /**
  * A template constat to check if both types T1 and T2 match every type from Ts sequence.
  */
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 45f3b50accc..03d2a352775 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -117,7 +117,14 @@ def abs(*args, **kwargs):
     return dpnp.absolute(*args, **kwargs)
 
 
-def absolute(x1, **kwargs):
+def absolute(x,
+             /,
+             out=None,
+             *,
+             where=True,
+             dtype=None,
+             subok=True,
+             **kwargs):
     """
     Calculate the absolute value element-wise.
 
@@ -125,34 +132,48 @@ def absolute(x1, **kwargs):
 
     .. seealso:: :obj:`dpnp.abs` : Calculate the absolute value element-wise.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array containing the absolute value of each element in `x`.
+    
     Limitations
     -----------
-        Parameter ``x1`` is supported as :obj:`dpnp.ndarray`.
-        Dimension of input array is limited by ``x1.ndim != 0``.
-        Keyword arguments ``kwargs`` are currently unsupported.
-        Otherwise the functions will be executed sequentially on CPU.
-        Input array data types are limited by supported DPNP :ref:`Data types`.
+    Parameters `x` is only supported as either :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
+    Keyword arguments ``kwargs`` are currently unsupported.
+    Otherwise the function will be executed sequentially on CPU.
+    Input array data types are limited by supported DPNP :ref:`Data types`.
 
     Examples
     --------
-    >>> import dpnp as np
-    >>> a = np.array([-1.2, 1.2])
-    >>> result = np.absolute(a)
+    >>> import dpnp as dp
+    >>> a = dp.array([-1.2, 1.2])
+    >>> result = dp.absolute(a)
     >>> [x for x in result]
     [1.2, 1.2]
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    if x1_desc and not kwargs:
-        if not x1_desc.ndim:
-            pass
-        else:
-            result = dpnp_absolute(x1_desc).get_pyobj()
-
-            return result
-
-    return call_origin(numpy.absolute, x1, **kwargs)
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x):
+        pass
+    else:
+        x_desc = dpnp.get_dpnp_descriptor(x, copy_when_nondefault_queue=False)
+        if x_desc:
+            if x_desc.dtype == dpnp.bool:
+                # return a copy of input array "x"
+                return dpnp.array(x, dtype=x.dtype, sycl_queue=x.sycl_queue, usm_type=x.usm_type)
+            return dpnp_absolute(x_desc).get_pyobj()
+
+    return call_origin(numpy.absolute, x, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def add(x1,
diff --git a/tests/helper.py b/tests/helper.py
index bce21dcc29d..8432443d488 100644
--- a/tests/helper.py
+++ b/tests/helper.py
@@ -4,6 +4,48 @@
 import dpnp
 
 
+def get_complex_dtypes(device=None):
+    """
+    Build a list of complex types supported by DPNP based on device capabilities.
+    """
+
+    dev = dpctl.select_default_device() if device is None else device
+
+    # add complex types
+    dtypes = [dpnp.complex64]
+    if dev.has_aspect_fp64:
+        dtypes.append(dpnp.complex128)
+    return dtypes
+
+
+def get_float_dtypes(no_float16=True,
+                     device=None):
+    """
+    Build a list of floating types supported by DPNP based on device capabilities.
+    """
+
+    dev = dpctl.select_default_device() if device is None else device
+
+    # add floating types
+    dtypes = [dpnp.float16] if not no_float16 else []
+
+    dtypes.append(dpnp.float32)
+    if dev.has_aspect_fp64:
+        dtypes.append(dpnp.float64)
+    return dtypes
+
+
+def get_float_complex_dtypes(no_float16=True,
+                             device=None):
+    """
+    Build a list of floating and complex types supported by DPNP based on device capabilities.
+    """
+
+    dtypes = get_float_dtypes(no_float16, device)
+    dtypes.extend(get_complex_dtypes(device))
+    return dtypes
+
+
 def get_all_dtypes(no_bool=False,
                    no_float16=True,
                    no_complex=False,
@@ -22,18 +64,11 @@ def get_all_dtypes(no_bool=False,
     dtypes.extend([dpnp.int32, dpnp.int64])
 
     # add floating types
-    if not no_float16 and dev.has_aspect_fp16:
-        dtypes.append(dpnp.float16)
-
-    dtypes.append(dpnp.float32)
-    if dev.has_aspect_fp64:
-        dtypes.append(dpnp.float64)
+    dtypes.extend(get_float_dtypes(dev))
 
     # add complex types
     if not no_complex:
-        dtypes.append(dpnp.complex64)
-        if dev.has_aspect_fp64:
-            dtypes.append(dpnp.complex128)
+        dtypes.extend(get_complex_dtypes(dev))
 
     # add None value to validate a default dtype
     if not no_none:
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index aebd577ed1a..79debec15a2 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -12,7 +12,6 @@ tests/test_random.py::TestPermutationsTestShuffle::test_no_miss_numbers[int64]
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.array([])]
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.astype(dpnp.asarray(x), dpnp.float32)]
 
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-abs-data0]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ceil-data1]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-copy-data3]
@@ -22,7 +21,6 @@ tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ediff1d-data7]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9]
 
-tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-abs-data0]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ceil-data1]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-copy-data3]
diff --git a/tests/test_absolute.py b/tests/test_absolute.py
index aa145cc9202..81929ca3aa1 100644
--- a/tests/test_absolute.py
+++ b/tests/test_absolute.py
@@ -1,53 +1,58 @@
 import pytest
+from .helper import (
+    get_all_dtypes,
+    get_complex_dtypes,
+    get_float_complex_dtypes
+)
 
-import dpnp as inp
+import dpnp
 
 import numpy
-
-
-@pytest.mark.parametrize("type",
-                         [numpy.int64],
-                         ids=['int64'])
-def test_abs_int(type):
-    a = numpy.array([1, 0, 2, -3, -1, 2, 21, -9])
-    ia = inp.array(a)
-
-    result = inp.abs(ia)
-    expected = numpy.abs(a)
-    numpy.testing.assert_array_equal(expected, result)
-
-
-@pytest.mark.parametrize("type",
-                         [numpy.int64],
-                         ids=['int64'])
-def test_absolute_int(type):
-    a = numpy.array([1, 0, 2, -3, -1, 2, 21, -9])
-    ia = inp.array(a)
-
-    result = inp.absolute(ia)
-    expected = numpy.absolute(a)
-    numpy.testing.assert_array_equal(expected, result)
-
-
-@pytest.mark.parametrize("type",
-                         [numpy.float64],
-                         ids=['float64'])
-def test_absolute_float(type):
-    a = numpy.array([[-2., 3., 9.1], [-2., 5.0, -2], [1.0, -2., 5.0]])
-    ia = inp.array(a)
-
-    result = inp.absolute(ia)
-    expected = numpy.absolute(a)
-    numpy.testing.assert_array_equal(expected, result)
-
-
-@pytest.mark.parametrize("type",
-                         [numpy.float64],
-                         ids=['float64'])
-def test_absolute_float_3(type):
-    a = numpy.array([[[-2., 3.], [9.1, 0.2]], [[-2., 5.0], [-2, -1.2]], [[1.0, -2.], [5.0, -1.1]]])
-    ia = inp.array(a)
-
-    result = inp.absolute(ia)
-    expected = numpy.absolute(a)
-    numpy.testing.assert_array_equal(expected, result)
+from numpy.testing import (
+    assert_array_equal,
+    assert_equal
+)
+
+
+@pytest.mark.parametrize("func", ["abs", "absolute"])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
+def test_abs(func, dtype):
+    a = numpy.array([1, 0, 2, -3, -1, 2, 21, -9], dtype=dtype)
+    ia = dpnp.array(a)
+
+    result = getattr(dpnp, func)(ia)
+    expected = getattr(numpy, func)(a)
+    assert_array_equal(expected, result)
+    assert_equal(result.dtype, expected.dtype)
+
+
+@pytest.mark.parametrize("stride", [-4, -2, -1, 1, 2, 4])
+@pytest.mark.parametrize("dtype", get_complex_dtypes())
+def test_abs_complex(stride, dtype):
+    np_arr = numpy.array([complex(numpy.nan , numpy.nan),
+                          complex(numpy.nan , numpy.inf),
+                          complex(numpy.inf , numpy.nan),
+                          complex(numpy.inf , numpy.inf),
+                          complex(0.        , numpy.inf),
+                          complex(numpy.inf , 0.),
+                          complex(0.        , 0.),
+                          complex(0.        , numpy.nan),
+                          complex(numpy.nan , 0.)], dtype=dtype)
+    dpnp_arr = dpnp.array(np_arr)
+    assert_equal(numpy.abs(np_arr[::stride]), dpnp.abs(dpnp_arr[::stride]))
+
+
+@pytest.mark.parametrize("arraysize", [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 18, 19])
+@pytest.mark.parametrize("stride", [-4, -3, -2, -1, 1, 2, 3, 4])
+@pytest.mark.parametrize("astype", get_complex_dtypes())
+def test_abs_complex_avx(arraysize, stride, astype):
+    np_arr = numpy.ones(arraysize, dtype=astype)
+    dpnp_arr = dpnp.array(np_arr)
+    assert_equal(numpy.abs(np_arr[::stride]), dpnp.abs(dpnp_arr[::stride]))
+
+
+@pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+def test_abs_values(dtype):
+    np_arr = numpy.array([numpy.nan, -numpy.nan, numpy.inf, -numpy.inf, 0., -0., -1.0, 1.0], dtype=dtype)
+    dpnp_arr = dpnp.array(np_arr)
+    assert_equal(numpy.abs(np_arr), dpnp.abs(dpnp_arr))
diff --git a/tests/test_strides.py b/tests/test_strides.py
index 02e8c868975..e56e9befeee 100644
--- a/tests/test_strides.py
+++ b/tests/test_strides.py
@@ -20,10 +20,10 @@ def _getattr(ex, str_):
 
 @pytest.mark.parametrize("func_name",
                          ['abs', ])
-@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+@pytest.mark.parametrize("dtype", get_all_dtypes())
 def test_strides(func_name, dtype):
     shape = (4, 4)
-    a = numpy.arange(shape[0] * shape[1], dtype=dtype).reshape(shape)
+    a = numpy.ones(shape[0] * shape[1], dtype=dtype).reshape(shape)
     a_strides = a[0::2, 0::2]
     dpa = dpnp.array(a)
     dpa_strides = dpa[0::2, 0::2]

From 2224ce2f06b2e9a6e01fd3c3a0a7723310018c82 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Sun, 5 Mar 2023 11:42:56 +0100
Subject: [PATCH 13/16] Add parameter out in dpnp.dot() (#1327)

---
 dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx    | 35 +++++++---
 dpnp/dpnp_array.py                            |  3 +-
 dpnp/dpnp_iface_linearalgebra.py              | 67 +++++++++++--------
 tests/skipped_tests.tbl                       |  4 --
 tests/skipped_tests_gpu.tbl                   |  5 --
 tests/test_dot.py                             | 23 +++----
 tests/test_sycl_queue.py                      | 28 +++++---
 tests/test_usm_type.py                        | 19 ++++++
 .../cupy/linalg_tests/test_product.py         |  1 -
 9 files changed, 113 insertions(+), 72 deletions(-)

diff --git a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx
index c738cc75b70..91c1da88405 100644
--- a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -65,8 +65,9 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_matmul_t)(c_dpctl.DPCTLSyclQue
                                                             const shape_elem_type *, const shape_elem_type * ,
                                                             const c_dpctl.DPCTLEventVectorRef)
 
-cpdef utils.dpnp_descriptor dpnp_dot(utils.dpnp_descriptor in_array1, utils.dpnp_descriptor in_array2):
-
+cpdef utils.dpnp_descriptor dpnp_dot(utils.dpnp_descriptor in_array1,
+                                     utils.dpnp_descriptor in_array2,
+                                     utils.dpnp_descriptor out=None):
     cdef shape_type_c shape1, shape2
 
     shape1 = in_array1.shape
@@ -78,6 +79,7 @@ cpdef utils.dpnp_descriptor dpnp_dot(utils.dpnp_descriptor in_array1, utils.dpnp
 
     # get the FPTR data structure
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DOT_EXT, param1_type, param2_type)
+    cdef utils.dpnp_descriptor result
 
     ndim1 = in_array1.ndim
     ndim2 = in_array2.ndim
@@ -89,7 +91,7 @@ cpdef utils.dpnp_descriptor dpnp_dot(utils.dpnp_descriptor in_array1, utils.dpnp
     elif ndim1 == 1 and ndim2 == 1:
         result_shape = ()
     elif ndim1 == 1:  # ndim2 > 1
-        result_shape = shape2[:-1]
+        result_shape = shape2[::-2] if ndim2 == 2 else shape2[::2]
     elif ndim2 == 1:  # ndim1 > 1
         result_shape = shape1[:-1]
     else:
@@ -101,13 +103,24 @@ cpdef utils.dpnp_descriptor dpnp_dot(utils.dpnp_descriptor in_array1, utils.dpnp
 
     result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(in_array1, in_array2)
 
-    # create result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
-                                                                       kernel_data.return_type,
-                                                                       None,
-                                                                       device=result_sycl_device,
-                                                                       usm_type=result_usm_type,
-                                                                       sycl_queue=result_sycl_queue)
+    if out is None:
+        # create result array with type given by FPTR data
+        result = utils.create_output_descriptor(result_shape,
+                                                kernel_data.return_type,
+                                                None,
+                                                device=result_sycl_device,
+                                                usm_type=result_usm_type,
+                                                sycl_queue=result_sycl_queue)
+    else:
+        result_type = dpnp_DPNPFuncType_to_dtype(< size_t > kernel_data.return_type)
+        if out.dtype != result_type:
+            utils.checker_throw_value_error('dot', 'out.dtype', out.dtype, result_type)
+        if out.shape != result_shape:
+            utils.checker_throw_value_error('dot', 'out.shape', out.shape, result_shape)
+
+        result = out
+
+        utils.get_common_usm_allocation(in_array1, result)  # check USM allocation is common
 
     cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result.shape)
     cdef shape_type_c in_array1_shape = in_array1.shape
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index b3dfe7a339e..6c743f4e1c9 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -592,7 +592,8 @@ def diagonal(input, offset=0, axis1=0, axis2=1):
 
         return dpnp.diagonal(input, offset, axis1, axis2)
 
- # 'dot',
+    def dot(self, other, out=None):
+        return dpnp.dot(self, other, out)
 
     @property
     def dtype(self):
diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py
index 117a1b9b61d..a989f745c0a 100644
--- a/dpnp/dpnp_iface_linearalgebra.py
+++ b/dpnp/dpnp_iface_linearalgebra.py
@@ -44,9 +44,9 @@
 from dpnp.dpnp_algo import *
 from dpnp.dpnp_utils import *
 import dpnp
-import dpnp.config as config
 
 import numpy
+import dpctl.tensor as dpt
 
 
 __all__ = [
@@ -62,18 +62,25 @@
 ]
 
 
-def dot(x1, x2, **kwargs):
+def dot(x1, x2, out=None, **kwargs):
     """
-    Returns the dot product of `x1` and `x2`.
+    Dot product of `x1` and `x2`.
 
     For full documentation refer to :obj:`numpy.dot`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        Returns the dot product of `x1` and `x2`.
+        If `out` is given, then it is returned.
+    
     Limitations
     -----------
-        Parameters ``x1`` and ``x2`` are supported as :obj:`dpnp.ndarray` of the same type.
-        Keyword arguments ``kwargs`` are currently unsupported.
-        Otherwise the functions will be executed sequentially on CPU.
-        Input array data types are limited by supported DPNP :ref:`Data types`.
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
+    Keyword argument ``kwargs`` is currently unsupported.
+    Otherwise the functions will be executed sequentially on CPU.
+    Input array data types are limited by supported DPNP :ref:`Data types`.
 
     See Also
     --------
@@ -82,31 +89,37 @@ def dot(x1, x2, **kwargs):
 
     Examples
     --------
-    >>> import dpnp as np
-    >>> np.dot(3, 4)
-    12
-    >>> a = np.array([1, 2, 3])
-    >>> b = np.array([1, 2, 3])
-    >>> np.dot(a, b)
+    >>> import dpnp as dp
+    >>> a = dp.array([1, 2, 3])
+    >>> b = dp.array([1, 2, 3])
+    >>> dp.dot(a, b)
     14
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
-    if x1_desc and x2_desc and not kwargs:
-        # TODO: remove fallback with scalars when muliply backend func will support strides
-        if(x1_desc.ndim == 0 and x2_desc.strides is not None
-                or x2_desc.ndim == 0 and x1_desc.strides is not None):
-            pass
-        elif (x1_desc.ndim >= 1 and x2_desc.ndim > 1 and x1_desc.shape[-1] != x2_desc.shape[-2]):
-            pass
-        elif (x1_desc.ndim > 0 and x2_desc.ndim == 1 and x1_desc.shape[-1] != x2_desc.shape[0]):
-            pass
-        else:
-            return dpnp_dot(x1_desc, x2_desc).get_pyobj()
+    if kwargs:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            if out is not None:
+                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
+                    raise TypeError("return array must be of supported array type")
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
+            else:
+                out_desc = None
+            return dpnp_dot(x1_desc, x2_desc, out=out_desc).get_pyobj()
 
-    return call_origin(numpy.dot, x1, x2, **kwargs)
+    return call_origin(numpy.dot, x1, x2, out=out, **kwargs)
 
 
 def einsum(*args, **kwargs):
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index 57a7d393e09..995ddd682c7 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -610,10 +610,6 @@ tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumLarge_param_9_{opt
 tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_float
 tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_int
 tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_invalid_sub1
-tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_64_{shape=((2,), (2, 4)), trans_a=True, trans_b=True}::test_dot
-tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_65_{shape=((2,), (2, 4)), trans_a=True, trans_b=False}::test_dot
-tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_66_{shape=((2,), (2, 4)), trans_a=False, trans_b=True}::test_dot
-tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_67_{shape=((2,), (2, 4)), trans_a=False, trans_b=False}::test_dot
 tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_invlarge
 tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_large
 tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_of_two
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 79debec15a2..e3e0f889b2d 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -812,10 +812,6 @@ tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWith
 tests/third_party/cupy/linalg_tests/test_einsum.py::TestEinSumUnaryOperationWithScalar::test_scalar_int
 tests/third_party/cupy/linalg_tests/test_einsum.py::TestListArgEinSumError::test_invalid_sub1
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_dot
-tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_64_{shape=((2,), (2, 4)), trans_a=True, trans_b=True}::test_dot
-tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_65_{shape=((2,), (2, 4)), trans_a=True, trans_b=False}::test_dot
-tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_66_{shape=((2,), (2, 4)), trans_a=False, trans_b=True}::test_dot
-tests/third_party/cupy/linalg_tests/test_product.py::TestDot_param_67_{shape=((2,), (2, 4)), trans_a=False, trans_b=False}::test_dot
 tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_invlarge
 tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_large
 tests/third_party/cupy/linalg_tests/test_product.py::TestMatrixPower::test_matrix_power_of_two
@@ -827,7 +823,6 @@ tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transpose
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_int_axes
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_list_axes
-tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_tensordot_zero_dim
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_dot_with_out_f_contiguous
 tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_multidim_vdot
diff --git a/tests/test_dot.py b/tests/test_dot.py
index ae6341ea909..b9cb5659973 100644
--- a/tests/test_dot.py
+++ b/tests/test_dot.py
@@ -1,13 +1,16 @@
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp as inp
 
 import numpy
+from numpy.testing import (
+    assert_allclose,
+    assert_array_equal
+)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 def test_dot_ones(type):
     n = 10**5
     a = numpy.ones(n, dtype=type)
@@ -17,12 +20,10 @@ def test_dot_ones(type):
 
     result = inp.dot(ia, ib)
     expected = numpy.dot(a, b)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 def test_dot_arange(type):
     n = 10**2
     m = 10**3
@@ -33,12 +34,10 @@ def test_dot_arange(type):
 
     result = inp.dot(ia, ib)
     expected = numpy.dot(a, b)
-    numpy.testing.assert_allclose(expected, result)
+    assert_allclose(expected, result)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 def test_multi_dot(type):
     n = 16
     a = inp.reshape(inp.arange(n, dtype=type), (4, 4))
@@ -53,4 +52,4 @@ def test_multi_dot(type):
 
     result = inp.linalg.multi_dot([a, b, c, d])
     expected = numpy.linalg.multi_dot([a1, b1, c1, d1])
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index ea36a0992b2..42cbe745951 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -297,7 +297,7 @@ def test_1in_1out(func, data, device):
     x = dpnp.array(data, device=device)
     result = getattr(dpnp, func)(x)
 
-    numpy.testing.assert_array_equal(result, expected)
+    assert_array_equal(result, expected)
 
     expected_queue = x.get_array().sycl_queue
     result_queue = result.get_array().sycl_queue
@@ -320,6 +320,9 @@ def test_1in_1out(func, data, device):
         pytest.param("divide",
                      [0., 1., 2., 3., 4.],
                      [4., 4., 4., 4., 4.]),
+        pytest.param("dot",
+                     [[0., 1., 2.], [3., 4., 5.]],
+                     [[4., 4.], [4., 4.], [4., 4.]]),
         pytest.param("floor_divide",
                      [1., 2., 3., 4.],
                      [2.5, 2.5, 2.5, 2.5]),
@@ -364,7 +367,7 @@ def test_2in_1out(func, data1, data2, device):
     x2 = dpnp.array(data2, device=device)
     result = getattr(dpnp, func)(x1, x2)
 
-    numpy.testing.assert_array_equal(result, expected)
+    assert_array_equal(result, expected)
 
     assert_sycl_queue_equal(result.sycl_queue, x1.sycl_queue)
     assert_sycl_queue_equal(result.sycl_queue, x2.sycl_queue)
@@ -539,6 +542,9 @@ def test_random_state(func, args, kwargs, device, usm_type):
         pytest.param("divide",
                      [0., 1., 2., 3., 4.],
                      [4., 4., 4., 4., 4.]),
+        pytest.param("dot",
+                     [[0., 1., 2.], [3., 4., 5.]],
+                     [[4., 4.], [4., 4.], [4., 4.]]),
         pytest.param("floor_divide",
                      [1., 2., 3., 4.],
                      [2.5, 2.5, 2.5, 2.5]),
@@ -571,20 +577,20 @@ def test_random_state(func, args, kwargs, device, usm_type):
 def test_out(func, data1, data2, device):
     x1_orig = numpy.array(data1)
     x2_orig = numpy.array(data2)
-    expected = numpy.empty(x1_orig.size)
-    numpy.add(x1_orig, x2_orig, out=expected)
+    np_out = getattr(numpy, func)(x1_orig, x2_orig)
+    expected = numpy.empty_like(np_out)
+    getattr(numpy, func)(x1_orig, x2_orig, out=expected)
 
     x1 = dpnp.array(data1, device=device)
     x2 = dpnp.array(data2, device=device)
-    result = dpnp.empty(x1.size, device=device)
-    dpnp.add(x1, x2, out=result)
+    dp_out = getattr(dpnp, func)(x1, x2)
+    result = dpnp.empty_like(dp_out)
+    getattr(dpnp, func)(x1, x2, out=result)
 
-    numpy.testing.assert_array_equal(result, expected)
+    assert_array_equal(result, expected)
 
-    expected_queue = x1.get_array().sycl_queue
-    result_queue = result.get_array().sycl_queue
-
-    assert_sycl_queue_equal(result_queue, expected_queue)
+    assert_sycl_queue_equal(result.sycl_queue, x1.sycl_queue)
+    assert_sycl_queue_equal(result.sycl_queue, x2.sycl_queue)
 
 
 @pytest.mark.parametrize("device",
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 5fec346a000..96d55f6875c 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -154,3 +154,22 @@ def test_meshgrid(usm_type_x, usm_type_y):
     z = dp.meshgrid(x, y)
     assert z[0].usm_type == usm_type_x
     assert z[1].usm_type == usm_type_y
+
+@pytest.mark.parametrize(
+    "func,data1,data2",
+    [
+        pytest.param("dot",
+                     [[0., 1., 2.], [3., 4., 5.]],
+                     [[4., 4.], [4., 4.], [4., 4.]]),
+    ],
+)
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+def test_2in_1out(func, data1, data2, usm_type_x, usm_type_y):
+    x = dp.array(data1, usm_type = usm_type_x)
+    y = dp.array(data2, usm_type = usm_type_y)
+    z = getattr(dp, func)(x, y)
+
+    assert x.usm_type == usm_type_x
+    assert y.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
diff --git a/tests/third_party/cupy/linalg_tests/test_product.py b/tests/third_party/cupy/linalg_tests/test_product.py
index 2a97fa79b7c..d25cebbfa67 100644
--- a/tests/third_party/cupy/linalg_tests/test_product.py
+++ b/tests/third_party/cupy/linalg_tests/test_product.py
@@ -31,7 +31,6 @@
     'trans_a': [True, False],
     'trans_b': [True, False],
 }))
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestDot(unittest.TestCase):
 

From cd2418421e9ae38f30855d7a80520db4b0f43db8 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Sun, 5 Mar 2023 14:19:35 +0100
Subject: [PATCH 14/16] Excess memcpy to shared memory in elementwise and
 bitwise functions (#1328)

---
 .github/workflows/conda-package.yml         |   1 +
 dpnp/backend/kernels/dpnp_krnl_bitwise.cpp  |  97 ++++++++-------
 dpnp/backend/kernels/dpnp_krnl_elemwise.cpp | 131 ++++++++++++--------
 3 files changed, 130 insertions(+), 99 deletions(-)

diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index 64a14a85be5..52ac769b7fe 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -12,6 +12,7 @@ env:
   CHANNELS: '-c dppy/label/dev -c intel -c main --override-channels'
   TEST_SCOPE: >-
       test_arraycreation.py
+      test_dot.py
       test_dparray.py
       test_fft.py
       test_linalg.py
diff --git a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
index b64670be4e0..c082bd636bf 100644
--- a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -148,53 +148,62 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
                                                                                                                        \
         sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));                                                      \
                                                                                                                        \
-        DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, input1_in, input1_size);                                        \
-        DPNPC_ptr_adapter<shape_elem_type> input1_shape_ptr(q_ref, input1_shape, input1_ndim, true);                   \
-        DPNPC_ptr_adapter<shape_elem_type> input1_strides_ptr(q_ref, input1_strides, input1_ndim, true);               \
+        _DataType* input1_data = static_cast<_DataType*>(const_cast<void*>(input1_in));                                \
+        _DataType* input2_data = static_cast<_DataType*>(const_cast<void*>(input2_in));                                \
+        _DataType* result = static_cast<_DataType*>(result_out);                                                       \
                                                                                                                        \
-        DPNPC_ptr_adapter<_DataType> input2_ptr(q_ref, input2_in, input2_size);                                        \
-        DPNPC_ptr_adapter<shape_elem_type> input2_shape_ptr(q_ref, input2_shape, input2_ndim, true);                   \
-        DPNPC_ptr_adapter<shape_elem_type> input2_strides_ptr(q_ref, input2_strides, input2_ndim, true);               \
+        shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim];                                      \
                                                                                                                        \
-        DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result_out, result_size, false, true);                          \
-        DPNPC_ptr_adapter<shape_elem_type> result_strides_ptr(q_ref, result_strides, result_ndim);                     \
+        get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets);                                   \
+        bool use_strides = !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim);               \
+        delete[] input1_shape_offsets;                                                                                 \
                                                                                                                        \
-        _DataType* input1_data = input1_ptr.get_ptr();                                                                 \
-        shape_elem_type* input1_shape_data = input1_shape_ptr.get_ptr();                                               \
-        shape_elem_type* input1_strides_data = input1_strides_ptr.get_ptr();                                           \
+        shape_elem_type* input2_shape_offsets = new shape_elem_type[input2_ndim];                                      \
                                                                                                                        \
-        _DataType* input2_data = input2_ptr.get_ptr();                                                                 \
-        shape_elem_type* input2_shape_data = input2_shape_ptr.get_ptr();                                               \
-        shape_elem_type* input2_strides_data = input2_strides_ptr.get_ptr();                                           \
-                                                                                                                       \
-        _DataType* result = result_ptr.get_ptr();                                                                      \
-        shape_elem_type* result_strides_data = result_strides_ptr.get_ptr();                                           \
-                                                                                                                       \
-        const size_t input1_shape_size_in_bytes = input1_ndim * sizeof(shape_elem_type);                               \
-        shape_elem_type* input1_shape_offsets =                                                                        \
-            reinterpret_cast<shape_elem_type*>(sycl::malloc_shared(input1_shape_size_in_bytes, q));                    \
-        get_shape_offsets_inkernel(input1_shape_data, input1_ndim, input1_shape_offsets);                              \
-        bool use_strides = !array_equal(input1_strides_data, input1_ndim, input1_shape_offsets, input1_ndim);          \
-        sycl::free(input1_shape_offsets, q);                                                                           \
-                                                                                                                       \
-        const size_t input2_shape_size_in_bytes = input2_ndim * sizeof(shape_elem_type);                               \
-        shape_elem_type* input2_shape_offsets =                                                                        \
-            reinterpret_cast<shape_elem_type*>(sycl::malloc_shared(input2_shape_size_in_bytes, q));                    \
-        get_shape_offsets_inkernel(input2_shape_data, input2_ndim, input2_shape_offsets);                              \
-        use_strides =                                                                                                  \
-            use_strides || !array_equal(input2_strides_data, input2_ndim, input2_shape_offsets, input2_ndim);          \
-        sycl::free(input2_shape_offsets, q);                                                                           \
+        get_shape_offsets_inkernel(input2_shape, input2_ndim, input2_shape_offsets);                                   \
+        use_strides = use_strides || !array_equal(input2_strides, input2_ndim, input2_shape_offsets, input2_ndim);     \
+        delete[] input2_shape_offsets;                                                                                 \
                                                                                                                        \
         sycl::event event;                                                                                             \
         sycl::range<1> gws(result_size);                                                                               \
                                                                                                                        \
         if (use_strides)                                                                                               \
         {                                                                                                              \
+            if ((result_ndim != input1_ndim) || (result_ndim != input2_ndim))                                          \
+            {                                                                                                          \
+                throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) +                                \
+                                         " mismatches with either input1 ndim=" + std::to_string(input1_ndim) +        \
+                                         " or input2 ndim=" + std::to_string(input2_ndim));                            \
+            }                                                                                                          \
+                                                                                                                       \
+            /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */                 \
+            using usm_host_allocatorT = sycl::usm_allocator<shape_elem_type, sycl::usm::alloc::host>;                  \
+                                                                                                                       \
+            size_t strides_size = 3 * result_ndim;                                                                     \
+            shape_elem_type* dev_strides_data = sycl::malloc_device<shape_elem_type>(strides_size, q);                 \
+                                                                                                                       \
+            /* create host temporary for packed strides managed by shared pointer */                                   \
+            auto strides_host_packed =                                                                                 \
+                std::vector<shape_elem_type, usm_host_allocatorT>(strides_size, usm_host_allocatorT(q));               \
+                                                                                                                       \
+            /* packed vector is concatenation of result_strides, input1_strides and input2_strides */                  \
+            std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin());                      \
+            std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + result_ndim);        \
+            std::copy(input2_strides, input2_strides + result_ndim, strides_host_packed.begin() + 2 * result_ndim);    \
+                                                                                                                       \
+            auto copy_strides_ev =                                                                                     \
+                q.copy<shape_elem_type>(strides_host_packed.data(), dev_strides_data, strides_host_packed.size());     \
+                                                                                                                       \
             auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
-                const size_t output_id = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/                    \
+                const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
                 {                                                                                                      \
+                    const shape_elem_type* result_strides_data = &dev_strides_data[0];                                 \
+                    const shape_elem_type* input1_strides_data = &dev_strides_data[1];                                 \
+                    const shape_elem_type* input2_strides_data = &dev_strides_data[2];                                 \
+                                                                                                                       \
                     size_t input1_id = 0;                                                                              \
                     size_t input2_id = 0;                                                                              \
+                                                                                                                       \
                     for (size_t i = 0; i < result_ndim; ++i)                                                           \
                     {                                                                                                  \
                         const size_t output_xyz_id =                                                                   \
@@ -209,14 +218,19 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
                 }                                                                                                      \
             };                                                                                                         \
             auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.depends_on(copy_strides_ev);                                                                       \
                 cgh.parallel_for<class __name__##_strides_kernel<_DataType>>(gws, kernel_parallel_for_func);           \
             };                                                                                                         \
-            event = q.submit(kernel_func);                                                                             \
+                                                                                                                       \
+            q.submit(kernel_func).wait();                                                                              \
+                                                                                                                       \
+            sycl::free(dev_strides_data, q);                                                                           \
+            return event_ref;                                                                                          \
         }                                                                                                              \
         else                                                                                                           \
         {                                                                                                              \
             auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
-                size_t i = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/                                  \
+                size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                                \
                 const _DataType input1_elem = (input1_size == 1) ? input1_data[0] : input1_data[i];                    \
                 const _DataType input2_elem = (input2_size == 1) ? input2_data[0] : input2_data[i];                    \
                 result[i] = __operation__;                                                                             \
@@ -226,16 +240,8 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
             };                                                                                                         \
             event = q.submit(kernel_func);                                                                             \
         }                                                                                                              \
-        input1_ptr.depends_on(event);                                                                                  \
-        input1_shape_ptr.depends_on(event);                                                                            \
-        input1_strides_ptr.depends_on(event);                                                                          \
-        input2_ptr.depends_on(event);                                                                                  \
-        input2_shape_ptr.depends_on(event);                                                                            \
-        input2_strides_ptr.depends_on(event);                                                                          \
-        result_ptr.depends_on(event);                                                                                  \
-        result_strides_ptr.depends_on(event);                                                                          \
-        event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                       \
                                                                                                                        \
+        event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                       \
         return DPCTLEvent_Copy(event_ref);                                                                             \
     }                                                                                                                  \
                                                                                                                        \
@@ -278,6 +284,7 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
                                                           where,                                                       \
                                                           dep_event_vec_ref);                                          \
         DPCTLEvent_WaitAndThrow(event_ref);                                                                            \
+        DPCTLEvent_Delete(event_ref);                                                                                  \
     }                                                                                                                  \
                                                                                                                        \
     template <typename _DataType>                                                                                      \
diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
index c8b32fa9809..741a945fb09 100644
--- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
@@ -70,35 +70,49 @@
                                                                                                                        \
         sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));                                                      \
                                                                                                                        \
-        DPNPC_ptr_adapter<_DataType_input> input1_ptr(q_ref, input1_in, input1_size);                                  \
-        DPNPC_ptr_adapter<shape_elem_type> input1_shape_ptr(q_ref, input1_shape, input1_ndim, true);                   \
-        DPNPC_ptr_adapter<shape_elem_type> input1_strides_ptr(q_ref, input1_strides, input1_ndim, true);               \
-                                                                                                                       \
-        DPNPC_ptr_adapter<_DataType_output> result_ptr(q_ref, result_out, result_size, false, true);                   \
-        DPNPC_ptr_adapter<shape_elem_type> result_strides_ptr(q_ref, result_strides, result_ndim);                     \
-                                                                                                                       \
-        _DataType_input* input1_data = input1_ptr.get_ptr();                                                           \
-        shape_elem_type* input1_shape_data = input1_shape_ptr.get_ptr();                                               \
-        shape_elem_type* input1_strides_data = input1_strides_ptr.get_ptr();                                           \
+        _DataType_input* input1_data = static_cast<_DataType_input*>(const_cast<void*>(input1_in));                    \
+        _DataType_output* result = static_cast<_DataType_output*>(result_out);                                         \
                                                                                                                        \
-        _DataType_output* result = result_ptr.get_ptr();                                                               \
-        shape_elem_type* result_strides_data = result_strides_ptr.get_ptr();                                           \
+        shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim];                                      \
                                                                                                                        \
-        const size_t input1_shape_size_in_bytes = input1_ndim * sizeof(shape_elem_type);                               \
-        shape_elem_type* input1_shape_offsets =                                                                        \
-            reinterpret_cast<shape_elem_type*>(sycl::malloc_shared(input1_shape_size_in_bytes, q));                    \
-        get_shape_offsets_inkernel(input1_shape_data, input1_ndim, input1_shape_offsets);                              \
-        bool use_strides = !array_equal(input1_strides_data, input1_ndim, input1_shape_offsets, input1_ndim);          \
-        sycl::free(input1_shape_offsets, q);                                                                           \
+        get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets);                                   \
+        bool use_strides = !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim);               \
+        delete[] input1_shape_offsets;                                                                                 \
                                                                                                                        \
         sycl::event event;                                                                                             \
         sycl::range<1> gws(result_size);                                                                               \
                                                                                                                        \
         if (use_strides)                                                                                               \
         {                                                                                                              \
+            if (result_ndim != input1_ndim)                                                                            \
+            {                                                                                                          \
+                throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) +                                \
+                                         " mismatches with input1 ndim=" + std::to_string(input1_ndim));               \
+            }                                                                                                          \
+                                                                                                                       \
+            /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */                 \
+            using usm_host_allocatorT = sycl::usm_allocator<shape_elem_type, sycl::usm::alloc::host>;                  \
+                                                                                                                       \
+            size_t strides_size = 2 * result_ndim;                                                                     \
+            shape_elem_type* dev_strides_data = sycl::malloc_device<shape_elem_type>(strides_size, q);                 \
+                                                                                                                       \
+            /* create host temporary for packed strides managed by shared pointer */                                   \
+            auto strides_host_packed =                                                                                 \
+                std::vector<shape_elem_type, usm_host_allocatorT>(strides_size, usm_host_allocatorT(q));               \
+                                                                                                                       \
+            /* packed vector is concatenation of result_strides, input1_strides and input2_strides */                  \
+            std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin());                      \
+            std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + result_ndim);        \
+                                                                                                                       \
+            auto copy_strides_ev =                                                                                     \
+                q.copy<shape_elem_type>(strides_host_packed.data(), dev_strides_data, strides_host_packed.size());     \
+                                                                                                                       \
             auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
-                size_t output_id = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/                          \
+                size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                        \
                 {                                                                                                      \
+                    const shape_elem_type* result_strides_data = &dev_strides_data[0];                                 \
+                    const shape_elem_type* input1_strides_data = &dev_strides_data[1];                                 \
+                                                                                                                       \
                     size_t input_id = 0;                                                                               \
                     for (size_t i = 0; i < input1_ndim; ++i)                                                           \
                     {                                                                                                  \
@@ -115,7 +129,11 @@
                 cgh.parallel_for<class __name__##_strides_kernel<_DataType_input, _DataType_output>>(                  \
                     gws, kernel_parallel_for_func);                                                                    \
             };                                                                                                         \
-            event = q.submit(kernel_func);                                                                             \
+                                                                                                                       \
+            q.submit(kernel_func).wait();                                                                              \
+                                                                                                                       \
+            sycl::free(dev_strides_data, q);                                                                           \
+            return event_ref;                                                                                          \
         }                                                                                                              \
         else                                                                                                           \
         {                                                                                                              \
@@ -143,14 +161,7 @@
             }                                                                                                          \
         }                                                                                                              \
                                                                                                                        \
-        input1_ptr.depends_on(event);                                                                                  \
-        input1_shape_ptr.depends_on(event);                                                                            \
-        input1_strides_ptr.depends_on(event);                                                                          \
-        result_ptr.depends_on(event);                                                                                  \
-        result_strides_ptr.depends_on(event);                                                                          \
-                                                                                                                       \
         event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                       \
-                                                                                                                       \
         return DPCTLEvent_Copy(event_ref);                                                                             \
     }                                                                                                                  \
                                                                                                                        \
@@ -583,34 +594,49 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap)
                                                                                                                        \
         sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));                                                      \
                                                                                                                        \
-        DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, input1_in, input1_size);                                        \
-        DPNPC_ptr_adapter<shape_elem_type> input1_shape_ptr(q_ref, input1_shape, input1_ndim, true);                   \
-        DPNPC_ptr_adapter<shape_elem_type> input1_strides_ptr(q_ref, input1_strides, input1_ndim, true);               \
-        DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result_out, result_size, false, true);                          \
-        DPNPC_ptr_adapter<shape_elem_type> result_strides_ptr(q_ref, result_strides, result_ndim);                     \
-                                                                                                                       \
-        _DataType* input1_data = input1_ptr.get_ptr();                                                                 \
-        shape_elem_type* input1_shape_data = input1_shape_ptr.get_ptr();                                               \
-        shape_elem_type* input1_strides_data = input1_strides_ptr.get_ptr();                                           \
+        _DataType* input1_data = static_cast<_DataType*>(const_cast<void*>(input1_in));                                \
+        _DataType* result = static_cast<_DataType*>(result_out);                                                       \
                                                                                                                        \
-        _DataType* result = result_ptr.get_ptr();                                                                      \
-        shape_elem_type* result_strides_data = result_strides_ptr.get_ptr();                                           \
+        shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim];                                      \
                                                                                                                        \
-        const size_t input1_shape_size_in_bytes = input1_ndim * sizeof(shape_elem_type);                               \
-        shape_elem_type* input1_shape_offsets =                                                                        \
-            reinterpret_cast<shape_elem_type*>(sycl::malloc_shared(input1_shape_size_in_bytes, q));                    \
-        get_shape_offsets_inkernel(input1_shape_data, input1_ndim, input1_shape_offsets);                              \
-        bool use_strides = !array_equal(input1_strides_data, input1_ndim, input1_shape_offsets, input1_ndim);          \
-        sycl::free(input1_shape_offsets, q);                                                                           \
+        get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets);                                   \
+        bool use_strides = !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim);               \
+        delete[] input1_shape_offsets;                                                                                 \
                                                                                                                        \
         sycl::event event;                                                                                             \
         sycl::range<1> gws(result_size);                                                                               \
                                                                                                                        \
         if (use_strides)                                                                                               \
         {                                                                                                              \
+            if (result_ndim != input1_ndim)                                                                            \
+            {                                                                                                          \
+                throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) +                                \
+                                         " mismatches with input1 ndim=" + std::to_string(input1_ndim));               \
+            }                                                                                                          \
+                                                                                                                       \
+            /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */                 \
+            using usm_host_allocatorT = sycl::usm_allocator<shape_elem_type, sycl::usm::alloc::host>;                  \
+                                                                                                                       \
+            size_t strides_size = 2 * result_ndim;                                                                     \
+            shape_elem_type* dev_strides_data = sycl::malloc_device<shape_elem_type>(strides_size, q);                 \
+                                                                                                                       \
+            /* create host temporary for packed strides managed by shared pointer */                                   \
+            auto strides_host_packed =                                                                                 \
+                std::vector<shape_elem_type, usm_host_allocatorT>(strides_size, usm_host_allocatorT(q));               \
+                                                                                                                       \
+            /* packed vector is concatenation of result_strides, input1_strides and input2_strides */                  \
+            std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin());                      \
+            std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + result_ndim);        \
+                                                                                                                       \
+            auto copy_strides_ev =                                                                                     \
+                q.copy<shape_elem_type>(strides_host_packed.data(), dev_strides_data, strides_host_packed.size());     \
+                                                                                                                       \
             auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
-                size_t output_id = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/                          \
+                size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                        \
                 {                                                                                                      \
+                    const shape_elem_type* result_strides_data = &dev_strides_data[0];                                 \
+                    const shape_elem_type* input1_strides_data = &dev_strides_data[1];                                 \
+                                                                                                                       \
                     size_t input_id = 0;                                                                               \
                     for (size_t i = 0; i < input1_ndim; ++i)                                                           \
                     {                                                                                                  \
@@ -626,12 +652,16 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap)
             auto kernel_func = [&](sycl::handler& cgh) {                                                               \
                 cgh.parallel_for<class __name__##_strides_kernel<_DataType>>(gws, kernel_parallel_for_func);           \
             };                                                                                                         \
-            event = q.submit(kernel_func);                                                                             \
+                                                                                                                       \
+            q.submit(kernel_func).wait();                                                                              \
+                                                                                                                       \
+            sycl::free(dev_strides_data, q);                                                                           \
+            return event_ref;                                                                                          \
         }                                                                                                              \
         else                                                                                                           \
         {                                                                                                              \
             auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
-                size_t i = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/                                  \
+                size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                                \
                 {                                                                                                      \
                     const _DataType input_elem = input1_data[i];                                                       \
                     result[i] = __operation1__;                                                                        \
@@ -651,14 +681,7 @@ static void func_map_init_elemwise_1arg_2type(func_map_t& fmap)
             }                                                                                                          \
         }                                                                                                              \
                                                                                                                        \
-        input1_ptr.depends_on(event);                                                                                  \
-        input1_shape_ptr.depends_on(event);                                                                            \
-        input1_strides_ptr.depends_on(event);                                                                          \
-        result_ptr.depends_on(event);                                                                                  \
-        result_strides_ptr.depends_on(event);                                                                          \
-                                                                                                                       \
         event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                       \
-                                                                                                                       \
         return DPCTLEvent_Copy(event_ref);                                                                             \
     }                                                                                                                  \
                                                                                                                        \

From bb01f05d4e9c3059c06bc03d578c377e47a3398f Mon Sep 17 00:00:00 2001
From: vlad-perevezentsev <vladislav.perevezentsev@intel.com>
Date: Mon, 6 Mar 2023 01:26:57 +0100
Subject: [PATCH 15/16] dpnp.where() doesn`t work with 1 argument (#1326)

* Fix where operator for support passing 1 arg

* Unskip and fix tests for where operator

* Add support of dpnp.where() with x and y arguments

* Update dpnp/backend/include/dpnp_iface_fptr.hpp

* Use dpctl.tensor.nonzero() implementation

---------

Co-authored-by: Anton Volkov <antonwolfy@gmail.com>
Co-authored-by: Anton <100830759+antonwolfy@users.noreply.github.com>
---
 dpnp/backend/include/dpnp_iface.hpp           |  52 ++++
 dpnp/backend/include/dpnp_iface_fptr.hpp      |   1 +
 dpnp/backend/kernels/dpnp_krnl_indexing.cpp   |   1 +
 dpnp/backend/kernels/dpnp_krnl_searching.cpp  | 257 +++++++++++++++++-
 dpnp/dpnp_algo/dpnp_algo.pxd                  |   2 +
 dpnp/dpnp_algo/dpnp_algo_searching.pyx        | 106 +++++++-
 dpnp/dpnp_iface_searching.py                  |  67 ++++-
 tests/skipped_tests.tbl                       |   7 +-
 tests/skipped_tests_gpu.tbl                   |   9 -
 .../cupy/sorting_tests/test_search.py         |   5 +-
 10 files changed, 484 insertions(+), 23 deletions(-)

diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp
index 7a80b40a3d2..a124b9e07a1 100644
--- a/dpnp/backend/include/dpnp_iface.hpp
+++ b/dpnp/backend/include/dpnp_iface.hpp
@@ -57,6 +57,7 @@ typedef ssize_t shape_elem_type;
 
 #include <dpctl_sycl_interface.h>
 
+#include "dpnp_iface_fptr.hpp"
 #include "dpnp_iface_fft.hpp"
 #include "dpnp_iface_random.hpp"
 
@@ -1683,6 +1684,57 @@ INP_DLLEXPORT void dpnp_var_c(void* array,
                               size_t naxis,
                               size_t ddof);
 
+/**
+ * @ingroup BACKEND_API
+ * @brief Implementation of where function
+ *
+ * @param [in]  q_ref               Reference to SYCL queue.
+ * @param [out] result_out          Output array.
+ * @param [in]  result_size         Size of output array.
+ * @param [in]  result_ndim         Number of output array dimensions.
+ * @param [in]  result_shape        Shape of output array.
+ * @param [in]  result_strides      Strides of output array.
+ * @param [in]  condition_in        Condition array.
+ * @param [in]  condition_size      Size of condition array.
+ * @param [in]  condition_ndim      Number of condition array dimensions.
+ * @param [in]  condition_shape     Shape of condition array.
+ * @param [in]  condition_strides   Strides of condition array.
+ * @param [in]  input1_in           First input array.
+ * @param [in]  input1_size         Size of first input array.
+ * @param [in]  input1_ndim         Number of first input array dimensions.
+ * @param [in]  input1_shape        Shape of first input array.
+ * @param [in]  input1_strides      Strides of first input array.
+ * @param [in]  input2_in           Second input array.
+ * @param [in]  input2_size         Size of second input array.
+ * @param [in]  input2_ndim         Number of second input array dimensions.
+ * @param [in]  input2_shape        Shape of second input array.
+ * @param [in]  input2_strides      Strides of second input array.
+ * @param [in]  dep_event_vec_ref   Reference to vector of SYCL events.
+ */
+template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>
+INP_DLLEXPORT DPCTLSyclEventRef dpnp_where_c(DPCTLSyclQueueRef q_ref,
+                                             void* result_out,
+                                             const size_t result_size,
+                                             const size_t result_ndim,
+                                             const shape_elem_type* result_shape,
+                                             const shape_elem_type* result_strides,
+                                             const void* condition_in,
+                                             const size_t condition_size,
+                                             const size_t condition_ndim,
+                                             const shape_elem_type* condition_shape,
+                                             const shape_elem_type* condition_strides,
+                                             const void* input1_in,
+                                             const size_t input1_size,
+                                             const size_t input1_ndim,
+                                             const shape_elem_type* input1_shape,
+                                             const shape_elem_type* input1_strides,
+                                             const void* input2_in,
+                                             const size_t input2_size,
+                                             const size_t input2_ndim,
+                                             const shape_elem_type* input2_shape,
+                                             const shape_elem_type* input2_strides,
+                                             const DPCTLEventVectorRef dep_event_vec_ref);
+
 /**
  * @ingroup BACKEND_API
  * @brief Implementation of invert function
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index fb154fcabfa..3a393708255 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -377,6 +377,7 @@ enum class DPNPFuncName : size_t
     DPNP_FN_VANDER_EXT,                   /**< Used in numpy.vander() impl, requires extra parameters */
     DPNP_FN_VAR,                          /**< Used in numpy.var() impl  */
     DPNP_FN_VAR_EXT,                      /**< Used in numpy.var() impl, requires extra parameters */
+    DPNP_FN_WHERE_EXT,                    /**< Used in numpy.where() impl, requires extra parameters */
     DPNP_FN_ZEROS,                        /**< Used in numpy.zeros() impl */
     DPNP_FN_ZEROS_LIKE,                   /**< Used in numpy.zeros_like() impl */
     DPNP_FN_LAST,                         /**< The latest element of the enumeration */
diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
index 0b80ac678d3..5c3fb76e23b 100644
--- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
@@ -1021,6 +1021,7 @@ void func_map_init_indexing_func(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_NONZERO][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_nonzero_default_c<float>};
     fmap[DPNPFuncName::DPNP_FN_NONZERO][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_nonzero_default_c<double>};
 
+    fmap[DPNPFuncName::DPNP_FN_NONZERO_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_nonzero_ext_c<bool>};
     fmap[DPNPFuncName::DPNP_FN_NONZERO_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_nonzero_ext_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_NONZERO_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_nonzero_ext_c<int64_t>};
     fmap[DPNPFuncName::DPNP_FN_NONZERO_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_nonzero_ext_c<float>};
diff --git a/dpnp/backend/kernels/dpnp_krnl_searching.cpp b/dpnp/backend/kernels/dpnp_krnl_searching.cpp
index 39156ea07c4..fef5f78d15d 100644
--- a/dpnp/backend/kernels/dpnp_krnl_searching.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_searching.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -27,6 +27,7 @@
 
 #include <dpnp_iface.hpp>
 #include "dpnp_fptr.hpp"
+#include "dpnp_iterator.hpp"
 #include "dpnpc_memory_adapter.hpp"
 #include "queue_sycl.hpp"
 
@@ -139,6 +140,258 @@ DPCTLSyclEventRef (*dpnp_argmin_ext_c)(DPCTLSyclQueueRef,
                                        size_t,
                                        const DPCTLEventVectorRef) = dpnp_argmin_c<_DataType, _idx_DataType>;
 
+
+template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>
+class dpnp_where_c_broadcast_kernel;
+
+template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>
+class dpnp_where_c_strides_kernel;
+
+template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>
+class dpnp_where_c_kernel;
+
+template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>
+DPCTLSyclEventRef dpnp_where_c(DPCTLSyclQueueRef q_ref,
+                               void* result_out,
+                               const size_t result_size,
+                               const size_t result_ndim,
+                               const shape_elem_type* result_shape,
+                               const shape_elem_type* result_strides,
+                               const void* condition_in,
+                               const size_t condition_size,
+                               const size_t condition_ndim,
+                               const shape_elem_type* condition_shape,
+                               const shape_elem_type* condition_strides,
+                               const void* input1_in,
+                               const size_t input1_size,
+                               const size_t input1_ndim,
+                               const shape_elem_type* input1_shape,
+                               const shape_elem_type* input1_strides,
+                               const void* input2_in,
+                               const size_t input2_size,
+                               const size_t input2_ndim,
+                               const shape_elem_type* input2_shape,
+                               const shape_elem_type* input2_strides,
+                               const DPCTLEventVectorRef dep_event_vec_ref)
+{
+    /* avoid warning unused variable*/
+    (void)dep_event_vec_ref;
+
+    DPCTLSyclEventRef event_ref = nullptr;
+
+    if (!condition_size || !input1_size || !input2_size)
+    {
+        return event_ref;
+    }
+
+    sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));
+
+    bool* condition_data = static_cast<bool*>(const_cast<void*>(condition_in));
+    _DataType_input1* input1_data = static_cast<_DataType_input1*>(const_cast<void*>(input1_in));
+    _DataType_input2* input2_data = static_cast<_DataType_input2*>(const_cast<void*>(input2_in));
+    _DataType_output* result = static_cast<_DataType_output*>(result_out);
+
+    bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim);
+    use_broadcasting = use_broadcasting || !array_equal(condition_shape, condition_ndim, input1_shape, input1_ndim);
+    use_broadcasting = use_broadcasting || !array_equal(condition_shape, condition_ndim, input2_shape, input2_ndim);
+
+    shape_elem_type* condition_shape_offsets = new shape_elem_type[condition_ndim];
+
+    get_shape_offsets_inkernel(condition_shape, condition_ndim, condition_shape_offsets);
+    bool use_strides = !array_equal(condition_strides, condition_ndim, condition_shape_offsets, condition_ndim);
+    delete[] condition_shape_offsets;
+
+    shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim];
+
+    get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets);
+    use_strides = use_strides || !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim);
+    delete[] input1_shape_offsets;
+
+    shape_elem_type* input2_shape_offsets = new shape_elem_type[input2_ndim];
+
+    get_shape_offsets_inkernel(input2_shape, input2_ndim, input2_shape_offsets);
+    use_strides = use_strides || !array_equal(input2_strides, input2_ndim, input2_shape_offsets, input2_ndim);
+    delete[] input2_shape_offsets;
+
+    sycl::event event;
+    sycl::range<1> gws(result_size);
+
+    if (use_broadcasting)
+    {
+        DPNPC_id<bool>* condition_it;
+        const size_t condition_it_it_size_in_bytes = sizeof(DPNPC_id<bool>);
+        condition_it = reinterpret_cast<DPNPC_id<bool>*>(dpnp_memory_alloc_c(q_ref, condition_it_it_size_in_bytes));
+        new (condition_it) DPNPC_id<bool>(q_ref, condition_data, condition_shape, condition_strides, condition_ndim);
+
+        condition_it->broadcast_to_shape(result_shape, result_ndim);
+
+        DPNPC_id<_DataType_input1>* input1_it;
+        const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>);
+        input1_it = reinterpret_cast<DPNPC_id<_DataType_input1>*>(dpnp_memory_alloc_c(q_ref, input1_it_size_in_bytes));
+        new (input1_it) DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim);
+
+        input1_it->broadcast_to_shape(result_shape, result_ndim);
+
+        DPNPC_id<_DataType_input2>* input2_it;
+        const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>);
+        input2_it = reinterpret_cast<DPNPC_id<_DataType_input2>*>(dpnp_memory_alloc_c(q_ref, input2_it_size_in_bytes));
+        new (input2_it) DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim);
+
+        input2_it->broadcast_to_shape(result_shape, result_ndim);
+
+        auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {
+            const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */
+            {
+                const bool condition = (*condition_it)[i];
+                const _DataType_output input1_elem = (*input1_it)[i];
+                const _DataType_output input2_elem = (*input2_it)[i];
+                result[i] = (condition) ? input1_elem : input2_elem;
+            }
+        };
+        auto kernel_func = [&](sycl::handler& cgh) {
+            cgh.parallel_for<class dpnp_where_c_broadcast_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(
+                gws, kernel_parallel_for_func);
+        };
+
+        q.submit(kernel_func).wait();
+
+        condition_it->~DPNPC_id();
+        input1_it->~DPNPC_id();
+        input2_it->~DPNPC_id();
+
+        return event_ref;
+    }
+    else if (use_strides)
+    {
+        if ((result_ndim != condition_ndim) || (result_ndim != input1_ndim) || (result_ndim != input2_ndim))
+        {
+            throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) +
+                                     " mismatches with either condition ndim=" + std::to_string(condition_ndim) +
+                                     " or input1 ndim=" + std::to_string(input1_ndim) +
+                                     " or input2 ndim=" + std::to_string(input2_ndim));
+        }
+
+        /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */
+        using usm_host_allocatorT = sycl::usm_allocator<shape_elem_type, sycl::usm::alloc::host>;
+
+        size_t strides_size = 4 * result_ndim;
+        shape_elem_type* dev_strides_data = sycl::malloc_device<shape_elem_type>(strides_size, q);
+
+        /* create host temporary for packed strides managed by shared pointer */
+        auto strides_host_packed =
+            std::vector<shape_elem_type, usm_host_allocatorT>(strides_size, usm_host_allocatorT(q));
+
+        /* packed vector is concatenation of result_strides, condition_strides, input1_strides and input2_strides */
+        std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin());
+        std::copy(condition_strides, condition_strides + result_ndim, strides_host_packed.begin() + result_ndim);
+        std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + 2 * result_ndim);
+        std::copy(input2_strides, input2_strides + result_ndim, strides_host_packed.begin() + 3 * result_ndim);
+
+        auto copy_strides_ev =
+            q.copy<shape_elem_type>(strides_host_packed.data(), dev_strides_data, strides_host_packed.size());
+
+        auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {
+            const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */
+            {
+                const shape_elem_type* result_strides_data = &dev_strides_data[0];
+                const shape_elem_type* condition_strides_data = &dev_strides_data[1];
+                const shape_elem_type* input1_strides_data = &dev_strides_data[2];
+                const shape_elem_type* input2_strides_data = &dev_strides_data[3];
+
+                size_t condition_id = 0;
+                size_t input1_id = 0;
+                size_t input2_id = 0;
+
+                for (size_t i = 0; i < result_ndim; ++i)
+                {
+                    const size_t output_xyz_id =
+                        get_xyz_id_by_id_inkernel(output_id, result_strides_data, result_ndim, i);
+                    condition_id += output_xyz_id * condition_strides_data[i];
+                    input1_id    += output_xyz_id * input1_strides_data[i];
+                    input2_id    += output_xyz_id * input2_strides_data[i];
+                }
+
+                const bool condition = condition_data[condition_id];
+                const _DataType_output input1_elem = input1_data[input1_id];
+                const _DataType_output input2_elem = input2_data[input2_id];
+                result[output_id] = (condition) ? input1_elem : input2_elem;
+            }
+        };
+        auto kernel_func = [&](sycl::handler& cgh) {
+            cgh.depends_on(copy_strides_ev);
+            cgh.parallel_for<class dpnp_where_c_strides_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(
+                gws, kernel_parallel_for_func);
+        };
+
+        q.submit(kernel_func).wait();
+
+        sycl::free(dev_strides_data, q);
+        return event_ref;
+    }
+    else
+    {
+        auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {
+            const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */
+
+            const bool condition = condition_data[i];
+            const _DataType_output input1_elem = input1_data[i];
+            const _DataType_output input2_elem = input2_data[i];
+            result[i] = (condition) ? input1_elem : input2_elem;
+        };
+        auto kernel_func = [&](sycl::handler& cgh) {
+            cgh.parallel_for<class dpnp_where_c_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(
+                gws, kernel_parallel_for_func);
+        };
+        event = q.submit(kernel_func);
+    }
+
+    event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
+    return DPCTLEvent_Copy(event_ref);
+
+    return event_ref;
+}
+
+template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>
+DPCTLSyclEventRef (*dpnp_where_ext_c)(DPCTLSyclQueueRef,
+                                      void*,
+                                      const size_t,
+                                      const size_t,
+                                      const shape_elem_type*,
+                                      const shape_elem_type*,
+                                      const void*,
+                                      const size_t,
+                                      const size_t,
+                                      const shape_elem_type*,
+                                      const shape_elem_type*,
+                                      const void*,
+                                      const size_t,
+                                      const size_t,
+                                      const shape_elem_type*,
+                                      const shape_elem_type*,
+                                      const void*,
+                                      const size_t,
+                                      const size_t,
+                                      const shape_elem_type*,
+                                      const shape_elem_type*,
+                                      const DPCTLEventVectorRef) = dpnp_where_c<_DataType_output, _DataType_input1, _DataType_input2>;
+
+template <DPNPFuncType FT1, DPNPFuncType... FTs>
+static void func_map_searching_2arg_3type_core(func_map_t& fmap)
+{
+    ((fmap[DPNPFuncName::DPNP_FN_WHERE_EXT][FT1][FTs] =
+          {populate_func_types<FT1, FTs>(),
+           (void*)dpnp_where_ext_c<func_type_map_t::find_type<populate_func_types<FT1, FTs>()>,
+                                   func_type_map_t::find_type<FT1>,
+                                   func_type_map_t::find_type<FTs>>}),
+     ...);
+}
+
+template <DPNPFuncType... FTs>
+static void func_map_searching_2arg_3type_helper(func_map_t& fmap)
+{
+    ((func_map_searching_2arg_3type_core<FTs, FTs...>(fmap)), ...);
+}
+
 void func_map_init_searching(func_map_t& fmap)
 {
     fmap[DPNPFuncName::DPNP_FN_ARGMAX][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_argmax_default_c<int32_t, int32_t>};
@@ -177,5 +430,7 @@ void func_map_init_searching(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_ARGMIN_EXT][eft_DBL][eft_INT] = {eft_INT, (void*)dpnp_argmin_ext_c<double, int32_t>};
     fmap[DPNPFuncName::DPNP_FN_ARGMIN_EXT][eft_DBL][eft_LNG] = {eft_LNG, (void*)dpnp_argmin_ext_c<double, int64_t>};
 
+    func_map_searching_2arg_3type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL, eft_C64, eft_C128>(fmap);
+
     return;
 }
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index 9bf161b0aaf..9edf6255ef5 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -356,6 +356,7 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_VANDER_EXT
         DPNP_FN_VAR
         DPNP_FN_VAR_EXT
+        DPNP_FN_WHERE_EXT
         DPNP_FN_ZEROS
         DPNP_FN_ZEROS_LIKE
 
@@ -578,6 +579,7 @@ Searching functions
 """
 cpdef dpnp_descriptor dpnp_argmax(dpnp_descriptor array1)
 cpdef dpnp_descriptor dpnp_argmin(dpnp_descriptor array1)
+cpdef dpnp_descriptor dpnp_where(dpnp_descriptor cond_obj, dpnp_descriptor x_obj, dpnp_descriptor y_obj)
 
 """
 Trigonometric functions
diff --git a/dpnp/dpnp_algo/dpnp_algo_searching.pyx b/dpnp/dpnp_algo/dpnp_algo_searching.pyx
index 59ce8475181..44621b5cca0 100644
--- a/dpnp/dpnp_algo/dpnp_algo_searching.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_searching.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -36,7 +36,8 @@ and the rest of the library
 
 __all__ += [
     "dpnp_argmax",
-    "dpnp_argmin"
+    "dpnp_argmin",
+    "dpnp_where"
 ]
 
 
@@ -45,6 +46,29 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*custom_search_1in_1out_func_ptr_t)(c_dpctl.D
                                                                        void * , void * , size_t,
                                                                        const c_dpctl.DPCTLEventVectorRef)
 
+ctypedef c_dpctl.DPCTLSyclEventRef(*where_func_ptr_t)(c_dpctl.DPCTLSyclQueueRef,
+                                                      void *,
+                                                      const size_t,
+                                                      const size_t,
+                                                      const shape_elem_type * ,
+                                                      const shape_elem_type * ,
+                                                      void *,
+                                                      const size_t,
+                                                      const size_t,
+                                                      const shape_elem_type * ,
+                                                      const shape_elem_type * ,
+                                                      void *,
+                                                      const size_t,
+                                                      const size_t,
+                                                      const shape_elem_type * ,
+                                                      const shape_elem_type * ,
+                                                      void *,
+                                                      const size_t,
+                                                      const size_t,
+                                                      const shape_elem_type * ,
+                                                      const shape_elem_type * ,
+                                                      const c_dpctl.DPCTLEventVectorRef) except +
+
 
 cpdef utils.dpnp_descriptor dpnp_argmax(utils.dpnp_descriptor in_array1):
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(in_array1.dtype)
@@ -116,3 +140,81 @@ cpdef utils.dpnp_descriptor dpnp_argmin(utils.dpnp_descriptor in_array1):
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
+
+
+cpdef utils.dpnp_descriptor dpnp_where(utils.dpnp_descriptor cond_obj,
+                                       utils.dpnp_descriptor x_obj,
+                                       utils.dpnp_descriptor y_obj):
+    # Convert object type to C enum DPNPFuncType
+    cdef DPNPFuncType cond_c_type = dpnp_dtype_to_DPNPFuncType(cond_obj.dtype)
+    cdef DPNPFuncType x_c_type = dpnp_dtype_to_DPNPFuncType(x_obj.dtype)
+    cdef DPNPFuncType y_c_type = dpnp_dtype_to_DPNPFuncType(y_obj.dtype)
+
+    # get the FPTR data structure
+    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_WHERE_EXT, x_c_type, y_c_type)
+
+    # Create result array
+    cdef shape_type_c cond_shape = cond_obj.shape
+    cdef shape_type_c x_shape = x_obj.shape
+    cdef shape_type_c y_shape = y_obj.shape
+
+    cdef shape_type_c cond_strides = utils.strides_to_vector(cond_obj.strides, cond_shape)
+    cdef shape_type_c x_strides = utils.strides_to_vector(x_obj.strides, x_shape)
+    cdef shape_type_c y_strides = utils.strides_to_vector(y_obj.strides, y_shape)
+
+    cdef shape_type_c cond_x_shape = utils.get_common_shape(cond_shape, x_shape)
+    cdef shape_type_c cond_y_shape = utils.get_common_shape(cond_shape, y_shape)
+    cdef shape_type_c result_shape = utils.get_common_shape(cond_x_shape, cond_y_shape)
+    cdef utils.dpnp_descriptor result
+
+    result_usm_type, result_sycl_queue = utils_py.get_usm_allocations([cond_obj.get_array(),
+                                                                       x_obj.get_array(),
+                                                                       y_obj.get_array()])
+
+    # get FPTR function and return type
+    cdef where_func_ptr_t func = < where_func_ptr_t > kernel_data.ptr
+    cdef DPNPFuncType return_type = kernel_data.return_type
+
+    """ Create result array with type given by FPTR data """
+    result = utils.create_output_descriptor(result_shape,
+                                            return_type,
+                                            None,
+                                            device=None,
+                                            usm_type=result_usm_type,
+                                            sycl_queue=result_sycl_queue)
+
+    cdef shape_type_c result_strides = utils.strides_to_vector(result.strides, result_shape)
+
+    result_obj = result.get_array()
+
+    cdef c_dpctl.SyclQueue q = < c_dpctl.SyclQueue > result_obj.sycl_queue
+    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
+
+    """ Call FPTR function """
+    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
+                                                    result.get_data(),
+                                                    result.size,
+                                                    result.ndim,
+                                                    result_shape.data(),
+                                                    result_strides.data(),
+                                                    cond_obj.get_data(),
+                                                    cond_obj.size,
+                                                    cond_obj.ndim,
+                                                    cond_shape.data(),
+                                                    cond_strides.data(),
+                                                    x_obj.get_data(),
+                                                    x_obj.size,
+                                                    x_obj.ndim,
+                                                    x_shape.data(),
+                                                    x_strides.data(),
+                                                    y_obj.get_data(),
+                                                    y_obj.size,
+                                                    y_obj.ndim,
+                                                    y_shape.data(),
+                                                    y_strides.data(),
+                                                    NULL)  # dep_events_ref)
+
+    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
+    c_dpctl.DPCTLEvent_Delete(event_ref)
+
+    return result
diff --git a/dpnp/dpnp_iface_searching.py b/dpnp/dpnp_iface_searching.py
index cef5d686035..1c2291ea33c 100644
--- a/dpnp/dpnp_iface_searching.py
+++ b/dpnp/dpnp_iface_searching.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -44,7 +44,10 @@
 from dpnp.dpnp_utils import *
 
 import dpnp
+from dpnp.dpnp_array import dpnp_array
+
 import numpy
+import dpctl.tensor as dpt
 
 
 __all__ = [
@@ -176,12 +179,68 @@ def searchsorted(a, v, side='left', sorter=None):
     return call_origin(numpy.where, a, v, side, sorter)
 
 
-def where(condition, x=None, y=None):
+def where(condition, x=None, y=None, /):
     """
-    Find indices where elements should be inserted to maintain order.
+    Return elements chosen from `x` or `y` depending on `condition`.
 
-    For full documentation refer to :obj:`numpy.searchsorted`.
+    When only `condition` is provided, this function is a shorthand for
+    :obj:`dpnp.nonzero(condition)`. 
+
+    For full documentation refer to :obj:`numpy.where`.
+
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array with elements from `x` where `condition` is True, and elements
+        from `y` elsewhere.
+
+    Limitations
+    -----------
+    Parameters `condition`, `x` and `y` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`.
+    Otherwise the function will be executed sequentially on CPU.
+    Data type of `condition` parameter is limited by :obj:`dpnp.bool`.
+    Input array data types of `x` and `y` are limited by supported DPNP :ref:`Data types`.
+        
+    See Also
+    --------
+    :obj:`nonzero` : The function that is called when `x` and `y`are omitted.
+
+    Examples
+    --------
+    >>> import dpnp as dp
+    >>> a = dp.arange(10)
+    >>> d
+    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+    >>> dp.where(a < 5, a, 10*a)
+    array([ 0,  1,  2,  3,  4, 50, 60, 70, 80, 90])
 
     """
 
+    missing = (x is None, y is None).count(True)
+    if missing == 1:
+        raise ValueError("Must provide both 'x' and 'y' or neither.")
+    elif missing == 2:
+        # TODO: rework through dpnp.nonzero() once ready
+        # return dpnp.nonzero(condition)
+        if isinstance(condition, dpnp_array):
+            return dpt.nonzero(condition.get_array())
+
+        if isinstance(condition, dpt.usm_ndarray):
+            return dpt.nonzero(condition)
+    elif missing == 0:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([condition, x, y])
+
+        c_desc = dpnp.get_dpnp_descriptor(condition, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                          alloc_usm_type=usm_type, alloc_queue=queue)
+        x_desc = dpnp.get_dpnp_descriptor(x, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                          alloc_usm_type=usm_type, alloc_queue=queue)
+        y_desc = dpnp.get_dpnp_descriptor(y, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                          alloc_usm_type=usm_type, alloc_queue=queue)
+        if c_desc and x_desc and y_desc:
+            if c_desc.dtype != dpnp.bool:
+                raise TypeError("condition must be a boolean array")
+            return dpnp_where(c_desc, x_desc, y_desc).get_pyobj()
+
     return call_origin(numpy.where, condition, x, y)
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index 995ddd682c7..26dd6fc59cd 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -364,7 +364,7 @@ tests/third_party/cupy/creation_tests/test_from_data.py::TestArrayPreservationOf
 tests/third_party/cupy/creation_tests/test_from_data.py::TestArrayPreservationOfShape_param_7_{copy=True, ndmin=3, xp=dpnp}::test_cupy_array
 tests/third_party/cupy/creation_tests/test_from_data.py::TestArrayPreservationOfShape_param_8_{copy=False, ndmin=0, xp=numpy}::test_cupy_array
 tests/third_party/cupy/creation_tests/test_from_data.py::TestArrayPreservationOfShape_param_9_{copy=False, ndmin=0, xp=dpnp}::test_cupy_array
-tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_array_copy_is_copied 
+tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_array_copy_is_copied
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_array_copy_list_of_cupy_with_dtype
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_array_copy_list_of_cupy_with_dtype_char
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_array_copy_list_of_numpy_with_dtype
@@ -1068,11 +1068,6 @@ tests/third_party/cupy/sorting_tests/test_search.py::TestNonzeroZeroDimension_pa
 tests/third_party/cupy/sorting_tests/test_search.py::TestNonzeroZeroDimension_param_1_{array=array(1)}::test_nonzero
 tests/third_party/cupy/sorting_tests/test_search.py::TestSearch::test_argmax_zero_size
 tests/third_party/cupy/sorting_tests/test_search.py::TestSearch::test_argmin_zero_size
-tests/third_party/cupy/sorting_tests/test_search.py::TestWhereCond_param_0_{cond_shape=(2, 3, 4)}::test_where_cond
-tests/third_party/cupy/sorting_tests/test_search.py::TestWhereCond_param_1_{cond_shape=(4,)}::test_where_cond
-tests/third_party/cupy/sorting_tests/test_search.py::TestWhereCond_param_2_{cond_shape=(2, 3, 4)}::test_where_cond
-tests/third_party/cupy/sorting_tests/test_search.py::TestWhereCond_param_3_{cond_shape=(3, 4)}::test_where_cond
-tests/third_party/cupy/sorting_tests/test_search.py::TestWhereError::test_one_argument
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_0_{external=False}::test_argpartition_axis
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_0_{external=False}::test_argpartition_invalid_axis1
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_0_{external=False}::test_argpartition_invalid_axis2
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index e3e0f889b2d..500ce35506f 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -1269,15 +1269,6 @@ tests/third_party/cupy/sorting_tests/test_search.py::TestNonzeroZeroDimension_pa
 tests/third_party/cupy/sorting_tests/test_search.py::TestNonzeroZeroDimension_param_1_{array=array(1)}::test_nonzero
 tests/third_party/cupy/sorting_tests/test_search.py::TestSearch::test_argmax_zero_size
 tests/third_party/cupy/sorting_tests/test_search.py::TestSearch::test_argmin_zero_size
-tests/third_party/cupy/sorting_tests/test_search.py::TestWhereCond_param_0_{cond_shape=(2, 3, 4)}::test_where_cond
-tests/third_party/cupy/sorting_tests/test_search.py::TestWhereCond_param_1_{cond_shape=(4,)}::test_where_cond
-tests/third_party/cupy/sorting_tests/test_search.py::TestWhereCond_param_2_{cond_shape=(2, 3, 4)}::test_where_cond
-tests/third_party/cupy/sorting_tests/test_search.py::TestWhereCond_param_3_{cond_shape=(3, 4)}::test_where_cond
-tests/third_party/cupy/sorting_tests/test_search.py::TestWhereError::test_one_argument
-tests/third_party/cupy/sorting_tests/test_search.py::TestWhereTwoArrays_param_0_{cond_shape=(2, 3, 4), x_shape=(2, 3, 4), y_shape=(2, 3, 4)}::test_where_two_arrays
-tests/third_party/cupy/sorting_tests/test_search.py::TestWhereTwoArrays_param_1_{cond_shape=(4,), x_shape=(2, 3, 4), y_shape=(2, 3, 4)}::test_where_two_arrays
-tests/third_party/cupy/sorting_tests/test_search.py::TestWhereTwoArrays_param_2_{cond_shape=(2, 3, 4), x_shape=(2, 3, 4), y_shape=(3, 4)}::test_where_two_arrays
-tests/third_party/cupy/sorting_tests/test_search.py::TestWhereTwoArrays_param_3_{cond_shape=(3, 4), x_shape=(2, 3, 4), y_shape=(4,)}::test_where_two_arrays
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_0_{external=False}::test_argpartition_axis
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_0_{external=False}::test_argpartition_invalid_axis1
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_0_{external=False}::test_argpartition_invalid_axis2
diff --git a/tests/third_party/cupy/sorting_tests/test_search.py b/tests/third_party/cupy/sorting_tests/test_search.py
index 838f559ed8c..17751aed75c 100644
--- a/tests/third_party/cupy/sorting_tests/test_search.py
+++ b/tests/third_party/cupy/sorting_tests/test_search.py
@@ -262,17 +262,20 @@ def test_argminmax_dtype(self, in_dtype, result_dtype):
     {'cond_shape': (2, 3, 4), 'x_shape': (2, 3, 4), 'y_shape': (3, 4)},
     {'cond_shape': (3, 4), 'x_shape': (2, 3, 4), 'y_shape': (4,)},
 )
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestWhereTwoArrays(unittest.TestCase):
 
     @testing.for_all_dtypes_combination(
         names=['cond_type', 'x_type', 'y_type'])
-    @testing.numpy_cupy_allclose()
+    @testing.numpy_cupy_allclose(type_check=False)
     def test_where_two_arrays(self, xp, cond_type, x_type, y_type):
         m = testing.shaped_random(self.cond_shape, xp, xp.bool_)
         # Almost all values of a matrix `shaped_random` makes are not zero.
         # To make a sparse matrix, we need multiply `m`.
         cond = testing.shaped_random(self.cond_shape, xp, cond_type) * m
+        if xp is cupy:
+            cond = cond.astype(cupy.bool)
         x = testing.shaped_random(self.x_shape, xp, x_type, seed=0)
         y = testing.shaped_random(self.y_shape, xp, y_type, seed=1)
         return xp.where(cond, x, y)

From a8d71352a73725bf9e9a1df2b34832d421d67476 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Mon, 6 Mar 2023 12:42:21 +0100
Subject: [PATCH 16/16] Reuse dpctl.tensor implementation for dpnp.nonzero()
 (#1330)

---
 dpnp/backend/include/dpnp_iface.hpp         |  1 -
 dpnp/backend/include/dpnp_iface_fptr.hpp    |  1 -
 dpnp/backend/kernels/dpnp_krnl_indexing.cpp | 17 +-----
 dpnp/dpnp_algo/dpnp_algo.pxd                |  1 -
 dpnp/dpnp_algo/dpnp_algo_indexing.pyx       | 65 +--------------------
 dpnp/dpnp_array.py                          |  5 +-
 dpnp/dpnp_iface_indexing.py                 | 26 +++++----
 dpnp/dpnp_iface_searching.py                | 11 +---
 tests/skipped_tests_gpu.tbl                 |  7 ---
 9 files changed, 23 insertions(+), 111 deletions(-)

diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp
index a124b9e07a1..348dd8e7bff 100644
--- a/dpnp/backend/include/dpnp_iface.hpp
+++ b/dpnp/backend/include/dpnp_iface.hpp
@@ -57,7 +57,6 @@ typedef ssize_t shape_elem_type;
 
 #include <dpctl_sycl_interface.h>
 
-#include "dpnp_iface_fptr.hpp"
 #include "dpnp_iface_fft.hpp"
 #include "dpnp_iface_random.hpp"
 
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index 3a393708255..197623efe45 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -234,7 +234,6 @@ enum class DPNPFuncName : size_t
     DPNP_FN_NEGATIVE,                     /**< Used in numpy.negative() impl  */
     DPNP_FN_NEGATIVE_EXT,                 /**< Used in numpy.negative() impl, requires extra parameters */
     DPNP_FN_NONZERO,                      /**< Used in numpy.nonzero() impl  */
-    DPNP_FN_NONZERO_EXT,                  /**< Used in numpy.nonzero() impl, requires extra parameters */
     DPNP_FN_NOT_EQUAL_EXT,                /**< Used in numpy.not_equal() impl, requires extra parameters */
     DPNP_FN_ONES,                         /**< Used in numpy.ones() impl */
     DPNP_FN_ONES_LIKE,                    /**< Used in numpy.ones_like() impl */
diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
index 5c3fb76e23b..756899b6cc5 100644
--- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
@@ -475,6 +475,7 @@ void dpnp_nonzero_c(const void* in_array1,
                                                             j,
                                                             dep_event_vec_ref);
     DPCTLEvent_WaitAndThrow(event_ref);
+    DPCTLEvent_Delete(event_ref);
 }
 
 template <typename _DataType>
@@ -485,16 +486,6 @@ void (*dpnp_nonzero_default_c)(const void*,
                                const size_t,
                                const size_t) = dpnp_nonzero_c<_DataType>;
 
-template <typename _DataType>
-DPCTLSyclEventRef (*dpnp_nonzero_ext_c)(DPCTLSyclQueueRef,
-                                        const void*,
-                                        void*,
-                                        const size_t,
-                                        const shape_elem_type*,
-                                        const size_t,
-                                        const size_t,
-                                        const DPCTLEventVectorRef) = dpnp_nonzero_c<_DataType>;
-
 template <typename _DataType>
 DPCTLSyclEventRef dpnp_place_c(DPCTLSyclQueueRef q_ref,
                                void* arr_in,
@@ -1021,12 +1012,6 @@ void func_map_init_indexing_func(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_NONZERO][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_nonzero_default_c<float>};
     fmap[DPNPFuncName::DPNP_FN_NONZERO][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_nonzero_default_c<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_NONZERO_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_nonzero_ext_c<bool>};
-    fmap[DPNPFuncName::DPNP_FN_NONZERO_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_nonzero_ext_c<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_NONZERO_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_nonzero_ext_c<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_NONZERO_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_nonzero_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_NONZERO_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_nonzero_ext_c<double>};
-
     fmap[DPNPFuncName::DPNP_FN_PLACE][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_place_default_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_PLACE][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_place_default_c<int64_t>};
     fmap[DPNPFuncName::DPNP_FN_PLACE][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_place_default_c<float>};
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index 9edf6255ef5..da1efddd3cc 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -211,7 +211,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_NEGATIVE
         DPNP_FN_NEGATIVE_EXT
         DPNP_FN_NONZERO
-        DPNP_FN_NONZERO_EXT
         DPNP_FN_NOT_EQUAL_EXT
         DPNP_FN_ONES
         DPNP_FN_ONES_LIKE
diff --git a/dpnp/dpnp_algo/dpnp_algo_indexing.pyx b/dpnp/dpnp_algo/dpnp_algo_indexing.pyx
index 907d2044d87..4e07c03f24a 100644
--- a/dpnp/dpnp_algo/dpnp_algo_indexing.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_indexing.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -40,7 +40,6 @@ __all__ += [
     "dpnp_diagonal",
     "dpnp_fill_diagonal",
     "dpnp_indices",
-    "dpnp_nonzero",
     "dpnp_place",
     "dpnp_put",
     "dpnp_put_along_axis",
@@ -104,14 +103,6 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_6in_func_ptr_t)(c_dpctl.DPCT
                                                                     const size_t,
                                                                     const size_t,
                                                                     const c_dpctl.DPCTLEventVectorRef)
-ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_nonzero_t)(c_dpctl.DPCTLSyclQueueRef,
-                                                         const void * ,
-                                                         void * ,
-                                                         const size_t,
-                                                         const shape_elem_type * ,
-                                                         const size_t ,
-                                                         const size_t,
-                                                         const c_dpctl.DPCTLEventVectorRef)
 
 
 cpdef utils.dpnp_descriptor dpnp_choose(utils.dpnp_descriptor x1, list choices1):
@@ -316,60 +307,6 @@ cpdef object dpnp_indices(dimensions):
     return dpnp_result
 
 
-cpdef tuple dpnp_nonzero(utils.dpnp_descriptor in_array1):
-    cdef shape_type_c shape_arr = in_array1.shape
-    res_count = in_array1.ndim
-
-    # have to go through array one extra time to count size of result arrays
-    res_size_obj = dpnp_count_nonzero(in_array1)
-    cdef size_t res_size = dpnp.convert_single_elem_array_to_scalar(res_size_obj.get_pyobj())
-
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(in_array1.dtype)
-
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_NONZERO_EXT, param1_type, param1_type)
-
-    cdef fptr_dpnp_nonzero_t func = <fptr_dpnp_nonzero_t > kernel_data.ptr
-
-    cdef c_dpctl.SyclQueue q
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref
-    cdef c_dpctl.DPCTLSyclEventRef event_ref
-
-    array1_obj = in_array1.get_array()
-
-    res_list = []
-    cdef utils.dpnp_descriptor res_arr
-    cdef shape_type_c result_shape
-    for j in range(res_count):
-        result_shape = utils._object_to_tuple(res_size)
-        res_arr = utils_py.create_output_descriptor_py(result_shape,
-                                                       dpnp.int64,
-                                                       None,
-                                                       device=array1_obj.sycl_device,
-                                                       usm_type=array1_obj.usm_type,
-                                                       sycl_queue=array1_obj.sycl_queue)
-
-        q = <c_dpctl.SyclQueue> res_arr.get_array().sycl_queue
-        q_ref = q.get_queue_ref()
-
-        event_ref = func(q_ref,
-                         in_array1.get_data(),
-                         res_arr.get_data(),
-                         res_arr.size,
-                         shape_arr.data(),
-                         in_array1.ndim,
-                         j,
-                         NULL)  # dep_events_ref
-
-        with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-        c_dpctl.DPCTLEvent_Delete(event_ref)
-
-        res_list.append(res_arr.get_pyobj())
-
-    result = utils._object_to_tuple(res_list)
-
-    return result
-
-
 cpdef dpnp_place(dpnp_descriptor arr, object mask, dpnp_descriptor vals):
     result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(arr, vals)
 
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 6c743f4e1c9..d1ad1252d4e 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -760,7 +760,10 @@ def ndim(self):
         return self._array_obj.ndim
 
  # 'newbyteorder',
- # 'nonzero',
+
+    def nonzero(self):
+        return dpnp.nonzero(self)
+
  # 'partition',
 
     def prod(self, axis=None, dtype=None, out=None, keepdims=False, initial=None, where=True):
diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py
index 6ff554d89d3..ad2eb9794f6 100644
--- a/dpnp/dpnp_iface_indexing.py
+++ b/dpnp/dpnp_iface_indexing.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -39,14 +39,14 @@
 
 """
 
-
-import collections
-
 from dpnp.dpnp_algo import *
 from dpnp.dpnp_utils import *
 
 import dpnp
+from dpnp.dpnp_array import dpnp_array
+
 import numpy
+import dpctl.tensor as dpt
 
 
 __all__ = [
@@ -286,15 +286,21 @@ def indices(dimensions, dtype=int, sparse=False):
     return call_origin(numpy.indices, dimensions, dtype, sparse)
 
 
-def nonzero(x1):
+def nonzero(x, /):
     """
     Return the indices of the elements that are non-zero.
 
     For full documentation refer to :obj:`numpy.nonzero`.
 
+    Returns
+    -------
+    y : tuple[dpnp.ndarray]
+        Indices of elements that are non-zero.
+    
     Limitations
     -----------
-    Input array is supported as :obj:`dpnp.ndarray`.
+    Parameters `x` is supported as either :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`.
     Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
@@ -329,11 +335,11 @@ def nonzero(x1):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    if x1_desc:
-        return dpnp_nonzero(x1_desc)
+    if isinstance(x, dpnp_array) or isinstance(x, dpt.usm_ndarray):
+        dpt_array = x.get_array() if isinstance(x, dpnp_array) else x
+        return tuple(dpnp_array._create_from_usm_ndarray(y) for y in dpt.nonzero(dpt_array))
 
-    return call_origin(numpy.nonzero, x1)
+    return call_origin(numpy.nonzero, x)
 
 
 def place(x1, mask, vals):
diff --git a/dpnp/dpnp_iface_searching.py b/dpnp/dpnp_iface_searching.py
index 1c2291ea33c..a0b17f4845e 100644
--- a/dpnp/dpnp_iface_searching.py
+++ b/dpnp/dpnp_iface_searching.py
@@ -44,10 +44,7 @@
 from dpnp.dpnp_utils import *
 
 import dpnp
-from dpnp.dpnp_array import dpnp_array
-
 import numpy
-import dpctl.tensor as dpt
 
 
 __all__ = [
@@ -221,13 +218,7 @@ def where(condition, x=None, y=None, /):
     if missing == 1:
         raise ValueError("Must provide both 'x' and 'y' or neither.")
     elif missing == 2:
-        # TODO: rework through dpnp.nonzero() once ready
-        # return dpnp.nonzero(condition)
-        if isinstance(condition, dpnp_array):
-            return dpt.nonzero(condition.get_array())
-
-        if isinstance(condition, dpt.usm_ndarray):
-            return dpt.nonzero(condition)
+        return dpnp.nonzero(condition)
     elif missing == 0:
         # get USM type and queue to copy scalar from the host memory into a USM allocation
         usm_type, queue = get_usm_allocations([condition, x, y])
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 500ce35506f..7e9b9e5505d 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -52,13 +52,6 @@ tests/test_sycl_queue.py::test_modf[level_zero:gpu:0]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-trapz-data19]
 tests/test_sycl_queue.py::test_1in_1out[opencl:cpu:0-trapz-data19]
 
-tests/test_indexing.py::test_nonzero[[[1, 0], [1, 0]]]
-tests/test_indexing.py::test_nonzero[[[1, 2], [3, 4]]]
-tests/test_indexing.py::test_nonzero[[[0, 1, 2], [3, 0, 5], [6, 7, 0]]]
-tests/test_indexing.py::test_nonzero[[[0, 1, 0, 3, 0], [5, 0, 7, 0, 9]]]
-tests/test_indexing.py::test_nonzero[[[[1, 2], [0, 4]], [[0, 2], [0, 1]], [[0, 0], [3, 1]]]]
-tests/test_indexing.py::test_nonzero[[[[[1, 2, 3], [3, 4, 5]], [[1, 2, 3], [2, 1, 0]]], [[[1, 3, 5], [3, 1, 0]], [[0, 1, 2], [1, 3, 4]]]]]
-
 tests/third_party/cupy/indexing_tests/test_indexing.py::TestIndexing::test_take_no_axis
 tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_3_{n_vals=1, shape=(7,)}::test_place
 tests/third_party/cupy/indexing_tests/test_insert.py::TestPlace_param_4_{n_vals=1, shape=(2, 3)}::test_place