From 938a0a6b7cfed87561372d5653461bd11e9a6e50 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Mon, 16 Jan 2023 11:40:48 +0100
Subject: [PATCH 01/32] Complete support of python 3.10 in external CI (#1269)

---
 .github/workflows/conda-package.yml | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index 07bd44c8fd1..da29bf31dd0 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -29,11 +29,8 @@ jobs:
 
     strategy:
       matrix:
-        python: ['3.8', '3.9']
+        python: ['3.8', '3.9', '3.10']
         os: [ubuntu-20.04, windows-latest]
-        include:
-          - python: '3.10'
-            os: ubuntu-20.04
 
     runs-on: ${{ matrix.os }}
 
@@ -110,7 +107,7 @@ jobs:
 
     strategy:
       matrix:
-        python: ['3.8', '3.9']
+        python: ['3.8', '3.9', '3.10']
         os: [ubuntu-20.04, ubuntu-latest]
 
         experimental: [false]
@@ -215,7 +212,7 @@ jobs:
 
     strategy:
       matrix:
-        python: ['3.8', '3.9']
+        python: ['3.8', '3.9', '3.10']
         experimental: [false]
 
     continue-on-error: ${{ matrix.experimental }}
@@ -384,7 +381,7 @@ jobs:
 
     strategy:
       matrix:
-        python: ['3.8', '3.9']
+        python: ['3.8', '3.9', '3.10']
         os: [ubuntu-20.04, windows-latest]
 
     runs-on: ${{ matrix.os }}

From 12e7bd5576b4468fab224125283cf901f8c36b96 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Mon, 16 Jan 2023 21:20:28 +0100
Subject: [PATCH 02/32] Set minimum required versions & fix debug building
 (#1270)

* Set minimum required versions & fix debug building

* Fix typo
---
 0.build.sh                                  |  8 ++++++++
 dpnp/backend/CMakeLists.txt                 | 20 +++++++++++++-------
 dpnp/backend/include/dpnp_iface.hpp         |  4 ++--
 dpnp/backend/kernels/dpnp_krnl_fft.cpp      |  8 +++++++-
 dpnp/backend/kernels/dpnp_krnl_indexing.cpp |  3 ++-
 dpnp/backend/kernels/dpnp_krnl_random.cpp   | 14 ++++----------
 dpnp/backend/src/dpnp_utils.hpp             | 10 +++++-----
 dpnp/backend/src/dpnpc_memory_adapter.hpp   |  6 +-----
 dpnp/backend/src/queue_sycl.cpp             |  7 +------
 dpnp/backend/src/queue_sycl.hpp             |  4 ++--
 dpnp/dpnp_algo/dpnp_algo.pyx                |  4 ++--
 11 files changed, 47 insertions(+), 41 deletions(-)

diff --git a/0.build.sh b/0.build.sh
index 380214e6802..59df4dc8571 100755
--- a/0.build.sh
+++ b/0.build.sh
@@ -4,6 +4,14 @@ THEDIR=$(dirname $(readlink -e ${BASH_SOURCE[0]}))
 # . ${THEDIR}/0.env.sh
 cd ${THEDIR}
 
+# Assign $TMP env variable to a directory where the script locates.
+# The env variable is used by compiler as a path to temporary folder,
+# where it can store a temporary files generated during compilation and linkage phases.
+# By default the compiler uses /tmp folder, but it is limited by the size and
+# there might be not enough space to temporary keep all generated data.
+export TMP=${THEDIR}
+
+
 export DPNP_DEBUG=1
 
 python setup.py clean
diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt
index 1714124cf85..f66aa4be1ae 100644
--- a/dpnp/backend/CMakeLists.txt
+++ b/dpnp/backend/CMakeLists.txt
@@ -1,5 +1,5 @@
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -111,7 +111,7 @@ elseif(WIN32)
   # set(CMAKE_RANLIB "llvm-ranlib")
   # set(CMAKE_CXX_FLAGS "/EHsc")
 
-  string(APPEND COMMON_COMPILER_FLAGS
+  string(APPEND COMMON_COMPILE_FLAGS
     "/EHsc "
 #    "/Ox "
 #    "/W3 "
@@ -133,23 +133,29 @@ string(CONCAT DPNP_WARNING_FLAGS
   "-Wextra "
   "-Wshadow "
   "-Wall "
-  "-Wstring-prototypes "
+  "-Wstrict-prototypes "
   "-Wformat "
   "-Wformat-security "
 )
-string(APPEND COMMON_COMPILER_FLAGS
+string(APPEND COMMON_COMPILE_FLAGS
   "${DPNP_WARNING_FLAGS}"
 )
 
 # debug/release compile definitions
 if(DPNP_DEBUG_ENABLE)
   set(CMAKE_BUILD_TYPE "Debug")
-  string(APPEND COMMON_COMPILER_FLAGS
+  string(APPEND COMMON_COMPILE_FLAGS
     "-O0 "
+    "-ggdb3 "
+  )
+  string(APPEND COMMON_LINK_FLAGS
+    "-O0 "
+    "-ggdb3 "
+    "-fsycl-link-huge-device-code "
   )
 else()
   set(CMAKE_BUILD_TYPE "Release")
-  string(APPEND COMMON_COMPILER_FLAGS
+  string(APPEND COMMON_COMPILE_FLAGS
     "-O3 "
   )
 endif()
@@ -162,7 +168,7 @@ string(CONCAT DPNP_DEFS
   "-D_FORTIFY_SOURCE=2 "
 )
 if(NOT WIN32)
-  string(APPEND COMMON_COMPILER_FLAGS
+  string(APPEND COMMON_COMPILE_FLAGS
     "-fno-delete-null-pointer-checks "
     "-fstack-protector-strong "
     "-fno-strict-overflow "
diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp
index 42c05f0fd61..756c7082598 100644
--- a/dpnp/backend/include/dpnp_iface.hpp
+++ b/dpnp/backend/include/dpnp_iface.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -95,7 +95,7 @@ INP_DLLEXPORT void dpnp_queue_initialize_c(QueueOptions selector = QueueOptions:
  * @ingroup BACKEND_API
  * @brief SYCL queue device status.
  *
- * Return 1 if current @ref queue is related to cpu or host device. return 0 otherwise.
+ * Return 1 if current @ref queue is related to cpu device. return 0 otherwise.
  */
 INP_DLLEXPORT size_t dpnp_queue_is_cpu_c();
 
diff --git a/dpnp/backend/kernels/dpnp_krnl_fft.cpp b/dpnp/backend/kernels/dpnp_krnl_fft.cpp
index 3d39f2f373c..b3f9716d73f 100644
--- a/dpnp/backend/kernels/dpnp_krnl_fft.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_fft.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -182,7 +182,10 @@ static void dpnp_fft_fft_mathlib_cmplx_to_cmplx_c(DPCTLSyclQueueRef q_ref,
                                                   size_t inverse,
                                                   const size_t norm)
 {
+    // avoid warning unused variable
     (void)result_shape;
+    (void)input_size;
+    (void)result_size;
 
     if (!shape_size) {
         return;
@@ -253,6 +256,9 @@ static DPCTLSyclEventRef dpnp_fft_fft_mathlib_real_to_cmplx_c(DPCTLSyclQueueRef
                                                               const size_t norm,
                                                               const size_t real)
 {
+    // avoid warning unused variable
+    (void)input_size;
+
     DPCTLSyclEventRef event_ref = nullptr;
     if (!shape_size) {
         return event_ref;
diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
index 5cde013b69f..0b80ac678d3 100644
--- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -896,6 +896,7 @@ DPCTLSyclEventRef dpnp_take_c(DPCTLSyclQueueRef q_ref,
                               const DPCTLEventVectorRef dep_event_vec_ref)
 {
     // avoid warning unused variable
+    (void)array1_size;
     (void)dep_event_vec_ref;
 
     DPCTLSyclEventRef event_ref = nullptr;
diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp
index 4411e207003..568db448d96 100644
--- a/dpnp/backend/kernels/dpnp_krnl_random.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -37,6 +37,9 @@
 #include "queue_sycl.hpp"
 #include "dpnp_random_state.hpp"
 
+static_assert(INTEL_MKL_VERSION >= __INTEL_MKL_2023_VERSION_REQUIRED,
+              "MKL does not meet minimum version requirement");
+
 namespace mkl_blas = oneapi::mkl::blas;
 namespace mkl_rng = oneapi::mkl::rng;
 namespace mkl_vm = oneapi::mkl::vm;
@@ -990,11 +993,7 @@ DPCTLSyclEventRef dpnp_rng_multinomial_c(DPCTLSyclQueueRef q_ref,
             DPNPC_ptr_adapter<_DataType> result_ptr(q_ref, result, size, true, true);
             _DataType* result1 = result_ptr.get_ptr();
 
-#if (INTEL_MKL_VERSION < __INTEL_MKL_2023_SWITCHOVER)
-            std::vector<double> p(p_data, p_data + p_size);
-#else
             auto p = sycl::span<double>{p_data, p_size};
-#endif
             mkl_rng::multinomial<_DataType> distribution(ntrial, p);
 
             // perform generation
@@ -1082,13 +1081,8 @@ DPCTLSyclEventRef dpnp_rng_multivariate_normal_c(DPCTLSyclQueueRef q_ref,
 
     _DataType* result1 = static_cast<_DataType *>(result);
 
-#if (INTEL_MKL_VERSION < __INTEL_MKL_2023_SWITCHOVER)
-    std::vector<double> mean(mean_data, mean_data + mean_size);
-    std::vector<double> cov(cov_data, cov_data + cov_size);
-#else
     auto mean = sycl::span<double>{mean_data, mean_size};
     auto cov = sycl::span<double>{cov_data, cov_size};
-#endif
 
     // `result` is a array for random numbers
     // `size` is a `result`'s len.
diff --git a/dpnp/backend/src/dpnp_utils.hpp b/dpnp/backend/src/dpnp_utils.hpp
index 33f4d750067..985d5a61494 100644
--- a/dpnp/backend/src/dpnp_utils.hpp
+++ b/dpnp/backend/src/dpnp_utils.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -45,15 +45,15 @@
  * Intel(R) oneAPI DPC++ 2022.2.1 compiler has version 20221020L on Linux and
  * 20221101L on Windows.
  */
-#ifndef __SYCL_COMPILER_2023_SWITCHOVER
-#define __SYCL_COMPILER_2023_SWITCHOVER 20221102L
+#ifndef __SYCL_COMPILER_VERSION_REQUIRED
+#define __SYCL_COMPILER_VERSION_REQUIRED 20221102L
 #endif
 
 /**
  * Version of Intel MKL at which transition to OneMKL release 2023.0.0 occurs.
  */
-#ifndef __INTEL_MKL_2023_SWITCHOVER
-#define __INTEL_MKL_2023_SWITCHOVER 20230000
+#ifndef __INTEL_MKL_2023_VERSION_REQUIRED
+#define __INTEL_MKL_2023_VERSION_REQUIRED 20230000
 #endif
 
 /**
diff --git a/dpnp/backend/src/dpnpc_memory_adapter.hpp b/dpnp/backend/src/dpnpc_memory_adapter.hpp
index dab09622a69..6c81f526778 100644
--- a/dpnp/backend/src/dpnpc_memory_adapter.hpp
+++ b/dpnp/backend/src/dpnpc_memory_adapter.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -85,10 +85,6 @@ class DPNPC_ptr_adapter final
             std::cerr << "\n\t size_in_bytes=" << size_in_bytes;
             std::cerr << "\n\t pointer type=" << (long)src_ptr_type;
             std::cerr << "\n\t queue inorder=" << queue.is_in_order();
-#if (__SYCL_COMPILER_VERSION < __SYCL_COMPILER_2023_SWITCHOVER)
-            std::cerr << "\n\t queue is_host=" << queue.is_host();
-            std::cerr << "\n\t queue device is_host=" << queue.get_device().is_host();
-#endif
             std::cerr << "\n\t queue device is_cpu=" << queue.get_device().is_cpu();
             std::cerr << "\n\t queue device is_gpu=" << queue.get_device().is_gpu();
             std::cerr << "\n\t queue device is_accelerator=" << queue.get_device().is_accelerator();
diff --git a/dpnp/backend/src/queue_sycl.cpp b/dpnp/backend/src/queue_sycl.cpp
index 0810ed0aaba..55f78230d64 100644
--- a/dpnp/backend/src/queue_sycl.cpp
+++ b/dpnp/backend/src/queue_sycl.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -215,11 +215,6 @@ bool backend_sycl::backend_sycl_is_cpu()
     if (qptr.get_device().is_cpu()) {
         return true;
     }
-#if (__SYCL_COMPILER_VERSION < __SYCL_COMPILER_2023_SWITCHOVER)
-    else if (qptr.is_host() || qptr.get_device().is_host()) {
-        return true;
-    }
-#endif
 
     return false;
 }
diff --git a/dpnp/backend/src/queue_sycl.hpp b/dpnp/backend/src/queue_sycl.hpp
index af03e1b6f12..63dc01c1dab 100644
--- a/dpnp/backend/src/queue_sycl.hpp
+++ b/dpnp/backend/src/queue_sycl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -113,7 +113,7 @@ class backend_sycl
     static void backend_sycl_queue_init(QueueOptions selector = QueueOptions::CPU_SELECTOR);
 
     /**
-     * Return True if current @ref queue is related to cpu or host device
+     * Return True if current @ref queue is related to cpu device
      */
     static bool backend_sycl_is_cpu();
 
diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index 41f0c0c0102..a41be19b606 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -221,7 +221,7 @@ cpdef dpnp_queue_initialize():
 
 
 cpdef dpnp_queue_is_cpu():
-    """Return 1 if current queue is CPU or HOST. Return 0 otherwise.
+    """Return 1 if current queue is CPU. Return 0 otherwise.
 
     """
     return dpnp_queue_is_cpu_c()

From 9209351ae4960be3dd381c278b2eca98af09e7de Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Mon, 23 Jan 2023 18:04:28 +0100
Subject: [PATCH 03/32] Add support of NumPy 1.24 (#1276)

* Set minimum required versions & fix debug building

* Add support of numpy 1.24
---
 conda-recipe/meta.yaml                        |  2 +-
 dpnp/backend/include/dpnp_iface_fptr.hpp      |  4 +--
 dpnp/dparray.pyx                              | 10 +++---
 dpnp/dpnp_algo/dpnp_algo.pyx                  |  2 +-
 dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx    |  4 +--
 dpnp/dpnp_array.py                            |  6 ++--
 dpnp/dpnp_iface_arraycreation.py              |  6 ++--
 dpnp/dpnp_iface_mathematical.py               |  6 ++--
 dpnp/dpnp_iface_statistics.py                 |  6 ++--
 dpnp/dpnp_iface_types.py                      |  8 ++---
 dpnp/dpnp_utils/dpnp_algo_utils.pyx           |  4 +--
 examples/example4.py                          |  8 ++---
 tests/skipped_tests.tbl                       | 14 +-------
 tests/skipped_tests_gpu.tbl                   | 14 +-------
 tests/test_arraycreation.py                   | 24 ++++++-------
 tests/test_dparray.py                         | 14 ++++----
 tests/test_logic.py                           |  8 ++---
 tests/test_random_state.py                    | 36 +++++++++++--------
 .../cupy/creation_tests/test_ranges.py        |  2 +-
 .../cupy/indexing_tests/test_generate.py      |  2 +-
 .../cupy/indexing_tests/test_insert.py        |  2 +-
 .../cupy/math_tests/test_arithmetic.py        |  5 +--
 22 files changed, 85 insertions(+), 102 deletions(-)

diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index 6004b945bb6..3e411e354a9 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -8,7 +8,7 @@ requirements:
     host:
       - python
       - setuptools
-      - numpy >=1.19,<1.23a0
+      - numpy >=1.19,<1.25a0
       - cython
       - cmake >=3.19
       - dpctl >=0.13
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index 8e209d38317..126fdd24f3b 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -391,7 +391,7 @@ enum class DPNPFuncType : size_t
     DPNP_FT_DOUBLE,   /**< analog of numpy.float32 or double */
     DPNP_FT_CMPLX64,  /**< analog of numpy.complex64 or std::complex<float> */
     DPNP_FT_CMPLX128, /**< analog of numpy.complex128 or std::complex<double> */
-    DPNP_FT_BOOL      /**< analog of numpy.bool or numpy.bool_ or bool */
+    DPNP_FT_BOOL      /**< analog of numpy.bool_ or bool */
 };
 
 /**
diff --git a/dpnp/dparray.pyx b/dpnp/dparray.pyx
index 859bf49d59a..dffbf6f65d1 100644
--- a/dpnp/dparray.pyx
+++ b/dpnp/dparray.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -462,7 +462,7 @@ cdef class dparray:
             return ( < long * > self._dparray_data)[lin_idx]
         elif self.dtype == numpy.int32:
             return ( < int * > self._dparray_data)[lin_idx]
-        elif self.dtype == numpy.bool:
+        elif self.dtype == numpy.bool_:
             return ( < cpp_bool * > self._dparray_data)[lin_idx]
         elif self.dtype == numpy.complex128:
             return ( < double complex * > self._dparray_data)[lin_idx]
@@ -489,7 +489,7 @@ cdef class dparray:
             ( < long * > self._dparray_data)[lin_idx] = <long > value
         elif self.dtype == numpy.int32:
             ( < int * > self._dparray_data)[lin_idx] = <int > value
-        elif self.dtype == numpy.bool:
+        elif self.dtype == numpy.bool_:
             ( < cpp_bool * > self._dparray_data)[lin_idx] = < cpp_bool > value
         elif self.dtype == numpy.complex64:
             ( < float complex * > self._dparray_data)[lin_idx] = <float complex > value
@@ -876,7 +876,7 @@ cdef class dparray:
 
         """
 
-        if not numpy.issubsctype(self.dtype, numpy.complex):
+        if not numpy.issubsctype(self.dtype, numpy.complex_):
             return self
         else:
             return conjugate(self)
@@ -889,7 +889,7 @@ cdef class dparray:
 
         """
 
-        if not numpy.issubsctype(self.dtype, numpy.complex):
+        if not numpy.issubsctype(self.dtype, numpy.complex_):
             return self
         else:
             return conjugate(self)
diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index a41be19b606..aaa7334e18a 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -276,7 +276,7 @@ cdef dpnp_DPNPFuncType_to_dtype(size_t type):
     elif type == <size_t > DPNP_FT_CMPLX128:
         return numpy.complex128
     elif type == <size_t > DPNP_FT_BOOL:
-        return numpy.bool
+        return numpy.bool_
     else:
         utils.checker_throw_type_error("dpnp_DPNPFuncType_to_dtype", type)
 
diff --git a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
index c1c24a27747..1ec69c55311 100644
--- a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -434,7 +434,7 @@ cpdef utils.dpnp_descriptor dpnp_trace(utils.dpnp_descriptor arr, offset=0, axis
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_tri(N, M=None, k=0, dtype=numpy.float):
+cpdef utils.dpnp_descriptor dpnp_tri(N, M=None, k=0, dtype=dpnp.float):
     if M is None:
         M = N
 
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 82c271fa7d9..57f057ae760 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -493,7 +493,7 @@ def conj(self):
 
         """
 
-        if not numpy.issubsctype(self.dtype, numpy.complex):
+        if not numpy.issubsctype(self.dtype, numpy.complex_):
             return self
         else:
             return dpnp.conjugate(self)
@@ -506,7 +506,7 @@ def conjugate(self):
 
         """
 
-        if not numpy.issubsctype(self.dtype, numpy.complex):
+        if not numpy.issubsctype(self.dtype, numpy.complex_):
             return self
         else:
             return dpnp.conjugate(self)
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index 5fb4d8c7a4d..03d8b818520 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -1280,7 +1280,7 @@ def trace(x1, offset=0, axis1=0, axis2=1, dtype=None, out=None):
     return call_origin(numpy.trace, x1, offset, axis1, axis2, dtype, out)
 
 
-def tri(N, M=None, k=0, dtype=numpy.float, **kwargs):
+def tri(N, M=None, k=0, dtype=dpnp.float, **kwargs):
     """
     An array with ones at and below the given diagonal and zeros elsewhere.
 
@@ -1315,7 +1315,7 @@ def tri(N, M=None, k=0, dtype=numpy.float, **kwargs):
         elif not isinstance(k, int):
             pass
         else:
-            if dtype is numpy.float:
+            if dtype is dpnp.float:
                 sycl_queue = dpnp.get_normalized_queue_device(sycl_queue=None, device=None)
                 dtype = map_dtype_to_device(dpnp.float64, sycl_queue.sycl_device)
             return dpnp_tri(N, M, k, dtype).get_pyobj()
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 26b81a67dd9..ce9f340e8e4 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -1545,11 +1545,11 @@ def subtract(x1, x2, dtype=None, out=None, where=True, **kwargs):
             pass
         elif x1_desc and x1_desc.ndim == 0:
             pass
-        elif x1_desc and x1_desc.dtype == numpy.bool:
+        elif x1_desc and x1_desc.dtype == dpnp.bool:
             pass
         elif x2_desc and x2_desc.ndim == 0:
             pass
-        elif x2_desc and x2_desc.dtype == numpy.bool:
+        elif x2_desc and x2_desc.dtype == dpnp.bool:
             pass
         elif dtype is not None:
             pass
diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py
index 27eaf4a115f..ab92f8cc625 100644
--- a/dpnp/dpnp_iface_statistics.py
+++ b/dpnp/dpnp_iface_statistics.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -299,7 +299,7 @@ def cov(x1, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=
     return call_origin(numpy.cov, x1, y, rowvar, bias, ddof, fweights, aweights)
 
 
-def histogram(a, bins=10, range=None, normed=None, weights=None, density=None):
+def histogram(a, bins=10, range=None, density=None, weights=None):
     """
     Compute the histogram of a dataset.
     For full documentation refer to :obj:`numpy.histogram`.
@@ -323,7 +323,7 @@ def histogram(a, bins=10, range=None, normed=None, weights=None, density=None):
     1.0
     """
 
-    return call_origin(numpy.histogram, a=a, bins=bins, range=range, normed=normed, weights=weights, density=density)
+    return call_origin(numpy.histogram, a=a, bins=bins, range=range, density=density, weights=weights)
 
 
 def max(x1, axis=None, out=None, keepdims=False, initial=None, where=True):
diff --git a/dpnp/dpnp_iface_types.py b/dpnp/dpnp_iface_types.py
index dfcf599bf3b..787dcaa473b 100644
--- a/dpnp/dpnp_iface_types.py
+++ b/dpnp/dpnp_iface_types.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -59,7 +59,7 @@
     "void"
 ]
 
-bool = numpy.bool
+bool = numpy.bool_
 bool_ = numpy.bool_
 complex128 = numpy.complex128
 complex64 = numpy.complex64
@@ -67,11 +67,11 @@
 float16 = numpy.float16
 float32 = numpy.float32
 float64 = numpy.float64
-float = numpy.float
+float = numpy.float_
 int32 = numpy.int32
 int64 = numpy.int64
 integer = numpy.integer
-int = numpy.int
+int = numpy.int_
 longcomplex = numpy.longcomplex
 
 
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
index c09bef8ec48..2e04dd96bd6 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -399,7 +399,7 @@ cdef tuple get_shape_dtype(object input_obj):
 
             # shape and dtype does not match with siblings.
             if ((return_shape != elem_shape) or (return_dtype != elem_dtype)):
-                return (elem_shape, numpy.dtype(numpy.object))
+                return (elem_shape, numpy.dtype(numpy.object_))
 
         list_shape.push_back(len(input_obj))
         list_shape.insert(list_shape.end(), return_shape.begin(), return_shape.end())
diff --git a/examples/example4.py b/examples/example4.py
index 0790f84d10a..6705149d52b 100755
--- a/examples/example4.py
+++ b/examples/example4.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -40,7 +40,7 @@
 """
 for function in [numpy.sqrt, numpy.fabs, numpy.reciprocal, numpy.square, numpy.cbrt, numpy.degrees, numpy.radians]:
     print()
-    for test_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool]:
+    for test_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_]:
         data = numpy.array([1, 2, 3, 4], dtype=test_type)
         result = function(data)
         print(f"input:{data.dtype.name:10}: outout:{result.dtype.name:10}: name:{function.__name__}")
@@ -50,8 +50,8 @@
 """
 for function in [numpy.equal, numpy.arctan2]:
     print()
-    for input1_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool]:
-        for input2_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool]:
+    for input1_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_]:
+        for input2_type in [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_]:
             data1 = numpy.array([1, 2, 3, 4], dtype=input1_type)
             data2 = numpy.array([11, 21, 31, 41], dtype=input2_type)
             result = function(data1, data2)
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index b8b02e95bbf..df4a1423650 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -35,54 +35,42 @@ tests/third_party/intel/test_zero_copy_test1.py::test_dpnp_interaction_with_dpct
 
 tests/test_arraymanipulation.py::TestHstack::test_generator
 tests/test_arraymanipulation.py::TestVstack::test_generator
+
 tests/test_dparray.py::test_astype[[]-float64-float64]
 tests/test_dparray.py::test_astype[[]-float64-float32]
 tests/test_dparray.py::test_astype[[]-float64-int64]
 tests/test_dparray.py::test_astype[[]-float64-int32]
 tests/test_dparray.py::test_astype[[]-float64-bool]
-tests/test_dparray.py::test_astype[[]-float64-bool_]
 tests/test_dparray.py::test_astype[[]-float64-complex]
 tests/test_dparray.py::test_astype[[]-float32-float64]
 tests/test_dparray.py::test_astype[[]-float32-float32]
 tests/test_dparray.py::test_astype[[]-float32-int64]
 tests/test_dparray.py::test_astype[[]-float32-int32]
 tests/test_dparray.py::test_astype[[]-float32-bool]
-tests/test_dparray.py::test_astype[[]-float32-bool_]
 tests/test_dparray.py::test_astype[[]-float32-complex]
 tests/test_dparray.py::test_astype[[]-int64-float64]
 tests/test_dparray.py::test_astype[[]-int64-float32]
 tests/test_dparray.py::test_astype[[]-int64-int64]
 tests/test_dparray.py::test_astype[[]-int64-int32]
 tests/test_dparray.py::test_astype[[]-int64-bool]
-tests/test_dparray.py::test_astype[[]-int64-bool_]
 tests/test_dparray.py::test_astype[[]-int64-complex]
 tests/test_dparray.py::test_astype[[]-int32-float64]
 tests/test_dparray.py::test_astype[[]-int32-float32]
 tests/test_dparray.py::test_astype[[]-int32-int64]
 tests/test_dparray.py::test_astype[[]-int32-int32]
 tests/test_dparray.py::test_astype[[]-int32-bool]
-tests/test_dparray.py::test_astype[[]-int32-bool_]
 tests/test_dparray.py::test_astype[[]-int32-complex]
 tests/test_dparray.py::test_astype[[]-bool-float64]
 tests/test_dparray.py::test_astype[[]-bool-float32]
 tests/test_dparray.py::test_astype[[]-bool-int64]
 tests/test_dparray.py::test_astype[[]-bool-int32]
 tests/test_dparray.py::test_astype[[]-bool-bool]
-tests/test_dparray.py::test_astype[[]-bool-bool_]
 tests/test_dparray.py::test_astype[[]-bool-complex]
-tests/test_dparray.py::test_astype[[]-bool_-float64]
-tests/test_dparray.py::test_astype[[]-bool_-float32]
-tests/test_dparray.py::test_astype[[]-bool_-int64]
-tests/test_dparray.py::test_astype[[]-bool_-int32]
-tests/test_dparray.py::test_astype[[]-bool_-bool]
-tests/test_dparray.py::test_astype[[]-bool_-bool_]
-tests/test_dparray.py::test_astype[[]-bool_-complex]
 tests/test_dparray.py::test_astype[[]-complex-float64]
 tests/test_dparray.py::test_astype[[]-complex-float32]
 tests/test_dparray.py::test_astype[[]-complex-int64]
 tests/test_dparray.py::test_astype[[]-complex-int32]
 tests/test_dparray.py::test_astype[[]-complex-bool]
-tests/test_dparray.py::test_astype[[]-complex-bool_]
 tests/test_dparray.py::test_astype[[]-complex-complex]
 
 tests/test_linalg.py::test_cond[None-[[1, 0, -1], [0, 1, 0], [1, 0, 1]]]
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 01a2bb21dc9..5426e386bbc 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -301,54 +301,42 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestPartition_param_3_{extern
 tests/third_party/intel/test_zero_copy_test1.py::test_dpnp_interaction_with_dpctl_memory
 tests/test_arraymanipulation.py::TestHstack::test_generator
 tests/test_arraymanipulation.py::TestVstack::test_generator
+
 tests/test_dparray.py::test_astype[[]-float64-float64]
 tests/test_dparray.py::test_astype[[]-float64-float32]
 tests/test_dparray.py::test_astype[[]-float64-int64]
 tests/test_dparray.py::test_astype[[]-float64-int32]
 tests/test_dparray.py::test_astype[[]-float64-bool]
-tests/test_dparray.py::test_astype[[]-float64-bool_]
 tests/test_dparray.py::test_astype[[]-float64-complex]
 tests/test_dparray.py::test_astype[[]-float32-float64]
 tests/test_dparray.py::test_astype[[]-float32-float32]
 tests/test_dparray.py::test_astype[[]-float32-int64]
 tests/test_dparray.py::test_astype[[]-float32-int32]
 tests/test_dparray.py::test_astype[[]-float32-bool]
-tests/test_dparray.py::test_astype[[]-float32-bool_]
 tests/test_dparray.py::test_astype[[]-float32-complex]
 tests/test_dparray.py::test_astype[[]-int64-float64]
 tests/test_dparray.py::test_astype[[]-int64-float32]
 tests/test_dparray.py::test_astype[[]-int64-int64]
 tests/test_dparray.py::test_astype[[]-int64-int32]
 tests/test_dparray.py::test_astype[[]-int64-bool]
-tests/test_dparray.py::test_astype[[]-int64-bool_]
 tests/test_dparray.py::test_astype[[]-int64-complex]
 tests/test_dparray.py::test_astype[[]-int32-float64]
 tests/test_dparray.py::test_astype[[]-int32-float32]
 tests/test_dparray.py::test_astype[[]-int32-int64]
 tests/test_dparray.py::test_astype[[]-int32-int32]
 tests/test_dparray.py::test_astype[[]-int32-bool]
-tests/test_dparray.py::test_astype[[]-int32-bool_]
 tests/test_dparray.py::test_astype[[]-int32-complex]
 tests/test_dparray.py::test_astype[[]-bool-float64]
 tests/test_dparray.py::test_astype[[]-bool-float32]
 tests/test_dparray.py::test_astype[[]-bool-int64]
 tests/test_dparray.py::test_astype[[]-bool-int32]
 tests/test_dparray.py::test_astype[[]-bool-bool]
-tests/test_dparray.py::test_astype[[]-bool-bool_]
 tests/test_dparray.py::test_astype[[]-bool-complex]
-tests/test_dparray.py::test_astype[[]-bool_-float64]
-tests/test_dparray.py::test_astype[[]-bool_-float32]
-tests/test_dparray.py::test_astype[[]-bool_-int64]
-tests/test_dparray.py::test_astype[[]-bool_-int32]
-tests/test_dparray.py::test_astype[[]-bool_-bool]
-tests/test_dparray.py::test_astype[[]-bool_-bool_]
-tests/test_dparray.py::test_astype[[]-bool_-complex]
 tests/test_dparray.py::test_astype[[]-complex-float64]
 tests/test_dparray.py::test_astype[[]-complex-float32]
 tests/test_dparray.py::test_astype[[]-complex-int64]
 tests/test_dparray.py::test_astype[[]-complex-int32]
 tests/test_dparray.py::test_astype[[]-complex-bool]
-tests/test_dparray.py::test_astype[[]-complex-bool_]
 tests/test_dparray.py::test_astype[[]-complex-complex]
 
 tests/test_linalg.py::test_cond[-1-[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]
diff --git a/tests/test_arraycreation.py b/tests/test_arraycreation.py
index 5bb9795bbac..e0500848e9b 100644
--- a/tests/test_arraycreation.py
+++ b/tests/test_arraycreation.py
@@ -246,7 +246,7 @@ def test_geomspace(dtype, num, endpoint):
                          ids=['0', '1', '4'])
 @pytest.mark.parametrize("dtype",
                          [numpy.float64, numpy.float32, numpy.int64, numpy.int32,
-                          numpy.bool, numpy.complex64, numpy.complex128, None],
+                          numpy.bool_, numpy.complex64, numpy.complex128, None],
                          ids=['float64', 'float32', 'int64', 'int32',
                               'bool', 'complex64', 'complex128', 'None'])
 def test_identity(n, dtype):
@@ -344,8 +344,8 @@ def test_trace(array, offset, type, dtype):
                          [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5],
                          ids=['-5', '-4', '-3', '-2', '-1', '0', '1', '2', '3', '4', '5'])
 @pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, float, numpy.int64, numpy.int32, numpy.int, numpy.float, int],
-                         ids=['float64', 'float32', 'numpy.float', 'float', 'int64', 'int32', 'numpy.int', 'int'])
+                         [numpy.float64, numpy.float32, float, numpy.int64, numpy.int32, numpy.int_, numpy.float_, int],
+                         ids=['numpy.float64', 'numpy.float32', 'float', 'numpy.int64', 'numpy.int32', 'numpy.int', 'numpy.float', 'int'])
 def test_tri(N, M, k, dtype):
     func = lambda xp: xp.tri(N, M, k, dtype=dtype)
 
@@ -428,7 +428,7 @@ def test_triu_size_null(k):
                               '[0, 3, 5]'])
 @pytest.mark.parametrize("dtype",
                          [numpy.float64, numpy.float32, numpy.int64, numpy.int32,
-                          numpy.bool, numpy.complex64, numpy.complex128],
+                          numpy.bool_, numpy.complex64, numpy.complex128],
                          ids=['float64', 'float32', 'int64', 'int32',
                               'bool', 'complex64', 'complex128'])
 @pytest.mark.parametrize("n",
@@ -464,7 +464,7 @@ def test_vander(array, dtype, n, increase):
                          ids=['1.5', '2', '1.5+0.j'])
 @pytest.mark.parametrize("dtype",
                          [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
+                          numpy.float16, numpy.int64, numpy.int32, numpy.bool_],
                          ids=['None', 'complex128', 'complex64', 'float64', 'float32',
                               'float16', 'int64', 'int32', 'bool'])
 @pytest.mark.parametrize("order",
@@ -488,7 +488,7 @@ def test_full(shape, fill_value, dtype, order):
                          ids=['1.5', '2', '1.5+0.j'])
 @pytest.mark.parametrize("dtype",
                          [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
+                          numpy.float16, numpy.int64, numpy.int32, numpy.bool_],
                          ids=['None', 'complex128', 'complex64', 'float64', 'float32',
                               'float16', 'int64', 'int32', 'bool'])
 @pytest.mark.parametrize("order",
@@ -545,7 +545,7 @@ def test_full_invalid_fill_value(fill_value):
                          ids=['()', '0', '(0,)', '(2, 0, 3)', '(3, 2)'])
 @pytest.mark.parametrize("dtype",
                          [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
+                          numpy.float16, numpy.int64, numpy.int32, numpy.bool_],
                          ids=['None', 'complex128', 'complex64', 'float64', 'float32',
                               'float16', 'int64', 'int32', 'bool'])
 @pytest.mark.parametrize("order",
@@ -563,7 +563,7 @@ def test_zeros(shape, dtype, order):
                          ids=['[]', '0',  '[1, 2, 3]', '[[1, 2], [3, 4]]'])
 @pytest.mark.parametrize("dtype",
                          [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
+                          numpy.float16, numpy.int64, numpy.int32, numpy.bool_],
                          ids=['None', 'complex128', 'complex64', 'float64', 'float32',
                               'float16', 'int64', 'int32', 'bool'])
 @pytest.mark.parametrize("order",
@@ -584,7 +584,7 @@ def test_zeros_like(array, dtype, order):
                          ids=['()', '0', '(0,)', '(2, 0, 3)', '(3, 2)'])
 @pytest.mark.parametrize("dtype",
                          [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
+                          numpy.float16, numpy.int64, numpy.int32, numpy.bool_],
                          ids=['None', 'complex128', 'complex64', 'float64', 'float32',
                               'float16', 'int64', 'int32', 'bool'])
 @pytest.mark.parametrize("order",
@@ -602,7 +602,7 @@ def test_empty(shape, dtype, order):
                          ids=['[]', '0',  '[1, 2, 3]', '[[1, 2], [3, 4]]'])
 @pytest.mark.parametrize("dtype",
                          [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
+                          numpy.float16, numpy.int64, numpy.int32, numpy.bool_],
                          ids=['None', 'complex128', 'complex64', 'float64', 'float32',
                               'float16', 'int64', 'int32', 'bool'])
 @pytest.mark.parametrize("order",
@@ -623,7 +623,7 @@ def test_empty_like(array, dtype, order):
                          ids=['()', '0', '(0,)', '(2, 0, 3)', '(3, 2)'])
 @pytest.mark.parametrize("dtype",
                          [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32, 
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
+                          numpy.float16, numpy.int64, numpy.int32, numpy.bool_],
                          ids=['None', 'complex128', 'complex64', 'float64', 'float32',
                          'float16', 'int64', 'int32', 'bool'])
 @pytest.mark.parametrize("order",
@@ -644,7 +644,7 @@ def test_ones(shape, dtype, order):
                          ids=['[]', '0',  '[1, 2, 3]', '[[1, 2], [3, 4]]'])
 @pytest.mark.parametrize("dtype",
                          [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32, 
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
+                          numpy.float16, numpy.int64, numpy.int32, numpy.bool_],
                          ids=['None', 'complex128', 'complex64', 'float64', 'float32',
                          'float16', 'int64', 'int32', 'bool'])
 @pytest.mark.parametrize("order",
diff --git a/tests/test_dparray.py b/tests/test_dparray.py
index 6ff1672b853..745884f6a07 100644
--- a/tests/test_dparray.py
+++ b/tests/test_dparray.py
@@ -5,11 +5,11 @@
 
 
 @pytest.mark.parametrize("res_dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_, numpy.complex],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_', 'complex'])
+                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_, numpy.complex_],
+                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'complex'])
 @pytest.mark.parametrize("arr_dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_, numpy.complex],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_', 'complex'])
+                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_, numpy.complex_],
+                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'complex'])
 @pytest.mark.parametrize("arr",
                          [[-2, -1, 0, 1, 2], [[-2, -1], [1, 2]], []],
                          ids=['[-2, -1, 0, 1, 2]', '[[-2, -1], [1, 2]]', '[]'])
@@ -22,8 +22,8 @@ def test_astype(arr, arr_dtype, res_dtype):
 
 
 @pytest.mark.parametrize("arr_dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_, numpy.complex],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_', 'complex'])
+                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_, numpy.complex_],
+                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'complex'])
 @pytest.mark.parametrize("arr",
                          [[-2, -1, 0, 1, 2], [[-2, -1], [1, 2]], []],
                          ids=['[-2, -1, 0, 1, 2]', '[[-2, -1], [1, 2]]', '[]'])
@@ -51,7 +51,7 @@ def test_flags(shape, order):
 
 
 @pytest.mark.parametrize("dtype",
-                         [numpy.complex64, numpy.float32, numpy.int64, numpy.int32, numpy.bool],
+                         [numpy.complex64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_],
                          ids=['complex64', 'float32', 'int64', 'int32', 'bool'])
 @pytest.mark.parametrize("strides",
                          [(1, 4) , (4, 1)],
diff --git a/tests/test_logic.py b/tests/test_logic.py
index b3280be0761..7fefe91826f 100644
--- a/tests/test_logic.py
+++ b/tests/test_logic.py
@@ -6,8 +6,8 @@
 
 
 @pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_'])
+                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_],
+                         ids=['float64', 'float32', 'int64', 'int32', 'bool'])
 @pytest.mark.parametrize("shape",
                          [(0,), (4,), (2, 3), (2, 2, 2)],
                          ids=['(0,)', '(4,)', '(2,3)', '(2,2,2)'])
@@ -63,8 +63,8 @@ def test_allclose(type):
 
 
 @pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool, numpy.bool_],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'bool_'])
+                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_],
+                         ids=['float64', 'float32', 'int64', 'int32', 'bool'])
 @pytest.mark.parametrize("shape",
                          [(0,), (4,), (2, 3), (2, 2, 2)],
                          ids=['(0,)', '(4,)', '(2,3)', '(2,2,2)'])
diff --git a/tests/test_random_state.py b/tests/test_random_state.py
index b93f52411c5..1a5d554e14d 100644
--- a/tests/test_random_state.py
+++ b/tests/test_random_state.py
@@ -34,8 +34,8 @@ def get_default_floating():
 
 class TestNormal:
     @pytest.mark.parametrize("dtype",
-                             [dpnp.float32, dpnp.float64, None],
-                             ids=['float32', 'float64', 'None'])
+                             [dpnp.float32, dpnp.float64, dpnp.float, None],
+                             ids=['float32', 'float64', 'float', 'None'])
     @pytest.mark.parametrize("usm_type",
                              ["host", "device", "shared"],
                              ids=['host', 'device', 'shared'])
@@ -173,9 +173,9 @@ def test_fallback(self, loc, scale):
 
 
     @pytest.mark.parametrize("dtype",
-                             [dpnp.float16, dpnp.float, float, dpnp.integer, dpnp.int64, dpnp.int32, dpnp.int, int,
+                             [dpnp.float16, float, dpnp.integer, dpnp.int64, dpnp.int32, dpnp.int, int,
                               dpnp.longcomplex, dpnp.complex128, dpnp.complex64, dpnp.bool, dpnp.bool_],
-                             ids=['dpnp.float16', 'dpnp.float', 'float', 'dpnp.integer', 'dpnp.int64', 'dpnp.int32', 'dpnp.int', 'int',
+                             ids=['dpnp.float16', 'float', 'dpnp.integer', 'dpnp.int64', 'dpnp.int32', 'dpnp.int', 'int',
                                   'dpnp.longcomplex', 'dpnp.complex128', 'dpnp.complex64', 'dpnp.bool', 'dpnp.bool_'])
     def test_invalid_dtype(self, dtype):
         # dtype must be float32 or float64
@@ -257,8 +257,8 @@ def test_wrong_dims(self):
 
 class TestRandInt:
     @pytest.mark.parametrize("dtype",
-                             [int, dpnp.int32, dpnp.int],
-                             ids=['int', 'dpnp.int32', 'dpnp.int'])
+                             [int, dpnp.int32, dpnp.int, dpnp.integer],
+                             ids=['int', 'dpnp.int32', 'dpnp.int', 'dpnp.integer'])
     @pytest.mark.parametrize("usm_type",
                              ["host", "device", "shared"],
                              ids=['host', 'device', 'shared'])
@@ -267,6 +267,9 @@ def test_distr(self, dtype, usm_type):
         low = 1
         high = 10
 
+        if dtype in (dpnp.int, dpnp.integer) and dtype != dpnp.dtype('int32'):
+            pytest.skip("dtype isn't alias on dpnp.int32 on the target OS, so there will be a fallback")
+
         sycl_queue = dpctl.SyclQueue()
         data = RandomState(seed, sycl_queue=sycl_queue).randint(low=low,
                                                                 high=high,
@@ -421,16 +424,16 @@ def test_bounds_fallback(self, low, high):
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @pytest.mark.parametrize("dtype",
-                             [dpnp.int64, dpnp.integer, dpnp.bool, dpnp.bool_, bool],
-                             ids=['dpnp.int64', 'dpnp.integer', 'dpnp.bool', 'dpnp.bool_', 'bool'])
+                             [dpnp.int64, dpnp.int, dpnp.integer, dpnp.bool, dpnp.bool_, bool],
+                             ids=['dpnp.int64', 'dpnp.int', 'dpnp.integer', 'dpnp.bool', 'dpnp.bool_', 'bool'])
     def test_dtype_fallback(self, dtype):
         seed = 157
         low = -3 if not dtype in {dpnp.bool_, bool} else 0
         high = 37 if not dtype in {dpnp.bool_, bool} else 2
         size = (3, 2, 5)
 
-        if dtype == dpnp.integer and dtype == dpnp.dtype('int32'):
-            pytest.skip("dpnp.integer is alias on dpnp.int32 on the target OS, so no fallback here")
+        if dtype in (dpnp.int, dpnp.integer) and dtype == dpnp.dtype('int32'):
+            pytest.skip("dtype is alias on dpnp.int32 on the target OS, so no fallback here")
 
         # dtype must be int or dpnp.int32, in other cases it will be a fallback to numpy
         actual = RandomState(seed).randint(low=low, high=high, size=size, dtype=dtype).asnumpy()
@@ -714,8 +717,8 @@ class TestUniform:
                              [[1.23, 10.54], [10.54, 1.23]],
                              ids=['(low, high)=[1.23, 10.54]', '(low, high)=[10.54, 1.23]'])
     @pytest.mark.parametrize("dtype",
-                             [dpnp.float32, dpnp.float64, dpnp.int32, None],
-                             ids=['float32', 'float64', 'int32', 'None'])
+                             [dpnp.float32, dpnp.float64, dpnp.float, dpnp.int32, None],
+                             ids=['float32', 'float64', 'float', 'int32', 'None'])
     @pytest.mark.parametrize("usm_type",
                              ["host", "device", "shared"],
                              ids=['host', 'device', 'shared'])
@@ -831,12 +834,15 @@ def test_fallback(self, low, high):
 
 
     @pytest.mark.parametrize("dtype",
-                             [dpnp.float16, dpnp.float, float, dpnp.integer, dpnp.int64, dpnp.int, int,
+                             [dpnp.float16, float, dpnp.integer, dpnp.int64, dpnp.int, int,
                               dpnp.longcomplex, dpnp.complex128, dpnp.complex64, dpnp.bool, dpnp.bool_],
-                             ids=['dpnp.float16', 'dpnp.float', 'float', 'dpnp.integer', 'dpnp.int64', 'dpnp.int', 'int',
+                             ids=['dpnp.float16', 'float', 'dpnp.integer', 'dpnp.int64', 'dpnp.int', 'int',
                                   'dpnp.longcomplex', 'dpnp.complex128', 'dpnp.complex64', 'dpnp.bool', 'dpnp.bool_'])
     def test_invalid_dtype(self, dtype):
-        # dtype must be float32 or float64
+        if dtype in (dpnp.int, dpnp.integer) and dtype == dpnp.dtype('int32'):
+            pytest.skip("dtype is alias on dpnp.int32 on the target OS, so no error here")
+
+        # dtype must be int32, float32 or float64
         assert_raises(TypeError, RandomState().uniform, dtype=dtype)
 
 
diff --git a/tests/third_party/cupy/creation_tests/test_ranges.py b/tests/third_party/cupy/creation_tests/test_ranges.py
index 75960e492c1..4d5bc03f81b 100644
--- a/tests/third_party/cupy/creation_tests/test_ranges.py
+++ b/tests/third_party/cupy/creation_tests/test_ranges.py
@@ -54,7 +54,7 @@ def test_arange8(self, xp, dtype):
 
     def test_arange9(self):
         for xp in (numpy, cupy):
-            with pytest.raises(ValueError):
+            with pytest.raises((ValueError, TypeError)):
                 xp.arange(10, dtype=xp.bool_)
 
     @testing.numpy_cupy_array_equal()
diff --git a/tests/third_party/cupy/indexing_tests/test_generate.py b/tests/third_party/cupy/indexing_tests/test_generate.py
index d10e503bcec..2bb0404ab59 100644
--- a/tests/third_party/cupy/indexing_tests/test_generate.py
+++ b/tests/third_party/cupy/indexing_tests/test_generate.py
@@ -28,7 +28,7 @@ def test_indices_list2(self, xp, dtype):
 
     def test_indices_list3(self):
         for xp in (numpy, cupy):
-            with pytest.raises(ValueError):
+            with pytest.raises((ValueError, TypeError)):
                 xp.indices((1, 2, 3, 4), dtype=xp.bool_)
 
 
diff --git a/tests/third_party/cupy/indexing_tests/test_insert.py b/tests/third_party/cupy/indexing_tests/test_insert.py
index ed6a156e884..fdcc5357e19 100644
--- a/tests/third_party/cupy/indexing_tests/test_insert.py
+++ b/tests/third_party/cupy/indexing_tests/test_insert.py
@@ -42,7 +42,7 @@ class TestPlaceRaises(unittest.TestCase):
     def test_place_empty_value_error(self, dtype):
         for xp in (numpy, cupy):
             a = testing.shaped_arange(self.shape, xp, dtype)
-            mask = testing.shaped_arange(self.shape, xp, numpy.int) % 2 == 0
+            mask = testing.shaped_arange(self.shape, xp, numpy.int_) % 2 == 0
             vals = testing.shaped_random((0,), xp, dtype)
             with pytest.raises(ValueError):
                 xp.place(a, mask, vals)
diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py
index 28771b4979b..158f5cc1442 100644
--- a/tests/third_party/cupy/math_tests/test_arithmetic.py
+++ b/tests/third_party/cupy/math_tests/test_arithmetic.py
@@ -1,5 +1,6 @@
 import itertools
 import unittest
+import warnings
 
 import numpy
 import pytest
@@ -130,8 +131,8 @@ def check_binary(self, xp):
 
         func = getattr(xp, self.name)
         with testing.NumpyError(divide='ignore'):
-            with numpy.warnings.catch_warnings():
-                numpy.warnings.filterwarnings('ignore')
+            with warnings.catch_warnings():
+                warnings.filterwarnings('ignore')
                 if self.use_dtype:
                     y = func(arg1, arg2, dtype=self.dtype)
                 else:

From 082fb92aacb1ab20e58cd061e7de4f9a4da2c193 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Mon, 23 Jan 2023 19:49:09 +0100
Subject: [PATCH 04/32] Get rid of 'has_aspect_host' property in tests (#1274)

* Set minimum required versions & fix debug building

* Get rid of 'has_aspect_host' property in tests

* Update tests/test_sycl_queue.py

Co-authored-by: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>

Co-authored-by: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
---
 tests/test_sycl_queue.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 26a71eef293..977f4561f5a 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -17,7 +17,7 @@
     "cpu",
 ]
 
-available_devices = [d for d in dpctl.get_devices() if not d.has_aspect_host]
+available_devices = [d for d in dpctl.get_devices() if not getattr(d, 'has_aspect_host', False)]
 
 valid_devices = []
 for device in available_devices:

From 67e7f87cdbbe84eaf7ab5174094330bcab7f470e Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Tue, 24 Jan 2023 17:25:48 +0100
Subject: [PATCH 05/32] Add support of dpnp.less_equal() (#1275)

* Set minimum required versions & fix debug building

* Add support of dpnp.less_equal()

* Test no broadcast together with input shapes
---
 .../include/dpnp_gen_2arg_2type_tbl.hpp       |  91 ++++++
 dpnp/backend/include/dpnp_iface.hpp           |  23 ++
 dpnp/backend/include/dpnp_iface_fptr.hpp      |   1 +
 dpnp/backend/kernels/dpnp_krnl_logic.cpp      | 264 +++++++++++++++++-
 dpnp/backend/src/dpnp_fptr.hpp                |  37 ++-
 dpnp/dpnp_algo/dpnp_algo.pxd                  |   5 +-
 dpnp/dpnp_algo/dpnp_algo_logic.pyx            |  19 +-
 dpnp/dpnp_iface_logic.py                      |  54 +++-
 dpnp/dpnp_utils/dpnp_algo_utils.pxd           |   4 +-
 dpnp/dpnp_utils/dpnp_algo_utils.pyx           |  10 +-
 tests/test_logic.py                           |  74 ++++-
 .../cupy/logic_tests/test_comparison.py       |   7 +-
 tests_external/skipped_tests_numpy.tbl        |  77 -----
 13 files changed, 539 insertions(+), 127 deletions(-)
 create mode 100644 dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp

diff --git a/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp
new file mode 100644
index 00000000000..e76c92b47cd
--- /dev/null
+++ b/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp
@@ -0,0 +1,91 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+/*
+ * This header file contains single argument element wise functions definitions
+ *
+ * Macro `MACRO_2ARG_2TYPES_LOGIC_OP` must be defined before usage
+ *
+ * Parameters:
+ * - public name of the function and kernel name
+ * - operation used to calculate the result
+ *
+ */
+
+#ifndef MACRO_2ARG_2TYPES_LOGIC_OP
+#error "MACRO_2ARG_2TYPES_LOGIC_OP is not defined"
+#endif
+
+#ifdef _SECTION_DOCUMENTATION_GENERATION_
+
+#define MACRO_2ARG_2TYPES_LOGIC_OP(__name__, __operation__)                                                             \
+    /** @ingroup BACKEND_API                                                                                         */ \
+    /** @brief Per element operation function __name__                                                               */ \
+    /**                                                                                                              */ \
+    /** Function "__name__" executes operator "__operation__" over corresponding elements of input arrays            */ \
+    /**                                                                                                              */ \
+    /** @param[in]  q_ref              Reference to SYCL queue.                                                      */ \
+    /** @param[out] result_out         Output array.                                                                 */ \
+    /** @param[in]  result_size        Output array size.                                                            */ \
+    /** @param[in]  result_ndim        Number of output array dimensions.                                            */ \
+    /** @param[in]  result_shape       Output array shape.                                                           */ \
+    /** @param[in]  result_strides     Output array strides.                                                         */ \
+    /** @param[in]  input1_in          Input array 1.                                                                */ \
+    /** @param[in]  input1_size        Input array 1 size.                                                           */ \
+    /** @param[in]  input1_ndim        Number of input array 1 dimensions.                                           */ \
+    /** @param[in]  input1_shape       Input array 1 shape.                                                          */ \
+    /** @param[in]  input1_strides     Input array 1 strides.                                                        */ \
+    /** @param[in]  input2_in          Input array 2.                                                                */ \
+    /** @param[in]  input2_size        Input array 2 size.                                                           */ \
+    /** @param[in]  input2_ndim        Number of input array 2 dimensions.                                           */ \
+    /** @param[in]  input2_shape       Input array 2 shape.                                                          */ \
+    /** @param[in]  input2_strides     Input array 2 strides.                                                        */ \
+    /** @param[in]  where              Where condition.                                                              */ \
+    /** @param[in]  dep_event_vec_ref  Reference to vector of SYCL events.                                           */ \
+    template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>                          \
+    DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                                 \
+                               void* result_out,                                                                        \
+                               const size_t result_size,                                                                \
+                               const size_t result_ndim,                                                                \
+                               const shape_elem_type* result_shape,                                                     \
+                               const shape_elem_type* result_strides,                                                   \
+                               const void* input1_in,                                                                   \
+                               const size_t input1_size,                                                                \
+                               const size_t input1_ndim,                                                                \
+                               const shape_elem_type* input1_shape,                                                     \
+                               const shape_elem_type* input1_strides,                                                   \
+                               const void* input2_in,                                                                   \
+                               const size_t input2_size,                                                                \
+                               const size_t input2_ndim,                                                                \
+                               const shape_elem_type* input2_shape,                                                     \
+                               const shape_elem_type* input2_strides,                                                   \
+                               const size_t* where,                                                                     \
+                               const DPCTLEventVectorRef dep_event_vec_ref);
+
+#endif
+
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_less_equal_c, input1_elem <= input2_elem)
+
+#undef MACRO_2ARG_2TYPES_LOGIC_OP
diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp
index 756c7082598..713e3e82197 100644
--- a/dpnp/backend/include/dpnp_iface.hpp
+++ b/dpnp/backend/include/dpnp_iface.hpp
@@ -1806,6 +1806,29 @@ INP_DLLEXPORT void dpnp_invert_c(void* array1_in, void* result, size_t size);
 
 #include <dpnp_gen_1arg_2type_tbl.hpp>
 
+#define MACRO_2ARG_2TYPES_LOGIC_OP(__name__, __operation__)                                                           \
+    template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>                         \
+    INP_DLLEXPORT DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                  \
+                                             void* result_out,                                                         \
+                                             const size_t result_size,                                                 \
+                                             const size_t result_ndim,                                                 \
+                                             const shape_elem_type* result_shape,                                      \
+                                             const shape_elem_type* result_strides,                                    \
+                                             const void* input1_in,                                                    \
+                                             const size_t input1_size,                                                 \
+                                             const size_t input1_ndim,                                                 \
+                                             const shape_elem_type* input1_shape,                                      \
+                                             const shape_elem_type* input1_strides,                                    \
+                                             const void* input2_in,                                                    \
+                                             const size_t input2_size,                                                 \
+                                             const size_t input2_ndim,                                                 \
+                                             const shape_elem_type* input2_shape,                                      \
+                                             const shape_elem_type* input2_strides,                                    \
+                                             const size_t* where,                                                      \
+                                             const DPCTLEventVectorRef dep_event_vec_ref);
+
+#include <dpnp_gen_2arg_2type_tbl.hpp>
+
 #define MACRO_2ARG_3TYPES_OP(__name__, __operation1__, __operation2__)                                                 \
     template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>                         \
     INP_DLLEXPORT DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                  \
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index 126fdd24f3b..18e3629366d 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -193,6 +193,7 @@ enum class DPNPFuncName : size_t
     DPNP_FN_KRON_EXT,                     /**< Used in numpy.kron() impl, requires extra parameters  */
     DPNP_FN_LEFT_SHIFT,                   /**< Used in numpy.left_shift() impl  */
     DPNP_FN_LEFT_SHIFT_EXT,               /**< Used in numpy.left_shift() impl, requires extra parameters  */
+    DPNP_FN_LESS_EQUAL_EXT,               /**< Used in numpy.less_equal() impl, requires extra parameters */
     DPNP_FN_LOG,                          /**< Used in numpy.log() impl  */
     DPNP_FN_LOG_EXT,                      /**< Used in numpy.log() impl, requires extra parameters  */
     DPNP_FN_LOG10,                        /**< Used in numpy.log10() impl  */
diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
index 10924691358..6be989a4ec8 100644
--- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -27,6 +27,7 @@
 
 #include "dpnp_fptr.hpp"
 #include "dpnp_iface.hpp"
+#include "dpnp_iterator.hpp"
 #include "dpnpc_memory_adapter.hpp"
 #include "queue_sycl.hpp"
 
@@ -286,6 +287,265 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef,
                                     const size_t,
                                     const DPCTLEventVectorRef) = dpnp_any_c<_DataType, _ResultType>;
 
+
+#define MACRO_2ARG_2TYPES_LOGIC_OP(__name__, __operation__)                                                            \
+    template <typename _KernelNameSpecialization1,                                                                     \
+              typename _KernelNameSpecialization2>                                                                     \
+    class __name__##_kernel;                                                                                           \
+                                                                                                                       \
+    template <typename _KernelNameSpecialization1,                                                                     \
+              typename _KernelNameSpecialization2>                                                                     \
+    class __name__##_broadcast_kernel;                                                                                 \
+                                                                                                                       \
+    template <typename _KernelNameSpecialization1,                                                                     \
+              typename _KernelNameSpecialization2>                                                                     \
+    class __name__##_strides_kernel;                                                                                   \
+                                                                                                                       \
+    template <typename _DataType_input1, typename _DataType_input2>                                                    \
+    DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                                \
+                               void* result_out,                                                                       \
+                               const size_t result_size,                                                               \
+                               const size_t result_ndim,                                                               \
+                               const shape_elem_type* result_shape,                                                    \
+                               const shape_elem_type* result_strides,                                                  \
+                               const void* input1_in,                                                                  \
+                               const size_t input1_size,                                                               \
+                               const size_t input1_ndim,                                                               \
+                               const shape_elem_type* input1_shape,                                                    \
+                               const shape_elem_type* input1_strides,                                                  \
+                               const void* input2_in,                                                                  \
+                               const size_t input2_size,                                                               \
+                               const size_t input2_ndim,                                                               \
+                               const shape_elem_type* input2_shape,                                                    \
+                               const shape_elem_type* input2_strides,                                                  \
+                               const size_t* where,                                                                    \
+                               const DPCTLEventVectorRef dep_event_vec_ref)                                            \
+    {                                                                                                                  \
+        /* avoid warning unused variable*/                                                                             \
+        (void)where;                                                                                                   \
+        (void)dep_event_vec_ref;                                                                                       \
+                                                                                                                       \
+        DPCTLSyclEventRef event_ref = nullptr;                                                                         \
+                                                                                                                       \
+        if (!input1_size || !input2_size)                                                                              \
+        {                                                                                                              \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+                                                                                                                       \
+        sycl::queue q = *(reinterpret_cast<sycl::queue *>(q_ref));                                                     \
+                                                                                                                       \
+        _DataType_input1* input1_data = static_cast<_DataType_input1 *>(const_cast<void *>(input1_in));                \
+        _DataType_input2* input2_data = static_cast<_DataType_input2 *>(const_cast<void *>(input2_in));                \
+        bool* result = static_cast<bool *>(result_out);                                                                \
+                                                                                                                       \
+        bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim);                    \
+                                                                                                                       \
+        shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim];                                      \
+                                                                                                                       \
+        get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets);                                   \
+        bool use_strides = !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim);               \
+        delete[] input1_shape_offsets;                                                                                 \
+                                                                                                                       \
+        shape_elem_type* input2_shape_offsets = new shape_elem_type[input2_ndim];                                      \
+                                                                                                                       \
+        get_shape_offsets_inkernel(input2_shape, input2_ndim, input2_shape_offsets);                                   \
+        use_strides =                                                                                                  \
+            use_strides || !array_equal(input2_strides, input2_ndim, input2_shape_offsets, input2_ndim);               \
+        delete[] input2_shape_offsets;                                                                                 \
+                                                                                                                       \
+        sycl::event event;                                                                                             \
+        sycl::range<1> gws(result_size); /* used only when use_broadcasting or use_strides is true */                  \
+                                                                                                                       \
+        if (use_broadcasting)                                                                                          \
+        {                                                                                                              \
+            DPNPC_id<_DataType_input1>* input1_it;                                                                     \
+            const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>);                                 \
+            input1_it = reinterpret_cast<DPNPC_id<_DataType_input1>*>(dpnp_memory_alloc_c(q_ref,                       \
+                                                                                          input1_it_size_in_bytes));   \
+            new (input1_it)                                                                                            \
+                DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim);             \
+                                                                                                                       \
+            input1_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
+                                                                                                                       \
+            DPNPC_id<_DataType_input2>* input2_it;                                                                     \
+            const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>);                                 \
+            input2_it = reinterpret_cast<DPNPC_id<_DataType_input2>*>(dpnp_memory_alloc_c(q_ref,                       \
+                                                                                          input2_it_size_in_bytes));   \
+            new (input2_it)                                                                                            \
+                DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim);             \
+                                                                                                                       \
+            input2_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
+                const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                          \
+                {                                                                                                      \
+                    const _DataType_input1 input1_elem = (*input1_it)[i];                                              \
+                    const _DataType_input2 input2_elem = (*input2_it)[i];                                              \
+                    result[i] = __operation__;                                                                         \
+                }                                                                                                      \
+            };                                                                                                         \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.parallel_for<                                                                                      \
+                    class __name__##_broadcast_kernel<_DataType_input1, _DataType_input2>>(                            \
+                    gws, kernel_parallel_for_func);                                                                    \
+            };                                                                                                         \
+                                                                                                                       \
+            q.submit(kernel_func).wait();                                                                              \
+                                                                                                                       \
+            input1_it->~DPNPC_id();                                                                                    \
+            input2_it->~DPNPC_id();                                                                                    \
+                                                                                                                       \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+        else if (use_strides)                                                                                          \
+        {                                                                                                              \
+            if ((result_ndim != input1_ndim) || (result_ndim != input2_ndim))                                          \
+            {                                                                                                          \
+                throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) +                                \
+                                         " mismatches with either input1 ndim=" + std::to_string(input1_ndim) +        \
+                                         " or input2 ndim=" + std::to_string(input2_ndim));                            \
+            }                                                                                                          \
+                                                                                                                       \
+            /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */                 \
+            using usm_host_allocatorT = sycl::usm_allocator<shape_elem_type, sycl::usm::alloc::host>;                  \
+                                                                                                                       \
+            size_t strides_size = 3 * result_ndim;                                                                     \
+            shape_elem_type *dev_strides_data = sycl::malloc_device<shape_elem_type>(strides_size, q);                 \
+                                                                                                                       \
+            /* create host temporary for packed strides managed by shared pointer */                                   \
+            auto strides_host_packed = std::vector<shape_elem_type, usm_host_allocatorT>(strides_size,                 \
+                                                                                         usm_host_allocatorT(q));      \
+                                                                                                                       \
+            /* packed vector is concatenation of result_strides, input1_strides and input2_strides */                  \
+            std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin());                      \
+            std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + result_ndim);        \
+            std::copy(input2_strides, input2_strides + result_ndim, strides_host_packed.begin() + 2 * result_ndim);    \
+                                                                                                                       \
+            auto copy_strides_ev = q.copy<shape_elem_type>(strides_host_packed.data(),                                 \
+                                                           dev_strides_data,                                           \
+                                                           strides_host_packed.size());                                \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
+                const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
+                {                                                                                                      \
+                    const shape_elem_type *result_strides_data = &dev_strides_data[0];                                 \
+                    const shape_elem_type *input1_strides_data = &dev_strides_data[1];                                 \
+                    const shape_elem_type *input2_strides_data = &dev_strides_data[2];                                 \
+                                                                                                                       \
+                    size_t input1_id = 0;                                                                              \
+                    size_t input2_id = 0;                                                                              \
+                                                                                                                       \
+                    for (size_t i = 0; i < result_ndim; ++i)                                                           \
+                    {                                                                                                  \
+                        const size_t output_xyz_id =                                                                   \
+                            get_xyz_id_by_id_inkernel(output_id, result_strides_data, result_ndim, i);                 \
+                        input1_id += output_xyz_id * input1_strides_data[i];                                           \
+                        input2_id += output_xyz_id * input2_strides_data[i];                                           \
+                    }                                                                                                  \
+                                                                                                                       \
+                    const _DataType_input1 input1_elem = input1_data[input1_id];                                       \
+                    const _DataType_input2 input2_elem = input2_data[input2_id];                                       \
+                    result[output_id] = __operation__;                                                                 \
+                }                                                                                                      \
+            };                                                                                                         \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.depends_on(copy_strides_ev);                                                                       \
+                cgh.parallel_for<                                                                                      \
+                    class __name__##_strides_kernel<_DataType_input1, _DataType_input2>>(                              \
+                    gws, kernel_parallel_for_func);                                                                    \
+            };                                                                                                         \
+                                                                                                                       \
+            q.submit(kernel_func).wait();                                                                              \
+                                                                                                                       \
+            sycl::free(dev_strides_data, q);                                                                           \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+        else                                                                                                           \
+        {                                                                                                              \
+            constexpr size_t lws = 64;                                                                                 \
+            constexpr unsigned int vec_sz = 8;                                                                         \
+            constexpr sycl::access::address_space global_space = sycl::access::address_space::global_space;            \
+                                                                                                                       \
+            auto gws_range = sycl::range<1>(((result_size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);                \
+            auto lws_range = sycl::range<1>(lws);                                                                      \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {                                              \
+                auto sg = nd_it.get_sub_group();                                                                       \
+                size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +                               \
+                                         sg.get_group_id()[0] * sg.get_max_local_range()[0]);                          \
+                size_t end = start + static_cast<size_t>(vec_sz);                                                      \
+                                                                                                                       \
+                if (end < result_size) {                                                                               \
+                    sycl::vec<_DataType_input1, vec_sz> x1 =                                                           \
+                        sg.load<vec_sz>(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start]));         \
+                    sycl::vec<_DataType_input2, vec_sz> x2 =                                                           \
+                        sg.load<vec_sz>(sycl::multi_ptr<_DataType_input2, global_space>(&input2_data[start]));         \
+                    sycl::vec<bool, vec_sz> res_vec;                                                                   \
+                                                                                                                       \
+                    for (size_t k = 0; k < vec_sz; ++k) {                                                              \
+                        const _DataType_input1 input1_elem = x1[k];                                                    \
+                        const _DataType_input2 input2_elem = x2[k];                                                    \
+                        res_vec[k] = __operation__;                                                                    \
+                    }                                                                                                  \
+                    sg.store<vec_sz>(sycl::multi_ptr<bool, global_space>(&result[start]), res_vec);                    \
+                                                                                                                       \
+                }                                                                                                      \
+                else {                                                                                                 \
+                    for (size_t k = start; k < result_size; ++k) {                                                     \
+                        const _DataType_input1 input1_elem = input1_data[k];                                           \
+                        const _DataType_input2 input2_elem = input2_data[k];                                           \
+                        result[k] = __operation__;                                                                     \
+                    }                                                                                                  \
+                }                                                                                                      \
+            };                                                                                                         \
+                                                                                                                       \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.parallel_for<class __name__##_kernel<_DataType_input1, _DataType_input2>>(                         \
+                    sycl::nd_range<1>(gws_range, lws_range), kernel_parallel_for_func);                                \
+            };                                                                                                         \
+            event = q.submit(kernel_func);                                                                             \
+        }                                                                                                              \
+                                                                                                                       \
+        event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                       \
+        return DPCTLEvent_Copy(event_ref);                                                                             \
+    }                                                                                                                  \
+                                                                                                                       \
+    template <typename _DataType_input1, typename _DataType_input2>                                                    \
+    DPCTLSyclEventRef (*__name__##_ext)(DPCTLSyclQueueRef,                                                             \
+                                        void*,                                                                         \
+                                        const size_t,                                                                  \
+                                        const size_t,                                                                  \
+                                        const shape_elem_type*,                                                        \
+                                        const shape_elem_type*,                                                        \
+                                        const void*,                                                                   \
+                                        const size_t,                                                                  \
+                                        const size_t,                                                                  \
+                                        const shape_elem_type*,                                                        \
+                                        const shape_elem_type*,                                                        \
+                                        const void*,                                                                   \
+                                        const size_t,                                                                  \
+                                        const size_t,                                                                  \
+                                        const shape_elem_type*,                                                        \
+                                        const shape_elem_type*,                                                        \
+                                        const size_t*,                                                                 \
+                                        const DPCTLEventVectorRef) = __name__<_DataType_input1,                        \
+                                                                              _DataType_input2>;
+
+#include <dpnp_gen_2arg_2type_tbl.hpp>
+
+template <DPNPFuncType FT1, DPNPFuncType ... FTs>
+static void func_map_logic_2arg_2type_core(func_map_t& fmap)
+{
+    ((fmap[DPNPFuncName::DPNP_FN_LESS_EQUAL_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_less_equal_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+}
+
+template <DPNPFuncType ... FTs>
+static void func_map_logic_2arg_2type_helper(func_map_t& fmap)
+{
+    ((func_map_logic_2arg_2type_core<FTs, FTs...>(fmap)), ...);
+}
+
 void func_map_init_logic(func_map_t& fmap)
 {
     fmap[DPNPFuncName::DPNP_FN_ALL][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_all_default_c<bool, bool>};
@@ -378,5 +638,7 @@ void func_map_init_logic(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_ANY_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_any_ext_c<float, bool>};
     fmap[DPNPFuncName::DPNP_FN_ANY_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_any_ext_c<double, bool>};
 
+    func_map_logic_2arg_2type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL>(fmap);
+
     return;
 }
diff --git a/dpnp/backend/src/dpnp_fptr.hpp b/dpnp/backend/src/dpnp_fptr.hpp
index 5b10bc71a8b..76116cafae7 100644
--- a/dpnp/backend/src/dpnp_fptr.hpp
+++ b/dpnp/backend/src/dpnp_fptr.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -33,6 +33,7 @@
 #define BACKEND_FPTR_H
 
 #include <map>
+#include <complex>
 
 #include <dpnp_iface_fptr.hpp>
 
@@ -64,6 +65,40 @@ const DPNPFuncType eft_C64 = DPNPFuncType::DPNP_FT_CMPLX64;
 const DPNPFuncType eft_C128 = DPNPFuncType::DPNP_FT_CMPLX128;
 const DPNPFuncType eft_BLN = DPNPFuncType::DPNP_FT_BOOL;
 
+/**
+ * An internal structure to build a pair of Data type enum value with C++ type
+ */
+template <DPNPFuncType FuncType, typename T>
+struct func_type_pair_t
+{
+   using type = T;
+
+   static func_type_pair_t get_pair(std::integral_constant<DPNPFuncType, FuncType>) { return {}; }
+};
+
+/**
+ * An internal structure to create a map of Data type enum value associated with C++ type
+ */
+template <typename ... Ps>
+struct func_type_map_factory_t : public Ps...
+{
+   using Ps::get_pair...;
+
+   template <DPNPFuncType FuncType>
+   using find_type = typename decltype(get_pair(std::integral_constant<DPNPFuncType, FuncType>{}))::type;
+};
+
+/**
+ * A map of the FPTR interface to link Data type enum value with accociated C++ type
+ */
+typedef func_type_map_factory_t<func_type_pair_t<eft_BLN, bool>,
+                                func_type_pair_t<eft_INT, std::int32_t>,
+                                func_type_pair_t<eft_LNG, std::int64_t>,
+                                func_type_pair_t<eft_FLT, float>,
+                                func_type_pair_t<eft_DBL, double>,
+                                func_type_pair_t<eft_C64, std::complex<float>>,
+                                func_type_pair_t<eft_C128, std::complex<double>>> func_type_map_t;
+
 /**
  * FPTR interface initialization functions
  */
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index e0c82b6125c..0c30fa18b6f 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -169,6 +169,7 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_KRON_EXT
         DPNP_FN_LEFT_SHIFT
         DPNP_FN_LEFT_SHIFT_EXT
+        DPNP_FN_LESS_EQUAL_EXT
         DPNP_FN_LOG
         DPNP_FN_LOG_EXT
         DPNP_FN_LOG10
@@ -429,7 +430,7 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_strides_t)(c_dpctl.DPCTLSyclQu
                                                              const shape_elem_type * ,
                                                              const shape_elem_type * ,
                                                              const long * ,
-                                                             const c_dpctl.DPCTLEventVectorRef)
+                                                             const c_dpctl.DPCTLEventVectorRef) except +
 ctypedef void(*fptr_blas_gemm_2in_1out_t)(void *, void * , void * , size_t, size_t, size_t)
 ctypedef c_dpctl.DPCTLSyclEventRef(*dpnp_reduction_c_t)(c_dpctl.DPCTLSyclQueueRef,
                                                         void *,
diff --git a/dpnp/dpnp_algo/dpnp_algo_logic.pyx b/dpnp/dpnp_algo/dpnp_algo_logic.pyx
index e0b928ddf02..0aa8f949cfb 100644
--- a/dpnp/dpnp_algo/dpnp_algo_logic.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_logic.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -286,18 +286,13 @@ cpdef utils.dpnp_descriptor dpnp_less(utils.dpnp_descriptor input1, utils.dpnp_d
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_less_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-    for i in range(result.size):
-        result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] <= input2.get_pyobj()[i])
+cpdef utils.dpnp_descriptor dpnp_less_equal(utils.dpnp_descriptor x1_obj,
+                                            utils.dpnp_descriptor x2_obj,
+                                            object dtype=None,
+                                            utils.dpnp_descriptor out=None,
+                                            object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_LESS_EQUAL_EXT, x1_obj, x2_obj, dtype, out, where, func_name="less_equal")
 
-    return result
 
 
 cpdef utils.dpnp_descriptor dpnp_logical_and(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index 0f1e1b5fc0e..96fa795d4d6 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -576,17 +576,32 @@ def less(x1, x2):
     return call_origin(numpy.less, x1, x2)
 
 
-def less_equal(x1, x2):
+def less_equal(x1,
+               x2,
+               /,
+               out=None,
+               *,
+               where=True,
+               dtype=None,
+               subok=True):
     """
-    Return (x1 <= x2) element-wise.
+    Return the truth value of (x1 <= x2) element-wise.
 
     For full documentation refer to :obj:`numpy.less_equal`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -607,16 +622,25 @@ def less_equal(x1, x2):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc:
-    #     if x1_desc.size < 2:
-    #         pass
-    #     elif x2_desc.size < 2:
-    #         pass
-    #     else:
-    #         return dpnp_less_equal(x1_desc, x2_desc).get_pyobj()
-
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get a common queue to copy data from the host into a device if any input is scalar
+        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_less_equal(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.less_equal, x1, x2)
 
 
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pxd b/dpnp/dpnp_utils/dpnp_algo_utils.pxd
index 0924dae2640..db7127319bb 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pxd
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pxd
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -123,7 +123,7 @@ cdef class dpnp_descriptor:
     cdef void * get_data(self)
 
 
-cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape)
+cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape) except *
 """
 Calculate common shape from input shapes
 """
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
index 2e04dd96bd6..6605770be62 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
@@ -429,7 +429,9 @@ cpdef find_common_type(object x1_obj, object x2_obj):
     return numpy.find_common_type(array_types, scalar_types)
 
 
-cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape):
+cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input2_shape) except *:
+    cdef shape_type_c input1_shape_orig = input1_shape
+    cdef shape_type_c input2_shape_orig = input2_shape
     cdef shape_type_c result_shape
 
     # ex (8, 1, 6, 1) and (7, 1, 5) -> (8, 1, 6, 1) and (1, 7, 1, 5)
@@ -446,9 +448,9 @@ cdef shape_type_c get_common_shape(shape_type_c input1_shape, shape_type_c input
         elif input2_shape[it] == 1:
             result_shape.push_back(input1_shape[it])
         else:
-            err_msg = f"{ERROR_PREFIX} in function get_common_shape()"
-            err_msg += f"operands could not be broadcast together with shapes {input1_shape} {input2_shape}"
-            ValueError(err_msg)
+            err_msg = f"{ERROR_PREFIX} in function get_common_shape(): "
+            err_msg += f"operands could not be broadcast together with shapes {input1_shape_orig} {input2_shape_orig}"
+            raise ValueError(err_msg)
 
     return result_shape
 
diff --git a/tests/test_logic.py b/tests/test_logic.py
index 7fefe91826f..d79ffaa744f 100644
--- a/tests/test_logic.py
+++ b/tests/test_logic.py
@@ -3,6 +3,10 @@
 import dpnp
 
 import numpy
+from numpy.testing import (
+    assert_allclose,
+    assert_equal
+)
 
 
 @pytest.mark.parametrize("type",
@@ -31,11 +35,11 @@ def test_all(type, shape):
 
         np_res = numpy.all(a)
         dpnp_res = dpnp.all(ia)
-        numpy.testing.assert_allclose(dpnp_res, np_res)
+        assert_allclose(dpnp_res, np_res)
 
         np_res = a.all()
         dpnp_res = ia.all()
-        numpy.testing.assert_allclose(dpnp_res, np_res)
+        assert_allclose(dpnp_res, np_res)
 
 
 @pytest.mark.parametrize("type",
@@ -51,7 +55,7 @@ def test_allclose(type):
 
     np_res = numpy.allclose(a, b)
     dpnp_res = dpnp.allclose(dpnp_a, dpnp_b)
-    numpy.testing.assert_allclose(dpnp_res, np_res)
+    assert_allclose(dpnp_res, np_res)
 
     a[0] = numpy.inf
 
@@ -59,7 +63,7 @@ def test_allclose(type):
 
     np_res = numpy.allclose(a, b)
     dpnp_res = dpnp.allclose(dpnp_a, dpnp_b)
-    numpy.testing.assert_allclose(dpnp_res, np_res)
+    assert_allclose(dpnp_res, np_res)
 
 
 @pytest.mark.parametrize("type",
@@ -88,11 +92,11 @@ def test_any(type, shape):
 
         np_res = numpy.any(a)
         dpnp_res = dpnp.any(ia)
-        numpy.testing.assert_allclose(dpnp_res, np_res)
+        assert_allclose(dpnp_res, np_res)
 
         np_res = a.any()
         dpnp_res = ia.any()
-        numpy.testing.assert_allclose(dpnp_res, np_res)
+        assert_allclose(dpnp_res, np_res)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -102,7 +106,7 @@ def test_greater():
     for i in range(len(a) + 1):
         np_res = (a > i)
         dpnp_res = (ia > i)
-        numpy.testing.assert_equal(dpnp_res, np_res)
+        assert_equal(dpnp_res, np_res)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -112,7 +116,7 @@ def test_greater_equal():
     for i in range(len(a) + 1):
         np_res = (a >= i)
         dpnp_res = (ia >= i)
-        numpy.testing.assert_equal(dpnp_res, np_res)
+        assert_equal(dpnp_res, np_res)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -122,17 +126,16 @@ def test_less():
     for i in range(len(a) + 1):
         np_res = (a < i)
         dpnp_res = (ia < i)
-        numpy.testing.assert_equal(dpnp_res, np_res)
+        assert_equal(dpnp_res, np_res)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_less_equal():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
     for i in range(len(a) + 1):
         np_res = (a <= i)
         dpnp_res = (ia <= i)
-        numpy.testing.assert_equal(dpnp_res, np_res)
+        assert_equal(dpnp_res, np_res)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -142,4 +145,51 @@ def test_not_equal():
     for i in range(len(a)):
         np_res = (a != i)
         dpnp_res = (ia != i)
-        numpy.testing.assert_equal(dpnp_res, np_res)
+        assert_equal(dpnp_res, np_res)
+
+@pytest.mark.parametrize("op",
+                         ['less_equal'],
+                         ids=['less_equal'])
+@pytest.mark.parametrize("x1",
+                         [[3, 4, 5, 6], [[1, 2, 3, 4], [5, 6, 7, 8]], [[1, 2, 5, 6], [3, 4, 7, 8], [1, 2, 7, 8]]],
+                         ids=['[3, 4, 5, 6]', '[[1, 2, 3, 4], [5, 6, 7, 8]]', '[[1, 2, 5, 6], [3, 4, 7, 8], [1, 2, 7, 8]]'])
+@pytest.mark.parametrize("x2",
+                         [5, [1, 2, 5, 6]],
+                         ids=['5', '[1, 2, 5, 6]'])
+def test_elemwise_comparison(op, x1, x2):
+    create_func = lambda xp, a: xp.asarray(a) if not numpy.isscalar(a) else a
+
+    np_x1, np_x2 = create_func(numpy, x1), create_func(numpy, x2)
+    dp_x1, dp_x2 = create_func(dpnp, np_x1), create_func(dpnp, np_x2)
+
+    # x1 OP x2
+    np_res = getattr(numpy, op)(np_x1, np_x2)
+    dpnp_res = getattr(dpnp, op)(dp_x1, dp_x2)
+    assert_equal(dpnp_res, np_res)
+
+    # x2 OP x1
+    np_res = getattr(numpy, op)(np_x2, np_x1)
+    dpnp_res = getattr(dpnp, op)(dp_x2, dp_x1)
+    assert_equal(dpnp_res, np_res)
+
+    # x1[::-1] OP x2
+    np_res = getattr(numpy, op)(np_x1[::-1], np_x2)
+    dpnp_res = getattr(dpnp, op)(dp_x1[::-1], dp_x2)
+    assert_equal(dpnp_res, np_res)
+
+@pytest.mark.parametrize("op",
+                         ['less_equal'],
+                         ids=['less_equal'])
+@pytest.mark.parametrize("sh1",
+                         [[10], [8, 4], [4, 1, 2]],
+                         ids=['(10,)', '(8, 4)', '(4, 1, 2)'])
+@pytest.mark.parametrize("sh2",
+                         [[12], [4, 8], [1, 8, 6]],
+                         ids=['(12,)', '(4, 8)', '(1, 8, 6)'])
+def test_comparison_no_broadcast_with_shapes(op, sh1, sh2):
+    x1, x2 = dpnp.random.randn(*sh1), dpnp.random.randn(*sh2)
+
+    # x1 OP x2
+    with pytest.raises(ValueError):
+        getattr(dpnp, op)(x1, x2)
+        getattr(numpy, op)(x1.asnumpy(), x2.asnumpy())
diff --git a/tests/third_party/cupy/logic_tests/test_comparison.py b/tests/third_party/cupy/logic_tests/test_comparison.py
index 0be9eaeee61..4afcea568ff 100644
--- a/tests/third_party/cupy/logic_tests/test_comparison.py
+++ b/tests/third_party/cupy/logic_tests/test_comparison.py
@@ -8,10 +8,10 @@
 from tests.third_party.cupy import testing
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestComparison(unittest.TestCase):
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose(atol=1e-5)
     def check_binary(self, name, xp, dtype):
@@ -19,21 +19,26 @@ def check_binary(self, name, xp, dtype):
         b = testing.shaped_reverse_arange((2, 3), xp, dtype)
         return getattr(xp, name)(a, b)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_greater(self):
         self.check_binary('greater')
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_greater_equal(self):
         self.check_binary('greater_equal')
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_less(self):
         self.check_binary('less')
 
     def test_less_equal(self):
         self.check_binary('less_equal')
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_not_equal(self):
         self.check_binary('not_equal')
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_equal(self):
         self.check_binary('equal')
 
diff --git a/tests_external/skipped_tests_numpy.tbl b/tests_external/skipped_tests_numpy.tbl
index 30b66da5e66..c2c0dc78ec5 100644
--- a/tests_external/skipped_tests_numpy.tbl
+++ b/tests_external/skipped_tests_numpy.tbl
@@ -318,83 +318,6 @@ tests/test_datetime.py::TestDateTime::test_timedelta_np_int_construction[Y]
 tests/test_datetime.py::TestDateTime::test_timedelta_object_array_conversion
 tests/test_datetime.py::TestDateTime::test_timedelta_scalar_construction
 tests/test_datetime.py::TestDateTime::test_timedelta_scalar_construction_units
-tests/test_defchararray.py::TestBasic::test_from_object_array
-tests/test_defchararray.py::TestBasic::test_from_object_array_unicode
-tests/test_defchararray.py::TestBasic::test_from_string
-tests/test_defchararray.py::TestBasic::test_from_string_array
-tests/test_defchararray.py::TestBasic::test_from_unicode
-tests/test_defchararray.py::TestBasic::test_from_unicode_array
-tests/test_defchararray.py::TestBasic::test_unicode_upconvert
-tests/test_defchararray.py::TestChar::test_it
-tests/test_defchararray.py::TestComparisonsMixed1::test_equal
-tests/test_defchararray.py::TestComparisonsMixed1::test_greater
-tests/test_defchararray.py::TestComparisonsMixed1::test_greater_equal
-tests/test_defchararray.py::TestComparisonsMixed1::test_less
-tests/test_defchararray.py::TestComparisonsMixed1::test_less_equal
-tests/test_defchararray.py::TestComparisonsMixed1::test_not_equal
-tests/test_defchararray.py::TestComparisonsMixed2::test_equal
-tests/test_defchararray.py::TestComparisonsMixed2::test_greater
-tests/test_defchararray.py::TestComparisonsMixed2::test_greater_equal
-tests/test_defchararray.py::TestComparisonsMixed2::test_less
-tests/test_defchararray.py::TestComparisonsMixed2::test_less_equal
-tests/test_defchararray.py::TestComparisonsMixed2::test_not_equal
-tests/test_defchararray.py::TestComparisons::test_equal
-tests/test_defchararray.py::TestComparisons::test_greater
-tests/test_defchararray.py::TestComparisons::test_greater_equal
-tests/test_defchararray.py::TestComparisons::test_less
-tests/test_defchararray.py::TestComparisons::test_less_equal
-tests/test_defchararray.py::TestComparisons::test_not_equal
-tests/test_defchararray.py::test_empty_indexing
-tests/test_defchararray.py::TestInformation::test_count
-tests/test_defchararray.py::TestInformation::test_endswith
-tests/test_defchararray.py::TestInformation::test_find
-tests/test_defchararray.py::TestInformation::test_index
-tests/test_defchararray.py::TestInformation::test_isalnum
-tests/test_defchararray.py::TestInformation::test_isalpha
-tests/test_defchararray.py::TestInformation::test_isdigit
-tests/test_defchararray.py::TestInformation::test_islower
-tests/test_defchararray.py::TestInformation::test_isspace
-tests/test_defchararray.py::TestInformation::test_istitle
-tests/test_defchararray.py::TestInformation::test_isupper
-tests/test_defchararray.py::TestInformation::test_len
-tests/test_defchararray.py::TestInformation::test_rfind
-tests/test_defchararray.py::TestInformation::test_rindex
-tests/test_defchararray.py::TestInformation::test_startswith
-tests/test_defchararray.py::TestMethods::test_capitalize
-tests/test_defchararray.py::TestMethods::test_center
-tests/test_defchararray.py::TestMethods::test_decode
-tests/test_defchararray.py::TestMethods::test_encode
-tests/test_defchararray.py::TestMethods::test_expandtabs
-tests/test_defchararray.py::TestMethods::test_isdecimal
-tests/test_defchararray.py::TestMethods::test_isnumeric
-tests/test_defchararray.py::TestMethods::test_join
-tests/test_defchararray.py::TestMethods::test_ljust
-tests/test_defchararray.py::TestMethods::test_lower
-tests/test_defchararray.py::TestMethods::test_lstrip
-tests/test_defchararray.py::TestMethods::test_partition
-tests/test_defchararray.py::TestMethods::test_replace
-tests/test_defchararray.py::TestMethods::test_rjust
-tests/test_defchararray.py::TestMethods::test_rpartition
-tests/test_defchararray.py::TestMethods::test_rsplit
-tests/test_defchararray.py::TestMethods::test_rstrip
-tests/test_defchararray.py::TestMethods::test_split
-tests/test_defchararray.py::TestMethods::test_splitlines
-tests/test_defchararray.py::TestMethods::test_strip
-tests/test_defchararray.py::TestMethods::test_swapcase
-tests/test_defchararray.py::TestMethods::test_title
-tests/test_defchararray.py::TestMethods::test_upper
-tests/test_defchararray.py::TestOperations::test_add
-tests/test_defchararray.py::TestOperations::test_mod
-tests/test_defchararray.py::TestOperations::test_mul
-tests/test_defchararray.py::TestOperations::test_radd
-tests/test_defchararray.py::TestOperations::test_rmod
-tests/test_defchararray.py::TestOperations::test_rmul
-tests/test_defchararray.py::TestOperations::test_slice
-tests/test_defchararray.py::TestVecString::test_invalid_args_tuple
-tests/test_defchararray.py::TestVecString::test_invalid_function_args
-tests/test_defchararray.py::TestVecString::test_invalid_result_type
-tests/test_defchararray.py::TestVecString::test_non_string_array
-tests/test_defchararray.py::TestWhitespace::test1
 tests/test_deprecations.py::TestAlen::test_alen
 tests/test_deprecations.py::TestArrayDataAttributeAssignmentDeprecation::test_data_attr_assignment
 tests/test_deprecations.py::TestBinaryReprInsufficientWidthParameterForRepresentation::test_insufficient_width_negative

From 09a387ac6fde19a05d880b003e186a33260f2dbb Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Wed, 25 Jan 2023 11:23:08 +0100
Subject: [PATCH 06/32] Add support of comparison operations (#1278)

---
 .../include/dpnp_gen_2arg_2type_tbl.hpp       |   5 +
 dpnp/backend/include/dpnp_iface_fptr.hpp      |   5 +
 dpnp/backend/kernels/dpnp_krnl_logic.cpp      |  10 +
 dpnp/dpnp_algo/dpnp_algo.pxd                  |   5 +
 dpnp/dpnp_algo/dpnp_algo_logic.pyx            |  90 ++----
 dpnp/dpnp_iface_logic.py                      | 260 +++++++++++++-----
 tests/test_linalg.py                          |   1 -
 tests/test_logic.py                           |  23 +-
 tests/test_random.py                          |   1 -
 tests/test_random_state.py                    |   1 -
 tests/test_sycl_queue.py                      |   5 +-
 .../cupy/logic_tests/test_comparison.py       |   6 -
 12 files changed, 259 insertions(+), 153 deletions(-)

diff --git a/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp
index e76c92b47cd..2fb4fe9d6fd 100644
--- a/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp
@@ -86,6 +86,11 @@
 
 #endif
 
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_equal_c, input1_elem == input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_greater_c, input1_elem > input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_greater_equal_c, input1_elem >= input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_less_c, input1_elem < input2_elem)
 MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_less_equal_c, input1_elem <= input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_not_equal_c, input1_elem != input2_elem)
 
 #undef MACRO_2ARG_2TYPES_LOGIC_OP
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index 18e3629366d..7a3564fa1d3 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -151,6 +151,7 @@ enum class DPNPFuncName : size_t
     DPNP_FN_EIG_EXT,                      /**< Used in numpy.linalg.eig() impl, requires extra parameters */
     DPNP_FN_EIGVALS,                      /**< Used in numpy.linalg.eigvals() impl  */
     DPNP_FN_EIGVALS_EXT,                  /**< Used in numpy.linalg.eigvals() impl, requires extra parameters */
+    DPNP_FN_EQUAL_EXT,                    /**< Used in numpy.equal() impl, requires extra parameters */
     DPNP_FN_ERF,                          /**< Used in scipy.special.erf impl  */
     DPNP_FN_ERF_EXT,                      /**< Used in scipy.special.erf impl, requires extra parameters */
     DPNP_FN_EYE,                          /**< Used in numpy.eye() impl  */
@@ -179,6 +180,8 @@ enum class DPNPFuncName : size_t
     DPNP_FN_FMOD_EXT,                     /**< Used in numpy.fmod() impl, requires extra parameters  */
     DPNP_FN_FULL,                         /**< Used in numpy.full() impl  */
     DPNP_FN_FULL_LIKE,                    /**< Used in numpy.full_like() impl  */
+    DPNP_FN_GREATER_EXT,                  /**< Used in numpy.greater() impl, requires extra parameters */
+    DPNP_FN_GREATER_EQUAL_EXT,            /**< Used in numpy.greater_equal() impl, requires extra parameters */
     DPNP_FN_HYPOT,                        /**< Used in numpy.hypot() impl  */
     DPNP_FN_HYPOT_EXT,                    /**< Used in numpy.hypot() impl, requires extra parameters  */
     DPNP_FN_IDENTITY,                     /**< Used in numpy.identity() impl  */
@@ -193,6 +196,7 @@ enum class DPNPFuncName : size_t
     DPNP_FN_KRON_EXT,                     /**< Used in numpy.kron() impl, requires extra parameters  */
     DPNP_FN_LEFT_SHIFT,                   /**< Used in numpy.left_shift() impl  */
     DPNP_FN_LEFT_SHIFT_EXT,               /**< Used in numpy.left_shift() impl, requires extra parameters  */
+    DPNP_FN_LESS_EXT,                     /**< Used in numpy.less() impl, requires extra parameters */
     DPNP_FN_LESS_EQUAL_EXT,               /**< Used in numpy.less_equal() impl, requires extra parameters */
     DPNP_FN_LOG,                          /**< Used in numpy.log() impl  */
     DPNP_FN_LOG_EXT,                      /**< Used in numpy.log() impl, requires extra parameters  */
@@ -228,6 +232,7 @@ enum class DPNPFuncName : size_t
     DPNP_FN_NEGATIVE_EXT,                 /**< Used in numpy.negative() impl, requires extra parameters */
     DPNP_FN_NONZERO,                      /**< Used in numpy.nonzero() impl  */
     DPNP_FN_NONZERO_EXT,                  /**< Used in numpy.nonzero() impl, requires extra parameters */
+    DPNP_FN_NOT_EQUAL_EXT,                /**< Used in numpy.not_equal() impl, requires extra parameters */
     DPNP_FN_ONES,                         /**< Used in numpy.ones() impl */
     DPNP_FN_ONES_LIKE,                    /**< Used in numpy.ones_like() impl */
     DPNP_FN_PARTITION,                    /**< Used in numpy.partition() impl */
diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
index 6be989a4ec8..19a6dd3646e 100644
--- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
@@ -536,8 +536,18 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef,
 template <DPNPFuncType FT1, DPNPFuncType ... FTs>
 static void func_map_logic_2arg_2type_core(func_map_t& fmap)
 {
+    ((fmap[DPNPFuncName::DPNP_FN_EQUAL_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_equal_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_GREATER_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_greater_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_GREATER_EQUAL_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_greater_equal_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_LESS_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_less_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
     ((fmap[DPNPFuncName::DPNP_FN_LESS_EQUAL_EXT][FT1][FTs] =
         {eft_BLN, (void*)dpnp_less_equal_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_NOT_EQUAL_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_not_equal_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
 }
 
 template <DPNPFuncType ... FTs>
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index 0c30fa18b6f..50387e1565a 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -127,6 +127,7 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_EIG_EXT
         DPNP_FN_EIGVALS
         DPNP_FN_EIGVALS_EXT
+        DPNP_FN_EQUAL_EXT
         DPNP_FN_ERF
         DPNP_FN_ERF_EXT
         DPNP_FN_EYE
@@ -155,6 +156,8 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_FMOD_EXT
         DPNP_FN_FULL
         DPNP_FN_FULL_LIKE
+        DPNP_FN_GREATER_EXT
+        DPNP_FN_GREATER_EQUAL_EXT
         DPNP_FN_HYPOT
         DPNP_FN_HYPOT_EXT
         DPNP_FN_IDENTITY
@@ -169,6 +172,7 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_KRON_EXT
         DPNP_FN_LEFT_SHIFT
         DPNP_FN_LEFT_SHIFT_EXT
+        DPNP_FN_LESS_EXT
         DPNP_FN_LESS_EQUAL_EXT
         DPNP_FN_LOG
         DPNP_FN_LOG_EXT
@@ -204,6 +208,7 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_NEGATIVE_EXT
         DPNP_FN_NONZERO
         DPNP_FN_NONZERO_EXT
+        DPNP_FN_NOT_EQUAL_EXT
         DPNP_FN_ONES
         DPNP_FN_ONES_LIKE
         DPNP_FN_PARTITION
diff --git a/dpnp/dpnp_algo/dpnp_algo_logic.pyx b/dpnp/dpnp_algo/dpnp_algo_logic.pyx
index 0aa8f949cfb..ae0f711eb10 100644
--- a/dpnp/dpnp_algo/dpnp_algo_logic.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_logic.pyx
@@ -166,46 +166,28 @@ cpdef utils.dpnp_descriptor dpnp_any(utils.dpnp_descriptor array1):
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-    for i in range(result.size):
-        result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] == input2.get_pyobj()[i])
+cpdef utils.dpnp_descriptor dpnp_equal(utils.dpnp_descriptor x1_obj,
+                                       utils.dpnp_descriptor x2_obj,
+                                       object dtype=None,
+                                       utils.dpnp_descriptor out=None,
+                                       object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_EQUAL_EXT, x1_obj, x2_obj, dtype, out, where, func_name="equal")
 
-    return result
 
+cpdef utils.dpnp_descriptor dpnp_greater(utils.dpnp_descriptor x1_obj,
+                                         utils.dpnp_descriptor x2_obj,
+                                         object dtype=None,
+                                         utils.dpnp_descriptor out=None,
+                                         object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_GREATER_EXT, x1_obj, x2_obj, dtype, out, where, func_name="greater")
 
-cpdef utils.dpnp_descriptor dpnp_greater(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-    for i in range(result.size):
-        result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] > input2.get_pyobj()[i])
-
-    return result
-
-
-cpdef utils.dpnp_descriptor dpnp_greater_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-    for i in range(result.size):
-        result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] >= input2.get_pyobj()[i])
 
-    return result
+cpdef utils.dpnp_descriptor dpnp_greater_equal(utils.dpnp_descriptor x1_obj,
+                                               utils.dpnp_descriptor x2_obj,
+                                               object dtype=None,
+                                               utils.dpnp_descriptor out=None,
+                                               object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_GREATER_EQUAL_EXT, x1_obj, x2_obj, dtype, out, where, func_name="greater_equal")
 
 
 cpdef utils.dpnp_descriptor dpnp_isclose(utils.dpnp_descriptor input1,
@@ -272,18 +254,12 @@ cpdef utils.dpnp_descriptor dpnp_isnan(utils.dpnp_descriptor input1):
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_less(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-    for i in range(result.size):
-        result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] < input2.get_pyobj()[i])
-
-    return result
+cpdef utils.dpnp_descriptor dpnp_less(utils.dpnp_descriptor x1_obj,
+                                      utils.dpnp_descriptor x2_obj,
+                                      object dtype=None,
+                                      utils.dpnp_descriptor out=None,
+                                      object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_LESS_EXT, x1_obj, x2_obj, dtype, out, where, func_name="less")
 
 
 cpdef utils.dpnp_descriptor dpnp_less_equal(utils.dpnp_descriptor x1_obj,
@@ -355,15 +331,9 @@ cpdef utils.dpnp_descriptor dpnp_logical_xor(utils.dpnp_descriptor input1, utils
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_not_equal(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-    for i in range(result.size):
-        result.get_pyobj()[i] = dpnp.bool(input1.get_pyobj()[i] != input2.get_pyobj()[i])
-
-    return result
+cpdef utils.dpnp_descriptor dpnp_not_equal(utils.dpnp_descriptor x1_obj,
+                                           utils.dpnp_descriptor x2_obj,
+                                           object dtype=None,
+                                           utils.dpnp_descriptor out=None,
+                                           object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_NOT_EQUAL_EXT, x1_obj, x2_obj, dtype, out, where, func_name="not_equal")
diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index 96fa795d4d6..153bac1b24f 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -219,18 +219,32 @@ def any(x1, axis=None, out=None, keepdims=False):
     return call_origin(numpy.any, x1, axis, out, keepdims)
 
 
-def equal(x1, x2):
+def equal(x1,
+          x2,
+          /,
+          out=None,
+          *,
+          where=True,
+          dtype=None,
+          subok=True):
     """
-    Return (x1 == x2) element-wise.
+    Return the truth value of (x1 == x2) element-wise.
 
     For full documentation refer to :obj:`numpy.equal`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    Parameter ``x1`` is supported as :obj:`dpnp.ndarray`.
-    Parameter ``x2`` is supported as either :obj:`dpnp.ndarray` or int.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-    Sizes, shapes and data types of input arrays ``x1`` and ``x2`` are supported to be equal.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
+    Otherwise the function will be executed sequentially on CPU.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -250,33 +264,55 @@ def equal(x1, x2):
     [True, True, False]
 
     """
+    
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get a common queue to copy data from the host into a device if any input is scalar
+        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc:
-    #     if x1_desc.size != x2_desc.size:
-    #         pass
-    #     elif x1_desc.dtype != x2_desc.dtype:
-    #         pass
-    #     elif x1_desc.shape != x2_desc.shape:
-    #         pass
-    #     else:
-    #         return dpnp_equal(x1_desc, x2_desc).get_pyobj()
-
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_equal(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.equal, x1, x2)
 
 
-def greater(x1, x2):
+def greater(x1,
+            x2,
+            /,
+            out=None,
+            *,
+            where=True,
+            dtype=None,
+            subok=True):
     """
-    Return (x1 > x2) element-wise.
+    Return the truth value of (x1 > x2) element-wise.
 
     For full documentation refer to :obj:`numpy.greater`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -297,30 +333,54 @@ def greater(x1, x2):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc:
-    #     if x1_desc.size < 2:
-    #         pass
-    #     elif x2_desc.size < 2:
-    #         pass
-    #     else:
-    #         return dpnp_greater(x1_desc, x2_desc).get_pyobj()
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get a common queue to copy data from the host into a device if any input is scalar
+        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
 
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_greater(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.greater, x1, x2)
 
 
-def greater_equal(x1, x2):
+def greater_equal(x1,
+                  x2,
+                  /,
+                  out=None,
+                  *,
+                  where=True,
+                  dtype=None,
+                  subok=True):
     """
-    Return (x1 >= x2) element-wise.
+    Return the truth value of (x1 >= x2) element-wise.
 
     For full documentation refer to :obj:`numpy.greater_equal`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -341,16 +401,25 @@ def greater_equal(x1, x2):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc:
-    #     if x1_desc.size < 2:
-    #         pass
-    #     elif x2_desc.size < 2:
-    #         pass
-    #     else:
-    #         return dpnp_greater_equal(x1_desc, x2_desc).get_pyobj()
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get a common queue to copy data from the host into a device if any input is scalar
+        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
 
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_greater_equal(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.greater_equal, x1, x2)
 
 
@@ -532,17 +601,32 @@ def isnan(x1, out=None, **kwargs):
     return call_origin(numpy.isnan, x1, out, **kwargs)
 
 
-def less(x1, x2):
+def less(x1,
+         x2,
+         /,
+         out=None,
+         *,
+         where=True,
+         dtype=None,
+         subok=True):
     """
-    Return (x1 < x2) element-wise.
+    Return the truth value of (x1 < x2) element-wise.
 
     For full documentation refer to :obj:`numpy.less`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -563,16 +647,25 @@ def less(x1, x2):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc:
-    #     if x1_desc.size < 2:
-    #         pass
-    #     elif x2_desc.size < 2:
-    #         pass
-    #     else:
-    #         return dpnp_less(x1_desc, x2_desc).get_pyobj()
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get a common queue to copy data from the host into a device if any input is scalar
+        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
 
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_less(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.less, x1, x2)
 
 
@@ -813,18 +906,32 @@ def logical_xor(x1, x2, out=None, **kwargs):
     return call_origin(numpy.logical_xor, x1, x2, out, **kwargs)
 
 
-def not_equal(x1, x2):
+def not_equal(x1,
+              x2,
+              /,
+              out=None,
+              *,
+              where=True,
+              dtype=None,
+              subok=True):
     """
-    Return (x1 != x2) element-wise.
+    Return the truth value of (x1 != x2) element-wise.
 
     For full documentation refer to :obj:`numpy.not_equal`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    At least either ``x1`` or ``x2`` should be as :obj:`dpnp.ndarray`.
-    If either ``x1`` or ``x2`` is scalar then other one should be :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -845,16 +952,23 @@ def not_equal(x1, x2):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc:
-    #     if x1_desc.size < 2:
-    #         pass
-    #     elif x2_desc.size < 2:
-    #         pass
-    #     else:
-    #         result = dpnp_not_equal(x1_desc, x2_desc).get_pyobj()
-
-    #         return result
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get a common queue to copy data from the host into a device if any input is scalar
+        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
 
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_not_equal(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.not_equal, x1, x2)
diff --git a/tests/test_linalg.py b/tests/test_linalg.py
index dd89a18adbd..ac8392d1538 100644
--- a/tests/test_linalg.py
+++ b/tests/test_linalg.py
@@ -278,7 +278,6 @@ def test_qr(type, shape, mode):
     numpy.testing.assert_allclose(dpnp_r, np_r, rtol=tol, atol=tol)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("type",
                          [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
                          ids=['float64', 'float32', 'int64', 'int32'])
diff --git a/tests/test_logic.py b/tests/test_logic.py
index d79ffaa744f..062300bb8d3 100644
--- a/tests/test_logic.py
+++ b/tests/test_logic.py
@@ -99,7 +99,15 @@ def test_any(type, shape):
         assert_allclose(dpnp_res, np_res)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
+def test_equal():
+    a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
+    ia = dpnp.array(a)
+    for i in range(len(a)):
+        np_res = (a == i)
+        dpnp_res = (ia == i)
+        assert_equal(dpnp_res, np_res)
+
+
 def test_greater():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
@@ -109,7 +117,6 @@ def test_greater():
         assert_equal(dpnp_res, np_res)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_greater_equal():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
@@ -119,7 +126,6 @@ def test_greater_equal():
         assert_equal(dpnp_res, np_res)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_less():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
@@ -138,7 +144,6 @@ def test_less_equal():
         assert_equal(dpnp_res, np_res)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_not_equal():
     a = numpy.array([1, 2, 3, 4, 5, 6, 7, 8])
     ia = dpnp.array(a)
@@ -147,9 +152,10 @@ def test_not_equal():
         dpnp_res = (ia != i)
         assert_equal(dpnp_res, np_res)
 
+
 @pytest.mark.parametrize("op",
-                         ['less_equal'],
-                         ids=['less_equal'])
+                         ['equal', 'greater', 'greater_equal', 'less', 'less_equal', 'not_equal'],
+                         ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal', 'not_equal'])
 @pytest.mark.parametrize("x1",
                          [[3, 4, 5, 6], [[1, 2, 3, 4], [5, 6, 7, 8]], [[1, 2, 5, 6], [3, 4, 7, 8], [1, 2, 7, 8]]],
                          ids=['[3, 4, 5, 6]', '[[1, 2, 3, 4], [5, 6, 7, 8]]', '[[1, 2, 5, 6], [3, 4, 7, 8], [1, 2, 7, 8]]'])
@@ -177,9 +183,10 @@ def test_elemwise_comparison(op, x1, x2):
     dpnp_res = getattr(dpnp, op)(dp_x1[::-1], dp_x2)
     assert_equal(dpnp_res, np_res)
 
+
 @pytest.mark.parametrize("op",
-                         ['less_equal'],
-                         ids=['less_equal'])
+                         ['equal', 'greater', 'greater_equal', 'less', 'less_equal', 'not_equal'],
+                         ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal', 'not_equal'])
 @pytest.mark.parametrize("sh1",
                          [[10], [8, 4], [4, 1, 2]],
                          ids=['(10,)', '(8, 4)', '(4, 1, 2)'])
diff --git a/tests/test_random.py b/tests/test_random.py
index 54cb2fa3a4d..bc3501f4d20 100644
--- a/tests/test_random.py
+++ b/tests/test_random.py
@@ -75,7 +75,6 @@ def test_input_shape(func):
     assert shape == res.shape
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("func",
                          [dpnp.random.random,
                           dpnp.random.random_sample,
diff --git a/tests/test_random_state.py b/tests/test_random_state.py
index 1a5d554e14d..0d1752c744e 100644
--- a/tests/test_random_state.py
+++ b/tests/test_random_state.py
@@ -322,7 +322,6 @@ def test_negative_bounds(self):
         assert_array_equal(actual, desired)
 
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_negative_interval(self):
         rs = RandomState(3567)
 
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 977f4561f5a..a184ec51c25 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -655,7 +655,6 @@ def test_qr(device):
     assert_sycl_queue_equal(dpnp_r_queue, expected_queue)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("device",
                         valid_devices,
                         ids=[device.filter_string for device in valid_devices])
@@ -663,7 +662,7 @@ def test_svd(device):
     tol = 1e-12
     shape = (2,2)
     numpy_data = numpy.arange(shape[0] * shape[1]).reshape(shape)
-    dpnp_data = dpnp.arange(shape[0] * shape[1]).reshape(shape)
+    dpnp_data = dpnp.arange(shape[0] * shape[1], device=device).reshape(shape)
     np_u, np_s, np_vt = numpy.linalg.svd(numpy_data)
     dpnp_u, dpnp_s, dpnp_vt = dpnp.linalg.svd(dpnp_data)
 
@@ -675,7 +674,7 @@ def test_svd(device):
     assert (dpnp_vt.shape == np_vt.shape)
 
     # check decomposition
-    dpnp_diag_s = dpnp.zeros(shape, dtype=dpnp_s.dtype)
+    dpnp_diag_s = dpnp.zeros(shape, dtype=dpnp_s.dtype, device=device)
     for i in range(dpnp_s.size):
         dpnp_diag_s[i, i] = dpnp_s[i]
 
diff --git a/tests/third_party/cupy/logic_tests/test_comparison.py b/tests/third_party/cupy/logic_tests/test_comparison.py
index 4afcea568ff..461f00319bc 100644
--- a/tests/third_party/cupy/logic_tests/test_comparison.py
+++ b/tests/third_party/cupy/logic_tests/test_comparison.py
@@ -11,7 +11,6 @@
 @testing.gpu
 class TestComparison(unittest.TestCase):
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose(atol=1e-5)
     def check_binary(self, name, xp, dtype):
@@ -19,26 +18,21 @@ def check_binary(self, name, xp, dtype):
         b = testing.shaped_reverse_arange((2, 3), xp, dtype)
         return getattr(xp, name)(a, b)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_greater(self):
         self.check_binary('greater')
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_greater_equal(self):
         self.check_binary('greater_equal')
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_less(self):
         self.check_binary('less')
 
     def test_less_equal(self):
         self.check_binary('less_equal')
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_not_equal(self):
         self.check_binary('not_equal')
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_equal(self):
         self.check_binary('equal')
 

From 20defc5bef8721d7e1ec7f21fc4733e34f8ae3a8 Mon Sep 17 00:00:00 2001
From: Natalia Polina <natalia.polina@intel.com>
Date: Tue, 31 Jan 2023 00:02:27 -0800
Subject: [PATCH 07/32] Use eye() function from dpctl.tensor. (#1271)

* Use eye() function from dpctl.tensor.

* Add missed order in test for eye() function.

* Updated copyright year. Added parameter like for eye() function.

* Removed input argumet additional kwards for eye() function.
---
 dpnp/backend/include/dpnp_iface_fptr.hpp      |  1 -
 .../kernels/dpnp_krnl_arraycreation.cpp       | 14 +-----
 dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx    | 34 -------------
 dpnp/dpnp_container.py                        | 30 +++++++++++-
 dpnp/dpnp_iface_arraycreation.py              | 48 ++++++++++++-------
 tests/skipped_tests.tbl                       |  1 -
 tests/test_arraycreation.py                   | 32 ++++++++-----
 tests/test_sycl_queue.py                      | 15 +++---
 .../cupy/creation_tests/test_basic.py         |  2 +-
 9 files changed, 90 insertions(+), 87 deletions(-)

diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index 7a3564fa1d3..31347ceaeb5 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -155,7 +155,6 @@ enum class DPNPFuncName : size_t
     DPNP_FN_ERF,                          /**< Used in scipy.special.erf impl  */
     DPNP_FN_ERF_EXT,                      /**< Used in scipy.special.erf impl, requires extra parameters */
     DPNP_FN_EYE,                          /**< Used in numpy.eye() impl  */
-    DPNP_FN_EYE_EXT,                      /**< Used in numpy.eye() impl, requires extra parameters */
     DPNP_FN_EXP,                          /**< Used in numpy.exp() impl  */
     DPNP_FN_EXP_EXT,                      /**< Used in numpy.exp() impl, requires extra parameters */
     DPNP_FN_EXP2,                         /**< Used in numpy.exp2() impl  */
diff --git a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp
index 71d93842feb..8727e37fafc 100644
--- a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -292,13 +292,6 @@ void dpnp_eye_c(void* result1, int k, const shape_elem_type* res_shape)
 template <typename _DataType>
 void (*dpnp_eye_default_c)(void*, int, const shape_elem_type*) = dpnp_eye_c<_DataType>;
 
-template <typename _DataType>
-DPCTLSyclEventRef (*dpnp_eye_ext_c)(DPCTLSyclQueueRef,
-                                    void*,
-                                    int,
-                                    const shape_elem_type*,
-                                    const DPCTLEventVectorRef) = dpnp_eye_c<_DataType>;
-
 template <typename _DataType>
 DPCTLSyclEventRef dpnp_full_c(DPCTLSyclQueueRef q_ref,
                               void* array_in,
@@ -1319,11 +1312,6 @@ void func_map_init_arraycreation(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_EYE][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_eye_default_c<float>};
     fmap[DPNPFuncName::DPNP_FN_EYE][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_eye_default_c<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_EYE_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_eye_ext_c<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_EYE_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_eye_ext_c<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_EYE_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_eye_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_EYE_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_eye_ext_c<double>};
-
     fmap[DPNPFuncName::DPNP_FN_FULL][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_full_default_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_FULL][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_full_default_c<int64_t>};
     fmap[DPNPFuncName::DPNP_FN_FULL][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_full_default_c<float>};
diff --git a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
index 1ec69c55311..6986bf0ec70 100644
--- a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
@@ -37,7 +37,6 @@ and the rest of the library
 __all__ += [
     "dpnp_copy",
     "dpnp_diag",
-    "dpnp_eye",
     "dpnp_geomspace",
     "dpnp_identity",
     "dpnp_linspace",
@@ -84,9 +83,6 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*custom_indexing_1out_func_ptr_t)(c_dpctl.DPC
                                                                      const size_t ,
                                                                      const int,
                                                                      const c_dpctl.DPCTLEventVectorRef) except +
-ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_eye_t)(c_dpctl.DPCTLSyclQueueRef,
-                                                     void *, int , const shape_elem_type * ,
-                                                     const c_dpctl.DPCTLEventVectorRef)
 ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_trace_t)(c_dpctl.DPCTLSyclQueueRef,
                                                        const void *,
                                                        void * ,
@@ -146,36 +142,6 @@ cpdef utils.dpnp_descriptor dpnp_diag(utils.dpnp_descriptor v, int k):
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_eye(N, M=None, k=0, dtype=None):
-    if dtype is None:
-        dtype = dpnp.float64
-
-    if M is None:
-        M = N
-
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(dtype)
-
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_EYE_EXT, param1_type, param1_type)
-
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor((N, M), kernel_data.return_type, None)
-
-    result_sycl_queue = result.get_array().sycl_queue
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef fptr_dpnp_eye_t func = <fptr_dpnp_eye_t > kernel_data.ptr
-
-    cdef shape_type_c result_shape = result.shape
-
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref, result.get_data(), k, result_shape.data(), NULL)
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
-
-
 cpdef utils.dpnp_descriptor dpnp_geomspace(start, stop, num, endpoint, dtype, axis):
     cdef shape_type_c obj_shape = utils._object_to_tuple(num)
     cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(obj_shape, dtype, None)
diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py
index 93ab716eb59..7065e497652 100644
--- a/dpnp/dpnp_container.py
+++ b/dpnp/dpnp_container.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -45,6 +45,7 @@
     "arange",
     "asarray",
     "empty",
+    "eye",
     "full",
     "ones"
     "zeros",
@@ -150,6 +151,33 @@ def full(shape,
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 
+def eye(N,
+        M=None,
+        /,
+        *,
+        k=0,
+        dtype=None,
+        order="C",
+        device=None,
+        usm_type="device",
+        sycl_queue=None):
+    """Validate input parameters before passing them into `dpctl.tensor` module"""
+    dpu.validate_usm_type(usm_type, allow_none=False)
+    sycl_queue_normalized = dpnp.get_normalized_queue_device(sycl_queue=sycl_queue, device=device)
+    if order is None:
+        order = 'C'
+
+    """Creates `dpnp_array` with ones on the `k`th diagonal."""
+    array_obj = dpt.eye(N,
+                        M,
+                        k=k,
+                        dtype=dtype,
+                        order=order,
+                        usm_type=usm_type,
+                        sycl_queue=sycl_queue_normalized)
+    return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
+
+
 def ones(shape,
          *,
          dtype=None,
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index 03d8b818520..c0ed49316b8 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -572,31 +572,43 @@ def empty_like(x1,
     return call_origin(numpy.empty_like, x1, dtype, order, subok, shape)
 
 
-def eye(N, M=None, k=0, dtype=None, order='C', **kwargs):
+def eye(N,
+        M=None,
+        /,
+        *,
+        k=0,
+        dtype=None,
+        order="C",
+        like=None,
+        device=None,
+        usm_type="device",
+        sycl_queue=None):
     """
     Return a 2-D array with ones on the diagonal and zeros elsewhere.
     For full documentation refer to :obj:`numpy.eye`.
 
     Limitations
     -----------
-    Input array is supported as :obj:`dpnp.ndarray`.
-    Parameters ``order`` is supported only with default value.
-    """
-    if (not use_origin_backend()):
-        if not isinstance(N, (int, dpnp.int, dpnp.int32, dpnp.int64)):
-            pass
-        elif M is not None and not isinstance(M, (int, dpnp.int, dpnp.int32, dpnp.int64)):
-            pass
-        elif not isinstance(k, (int, dpnp.int, dpnp.int32, dpnp.int64)):
-            pass
-        elif order != 'C':
-            pass
-        elif len(kwargs) != 0:
-            pass
-        else:
-            return dpnp_eye(N, M=M, k=k, dtype=dtype).get_pyobj()
+    Parameter ``order`` is supported only with values ``"C"`` and ``"F"``.
+    Parameter ``like`` is supported only with default value ``None``.
+    Otherwise the function will be executed sequentially on CPU.
 
-    return call_origin(numpy.eye, N, M=M, k=k, dtype=dtype, order=order, **kwargs)
+    """
+    if order not in ('C', 'c', 'F', 'f', None):
+        pass
+    elif like is not None:
+        pass
+    else:
+        return dpnp_container.eye(N,
+                                  M,
+                                  k=k,
+                                  dtype=dtype,
+                                  order=order,
+                                  device=device,
+                                  usm_type=usm_type,
+                                  sycl_queue=sycl_queue)
+
+    return call_origin(numpy.eye, N, M, k=k, dtype=dtype, order=order, like=None)
 
 
 def frombuffer(buffer, **kwargs):
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index df4a1423650..63c6cbd0d13 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -330,7 +330,6 @@ tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_empty_like_
 tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_empty_like_K_strides
 tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_empty_like_subok
 tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_empty_zero_sized_array_strides
-tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_eye
 tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_full_like_subok
 tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_ones_like_subok
 tests/third_party/cupy/creation_tests/test_basic.py::TestBasic::test_zeros_like_subok
diff --git a/tests/test_arraycreation.py b/tests/test_arraycreation.py
index e0500848e9b..e41979efe05 100644
--- a/tests/test_arraycreation.py
+++ b/tests/test_arraycreation.py
@@ -101,21 +101,29 @@ def test_diag(v, k):
 
 
 @pytest.mark.parametrize("N",
-                         [0, 1, 2, 3, 4],
-                         ids=['0', '1', '2', '3', '4'])
+                         [0, 1, 2, 3],
+                         ids=['0', '1', '2', '3'])
 @pytest.mark.parametrize("M",
-                         [None, 0, 1, 2, 3, 4],
-                         ids=['None', '0', '1', '2', '3', '4'])
+                         [None, 0, 1, 2, 3],
+                         ids=['None', '0', '1', '2', '3'])
 @pytest.mark.parametrize("k",
-                         [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5],
-                         ids=['-5', '-4', '-3', '-2', '-1', '0', '1', '2', '3', '4', '5'])
+                         [-4, -3, -2, -1, 0, 1, 2, 3, 4],
+                         ids=['-4', '-3', '-2', '-1', '0', '1', '2', '3', '4'])
 @pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
-def test_eye(N, M, k, dtype):
-    expected = numpy.eye(N, M=M, k=k, dtype=dtype)
-    result = dpnp.eye(N, M=M, k=k, dtype=dtype)
-    assert_array_equal(expected, result)
+                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
+                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
+                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
+                         'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("order",
+                         [None, "C", "F"],
+                         ids=['None', 'C', 'F'])
+def test_eye(N, M, k, dtype, order):
+    func = lambda xp: xp.eye(N, M, k=k, dtype=dtype, order=order)
+    if not is_dtype_supported(dtype, no_complex_check=True):
+        assert_raises(RuntimeError, func, dpnp)
+        return
+
+    assert_array_equal(func(numpy), func(dpnp))
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index a184ec51c25..1eae3df9393 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -76,27 +76,30 @@ def vvsort(val, vec, size, xp):
     "func, arg, kwargs",
     [
         pytest.param("arange",
-                     -25.7,
+                     [-25.7],
                      {'stop': 10**8, 'step': 15}),
         pytest.param("full",
-                     (2,2),
+                     [(2,2)],
                      {'fill_value': 5}),
+        pytest.param("eye",
+                     [4, 2],
+                     {}),
         pytest.param("ones",
-                     (2,2),
+                     [(2,2)],
                      {}),
         pytest.param("zeros",
-                     (2,2),
+                     [(2,2)],
                      {})
     ])
 @pytest.mark.parametrize("device",
                           valid_devices,
                           ids=[device.filter_string for device in valid_devices])
 def test_array_creation(func, arg, kwargs, device):
-    numpy_array = getattr(numpy, func)(arg, **kwargs)
+    numpy_array = getattr(numpy, func)(*arg, **kwargs)
 
     dpnp_kwargs = dict(kwargs)
     dpnp_kwargs['device'] = device
-    dpnp_array = getattr(dpnp, func)(arg, **dpnp_kwargs)
+    dpnp_array = getattr(dpnp, func)(*arg, **dpnp_kwargs)
 
     numpy.testing.assert_array_equal(numpy_array, dpnp_array)
     assert dpnp_array.sycl_device == device
diff --git a/tests/third_party/cupy/creation_tests/test_basic.py b/tests/third_party/cupy/creation_tests/test_basic.py
index 337718d3caf..1adcf98f969 100644
--- a/tests/third_party/cupy/creation_tests/test_basic.py
+++ b/tests/third_party/cupy/creation_tests/test_basic.py
@@ -164,7 +164,7 @@ def test_empty_zero_sized_array_strides(self, order):
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()
     def test_eye(self, xp, dtype):
-        return xp.eye(5, 4, 1, dtype)
+        return xp.eye(5, 4, k=1, dtype=dtype)
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()

From 733b32ccf072d2e3a5d5ed03afa6fd142dd7255a Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Tue, 31 Jan 2023 13:30:21 +0100
Subject: [PATCH 08/32] Get rid of unsupported types in array creation tests
 (#1283)

---
 dpnp/dpnp_iface.py                  |  35 +++-
 dpnp/dpnp_iface_arraycreation.py    |  12 +-
 dpnp/dpnp_iface_types.py            |   6 +-
 dpnp/dpnp_utils/dpnp_algo_utils.pyx |   2 +-
 tests/helper.py                     |  39 ++++
 tests/test_arraycreation.py         | 301 ++++++----------------------
 6 files changed, 137 insertions(+), 258 deletions(-)
 create mode 100644 tests/helper.py

diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index 4e791ad0eaf..4806b511aff 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -61,6 +61,7 @@
     "asnumpy",
     "astype",
     "convert_single_elem_array_to_scalar",
+    "default_float_type",
     "dpnp_queue_initialize",
     "dpnp_queue_is_cpu",
     "get_dpnp_descriptor",
@@ -69,7 +70,8 @@
 ]
 
 from dpnp import (
-    isscalar
+    isscalar,
+    float64
 )
 
 from dpnp.dpnp_iface_arraycreation import *
@@ -191,6 +193,35 @@ def convert_single_elem_array_to_scalar(obj, keepdims=False):
     return obj
 
 
+def default_float_type(device=None, sycl_queue=None):
+    """
+    Return a floating type used by default in DPNP depending on device capabilities.
+
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where an array of default floating type might be created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+        The value ``None`` is interpreted as to use a default device.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue which might be used to create an array of default floating type.
+        The `sycl_queue` can be ``None`` (the default), which is interpreted as
+        to get the SYCL queue from `device` keyword if present or to use a default queue.
+
+    Returns
+    -------
+    dt : dtype
+        A default DPNP floating type.
+
+    """
+
+    _sycl_queue = get_normalized_queue_device(device=device, sycl_queue=sycl_queue)
+    return map_dtype_to_device(float64, _sycl_queue.sycl_device)
+
+
 def get_dpnp_descriptor(ext_obj,
                         copy_when_strides=True,
                         copy_when_nondefault_queue=True,
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index c0ed49316b8..01b9ac6b792 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -871,10 +871,8 @@ def identity(n, dtype=None, *, like=None):
         elif n < 0:
             pass
         else:
-            if dtype is None:
-                sycl_queue = dpnp.get_normalized_queue_device(sycl_queue=None, device=None)
-                dtype = map_dtype_to_device(dpnp.float64, sycl_queue.sycl_device)
-            return dpnp_identity(n, dtype).get_pyobj()
+            _dtype = dpnp.default_float_type() if dtype is None else dtype
+            return dpnp_identity(n, _dtype).get_pyobj()
 
     return call_origin(numpy.identity, n, dtype=dtype, like=like)
 
@@ -1327,10 +1325,8 @@ def tri(N, M=None, k=0, dtype=dpnp.float, **kwargs):
         elif not isinstance(k, int):
             pass
         else:
-            if dtype is dpnp.float:
-                sycl_queue = dpnp.get_normalized_queue_device(sycl_queue=None, device=None)
-                dtype = map_dtype_to_device(dpnp.float64, sycl_queue.sycl_device)
-            return dpnp_tri(N, M, k, dtype).get_pyobj()
+            _dtype = dpnp.default_float_type() if dtype in (dpnp.float, None) else dtype
+            return dpnp_tri(N, M, k, _dtype).get_pyobj()
 
     return call_origin(numpy.tri, N, M, k, dtype, **kwargs)
 
diff --git a/dpnp/dpnp_iface_types.py b/dpnp/dpnp_iface_types.py
index 787dcaa473b..a39cfa47cd1 100644
--- a/dpnp/dpnp_iface_types.py
+++ b/dpnp/dpnp_iface_types.py
@@ -36,12 +36,12 @@
 
 import numpy
 
+
 __all__ = [
     "bool",
     "bool_",
     "complex128",
     "complex64",
-    "default_float_type",
     "dtype",
     "float",
     "float16",
@@ -75,10 +75,6 @@
 longcomplex = numpy.longcomplex
 
 
-def default_float_type():
-    return float64
-
-
 def isscalar(obj):
     """
     Returns True if the type of `obj` is a scalar type.
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
index 6605770be62..4913d585491 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
@@ -165,7 +165,7 @@ def call_origin(function, *args, **kwargs):
 
     exec_q = dpctl.utils.get_execution_queue(alloc_queues)
     if exec_q is None:
-        exec_q = sycl_queue
+        exec_q = dpnp.get_normalized_queue_device(sycl_queue=sycl_queue)
     # print(f"DPNP call_origin(): bakend called. \n\t function={function}, \n\t args_new={args_new}, \n\t kwargs_new={kwargs_new}, \n\t dpnp_inplace={dpnp_inplace}")
     # TODO need to put array memory into NumPy call
     result_origin = function(*args_new, **kwargs_new)
diff --git a/tests/helper.py b/tests/helper.py
new file mode 100644
index 00000000000..be550a995dc
--- /dev/null
+++ b/tests/helper.py
@@ -0,0 +1,39 @@
+import dpctl
+import dpnp
+
+
+def get_all_dtypes(no_bool=False,
+                   no_float16=True,
+                   no_complex=False,
+                   no_none=False,
+                   device=None):
+    """
+    Build a list of types supported by DPNP based on input flags and device capabilities.
+    """
+
+    dev = dpctl.select_default_device() if device is None else device
+
+    # add boolean type
+    dtypes = [dpnp.bool] if not no_bool else []
+
+    # add integer types
+    dtypes.extend([dpnp.int32, dpnp.int64])
+
+    # add floating types
+    if not no_float16 and dev.has_aspect_fp16:
+        dtypes.append(dpnp.float16)
+
+    dtypes.append(dpnp.float32)
+    if dev.has_aspect_fp64:
+        dtypes.append(dpnp.float64)
+
+    # add complex types
+    if not no_complex:
+        dtypes.append(dpnp.complex64)
+        if dev.has_aspect_fp64:
+            dtypes.append(dpnp.complex128)
+    
+    # add None value to validate a default dtype
+    if not no_none:
+        dtypes.append(None)
+    return dtypes
diff --git a/tests/test_arraycreation.py b/tests/test_arraycreation.py
index e41979efe05..d428b1ab726 100644
--- a/tests/test_arraycreation.py
+++ b/tests/test_arraycreation.py
@@ -1,4 +1,5 @@
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp
 
@@ -8,6 +9,7 @@
 import numpy
 from numpy.testing import (
     assert_allclose,
+    assert_almost_equal,
     assert_array_equal,
     assert_raises
 )
@@ -15,19 +17,6 @@
 import tempfile
 
 
-# TODO: discuss with DPCTL why no exception on complex128
-def is_dtype_supported(dtype, no_complex_check=False):
-    device = dpctl.SyclQueue().sycl_device
-
-    if dtype is dpnp.float16 and not device.has_aspect_fp16:
-        return False
-    if dtype is dpnp.float64 and not device.has_aspect_fp64:
-        return False
-    if dtype is dpnp.complex128 and not device.has_aspect_fp64 and not no_complex_check:
-        return False
-    return True
-
-
 @pytest.mark.parametrize("start",
                          [0, -5, 10, -2.5, 9.7],
                          ids=['0', '-5', '10', '-2.5', '9.7'])
@@ -37,11 +26,7 @@ def is_dtype_supported(dtype, no_complex_check=False):
 @pytest.mark.parametrize("step",
                          [None, 1, 2.7, -1.6, 100],
                          ids=['None', '1', '2.7', '-1.6', '100'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32],
-                         ids=['complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_float16=False))
 def test_arange(start, stop, step, dtype):
     rtol_mult = 2
     if numpy.issubdtype(dtype, numpy.float16):
@@ -50,26 +35,23 @@ def test_arange(start, stop, step, dtype):
 
     func = lambda xp: xp.arange(start, stop=stop, step=step, dtype=dtype)
 
-    if not is_dtype_supported(dtype):
-        if stop is None:
-            _stop, _start = start, 0
-        else:
-            _stop, _start = stop, start
-        _step = 1 if step is None else step
-
-        if _start == _stop:
-            pass
-        elif (_step < 0) ^ (_start < _stop):
-            # exception is raising when dpctl calls a kernel function,
-            # i.e. when resulting array is not empty
-            assert_raises(RuntimeError, func, dpnp)
-            return
-
     exp_array = func(numpy)
     res_array = func(dpnp).asnumpy()
 
-    if numpy.issubdtype(dtype, numpy.floating) or numpy.issubdtype(dtype, numpy.complexfloating):
-        assert_allclose(exp_array, res_array, rtol=rtol_mult*numpy.finfo(dtype).eps)
+    if dtype is None:
+        _device = dpctl.SyclQueue().sycl_device
+        if not _device.has_aspect_fp64:
+            # numpy allocated array with dtype=float64 by default,
+            # while dpnp might use float32, if float64 isn't supported by device
+            _dtype = dpnp.float32
+            rtol_mult *= 150
+        else:
+            _dtype = dpnp.float64
+    else:
+        _dtype = dtype
+
+    if numpy.issubdtype(_dtype, numpy.floating) or numpy.issubdtype(_dtype, numpy.complexfloating):
+        assert_allclose(exp_array, res_array, rtol=rtol_mult*numpy.finfo(_dtype).eps)
     else:
         assert_array_equal(exp_array, res_array)
 
@@ -109,43 +91,25 @@ def test_diag(v, k):
 @pytest.mark.parametrize("k",
                          [-4, -3, -2, -1, 0, 1, 2, 3, 4],
                          ids=['-4', '-3', '-2', '-1', '0', '1', '2', '3', '4'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                         'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_eye(N, M, k, dtype, order):
     func = lambda xp: xp.eye(N, M, k=k, dtype=dtype, order=order)
-    if not is_dtype_supported(dtype, no_complex_check=True):
-        assert_raises(RuntimeError, func, dpnp)
-        return
-
     assert_array_equal(func(numpy), func(dpnp))
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 def test_frombuffer(dtype):
-    buffer = b'12345678'
+    buffer = b'12345678ABCDEF00'
     func = lambda xp: xp.frombuffer(buffer, dtype=dtype)
-
-    if not is_dtype_supported(dtype):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, func, dpnp)
-        return
-
-    assert_array_equal(func(dpnp), func(numpy))
+    assert_allclose(func(dpnp), func(numpy))
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 def test_fromfile(dtype):
     with tempfile.TemporaryFile() as fh:
         fh.write(b"\x00\x01\x02\x03\x04\x05\x06\x07\x08")
@@ -153,76 +117,44 @@ def test_fromfile(dtype):
 
         func = lambda xp: xp.fromfile(fh, dtype=dtype)
 
-        if not is_dtype_supported(dtype):
-            fh.seek(0)
-            # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-            assert_raises(ValueError, func, dpnp)
-            return
-
         fh.seek(0)
         np_res = func(numpy)
 
         fh.seek(0)
         dpnp_res = func(dpnp)
 
-        assert_array_equal(dpnp_res, np_res)
+        assert_almost_equal(dpnp_res, np_res)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_float16=False))
 def test_fromfunction(dtype):
     def func(x, y):
         return x * y
 
     shape = (3, 3)
     call_func = lambda xp: xp.fromfunction(func, shape=shape, dtype=dtype)
-
-    if not is_dtype_supported(dtype):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, call_func, dpnp)
-        return
-
     assert_array_equal(call_func(dpnp), call_func(numpy))
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 def test_fromiter(dtype):
     _iter = [1, 2, 3, 4]
     func = lambda xp: xp.fromiter(_iter, dtype=dtype)
-
-    if not is_dtype_supported(dtype):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, func, dpnp)
-        return
-
     assert_array_equal(func(dpnp), func(numpy))
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 def test_fromstring(dtype):
     string = "1 2 3 4"
     func = lambda xp: xp.fromstring(string, dtype=dtype, sep=' ')
-
-    if not is_dtype_supported(dtype):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, func, dpnp)
-        return
-
     assert_array_equal(func(dpnp), func(numpy))
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
 @pytest.mark.parametrize("num",
                          [2, 4, 8, 3, 9, 27])
 @pytest.mark.parametrize("endpoint",
@@ -233,11 +165,6 @@ def test_geomspace(dtype, num, endpoint):
 
     func = lambda xp: xp.geomspace(start, stop, num, endpoint, dtype)
 
-    if not is_dtype_supported(dtype):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, func, dpnp)
-        return
-
     np_res = func(numpy)
     dpnp_res = func(dpnp)
 
@@ -252,25 +179,14 @@ def test_geomspace(dtype, num, endpoint):
 @pytest.mark.parametrize("n",
                          [0, 1, 4],
                          ids=['0', '1', '4'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32,
-                          numpy.bool_, numpy.complex64, numpy.complex128, None],
-                         ids=['float64', 'float32', 'int64', 'int32',
-                              'bool', 'complex64', 'complex128', 'None'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
 def test_identity(n, dtype):
     func = lambda xp: xp.identity(n, dtype=dtype)
-
-    if n > 0 and not is_dtype_supported(dtype):
-        assert_raises(RuntimeError, func, dpnp)
-        return
-
     assert_array_equal(func(numpy), func(dpnp))
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 def test_loadtxt(dtype):
     func = lambda xp: xp.loadtxt(fh, dtype=dtype)
 
@@ -278,12 +194,6 @@ def test_loadtxt(dtype):
         fh.write(b"1 2 3 4")
         fh.flush()
 
-        if not is_dtype_supported(dtype):
-            # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-            fh.seek(0)
-            assert_raises(ValueError, func, dpnp)
-            return
-
         fh.seek(0)
         np_res = func(numpy)
         fh.seek(0)
@@ -292,12 +202,8 @@ def test_loadtxt(dtype):
         assert_array_equal(dpnp_res, np_res)
 
 
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, None],
-                         ids=['float64', 'float32', 'int64', 'int32', 'None'])
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("offset",
                          [0, 1],
                          ids=['0', '1'])
@@ -325,21 +231,9 @@ def test_trace(array, offset, type, dtype):
     create_array = lambda xp: xp.array(array, type)
     trace_func = lambda xp, x: xp.trace(x, offset=offset, dtype=dtype)
 
-    if not is_dtype_supported(type):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, create_array, dpnp)
-        return
-
     a = create_array(numpy)
     ia = create_array(dpnp)
-
-    if not is_dtype_supported(dtype):
-        assert_raises(RuntimeError, trace_func, dpnp, ia)
-        return
-
-    expected = trace_func(numpy, a)
-    result = trace_func(dpnp, ia)
-    assert_array_equal(expected, result)
+    assert_array_equal(trace_func(dpnp, ia), trace_func(numpy, a))
 
 
 @pytest.mark.parametrize("N",
@@ -351,16 +245,9 @@ def test_trace(array, offset, type, dtype):
 @pytest.mark.parametrize("k",
                          [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5],
                          ids=['-5', '-4', '-3', '-2', '-1', '0', '1', '2', '3', '4', '5'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, float, numpy.int64, numpy.int32, numpy.int_, numpy.float_, int],
-                         ids=['numpy.float64', 'numpy.float32', 'float', 'numpy.int64', 'numpy.int32', 'numpy.int', 'numpy.float', 'int'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 def test_tri(N, M, k, dtype):
     func = lambda xp: xp.tri(N, M, k, dtype=dtype)
-
-    if M > 0 and N > 0 and not is_dtype_supported(dtype):
-        assert_raises(RuntimeError, func, dpnp)
-        return
-
     assert_array_equal(func(dpnp), func(numpy))
 
 
@@ -434,11 +321,7 @@ def test_triu_size_null(k):
                          ids=['[1, 2, 3, 4]',
                               '[]',
                               '[0, 3, 5]'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32,
-                          numpy.bool_, numpy.complex64, numpy.complex128],
-                         ids=['float64', 'float32', 'int64', 'int32',
-                              'bool', 'complex64', 'complex128'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
 @pytest.mark.parametrize("n",
                          [0, 1, 4, None],
                          ids=['0', '1', '4', 'None'])
@@ -449,18 +332,8 @@ def test_vander(array, dtype, n, increase):
     create_array = lambda xp: xp.array(array, dtype=dtype)
     vander_func = lambda xp, x: xp.vander(x, N=n, increasing=increase)
 
-    if array and not is_dtype_supported(dtype):
-        # dtpcl intercepts RuntimeError about 'double' type and raise ValueError instead
-        assert_raises(ValueError, create_array, dpnp)
-        return
-
     a_np = numpy.array(array, dtype=dtype)
     a_dpnp = dpnp.array(array, dtype=dtype)
-
-    if array and not is_dtype_supported(dtype):
-        assert_raises(RuntimeError, vander_func, dpnp, a_dpnp)
-        return
-
     assert_array_equal(vander_func(numpy, a_np), vander_func(dpnp, a_dpnp))
 
 
@@ -470,21 +343,12 @@ def test_vander(array, dtype, n, increase):
 @pytest.mark.parametrize("fill_value",
                          [1.5, 2, 1.5+0.j],
                          ids=['1.5', '2', '1.5+0.j'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool_],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_full(shape, fill_value, dtype, order):
     func = lambda xp: xp.full(shape, fill_value, dtype=dtype, order=order)
-
-    if shape != 0 and not 0 in shape and not is_dtype_supported(dtype, no_complex_check=True):
-        assert_raises(RuntimeError, func, dpnp)
-        return
-
     assert_array_equal(func(numpy), func(dpnp))
 
 
@@ -494,23 +358,15 @@ def test_full(shape, fill_value, dtype, order):
 @pytest.mark.parametrize("fill_value",
                          [1.5, 2, 1.5+0.j],
                          ids=['1.5', '2', '1.5+0.j'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool_],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_full_like(array, fill_value, dtype, order):
-    a = numpy.array(array)
-    ia = dpnp.array(array)
     func = lambda xp, x: xp.full_like(x, fill_value, dtype=dtype, order=order)
 
-    if ia.size and not is_dtype_supported(dtype, no_complex_check=True):
-        assert_raises(RuntimeError, func, dpnp, ia)
-        return
-    
+    a = numpy.array(array)
+    ia = dpnp.array(array)
     assert_array_equal(func(numpy, a), func(dpnp, ia))
 
 
@@ -542,7 +398,9 @@ def test_full_strides():
     assert_array_equal(dpnp.asnumpy(ia), a)
 
 
-@pytest.mark.parametrize("fill_value", [[], (), dpnp.full(0, 0)], ids=['[]', '()', 'dpnp.full(0, 0)'])
+@pytest.mark.parametrize("fill_value",
+                         [[], (), dpnp.full(0, 0)],
+                         ids=['[]', '()', 'dpnp.full(0, 0)'])
 def test_full_invalid_fill_value(fill_value):
     with pytest.raises(ValueError):
         dpnp.full(10, fill_value=fill_value)
@@ -551,120 +409,79 @@ def test_full_invalid_fill_value(fill_value):
 @pytest.mark.parametrize("shape",
                          [(), 0, (0,), (2, 0, 3), (3, 2)],
                          ids=['()', '0', '(0,)', '(2, 0, 3)', '(3, 2)'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool_],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_zeros(shape, dtype, order):
-    expected = numpy.zeros(shape, dtype=dtype, order=order)
-    result = dpnp.zeros(shape, dtype=dtype, order=order)
-
-    assert_array_equal(expected, result)
+    func = lambda xp: xp.zeros(shape, dtype=dtype, order=order)
+    assert_array_equal(func(numpy), func(dpnp))
 
 
 @pytest.mark.parametrize("array",
                          [[], 0,  [1, 2, 3], [[1, 2], [3, 4]]],
                          ids=['[]', '0',  '[1, 2, 3]', '[[1, 2], [3, 4]]'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool_],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_zeros_like(array, dtype, order):
+    func = lambda xp, x: xp.zeros_like(x, dtype=dtype, order=order)
+
     a = numpy.array(array)
     ia = dpnp.array(array)
-
-    expected = numpy.zeros_like(a, dtype=dtype, order=order)
-    result = dpnp.zeros_like(ia, dtype=dtype, order=order)
-
-    assert_array_equal(expected, result)
+    assert_array_equal(func(numpy, a), func(dpnp, ia))
 
 
 @pytest.mark.parametrize("shape",
                          [(), 0, (0,), (2, 0, 3), (3, 2)],
                          ids=['()', '0', '(0,)', '(2, 0, 3)', '(3, 2)'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool_],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_empty(shape, dtype, order):
-    expected = numpy.empty(shape, dtype=dtype, order=order)
-    result = dpnp.empty(shape, dtype=dtype, order=order)
-
-    assert expected.shape == result.shape
+    func = lambda xp: xp.empty(shape, dtype=dtype, order=order)
+    assert func(numpy).shape == func(dpnp).shape
 
 
 @pytest.mark.parametrize("array",
                          [[], 0,  [1, 2, 3], [[1, 2], [3, 4]]],
                          ids=['[]', '0',  '[1, 2, 3]', '[[1, 2], [3, 4]]'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32,
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool_],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                              'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_empty_like(array, dtype, order):
+    func = lambda xp, x: xp.empty_like(x, dtype=dtype, order=order)
+
     a = numpy.array(array)
     ia = dpnp.array(array)
-
-    expected = numpy.empty_like(a, dtype=dtype, order=order)
-    result = dpnp.empty_like(ia, dtype=dtype, order=order)
-
-    assert expected.shape == result.shape
+    assert func(numpy, a).shape == func(dpnp, ia).shape
 
 
 @pytest.mark.parametrize("shape",
                          [(), 0, (0,), (2, 0, 3), (3, 2)],
                          ids=['()', '0', '(0,)', '(2, 0, 3)', '(3, 2)'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32, 
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool_],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                         'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_ones(shape, dtype, order):
     func = lambda xp: xp.ones(shape, dtype=dtype, order=order)
-
-    if shape != 0 and not 0 in shape and not is_dtype_supported(dtype, no_complex_check=True):
-        assert_raises(RuntimeError, func, dpnp)
-        return
-
     assert_array_equal(func(numpy), func(dpnp))
 
 
 @pytest.mark.parametrize("array",
                          [[], 0,  [1, 2, 3], [[1, 2], [3, 4]]],
                          ids=['[]', '0',  '[1, 2, 3]', '[[1, 2], [3, 4]]'])
-@pytest.mark.parametrize("dtype",
-                         [None, numpy.complex128, numpy.complex64, numpy.float64, numpy.float32, 
-                          numpy.float16, numpy.int64, numpy.int32, numpy.bool_],
-                         ids=['None', 'complex128', 'complex64', 'float64', 'float32',
-                         'float16', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 @pytest.mark.parametrize("order",
                          [None, "C", "F"],
                          ids=['None', 'C', 'F'])
 def test_ones_like(array, dtype, order):
-    a = numpy.array(array)
-    ia = dpnp.array(array)
     func = lambda xp, x: xp.ones_like(x, dtype=dtype, order=order)
 
-    if ia.size and not is_dtype_supported(dtype, no_complex_check=True):
-        assert_raises(RuntimeError, func, dpnp, ia)
-        return
-
+    a = numpy.array(array)
+    ia = dpnp.array(array)
     assert_array_equal(func(numpy, a), func(dpnp, ia))

From 744a3f2485aa36da894dd454fc56c2a41ec9a9a3 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Wed, 1 Feb 2023 13:26:35 +0100
Subject: [PATCH 09/32] Add support of logical comparison operations (#1280)

---
 .../include/dpnp_gen_1arg_1type_tbl.hpp       |  69 ++++++-
 .../include/dpnp_gen_2arg_2type_tbl.hpp       |   5 +-
 dpnp/backend/include/dpnp_iface_fptr.hpp      |   4 +
 dpnp/backend/kernels/dpnp_krnl_logic.cpp      | 183 ++++++++++++++++
 dpnp/dpnp_algo/dpnp_algo.pxd                  |   4 +
 dpnp/dpnp_algo/dpnp_algo_logic.pyx            |  76 +++----
 dpnp/dpnp_iface_logic.py                      | 195 +++++++++++++-----
 tests/skipped_tests_gpu.tbl                   |   2 +-
 tests/test_logic.py                           |  70 +++++--
 .../cupy/logic_tests/test_comparison.py       |   1 -
 .../third_party/cupy/logic_tests/test_ops.py  |   4 -
 11 files changed, 479 insertions(+), 134 deletions(-)

diff --git a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
index f5ee23d755f..0f6cb5b31de 100644
--- a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -23,6 +23,8 @@
 // THE POSSIBILITY OF SUCH DAMAGE.
 //*****************************************************************************
 
+#if defined(MACRO_1ARG_1TYPE_OP)
+
 /*
  * This header file contains single argument element wise functions definitions
  *
@@ -35,10 +37,6 @@
  *
  */
 
-#ifndef MACRO_1ARG_1TYPE_OP
-#error "MACRO_1ARG_1TYPE_OP is not defined"
-#endif
-
 #ifdef _SECTION_DOCUMENTATION_GENERATION_
 
 #define MACRO_1ARG_1TYPE_OP(__name__, __operation1__, __operation2__)                                                   \
@@ -88,7 +86,7 @@
                   const shape_elem_type* input1_strides,                                                                \
                   const size_t* where);
 
-#endif
+#endif // _SECTION_DOCUMENTATION_GENERATION_
 
 MACRO_1ARG_1TYPE_OP(dpnp_conjugate_c, std::conj(input_elem), q.submit(kernel_func))
 MACRO_1ARG_1TYPE_OP(dpnp_copy_c, input_elem, q.submit(kernel_func))
@@ -107,3 +105,62 @@ MACRO_1ARG_1TYPE_OP(dpnp_square_c,
                     oneapi::mkl::vm::sqr(q, input1_size, input1_data, result))
 
 #undef MACRO_1ARG_1TYPE_OP
+
+#elif defined(MACRO_1ARG_1TYPE_LOGIC_OP)
+
+/*
+ * This header file contains single argument element wise functions definitions
+ *
+ * Macro `MACRO_1ARG_1TYPE_LOGIC_OP` must be defined before usage
+ *
+ * Parameters:
+ * - public name of the function and kernel name
+ * - operation used to calculate the result
+ *
+ */
+
+#ifdef _SECTION_DOCUMENTATION_GENERATION_
+
+#define MACRO_1ARG_1TYPE_LOGIC_OP(__name__, __operation__)                                                             \
+    /** @ingroup BACKEND_API                                                                                         */ \
+    /** @brief Per element operation function __name__                                                               */ \
+    /**                                                                                                              */ \
+    /** Function "__name__" executes operator "__operation__" over corresponding elements of input array             */ \
+    /**                                                                                                              */ \
+    /** @param[in]  q_ref              Reference to SYCL queue.                                                      */ \
+    /** @param[out] result_out         Output array.                                                                 */ \
+    /** @param[in]  result_size        Output array size.                                                            */ \
+    /** @param[in]  result_ndim        Number of output array dimensions.                                            */ \
+    /** @param[in]  result_shape       Output array shape.                                                           */ \
+    /** @param[in]  result_strides     Output array strides.                                                         */ \
+    /** @param[in]  input1_in          Input array 1.                                                                */ \
+    /** @param[in]  input1_size        Input array 1 size.                                                           */ \
+    /** @param[in]  input1_ndim        Number of input array 1 dimensions.                                           */ \
+    /** @param[in]  input1_shape       Input array 1 shape.                                                          */ \
+    /** @param[in]  input1_strides     Input array 1 strides.                                                        */ \
+    /** @param[in]  where              Where condition.                                                              */ \
+    /** @param[in]  dep_event_vec_ref  Reference to vector of SYCL events.                                           */ \
+    template <typename _DataType_input1>                                                                                \
+    DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                                 \
+                               void* result_out,                                                                        \
+                               const size_t result_size,                                                                \
+                               const size_t result_ndim,                                                                \
+                               const shape_elem_type* result_shape,                                                     \
+                               const shape_elem_type* result_strides,                                                   \
+                               const void* input1_in,                                                                   \
+                               const size_t input1_size,                                                                \
+                               const size_t input1_ndim,                                                                \
+                               const shape_elem_type* input1_shape,                                                     \
+                               const shape_elem_type* input1_strides,                                                   \
+                               const size_t* where,                                                                     \
+                               const DPCTLEventVectorRef dep_event_vec_ref);
+
+#endif // _SECTION_DOCUMENTATION_GENERATION_
+
+MACRO_1ARG_1TYPE_LOGIC_OP(dpnp_logical_not_c, !input1_elem)
+
+#undef MACRO_1ARG_1TYPE_LOGIC_OP
+
+#else
+#error "MACRO_1ARG_1TYPE_OP or MACRO_1ARG_1TYPE_LOGIC_OP is not defined"
+#endif // MACRO_1ARG_1TYPE_OP || MACRO_1ARG_1TYPE_LOGIC_OP
diff --git a/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp
index 2fb4fe9d6fd..4b6c4290ef3 100644
--- a/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp
@@ -64,7 +64,7 @@
     /** @param[in]  input2_strides     Input array 2 strides.                                                        */ \
     /** @param[in]  where              Where condition.                                                              */ \
     /** @param[in]  dep_event_vec_ref  Reference to vector of SYCL events.                                           */ \
-    template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>                          \
+    template <typename _DataType_input1, typename _DataType_input2>                                                     \
     DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                                 \
                                void* result_out,                                                                        \
                                const size_t result_size,                                                                \
@@ -91,6 +91,9 @@ MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_greater_c, input1_elem > input2_elem)
 MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_greater_equal_c, input1_elem >= input2_elem)
 MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_less_c, input1_elem < input2_elem)
 MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_less_equal_c, input1_elem <= input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_logical_and_c, input1_elem && input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_logical_or_c, input1_elem || input2_elem)
+MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_logical_xor_c, (!!input1_elem) != (!!input2_elem))
 MACRO_2ARG_2TYPES_LOGIC_OP(dpnp_not_equal_c, input1_elem != input2_elem)
 
 #undef MACRO_2ARG_2TYPES_LOGIC_OP
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index 31347ceaeb5..f77a37aade8 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -205,6 +205,10 @@ enum class DPNPFuncName : size_t
     DPNP_FN_LOG2_EXT,                     /**< Used in numpy.log2() impl, requires extra parameters  */
     DPNP_FN_LOG1P,                        /**< Used in numpy.log1p() impl  */
     DPNP_FN_LOG1P_EXT,                    /**< Used in numpy.log1p() impl, requires extra parameters  */
+    DPNP_FN_LOGICAL_AND_EXT,              /**< Used in numpy.logical_and() impl, requires extra parameters */
+    DPNP_FN_LOGICAL_NOT_EXT,              /**< Used in numpy.logical_not() impl, requires extra parameters */
+    DPNP_FN_LOGICAL_OR_EXT,               /**< Used in numpy.logical_or() impl, requires extra parameters */
+    DPNP_FN_LOGICAL_XOR_EXT,              /**< Used in numpy.logical_xor() impl, requires extra parameters */
     DPNP_FN_MATMUL,                       /**< Used in numpy.matmul() impl  */
     DPNP_FN_MATMUL_EXT,                   /**< Used in numpy.matmul() impl, requires extra parameters */
     DPNP_FN_MATRIX_RANK,                  /**< Used in numpy.linalg.matrix_rank() impl  */
diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
index 19a6dd3646e..be1bb1bab79 100644
--- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
@@ -288,6 +288,182 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef,
                                     const DPCTLEventVectorRef) = dpnp_any_c<_DataType, _ResultType>;
 
 
+#define MACRO_1ARG_1TYPE_LOGIC_OP(__name__, __operation__)                                                             \
+    template <typename _KernelNameSpecialization>                                                                      \
+    class __name__##_kernel;                                                                                           \
+                                                                                                                       \
+    template <typename _KernelNameSpecialization>                                                                      \
+    class __name__##_broadcast_kernel;                                                                                 \
+                                                                                                                       \
+    template <typename _KernelNameSpecialization>                                                                      \
+    class __name__##_strides_kernel;                                                                                   \
+                                                                                                                       \
+    template <typename _DataType_input1>                                                                               \
+    DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                                \
+                               void* result_out,                                                                       \
+                               const size_t result_size,                                                               \
+                               const size_t result_ndim,                                                               \
+                               const shape_elem_type* result_shape,                                                    \
+                               const shape_elem_type* result_strides,                                                  \
+                               const void* input1_in,                                                                  \
+                               const size_t input1_size,                                                               \
+                               const size_t input1_ndim,                                                               \
+                               const shape_elem_type* input1_shape,                                                    \
+                               const shape_elem_type* input1_strides,                                                  \
+                               const size_t* where,                                                                    \
+                               const DPCTLEventVectorRef dep_event_vec_ref)                                            \
+    {                                                                                                                  \
+        /* avoid warning unused variable*/                                                                             \
+        (result_shape);                                                                                                \
+        (void)where;                                                                                                   \
+        (void)dep_event_vec_ref;                                                                                       \
+                                                                                                                       \
+        DPCTLSyclEventRef event_ref = nullptr;                                                                         \
+                                                                                                                       \
+        if (!input1_size)                                                                                              \
+        {                                                                                                              \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+                                                                                                                       \
+        sycl::queue q = *(reinterpret_cast<sycl::queue *>(q_ref));                                                     \
+                                                                                                                       \
+        _DataType_input1* input1_data = static_cast<_DataType_input1 *>(const_cast<void *>(input1_in));                \
+        bool* result = static_cast<bool *>(result_out);                                                                \
+                                                                                                                       \
+        shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim];                                      \
+                                                                                                                       \
+        get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets);                                   \
+        bool use_strides = !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim);               \
+        delete[] input1_shape_offsets;                                                                                 \
+                                                                                                                       \
+        if (use_strides)                                                                                               \
+        {                                                                                                              \
+            if (result_ndim != input1_ndim)                                                                            \
+            {                                                                                                          \
+                throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) +                                \
+                                         " mismatches with input1 ndim=" + std::to_string(input1_ndim));               \
+            }                                                                                                          \
+                                                                                                                       \
+            /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */                 \
+            using usm_host_allocatorT = sycl::usm_allocator<shape_elem_type, sycl::usm::alloc::host>;                  \
+                                                                                                                       \
+            size_t strides_size = 2 * result_ndim;                                                                     \
+            shape_elem_type *dev_strides_data = sycl::malloc_device<shape_elem_type>(strides_size, q);                 \
+                                                                                                                       \
+            /* create host temporary for packed strides managed by shared pointer */                                   \
+            auto strides_host_packed = std::vector<shape_elem_type, usm_host_allocatorT>(strides_size,                 \
+                                                                                         usm_host_allocatorT(q));      \
+                                                                                                                       \
+            /* packed vector is concatenation of result_strides and input1_strides */                                  \
+            std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin());                      \
+            std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + result_ndim);        \
+                                                                                                                       \
+            auto copy_strides_ev = q.copy<shape_elem_type>(strides_host_packed.data(),                                 \
+                                                           dev_strides_data,                                           \
+                                                           strides_host_packed.size());                                \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
+                const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
+                {                                                                                                      \
+                    const shape_elem_type *result_strides_data = &dev_strides_data[0];                                 \
+                    const shape_elem_type *input1_strides_data = &dev_strides_data[1];                                 \
+                                                                                                                       \
+                    size_t input1_id = 0;                                                                              \
+                                                                                                                       \
+                    for (size_t i = 0; i < result_ndim; ++i)                                                           \
+                    {                                                                                                  \
+                        const size_t output_xyz_id =                                                                   \
+                            get_xyz_id_by_id_inkernel(output_id, result_strides_data, result_ndim, i);                 \
+                        input1_id += output_xyz_id * input1_strides_data[i];                                           \
+                    }                                                                                                  \
+                                                                                                                       \
+                    const _DataType_input1 input1_elem = input1_data[input1_id];                                       \
+                    result[output_id] = __operation__;                                                                 \
+                }                                                                                                      \
+            };                                                                                                         \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.depends_on(copy_strides_ev);                                                                       \
+                cgh.parallel_for<class __name__##_strides_kernel<_DataType_input1>>(                                   \
+                    sycl::range<1>(result_size), kernel_parallel_for_func);                                            \
+            };                                                                                                         \
+                                                                                                                       \
+            q.submit(kernel_func).wait();                                                                              \
+                                                                                                                       \
+            sycl::free(dev_strides_data, q);                                                                           \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+        else                                                                                                           \
+        {                                                                                                              \
+            constexpr size_t lws = 64;                                                                                 \
+            constexpr unsigned int vec_sz = 8;                                                                         \
+            constexpr sycl::access::address_space global_space = sycl::access::address_space::global_space;            \
+                                                                                                                       \
+            auto gws_range = sycl::range<1>(((result_size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);                \
+            auto lws_range = sycl::range<1>(lws);                                                                      \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {                                              \
+                auto sg = nd_it.get_sub_group();                                                                       \
+                size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +                               \
+                                         sg.get_group_id()[0] * sg.get_max_local_range()[0]);                          \
+                size_t end = start + static_cast<size_t>(vec_sz);                                                      \
+                                                                                                                       \
+                if (end < result_size) {                                                                               \
+                    sycl::vec<_DataType_input1, vec_sz> x1 =                                                           \
+                        sg.load<vec_sz>(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start]));         \
+                    sycl::vec<bool, vec_sz> res_vec;                                                                   \
+                                                                                                                       \
+                    for (size_t k = 0; k < vec_sz; ++k) {                                                              \
+                        const _DataType_input1 input1_elem = x1[k];                                                    \
+                        res_vec[k] = __operation__;                                                                    \
+                    }                                                                                                  \
+                    sg.store<vec_sz>(sycl::multi_ptr<bool, global_space>(&result[start]), res_vec);                    \
+                                                                                                                       \
+                }                                                                                                      \
+                else {                                                                                                 \
+                    for (size_t k = start; k < result_size; ++k) {                                                     \
+                        const _DataType_input1 input1_elem = input1_data[k];                                           \
+                        result[k] = __operation__;                                                                     \
+                    }                                                                                                  \
+                }                                                                                                      \
+            };                                                                                                         \
+                                                                                                                       \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.parallel_for<class __name__##_kernel<_DataType_input1>>(                                           \
+                    sycl::nd_range<1>(gws_range, lws_range), kernel_parallel_for_func);                                \
+            };                                                                                                         \
+            sycl::event event = q.submit(kernel_func);                                                                 \
+                                                                                                                       \
+            event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                   \
+            return DPCTLEvent_Copy(event_ref);                                                                         \
+        }                                                                                                              \
+        return event_ref;                                                                                              \
+    }                                                                                                                  \
+                                                                                                                       \
+    template <typename _DataType_input1>                                                                               \
+    DPCTLSyclEventRef (*__name__##_ext)(DPCTLSyclQueueRef,                                                             \
+                                        void*,                                                                         \
+                                        const size_t,                                                                  \
+                                        const size_t,                                                                  \
+                                        const shape_elem_type*,                                                        \
+                                        const shape_elem_type*,                                                        \
+                                        const void*,                                                                   \
+                                        const size_t,                                                                  \
+                                        const size_t,                                                                  \
+                                        const shape_elem_type*,                                                        \
+                                        const shape_elem_type*,                                                        \
+                                        const size_t*,                                                                 \
+                                        const DPCTLEventVectorRef) = __name__<_DataType_input1>;
+
+#include <dpnp_gen_1arg_1type_tbl.hpp>
+
+template <DPNPFuncType ... FTs>
+static void func_map_logic_1arg_1type_helper(func_map_t& fmap)
+{
+    ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_NOT_EXT][FTs][FTs] =
+        {eft_BLN, (void*)dpnp_logical_not_c_ext<func_type_map_t::find_type<FTs>>}), ...);
+}
+
+
 #define MACRO_2ARG_2TYPES_LOGIC_OP(__name__, __operation__)                                                            \
     template <typename _KernelNameSpecialization1,                                                                     \
               typename _KernelNameSpecialization2>                                                                     \
@@ -546,6 +722,12 @@ static void func_map_logic_2arg_2type_core(func_map_t& fmap)
         {eft_BLN, (void*)dpnp_less_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
     ((fmap[DPNPFuncName::DPNP_FN_LESS_EQUAL_EXT][FT1][FTs] =
         {eft_BLN, (void*)dpnp_less_equal_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_AND_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_logical_and_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_OR_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_logical_or_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
+    ((fmap[DPNPFuncName::DPNP_FN_LOGICAL_XOR_EXT][FT1][FTs] =
+        {eft_BLN, (void*)dpnp_logical_xor_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
     ((fmap[DPNPFuncName::DPNP_FN_NOT_EQUAL_EXT][FT1][FTs] =
         {eft_BLN, (void*)dpnp_not_equal_c_ext<func_type_map_t::find_type<FT1>, func_type_map_t::find_type<FTs>>}), ...);
 }
@@ -648,6 +830,7 @@ void func_map_init_logic(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_ANY_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_any_ext_c<float, bool>};
     fmap[DPNPFuncName::DPNP_FN_ANY_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_any_ext_c<double, bool>};
 
+    func_map_logic_1arg_1type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL>(fmap);
     func_map_logic_2arg_2type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL>(fmap);
 
     return;
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index 50387e1565a..485e8adb1a6 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -182,6 +182,10 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_LOG1P_EXT
         DPNP_FN_LOG2
         DPNP_FN_LOG2_EXT
+        DPNP_FN_LOGICAL_AND_EXT
+        DPNP_FN_LOGICAL_NOT_EXT
+        DPNP_FN_LOGICAL_OR_EXT
+        DPNP_FN_LOGICAL_XOR_EXT
         DPNP_FN_MATMUL
         DPNP_FN_MATMUL_EXT
         DPNP_FN_MATRIX_RANK
diff --git a/dpnp/dpnp_algo/dpnp_algo_logic.pyx b/dpnp/dpnp_algo/dpnp_algo_logic.pyx
index ae0f711eb10..b6ac36db412 100644
--- a/dpnp/dpnp_algo/dpnp_algo_logic.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_logic.pyx
@@ -270,65 +270,35 @@ cpdef utils.dpnp_descriptor dpnp_less_equal(utils.dpnp_descriptor x1_obj,
     return call_fptr_2in_1out_strides(DPNP_FN_LESS_EQUAL_EXT, x1_obj, x2_obj, dtype, out, where, func_name="less_equal")
 
 
+cpdef utils.dpnp_descriptor dpnp_logical_and(utils.dpnp_descriptor x1_obj,
+                                             utils.dpnp_descriptor x2_obj,
+                                             object dtype=None,
+                                             utils.dpnp_descriptor out=None,
+                                             object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_LOGICAL_AND_EXT, x1_obj, x2_obj, dtype, out, where, func_name="logical_and")
 
-cpdef utils.dpnp_descriptor dpnp_logical_and(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
-
-    for i in range(result.size):
-        result.get_pyobj()[i] = numpy.logical_and(input1.get_pyobj()[i], input2.get_pyobj()[i])
-
-    return result
-
-
-cpdef utils.dpnp_descriptor dpnp_logical_not(utils.dpnp_descriptor input1):
-    input1_obj = input1.get_array()
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=input1_obj.sycl_device,
-                                                                             usm_type=input1_obj.usm_type,
-                                                                             sycl_queue=input1_obj.sycl_queue)
-
-    for i in range(result.size):
-        result.get_pyobj()[i] = numpy.logical_not(input1.get_pyobj()[i])
-
-    return result
-
-
-cpdef utils.dpnp_descriptor dpnp_logical_or(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
 
-    for i in range(result.size):
-        result.get_pyobj()[i] = numpy.logical_or(input1.get_pyobj()[i], input2.get_pyobj()[i])
-
-    return result
+cpdef utils.dpnp_descriptor dpnp_logical_not(utils.dpnp_descriptor x_obj,
+                                            object dtype=None,
+                                            utils.dpnp_descriptor out=None,
+                                            object where=True):
+    return call_fptr_1in_1out_strides(DPNP_FN_LOGICAL_NOT_EXT, x_obj, dtype, out, where, func_name="logical_not")
 
 
-cpdef utils.dpnp_descriptor dpnp_logical_xor(utils.dpnp_descriptor input1, utils.dpnp_descriptor input2):
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(input1, input2)
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(input1.shape,
-                                                                             dpnp.bool,
-                                                                             None,
-                                                                             device=result_sycl_device,
-                                                                             usm_type=result_usm_type,
-                                                                             sycl_queue=result_sycl_queue)
+cpdef utils.dpnp_descriptor dpnp_logical_or(utils.dpnp_descriptor x1_obj,
+                                            utils.dpnp_descriptor x2_obj,
+                                            object dtype=None,
+                                            utils.dpnp_descriptor out=None,
+                                            object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_LOGICAL_OR_EXT, x1_obj, x2_obj, dtype, out, where, func_name="logical_or")
 
-    for i in range(result.size):
-        result.get_pyobj()[i] = numpy.logical_xor(input1.get_pyobj()[i], input2.get_pyobj()[i])
 
-    return result
+cpdef utils.dpnp_descriptor dpnp_logical_xor(utils.dpnp_descriptor x1_obj,
+                                             utils.dpnp_descriptor x2_obj,
+                                             object dtype=None,
+                                             utils.dpnp_descriptor out=None,
+                                             object where=True):
+    return call_fptr_2in_1out_strides(DPNP_FN_LOGICAL_XOR_EXT, x1_obj, x2_obj, dtype, out, where, func_name="logical_xor")
 
 
 cpdef utils.dpnp_descriptor dpnp_not_equal(utils.dpnp_descriptor x1_obj,
diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index 153bac1b24f..de7537a4287 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -737,19 +737,32 @@ def less_equal(x1,
     return call_origin(numpy.less_equal, x1, x2)
 
 
-def logical_and(x1, x2, out=None, **kwargs):
+def logical_and(x1,
+                x2,
+                /,
+                out=None,
+                *,
+                where=True,
+                dtype=None,
+                subok=True):
     """
     Compute the truth value of x1 AND x2 element-wise.
 
     For full documentation refer to :obj:`numpy.logical_and`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise logical comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    Input arrays are supported as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-    Parameter ``out`` is supported only with default value ``None``.
-    Parameter ``where`` is supported only with default value ``True``.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -769,30 +782,53 @@ def logical_and(x1, x2, out=None, **kwargs):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc and not kwargs:
-    #     if out is not None:
-    #         pass
-    #     else:
-    #         return dpnp_logical_and(x1_desc, x2_desc).get_pyobj()
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get a common queue to copy data from the host into a device if any input is scalar
+        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
 
-    return call_origin(numpy.logical_and, x1, x2, out, **kwargs)
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_logical_and(x1_desc, x2_desc).get_pyobj()
+    return call_origin(numpy.logical_and, x1, x2)
 
 
-def logical_not(x1, out=None, **kwargs):
+def logical_not(x,
+                /,
+                out=None,
+                *,
+                where=True,
+                dtype=None,
+                subok=True):
     """
     Compute the truth value of NOT x element-wise.
 
     For full documentation refer to :obj:`numpy.logical_not`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Boolean result with the same shape as `x` of the NOT operation
+        on elements of `x`.
+
     Limitations
     -----------
-    Input array is supported as :obj:`dpnp.ndarray`.
+    Parameters `x` is only supported as :class:`dpnp.ndarray`.
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-    Parameter ``out`` is supported only with default value ``None``.
-    Parameter ``where`` is supported only with default value ``True``.
+    Input array data type is limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -810,29 +846,47 @@ def logical_not(x1, out=None, **kwargs):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # if x1_desc and not kwargs:
-    #     if out is not None:
-    #         pass
-    #     else:
-    #         return dpnp_logical_not(x1_desc).get_pyobj()
-
-    return call_origin(numpy.logical_not, x1, out, **kwargs)
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    else:
+        x1_desc = dpnp.get_dpnp_descriptor(x, copy_when_strides=False, copy_when_nondefault_queue=False)
+        if x1_desc:
+            return dpnp_logical_not(x1_desc).get_pyobj()
+    return call_origin(numpy.logical_not, x)
 
 
-def logical_or(x1, x2, out=None, **kwargs):
+def logical_or(x1,
+               x2,
+               /,
+               out=None,
+               *,
+               where=True,
+               dtype=None,
+               subok=True):
     """
     Compute the truth value of x1 OR x2 element-wise.
 
     For full documentation refer to :obj:`numpy.logical_or`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise logical comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    Input arrays are supported as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-    Parameter ``out`` is supported only with default value ``None``.
-    Parameter ``where`` is supported only with default value ``True``.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -852,30 +906,54 @@ def logical_or(x1, x2, out=None, **kwargs):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc and not kwargs:
-    #     if out is not None:
-    #         pass
-    #     else:
-    #         return dpnp_logical_or(x1_desc, x2_desc).get_pyobj()
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get a common queue to copy data from the host into a device if any input is scalar
+        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
 
-    return call_origin(numpy.logical_or, x1, x2, out, **kwargs)
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_logical_or(x1_desc, x2_desc).get_pyobj()
+    return call_origin(numpy.logical_or, x1, x2)
 
 
-def logical_xor(x1, x2, out=None, **kwargs):
+def logical_xor(x1,
+               x2,
+               /,
+               out=None,
+               *,
+               where=True,
+               dtype=None,
+               subok=True):
     """
-    Compute the truth value of x1 XOR x2, element-wise.
+    Compute the truth value of x1 XOR x2 element-wise.
 
     For full documentation refer to :obj:`numpy.logical_xor`.
 
+    Returns
+    -------
+    out : dpnp.ndarray
+        Output array of bool type, element-wise logical comparison of `x1` and `x2`.
+
     Limitations
     -----------
-    Input arrays are supported as :obj:`dpnp.ndarray`.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
-    Parameter ``out`` is supported only with default value ``None``.
-    Parameter ``where`` is supported only with default value ``True``.
+    Input array data types are limited by supported DPNP :ref:`Data types`,
+    excluding `dpnp.complex64` and `dpnp.complex128`.
 
     See Also
     --------
@@ -895,15 +973,26 @@ def logical_xor(x1, x2, out=None, **kwargs):
 
     """
 
-    # x1_desc = dpnp.get_dpnp_descriptor(x1)
-    # x2_desc = dpnp.get_dpnp_descriptor(x2)
-    # if x1_desc and x2_desc and not kwargs:
-    #     if out is not None:
-    #         pass
-    #     else:
-    #         return dpnp_logical_xor(x1_desc, x2_desc).get_pyobj()
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get a common queue to copy data from the host into a device if any input is scalar
+        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
 
-    return call_origin(numpy.logical_xor, x1, x2, out, **kwargs)
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_logical_xor(x1_desc, x2_desc).get_pyobj()
+    return call_origin(numpy.logical_xor, x1, x2)
 
 
 def not_equal(x1,
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 5426e386bbc..c64c7fa45f9 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -858,7 +858,7 @@ tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_arra
 tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_diff_length
 tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_is_equal
 tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_not_equal
-tests/third_party/cupy/logic_tests/test_comparison.py::TestComparisonOperator::test_binary_npscalar_array
+
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_0_{shapes=[(), ()]}::test_broadcast
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_0_{shapes=[(), ()]}::test_broadcast_arrays
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestBroadcast_param_10_{shapes=[(0, 1, 1, 0, 3), (5, 2, 0, 1, 0, 0, 3), (2, 1, 0, 0, 0, 3)]}::test_broadcast
diff --git a/tests/test_logic.py b/tests/test_logic.py
index 062300bb8d3..425106fd2ef 100644
--- a/tests/test_logic.py
+++ b/tests/test_logic.py
@@ -1,4 +1,5 @@
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp
 
@@ -9,9 +10,7 @@
 )
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(0,), (4,), (2, 3), (2, 2, 2)],
                          ids=['(0,)', '(4,)', '(2,3)', '(2,2,2)'])
@@ -42,9 +41,7 @@ def test_all(type, shape):
         assert_allclose(dpnp_res, np_res)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 def test_allclose(type):
 
     a = numpy.random.rand(10)
@@ -66,9 +63,7 @@ def test_allclose(type):
     assert_allclose(dpnp_res, np_res)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(0,), (4,), (2, 3), (2, 2, 2)],
                          ids=['(0,)', '(4,)', '(2,3)', '(2,2,2)'])
@@ -153,17 +148,60 @@ def test_not_equal():
         assert_equal(dpnp_res, np_res)
 
 
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
 @pytest.mark.parametrize("op",
-                         ['equal', 'greater', 'greater_equal', 'less', 'less_equal', 'not_equal'],
-                         ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal', 'not_equal'])
+                        ['logical_and', 'logical_or', 'logical_xor'],
+                        ids=['logical_and', 'logical_or', 'logical_xor'])
+def test_logic_comparison(op, dtype):
+    a = numpy.array([0, 0, 3, 2], dtype=dtype)
+    b = numpy.array([0, 4, 0, 2], dtype=dtype)
+
+    # x1 OP x2
+    np_res = getattr(numpy, op)(a, b)
+    dpnp_res = getattr(dpnp, op)(dpnp.array(a), dpnp.array(b))
+    assert_equal(dpnp_res, np_res)
+
+    # x2 OP x1
+    np_res = getattr(numpy, op)(b, a)
+    dpnp_res = getattr(dpnp, op)(dpnp.array(b), dpnp.array(a))
+    assert_equal(dpnp_res, np_res)
+
+    # numpy.tile(x1, (10,)) OP numpy.tile(x2, (10,))
+    a, b = numpy.tile(a, (10,)), numpy.tile(b, (10,))
+    np_res = getattr(numpy, op)(a, b)
+    dpnp_res = getattr(dpnp, op)(dpnp.array(a), dpnp.array(b))
+    assert_equal(dpnp_res, np_res)
+
+    # numpy.tile(x2, (10, 2)) OP numpy.tile(x1, (10, 2))
+    a, b = numpy.tile(a, (10, 1)), numpy.tile(b, (10, 1))
+    np_res = getattr(numpy, op)(b, a)
+    dpnp_res = getattr(dpnp, op)(dpnp.array(b), dpnp.array(a))
+    assert_equal(dpnp_res, np_res)
+
+
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
+def test_logical_not(dtype):
+    a = dpnp.array([0, 4, 0, 2], dtype=dtype)
+
+    np_res = numpy.logical_not(a.asnumpy())
+    dpnp_res = dpnp.logical_not(a)
+    assert_equal(dpnp_res, np_res)
+
+
+@pytest.mark.parametrize("op",
+                         ['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                          'logical_and', 'logical_or', 'logical_xor', 'not_equal'],
+                         ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                              'logical_and', 'logical_or', 'logical_xor', 'not_equal'])
 @pytest.mark.parametrize("x1",
                          [[3, 4, 5, 6], [[1, 2, 3, 4], [5, 6, 7, 8]], [[1, 2, 5, 6], [3, 4, 7, 8], [1, 2, 7, 8]]],
                          ids=['[3, 4, 5, 6]', '[[1, 2, 3, 4], [5, 6, 7, 8]]', '[[1, 2, 5, 6], [3, 4, 7, 8], [1, 2, 7, 8]]'])
 @pytest.mark.parametrize("x2",
                          [5, [1, 2, 5, 6]],
                          ids=['5', '[1, 2, 5, 6]'])
-def test_elemwise_comparison(op, x1, x2):
-    create_func = lambda xp, a: xp.asarray(a) if not numpy.isscalar(a) else a
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
+def test_elemwise_comparison(op, x1, x2, dtype):
+    create_func = lambda xp, a: xp.asarray(a, dtype=dtype) if not numpy.isscalar(a) else numpy.dtype(dtype=dtype).type(a)
 
     np_x1, np_x2 = create_func(numpy, x1), create_func(numpy, x2)
     dp_x1, dp_x2 = create_func(dpnp, np_x1), create_func(dpnp, np_x2)
@@ -185,8 +223,10 @@ def test_elemwise_comparison(op, x1, x2):
 
 
 @pytest.mark.parametrize("op",
-                         ['equal', 'greater', 'greater_equal', 'less', 'less_equal', 'not_equal'],
-                         ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal', 'not_equal'])
+                         ['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                          'logical_and', 'logical_or', 'logical_xor', 'not_equal'],
+                         ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                              'logical_and', 'logical_or', 'logical_xor', 'not_equal'])
 @pytest.mark.parametrize("sh1",
                          [[10], [8, 4], [4, 1, 2]],
                          ids=['(10,)', '(8, 4)', '(4, 1, 2)'])
diff --git a/tests/third_party/cupy/logic_tests/test_comparison.py b/tests/third_party/cupy/logic_tests/test_comparison.py
index 461f00319bc..67848359188 100644
--- a/tests/third_party/cupy/logic_tests/test_comparison.py
+++ b/tests/third_party/cupy/logic_tests/test_comparison.py
@@ -37,7 +37,6 @@ def test_equal(self):
         self.check_binary('equal')
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestComparisonOperator(unittest.TestCase):
 
diff --git a/tests/third_party/cupy/logic_tests/test_ops.py b/tests/third_party/cupy/logic_tests/test_ops.py
index 55b8617882b..cdbd035cd26 100644
--- a/tests/third_party/cupy/logic_tests/test_ops.py
+++ b/tests/third_party/cupy/logic_tests/test_ops.py
@@ -20,18 +20,14 @@ def check_binary(self, name, xp, dtype):
         b = testing.shaped_reverse_arange((2, 3), xp, dtype)
         return getattr(xp, name)(a, b)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_and(self):
         self.check_binary('logical_and')
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_or(self):
         self.check_binary('logical_or')
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_xor(self):
         self.check_binary('logical_xor')
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_logical_not(self):
         self.check_unary('logical_not')

From 351c6a6a4cc3ec6197277fd9b91a34bc13f3dc83 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Fri, 3 Feb 2023 01:08:40 +0100
Subject: [PATCH 10/32] Add device and sycl_queue keyword arguments to random
 calls (#1277)

* Set minimum required versions & fix debug building

* Add device and sycl_queue keyword arguments to random calls

* Add device and sycl_queue to dpnp.random.seed() & use random values if seed is None

* Update dpnp/random/dpnp_iface_random.py

Co-authored-by: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>

---------

Co-authored-by: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
---
 dpnp/random/dpnp_iface_random.py | 432 ++++++++++++++++++++++++-------
 dpnp/random/dpnp_random_state.py |  11 +-
 tests/test_sycl_queue.py         | 111 ++++++--
 3 files changed, 432 insertions(+), 122 deletions(-)

diff --git a/dpnp/random/dpnp_iface_random.py b/dpnp/random/dpnp_iface_random.py
index 677f2a7e94b..ade85bb2fe1 100644
--- a/dpnp/random/dpnp_iface_random.py
+++ b/dpnp/random/dpnp_iface_random.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -98,11 +98,20 @@
 ]
 
 
-def _get_random_state():
-    global _dpnp_random_state
-    if _dpnp_random_state is None:
-        _dpnp_random_state = RandomState()
-    return  _dpnp_random_state
+def _get_random_state(device=None, sycl_queue=None):
+    global _dpnp_random_states
+
+    if not isinstance(_dpnp_random_states, dict):
+         _dpnp_random_states = dict()
+    sycl_queue = dpnp.get_normalized_queue_device(device=device, sycl_queue=sycl_queue)
+    if sycl_queue not in _dpnp_random_states:
+        rs = RandomState(device=device, sycl_queue=sycl_queue)
+        if sycl_queue == rs.get_sycl_queue():
+            _dpnp_random_states[sycl_queue] = rs
+        else:
+            raise RuntimeError("Normalized SYCL queue {} mismatched with one returned by RandmoState {}"
+                               .format(sycl_queue, rs.get_sycl_queue()))
+    return _dpnp_random_states[sycl_queue]
 
 
 def beta(a, b, size=None):
@@ -774,20 +783,42 @@ def negative_binomial(n, p, size=None):
     return call_origin(numpy.random.negative_binomial, n, p, size)
 
 
-def normal(loc=0.0, scale=1.0, size=None, usm_type='device'):
+def normal(loc=0.0,
+           scale=1.0,
+           size=None,
+           device=None,
+           usm_type="device",
+           sycl_queue=None):
     """
-    Normal distribution.
-
     Draw random samples from a normal (Gaussian) distribution.
 
     For full documentation refer to :obj:`numpy.random.normal`.
 
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Drawn samples from the parameterized normal distribution.
+        Output array data type is the same as input `dtype`. If `dtype` is ``None`` (the default),
+        :obj:`dpnp.float64` type will be used if device supports it, or :obj:`dpnp.float32` otherwise.
+
     Limitations
     -----------
-    Parameters ``loc`` and ``scale`` are supported as scalar.
+    Parameters `loc` and `scale` are supported as scalar.
     Otherwise, :obj:`numpy.random.normal(loc, scale, size)` samples are drawn.
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameter `dtype` is supported only as :obj:`dpnp.float32`, :obj:`dpnp.float64` or ``None``.
 
     Examples
     --------
@@ -796,11 +827,9 @@ def normal(loc=0.0, scale=1.0, size=None, usm_type='device'):
     >>> s = dpnp.random.normal(mu, sigma, 1000)
 
     """
-    return _get_random_state().normal(loc=loc,
-                                      scale=scale,
-                                      size=size,
-                                      dtype=None,
-                                      usm_type=usm_type)
+
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.normal(loc=loc, scale=scale, size=size, dtype=None, usm_type=usm_type)
 
 
 def noncentral_chisquare(df, nonc, size=None):
@@ -986,7 +1015,11 @@ def power(a, size=None):
     return call_origin(numpy.random.power, a, size)
 
 
-def rand(d0, *dn, usm_type="device"):
+def rand(d0,
+         *dn,
+         device=None,
+         usm_type="device",
+         sycl_queue=None):
     """
     Random values in a given shape.
 
@@ -995,10 +1028,24 @@ def rand(d0, *dn, usm_type="device"):
 
     For full documentation refer to :obj:`numpy.random.rand`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Random values in a given shape.
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1012,20 +1059,48 @@ def rand(d0, *dn, usm_type="device"):
 
     """
 
-    return _get_random_state().rand(d0, *dn, usm_type=usm_type)
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.rand(d0, *dn, usm_type=usm_type)
 
 
-def randint(low, high=None, size=None, dtype=int, usm_type="device"):
+def randint(low,
+            high=None,
+            size=None,
+            dtype=int,
+            device=None,
+            usm_type="device",
+            sycl_queue=None):
     """
     Return random integers from `low` (inclusive) to `high` (exclusive).
 
     For full documentation refer to :obj:`numpy.random.randint`.
 
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        `size`-shaped array of random integers from the appropriate distribution,
+        or a single such random int if `size` is not provided.
+        Output array data type is the same as input `dtype`.
+
     Limitations
     -----------
-    Parameters ``low`` and ``high`` are supported only as scalar.
-    Parameter ``dtype`` is supported only as `int`.
-    Otherwise, :obj:`numpy.random.randint(low, high, size, dtype)` samples are drawn.
+    Parameters `low` and `high` are supported only as a scalar.
+    Parameter `dtype` is supported only as :obj:`dpnp.int32` or ``int``,
+    but ``int`` value is considered to be exactly equivalent to :obj:`dpnp.int32`.
+    Otherwise, :obj:`numpy.random.RandomState.randint(low, high, size, dtype)` samples are drawn.
 
     Examples
     --------
@@ -1041,23 +1116,39 @@ def randint(low, high=None, size=None, dtype=int, usm_type="device"):
 
     """
 
-    return _get_random_state().randint(low=low,
-                                       high=high,
-                                       size=size,
-                                       dtype=dtype,
-                                       usm_type=usm_type)
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.randint(low=low, high=high, size=size, dtype=dtype, usm_type=usm_type)
 
 
-def randn(d0, *dn, usm_type="device"):
+def randn(d0,
+          *dn,
+          device=None,
+          usm_type="device",
+          sycl_queue=None):
     """
     Return a sample (or samples) from the "standard normal" distribution.
 
     For full documentation refer to :obj:`numpy.random.randn`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        A ``(d0, d1, ..., dn)``-shaped array of floating-point samples from
+        the standard normal distribution, or a single such float if no parameters were supplied.
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1075,20 +1166,38 @@ def randn(d0, *dn, usm_type="device"):
 
     """
 
-    return _get_random_state().randn(d0, *dn, usm_type=usm_type)
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.randn(d0, *dn, usm_type=usm_type)
 
 
-def random(size=None, usm_type="device"):
+def random(size=None,
+           device=None,
+           usm_type="device",
+           sycl_queue=None):
     """
     Return random floats in the half-open interval [0.0, 1.0).
     Alias for random_sample.
 
     For full documentation refer to :obj:`numpy.random.random`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Array of random floats of shape `size` (if ``size=None``, zero dimension array with a single float is returned).
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1102,20 +1211,43 @@ def random(size=None, usm_type="device"):
 
     """
 
-    return random_sample(size=size, usm_type=usm_type)
+    return random_sample(size=size, device=device, usm_type=usm_type, sycl_queue=sycl_queue)
 
 
-def random_integers(low, high=None, size=None, usm_type="device"):
+def random_integers(low,
+                    high=None,
+                    size=None,
+                    device=None,
+                    usm_type="device",
+                    sycl_queue=None):
     """
     Random integers between `low` and `high`, inclusive.
 
     For full documentation refer to :obj:`numpy.random.random_integers`.
 
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        `size`-shaped array of random integers from the appropriate distribution,
+        or a single such random int if `size` is not provided.
+
     Limitations
     -----------
-    Parameters ``low`` and ``high`` are supported as scalar.
-    Otherwise, :obj:`numpy.random.random_integers(low, high, size)` samples
-    are drawn.
+    Parameters `low` and `high` are supported as scalar.
+    Otherwise, :obj:`numpy.random.random_integers(low, high, size)` samples are drawn.
 
     See Also
     --------
@@ -1134,12 +1266,15 @@ def random_integers(low, high=None, size=None, usm_type="device"):
     elif not dpnp.isscalar(high):
         pass
     else:
-        return randint(low, int(high) + 1, size=size, usm_type=usm_type)
+        return randint(low, int(high) + 1, size=size, device=device, usm_type=usm_type, sycl_queue=sycl_queue)
 
     return call_origin(numpy.random.random_integers, low, high, size)
 
 
-def random_sample(size=None, usm_type="device"):
+def random_sample(size=None,
+                  device=None,
+                  usm_type="device",
+                  sycl_queue=None):
     """
     Return random floats in the half-open interval [0.0, 1.0).
 
@@ -1147,10 +1282,24 @@ def random_sample(size=None, usm_type="device"):
 
     For full documentation refer to :obj:`numpy.random.random_sample`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Array of random floats of shape `size` (if ``size=None``, zero dimension array with a single float is returned).
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1164,21 +1313,38 @@ def random_sample(size=None, usm_type="device"):
 
     """
 
-    return _get_random_state().random_sample(size=size,
-                                             usm_type=usm_type)
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.random_sample(size=size, usm_type=usm_type)
 
 
-def ranf(size=None, usm_type="device"):
+def ranf(size=None,
+         device=None,
+         usm_type="device",
+         sycl_queue=None):
     """
     Return random floats in the half-open interval [0.0, 1.0).
     This is an alias of random_sample.
 
     For full documentation refer to :obj:`numpy.random.ranf`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Array of random floats of shape `size` (if ``size=None``, zero dimension array with a single float is returned).
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1193,7 +1359,7 @@ def ranf(size=None, usm_type="device"):
 
     """
 
-    return random_sample(size=size, usm_type=usm_type)
+    return random_sample(size=size, device=device, usm_type=usm_type, sycl_queue=sycl_queue)
 
 
 def rayleigh(scale=1.0, size=None):
@@ -1230,17 +1396,34 @@ def rayleigh(scale=1.0, size=None):
     return call_origin(numpy.random.rayleigh, scale, size)
 
 
-def sample(size=None, usm_type="device"):
+def sample(size=None,
+           device=None,
+           usm_type="device",
+           sycl_queue=None):
     """
     Return random floats in the half-open interval [0.0, 1.0).
     This is an alias of random_sample.
 
     For full documentation refer to :obj:`numpy.random.sample`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Array of random floats of shape `size` (if ``size=None``, zero dimension array with a single float is returned).
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1255,7 +1438,7 @@ def sample(size=None, usm_type="device"):
 
     """
 
-    return random_sample(size=size, usm_type=usm_type)
+    return random_sample(size=size, device=device, usm_type=usm_type, sycl_queue=sycl_queue)
 
 
 def shuffle(x1):
@@ -1283,18 +1466,35 @@ def shuffle(x1):
     return
 
 
-def seed(seed=None):
+def seed(seed=None,
+         device=None,
+         sycl_queue=None):
     """
-    Reseed a legacy mt19937 random number generator engine.
+    Reseed a legacy MT19937 random number generator engine.
+
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where an array with generated numbers will be created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for an array with generated numbers.
 
     Limitations
     -----------
-    Parameter ``seed`` is supported as a scalar.
-    Otherwise, the function will use :obj:`numpy.random.seed` on the backend
-    and will be executed on fallback backend.
+    Parameter `seed` is supported as either a scalar or an array of maximumum three integer scalars.
 
     """
 
+    # update a mt19937 random number for both RandomState and legacy functionality
+    global _dpnp_random_states
+
+    sycl_queue = dpnp.get_normalized_queue_device(device=device, sycl_queue=sycl_queue)
+    _dpnp_random_states[sycl_queue] = RandomState(seed=seed, sycl_queue=sycl_queue)
+
     if not use_origin_backend(seed):
         # TODO:
         # array_like of ints for `seed`
@@ -1307,10 +1507,6 @@ def seed(seed=None):
         else:
             # TODO:
             # migrate to a single approach with RandomState class
-
-            # update a mt19937 random number for both RandomState and legacy functionality
-            global _dpnp_random_state
-            _dpnp_random_state = RandomState(seed)
             dpnp_rng_srand(seed)
 
     # always reseed numpy engine also
@@ -1405,17 +1601,34 @@ def standard_gamma(shape, size=None):
     return call_origin(numpy.random.standard_gamma, shape, size)
 
 
-def standard_normal(size=None, usm_type="device"):
-    """Standard normal distribution.
-
+def standard_normal(size=None,
+                    device=None,
+                    usm_type="device",
+                    sycl_queue=None):
+    """
     Draw samples from a standard Normal distribution (mean=0, stdev=1).
 
     For full documentation refer to :obj:`numpy.random.standard_normal`.
 
-    Limitations
-    -----------
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        A floating-point array of shape `size` of drawn samples, or a
+        single sample if `size` was not specified.
+        Output array data type is :obj:`dpnp.float64` if device supports it, or :obj:`dpnp.float32` otherwise.
 
     Examples
     --------
@@ -1423,7 +1636,9 @@ def standard_normal(size=None, usm_type="device"):
     >>> s = dpnp.random.standard_normal(1000)
 
     """
-    return _get_random_state().standard_normal(size=size, usm_type=usm_type)
+
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.standard_normal(size=size, usm_type=usm_type)
 
 
 def standard_t(df, size=None):
@@ -1506,18 +1721,45 @@ def triangular(left, mode, right, size=None):
     return call_origin(numpy.random.triangular, left, mode, right, size)
 
 
-def uniform(low=0.0, high=1.0, size=None, usm_type='device'):
+def uniform(low=0.0,
+            high=1.0,
+            size=None,
+            device=None,
+            usm_type="device",
+            sycl_queue=None):
     """
     Draw samples from a uniform distribution.
 
+    Samples are uniformly distributed over the half-open interval [low, high) (includes low, but excludes high).
+    In other words, any value within the given interval is equally likely to be drawn by uniform.
+
     For full documentation refer to :obj:`numpy.random.uniform`.
 
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Drawn samples from the parameterized uniform distribution.
+        Output array data type is the same as input `dtype`. If `dtype` is ``None`` (the default),
+        :obj:`dpnp.float64` type will be used if device supports it, or :obj:`dpnp.float32` otherwise.
+
     Limitations
     -----------
-    Parameters ``low`` and ``high`` are supported as scalar.
-    Otherwise, :obj:`numpy.random.uniform(low, high, size)` samples are drawn.
-    Output array data type is :obj:`dpnp.float64` if device supports it
-    or :obj:`dpnp.float32` otherwise.
+    Parameters `low` and `high` are supported as a scalar. Otherwise,
+    :obj:`numpy.random.uniform(low, high, size)` samples are drawn.
+    Parameter `dtype` is supported only as :obj:`dpnp.int32`, :obj:`dpnp.float32`, :obj:`dpnp.float64` or ``None``.
 
     Examples
     --------
@@ -1530,11 +1772,9 @@ def uniform(low=0.0, high=1.0, size=None, usm_type='device'):
     :obj:`dpnp.random.random` : Floats uniformly distributed over ``[0, 1)``.
 
     """
-    return _get_random_state().uniform(low=low,
-                                       high=high,
-                                       size=size,
-                                       dtype=None,
-                                       usm_type=usm_type)
+
+    rs = _get_random_state(device=device, sycl_queue=sycl_queue)
+    return rs.uniform(low=low, high=high, size=size, dtype=None, usm_type=usm_type)
 
 
 def vonmises(mu, kappa, size=None):
@@ -1679,4 +1919,4 @@ def zipf(a, size=None):
     return call_origin(numpy.random.zipf, a, size)
 
 
-_dpnp_random_state = None
+_dpnp_random_states = {}
diff --git a/dpnp/random/dpnp_random_state.py b/dpnp/random/dpnp_random_state.py
index 1d4648c31c4..412d9dec0f8 100644
--- a/dpnp/random/dpnp_random_state.py
+++ b/dpnp/random/dpnp_random_state.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -76,7 +76,12 @@ class RandomState:
     """
 
     def __init__(self, seed=None, device=None, sycl_queue=None):
-        self._seed = 1 if seed is None else seed
+        if seed is None:
+            # ask NumPy to generate an array of three random integers as default seed value
+            self._seed = numpy.random.randint(low=0, high=numpy.iinfo(numpy.int32).max + 1, size=3)
+        else:
+            self._seed = seed
+
         self._sycl_queue = dpnp.get_normalized_queue_device(device=device, sycl_queue=sycl_queue)
         self._sycl_device = self._sycl_queue.sycl_device
 
@@ -290,7 +295,7 @@ def rand(self, *args, usm_type="device"):
 
     def randint(self, low, high=None, size=None, dtype=int, usm_type="device"):
         """
-        Draw random integers from low (inclusive) to high (exclusive).
+        Draw random integers from `low` (inclusive) to `high` (exclusive).
 
         Return random integers from the “discrete uniform” distribution of the specified type
         in the “half-open” interval [low, high).
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 1eae3df9393..413596e2cc7 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -338,36 +338,101 @@ def test_broadcasting(func, data1, data2, device):
     assert_sycl_queue_equal(result_queue, expected_queue)
 
 
+@pytest.mark.parametrize(
+    "func, kwargs",
+    [
+        pytest.param("normal",
+                     {'loc': 1.0, 'scale': 3.4, 'size': (5, 12)}),
+        pytest.param("rand",
+                     {'d0': 20}),
+        pytest.param("randint",
+                     {'low': 2, 'high': 15, 'size': (4, 8, 16), 'dtype': dpnp.int32}),
+        pytest.param("randn",
+                     {'d0': 20}),
+        pytest.param("random",
+                     {'size': (35, 45)}),
+        pytest.param("random_integers",
+                     {'low': -17, 'high': 3, 'size': (12, 16)}),
+        pytest.param("random_sample",
+                     {'size': (7, 7)}),
+        pytest.param("ranf",
+                     {'size': (10, 7, 12)}),
+        pytest.param("sample",
+                     {'size': (7, 9)}),
+        pytest.param("standard_normal",
+                     {'size': (4, 4, 8)}),
+        pytest.param("uniform",
+                     {'low': 1.0, 'high': 2.0, 'size': (4, 2, 5)})
+    ])
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
 @pytest.mark.parametrize("usm_type",
                          ["host", "device", "shared"])
-@pytest.mark.parametrize("size",
-                         [None, (), 3, (2, 1), (4, 2, 5)],
-                         ids=['None', '()', '3', '(2,1)', '(4,2,5)'])
-def test_uniform(usm_type, size):
-    low = 1.0
-    high = 2.0
-    res = dpnp.random.uniform(low, high, size=size, usm_type=usm_type)
+def test_random(func, kwargs, device, usm_type):
+    kwargs = {**kwargs, 'device': device, 'usm_type': usm_type}
+
+    # test with default SYCL queue per a device
+    res_array = getattr(dpnp.random, func)(**kwargs)
+    assert device == res_array.sycl_device
+    assert usm_type == res_array.usm_type
+
+    sycl_queue = dpctl.SyclQueue(device, property="in_order")
+    kwargs['device'] = None
+    kwargs['sycl_queue'] = sycl_queue
 
-    assert usm_type == res.usm_type
+    # test with in-order SYCL queue per a device and passed as argument
+    res_array = getattr(dpnp.random, func)(**kwargs)
+    assert usm_type == res_array.usm_type
+    assert_sycl_queue_equal(res_array.sycl_queue, sycl_queue)
 
 
+@pytest.mark.parametrize(
+    "func, args, kwargs",
+    [
+        pytest.param("normal",
+                     [],
+                     {'loc': 1.0, 'scale': 3.4, 'size': (5, 12)}),
+        pytest.param("rand",
+                     [15, 30, 5],
+                     {}),
+        pytest.param("randint",
+                     [],
+                     {'low': 2, 'high': 15, 'size': (4, 8, 16), 'dtype': dpnp.int32}),
+        pytest.param("randn",
+                     [20, 5, 40],
+                     {}),
+        pytest.param("random_sample",
+                     [],
+                     {'size': (7, 7)}),
+        pytest.param("standard_normal",
+                     [],
+                     {'size': (4, 4, 8)}),
+        pytest.param("uniform",
+                     [],
+                     {'low': 1.0, 'high': 2.0, 'size': (4, 2, 5)})
+    ])
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
 @pytest.mark.parametrize("usm_type",
                          ["host", "device", "shared"])
-@pytest.mark.parametrize("seed",
-                         [None, (), 123, (12, 58), (147, 56, 896), [1, 654, 78]],
-                         ids=['None', '()', '123', '(12,58)', '(147,56,896)', '[1,654,78]'])
-def test_rs_uniform(usm_type, seed):
-    seed = 123
-    sycl_queue = dpctl.SyclQueue()
-    low = 1.0
-    high = 2.0
-    rs = dpnp.random.RandomState(seed, sycl_queue=sycl_queue)
-    res = rs.uniform(low, high, usm_type=usm_type)
-
-    assert usm_type == res.usm_type
-
-    res_sycl_queue = res.get_array().sycl_queue
-    assert_sycl_queue_equal(res_sycl_queue, sycl_queue)
+def test_random_state(func, args, kwargs, device, usm_type):
+    kwargs = {**kwargs, 'usm_type': usm_type}
+
+    # test with default SYCL queue per a device
+    rs = dpnp.random.RandomState(seed=1234567, device=device)
+    res_array = getattr(rs, func)(*args, **kwargs)
+    assert device == res_array.sycl_device
+    assert usm_type == res_array.usm_type
+
+    sycl_queue = dpctl.SyclQueue(device=device, property="in_order")
+
+    # test with in-order SYCL queue per a device and passed as argument
+    rs = dpnp.random.RandomState((147, 56, 896), sycl_queue=sycl_queue)
+    res_array = getattr(rs, func)(*args, **kwargs)
+    assert usm_type == res_array.usm_type
+    assert_sycl_queue_equal(res_array.sycl_queue, sycl_queue)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")

From 32284703d806484cfae57dc9e735b5d67021d791 Mon Sep 17 00:00:00 2001
From: Vahid Tavanashad <vahid.tavanashad@intel.com>
Date: Mon, 23 Jan 2023 09:16:55 -0600
Subject: [PATCH 11/32] add __repr__

---
 dpnp/dpnp_array.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 57f057ae760..6ae848388a9 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -294,6 +294,8 @@ def __str__(self):
 
         return str(self.asnumpy())
 
+    def __repr__(self):
+        return self._array_obj.__repr__()
 
     def __sub__(self, other):
         return dpnp.subtract(self, other)

From 333b4fcbe977b3567bc646bccf01dfa163d12c53 Mon Sep 17 00:00:00 2001
From: Vahid Tavanashad <vahid.tavanashad@intel.com>
Date: Mon, 23 Jan 2023 18:07:45 -0600
Subject: [PATCH 12/32] add __str__

---
 dpnp/dpnp_array.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 6ae848388a9..417664967cd 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -292,7 +292,7 @@ def __str__(self):
 
         """
 
-        return str(self.asnumpy())
+        return self._array_obj.__str__()
 
     def __repr__(self):
         return self._array_obj.__repr__()

From 67d5cb1a41d4813f9eb16563de6c213a14438453 Mon Sep 17 00:00:00 2001
From: Vahid Tavanashad <vahid.tavanashad@intel.com>
Date: Wed, 1 Feb 2023 13:13:16 -0600
Subject: [PATCH 13/32] reviewer's comments

---
 dpnp/dpnp_array.py    |   8 ++--
 tests/test_dparray.py | 107 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 111 insertions(+), 4 deletions(-)

diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 417664967cd..9198b34d4d8 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -247,7 +247,10 @@ def __radd__(self, other):
  # '__rdivmod__',
  # '__reduce__',
  # '__reduce_ex__',
- # '__repr__',
+
+    def __repr__(self):
+        return dpt.usm_ndarray_repr(self._array_obj, prefix="array")
+
  # '__rfloordiv__',
  # '__rlshift__',
 
@@ -294,9 +297,6 @@ def __str__(self):
 
         return self._array_obj.__str__()
 
-    def __repr__(self):
-        return self._array_obj.__repr__()
-
     def __sub__(self, other):
         return dpnp.subtract(self, other)
 
diff --git a/tests/test_dparray.py b/tests/test_dparray.py
index 745884f6a07..4757fc855b4 100644
--- a/tests/test_dparray.py
+++ b/tests/test_dparray.py
@@ -68,3 +68,110 @@ def test_flags_strides(dtype, order, strides):
     assert usm_array.flags == dpnp_array.flags
     assert numpy_array.flags.c_contiguous == dpnp_array.flags.c_contiguous
     assert numpy_array.flags.f_contiguous == dpnp_array.flags.f_contiguous
+
+def test_print_dpnp_int():
+    result = repr(dpnp.array([1, 0, 2, -3, -1, 2, 21, -9], dtype='i4'))
+    expected = "array([ 1,  0,  2, -3, -1,  2, 21, -9], dtype=int32)"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, 0, 2, -3, -1, 2, 21, -9], dtype='i4'))
+    expected = "[ 1  0  2 -3 -1  2 21 -9]"
+    assert(result==expected)
+# int32
+    result = repr(dpnp.array([1, -1, 21], dtype=dpnp.int32))
+    expected = "array([ 1, -1, 21], dtype=int32)"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, -1, 21], dtype=dpnp.int32))
+    expected = "[ 1 -1 21]"
+    assert(result==expected)
+# uint8
+    result = repr(dpnp.array([1, 0, 3], dtype=numpy.uint8))
+    expected = "array([1, 0, 3], dtype=uint8)"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, 0, 3], dtype=numpy.uint8))
+    expected = "[1 0 3]"
+    assert(result==expected)
+
+def test_print_dpnp_float():
+    result = repr(dpnp.array([1, -1, 21], dtype=float))
+    expected = "array([ 1., -1., 21.])"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, -1, 21], dtype=float))
+    expected = "[ 1. -1. 21.]"
+    assert(result==expected)
+# float32
+    result = repr(dpnp.array([1, -1, 21], dtype=dpnp.float32))
+    expected = "array([ 1., -1., 21.], dtype=float32)"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, -1, 21], dtype=dpnp.float32))
+    expected = "[ 1. -1. 21.]"
+    assert(result==expected)
+
+def test_print_dpnp_complex():
+    result = repr(dpnp.array([1, -1, 21], dtype=complex))
+    expected = "array([ 1.+0.j, -1.+0.j, 21.+0.j])"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, -1, 21], dtype=complex))
+    expected = "[ 1.+0.j -1.+0.j 21.+0.j]"
+    assert(result==expected)
+
+def test_print_dpnp_boolean():
+    result = repr(dpnp.array([1, 0, 3], dtype=bool))
+    expected = "array([ True, False,  True])"
+    assert(result==expected)
+
+    result = str(dpnp.array([1, 0, 3], dtype=bool))
+    expected = "[ True False  True]"
+    assert(result==expected)
+
+def test_print_dpnp_special_character():
+# NaN
+    result = repr(dpnp.array([1., 0., dpnp.nan, 3.]))
+    expected = "array([ 1.,  0., nan,  3.])"
+    assert(result==expected)
+
+    result = str(dpnp.array([1., 0., dpnp.nan, 3.]))
+    expected = "[ 1.  0. nan  3.]"
+    assert(result==expected)
+# inf
+    result = repr(dpnp.array([1., 0., numpy.inf, 3.]))
+    expected = "array([ 1.,  0., inf,  3.])"
+    assert(result==expected)
+
+    result = str(dpnp.array([1., 0., numpy.inf, 3.]))
+    expected = "[ 1.  0. inf  3.]"
+    assert(result==expected)
+
+def test_print_dpnp_nd():
+# 1D
+    result = repr(dpnp.arange(10000, dtype='float32'))
+    expected = "array([0.000e+00, 1.000e+00, 2.000e+00, ..., 9.997e+03, 9.998e+03,\n       9.999e+03], dtype=float32)"
+    assert(result==expected)
+
+    result = str(dpnp.arange(10000, dtype='float32'))
+    expected = "[0.000e+00 1.000e+00 2.000e+00 ... 9.997e+03 9.998e+03 9.999e+03]"
+    assert(result==expected)
+
+# 2D
+    result = repr(dpnp.array([[1, 2], [3, 4]], dtype=float))
+    expected = "array([[1., 2.],\n       [3., 4.]])"
+    assert(result==expected)
+
+    result = str(dpnp.array([[1, 2], [3, 4]]))
+    expected = "[[1 2]\n [3 4]]"
+    assert(result==expected)
+
+# 0 shape
+    result = repr(dpnp.empty( shape=(0, 0) ))
+    expected = "array([])"
+    assert(result==expected)
+
+    result = str(dpnp.empty( shape=(0, 0) ))
+    expected = "[]"
+    assert(result==expected)
+    
\ No newline at end of file

From cb3421c1c2d3cfe6950e9a15b07dc0a3ac74ad2a Mon Sep 17 00:00:00 2001
From: Natalia Polina <natalia.polina@intel.com>
Date: Tue, 7 Feb 2023 13:11:55 -0800
Subject: [PATCH 14/32] Fixed gh-1272 (#1287)

---
 dpnp/dpnp_iface_arraycreation.py | 19 ++++++++++---------
 tests/test_arraycreation.py      | 23 +++++++++++++++++++++++
 2 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index 01b9ac6b792..1740b1d6001 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -48,6 +48,7 @@
 from dpnp.dpnp_utils import *
 
 import dpnp.dpnp_container as dpnp_container
+import dpctl.tensor as dpt
 
 
 __all__ = [
@@ -530,7 +531,7 @@ def empty_like(x1,
 
     Limitations
     -----------
-    Parameters ``x1`` is supported only as :class:`dpnp.dpnp_array`.
+    Parameter ``x1`` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray`
     Parameter ``order`` is supported with values ``"C"`` or ``"F"``.
     Parameter ``subok`` is supported only with default value ``False``.
     Otherwise the function will be executed sequentially on CPU.
@@ -552,7 +553,7 @@ def empty_like(x1,
 
     """
 
-    if not isinstance(x1, dpnp.ndarray):
+    if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)):
         pass
     elif order not in ('C', 'c', 'F', 'f', None):
         pass
@@ -762,7 +763,7 @@ def full_like(x1,
 
     Limitations
     -----------
-    Parameters ``x1`` is supported only as :class:`dpnp.dpnp_array`.
+    Parameter ``x1`` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray`
     Parameter ``order`` is supported only with values ``"C"`` and ``"F"``.
     Parameter ``subok`` is supported only with default value ``False``.
     Otherwise the function will be executed sequentially on CPU.
@@ -783,7 +784,7 @@ def full_like(x1,
     [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
 
     """
-    if not isinstance(x1, dpnp.ndarray):
+    if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)):
         pass
     elif order not in ('C', 'c', 'F', 'f', None):
         pass
@@ -1189,7 +1190,7 @@ def ones_like(x1,
 
     Limitations
     -----------
-    Parameters ``x1`` is supported only as :class:`dpnp.dpnp_array`.
+    Parameter ``x1`` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray`
     Parameter ``order`` is supported with values ``"C"`` or ``"F"``.
     Parameter ``subok`` is supported only with default value ``False``.
     Otherwise the function will be executed sequentially on CPU.
@@ -1211,7 +1212,7 @@ def ones_like(x1,
     [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
 
     """
-    if not isinstance(x1, dpnp.ndarray):
+    if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)):
         pass
     elif order not in ('C', 'c', 'F', 'f', None):
         pass
@@ -1502,7 +1503,7 @@ def zeros_like(x1,
 
     Limitations
     -----------
-    Parameters ``x1`` is supported only as :class:`dpnp.dpnp_array`.
+    Parameter ``x1`` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray`
     Parameter ``order`` is supported with values ``"C"`` or ``"F"``.
     Parameter ``subok`` is supported only with default value ``False``.
     Otherwise the function will be executed sequentially on CPU.
@@ -1523,8 +1524,8 @@ def zeros_like(x1,
     >>> [i for i in np.zeros_like(x)]
     [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
 
-"""
-    if not isinstance(x1, dpnp.ndarray):
+    """
+    if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)):
         pass
     elif order not in ('C', 'c', 'F', 'f', None):
         pass
diff --git a/tests/test_arraycreation.py b/tests/test_arraycreation.py
index d428b1ab726..833ea6109c3 100644
--- a/tests/test_arraycreation.py
+++ b/tests/test_arraycreation.py
@@ -485,3 +485,26 @@ def test_ones_like(array, dtype, order):
     a = numpy.array(array)
     ia = dpnp.array(array)
     assert_array_equal(func(numpy, a), func(dpnp, ia))
+
+
+@pytest.mark.parametrize(
+    "func, args",
+    [
+        pytest.param("full_like",
+                     ['x0', '4']),
+        pytest.param("zeros_like",
+                     ['x0']),
+        pytest.param("ones_like",
+                     ['x0']),
+        pytest.param("empty_like",
+                     ['x0']),
+    ])
+def test_dpctl_tensor_input(func, args):
+    x0 = dpt.reshape(dpt.arange(9), (3,3))
+    new_args = [eval(val, {'x0' : x0}) for val in args]
+    X = getattr(dpt, func)(*new_args)
+    Y = getattr(dpnp, func)(*new_args)
+    if func is 'empty_like':
+        assert X.shape == Y.shape
+    else:
+        assert_array_equal(X, Y)

From 9308f64a5e91e50215516f3b956780005894e4a0 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Wed, 8 Feb 2023 10:14:58 +0100
Subject: [PATCH 15/32] Support high=None in dpnp.randint() (#1284)

---
 dpnp/random/dpnp_random_state.py                   | 2 +-
 tests/third_party/cupy/random_tests/test_sample.py | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/dpnp/random/dpnp_random_state.py b/dpnp/random/dpnp_random_state.py
index 412d9dec0f8..c224553b0cf 100644
--- a/dpnp/random/dpnp_random_state.py
+++ b/dpnp/random/dpnp_random_state.py
@@ -337,7 +337,7 @@ def randint(self, low, high=None, size=None, dtype=int, usm_type="device"):
         if not use_origin_backend(low):
             if not dpnp.isscalar(low):
                 pass
-            elif not dpnp.isscalar(high):
+            elif not (high is None or dpnp.isscalar(high)):
                 pass
             else:
                 _dtype = dpnp.int32 if dtype is int else dpnp.dtype(dtype)
diff --git a/tests/third_party/cupy/random_tests/test_sample.py b/tests/third_party/cupy/random_tests/test_sample.py
index 3f8a0169ac1..f3b844cdc6a 100644
--- a/tests/third_party/cupy/random_tests/test_sample.py
+++ b/tests/third_party/cupy/random_tests/test_sample.py
@@ -33,7 +33,6 @@ def test_lo_hi_nonrandom(self):
         a = random.randint(-1.1, -0.9, size=(2, 2))
         numpy.testing.assert_array_equal(a, cupy.full((2, 2), -1))
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_zero_sizes(self):
         a = random.randint(10, size=(0,))
         numpy.testing.assert_array_equal(a, cupy.array(()))
@@ -112,7 +111,6 @@ def test_goodness_of_fit_2(self):
         self.assertTrue(hypothesis.chi_square_test(counts, expected))
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestRandintDtype(unittest.TestCase):
 

From 30d3ada54c9140910825ec4a47c16b0e13d4be28 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <oleksandr.pavlyk@intel.com>
Date: Wed, 8 Feb 2023 16:20:53 -0600
Subject: [PATCH 16/32] linter changes applied

---
 tests/test_dparray.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_dparray.py b/tests/test_dparray.py
index 4757fc855b4..87c23f7b878 100644
--- a/tests/test_dparray.py
+++ b/tests/test_dparray.py
@@ -174,4 +174,3 @@ def test_print_dpnp_nd():
     result = str(dpnp.empty( shape=(0, 0) ))
     expected = "[]"
     assert(result==expected)
-    
\ No newline at end of file

From 06533ebc7a7fd7d47da1c8605f82b14e16aecc7d Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Thu, 9 Feb 2023 07:56:26 +0100
Subject: [PATCH 17/32] Add operation __index__ and __complex__ (#1285)

* Add operation __index__ and __complex__

* Add tests
---
 dpnp/dpnp_array.py    | 10 +++++++--
 tests/test_dparray.py | 52 ++++++++++++++++++++++++++++++++-----------
 2 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 57f057ae760..a60464583ca 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -140,7 +140,10 @@ def __bool__(self):
         return self._array_obj.__bool__()
 
  # '__class__',
- # '__complex__',
+ 
+    def __complex__(self):
+        return self._array_obj.__complex__()
+
  # '__contains__',
  # '__copy__',
  # '__deepcopy__',
@@ -187,7 +190,10 @@ def __gt__(self, other):
  # '__imatmul__',
  # '__imod__',
  # '__imul__',
- # '__index__',
+ 
+    def __index__(self):
+        return self._array_obj.__index__()
+
  # '__init__',
  # '__init_subclass__',
 
diff --git a/tests/test_dparray.py b/tests/test_dparray.py
index 745884f6a07..50eaa2e46eb 100644
--- a/tests/test_dparray.py
+++ b/tests/test_dparray.py
@@ -1,15 +1,17 @@
-import dpnp
-import numpy
 import pytest
+from .helper import get_all_dtypes
+
+import dpnp
 import dpctl.tensor as dpt
 
+import numpy
+from numpy.testing import (
+    assert_array_equal
+)
+
 
-@pytest.mark.parametrize("res_dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_, numpy.complex_],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'complex'])
-@pytest.mark.parametrize("arr_dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_, numpy.complex_],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'complex'])
+@pytest.mark.parametrize("res_dtype", get_all_dtypes())
+@pytest.mark.parametrize("arr_dtype", get_all_dtypes())
 @pytest.mark.parametrize("arr",
                          [[-2, -1, 0, 1, 2], [[-2, -1], [1, 2]], []],
                          ids=['[-2, -1, 0, 1, 2]', '[[-2, -1], [1, 2]]', '[]'])
@@ -18,12 +20,10 @@ def test_astype(arr, arr_dtype, res_dtype):
     dpnp_array = dpnp.array(numpy_array)
     expected = numpy_array.astype(res_dtype)
     result = dpnp_array.astype(res_dtype)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
-@pytest.mark.parametrize("arr_dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.bool_, numpy.complex_],
-                         ids=['float64', 'float32', 'int64', 'int32', 'bool', 'complex'])
+@pytest.mark.parametrize("arr_dtype", get_all_dtypes())
 @pytest.mark.parametrize("arr",
                          [[-2, -1, 0, 1, 2], [[-2, -1], [1, 2]], []],
                          ids=['[-2, -1, 0, 1, 2]', '[[-2, -1], [1, 2]]', '[]'])
@@ -32,7 +32,7 @@ def test_flatten(arr, arr_dtype):
     dpnp_array = dpnp.array(arr, dtype=arr_dtype)
     expected = numpy_array.flatten()
     result = dpnp_array.flatten()
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("shape",
@@ -68,3 +68,29 @@ def test_flags_strides(dtype, order, strides):
     assert usm_array.flags == dpnp_array.flags
     assert numpy_array.flags.c_contiguous == dpnp_array.flags.c_contiguous
     assert numpy_array.flags.f_contiguous == dpnp_array.flags.f_contiguous
+
+
+@pytest.mark.parametrize("func", [bool, float, int, complex])
+@pytest.mark.parametrize("shape", [tuple(), (1,), (1, 1), (1, 1, 1)])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False, no_complex=True))
+def test_scalar_type_casting(func, shape, dtype):
+    numpy_array = numpy.full(shape, 5, dtype=dtype)
+    dpnp_array = dpnp.full(shape, 5, dtype=dtype)
+    assert func(numpy_array) == func(dpnp_array)
+
+
+@pytest.mark.parametrize("method", ["__bool__", "__float__", "__int__", "__complex__"])
+@pytest.mark.parametrize("shape", [tuple(), (1,), (1, 1), (1, 1, 1)])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False, no_complex=True, no_none=True))
+def test_scalar_type_casting_by_method(method, shape, dtype):
+    numpy_array = numpy.full(shape, 4.7, dtype=dtype)
+    dpnp_array = dpnp.full(shape, 4.7, dtype=dtype)
+    assert getattr(numpy_array, method)() == getattr(dpnp_array, method)()
+
+
+@pytest.mark.parametrize("shape", [(1,), (1, 1), (1, 1, 1)])
+@pytest.mark.parametrize("index_dtype", [dpnp.int32, dpnp.int64])
+def test_array_as_index(shape, index_dtype):
+    ind_arr = dpnp.ones(shape, dtype=index_dtype)
+    a = numpy.arange(ind_arr.size + 1)
+    assert a[tuple(ind_arr)] == a[1]

From e327dfb8f21ccaeedc759a9b77360f52c834f7c7 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Thu, 9 Feb 2023 15:04:42 +0100
Subject: [PATCH 18/32] Update tests with proper call of dpctl.SyclQueue()
 (#1290)

---
 tests/test_sycl_queue.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 413596e2cc7..bc42f70b370 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -426,7 +426,7 @@ def test_random_state(func, args, kwargs, device, usm_type):
     assert device == res_array.sycl_device
     assert usm_type == res_array.usm_type
 
-    sycl_queue = dpctl.SyclQueue(device=device, property="in_order")
+    sycl_queue = dpctl.SyclQueue(device, property="in_order")
 
     # test with in-order SYCL queue per a device and passed as argument
     rs = dpnp.random.RandomState((147, 56, 896), sycl_queue=sycl_queue)

From d1acfafc0fdd82286dff329fe9433f50e15fcb73 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Thu, 9 Feb 2023 16:54:10 +0100
Subject: [PATCH 19/32] Update minimum required versions of dependent
 components. (#1289)

---
 conda-recipe/meta.yaml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index 3e411e354a9..3f8db36ecc9 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -11,17 +11,17 @@ requirements:
       - numpy >=1.19,<1.25a0
       - cython
       - cmake >=3.19
-      - dpctl >=0.13
-      - mkl-devel-dpcpp {{ environ.get('MKL_VER', '>=2021.1.1') }}
+      - dpctl >=0.14
+      - mkl-devel-dpcpp {{ environ.get('MKL_VER', '>=2023.0.0') }}
       - onedpl-devel
       - tbb-devel
       - wheel
     build:
       - {{ compiler('cxx') }}
-      - {{ compiler('dpcpp') }}  >=2022.1  # [not osx]
+      - {{ compiler('dpcpp') }}  >=2023.0  # [not osx]
     run:
       - python
-      - dpctl >=0.13
+      - dpctl >=0.14
       - {{ pin_compatible('dpcpp-cpp-rt', min_pin='x.x', max_pin='x') }}
       - {{ pin_compatible('mkl-dpcpp', min_pin='x.x', max_pin='x') }}
       - {{ pin_compatible('numpy', min_pin='x.x', max_pin='x') }}

From 863f5876a47bf0d8948b0bec904597750509dc56 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Sat, 11 Feb 2023 01:41:30 +0100
Subject: [PATCH 20/32] dpnp.add() doesn't work properly with a scalar (#1288)

* dpnp.add() doesn't work properly with a scalar

* get rid of dpctl.SyclQueue() call in tests with unsupported device keyword

* Add a fix for crash on CPU device

* USM type in operations with a scalar

* Porting fix for crash to logic kernel
---
 .../include/dpnp_gen_2arg_3type_tbl.hpp       |  93 ++++-
 dpnp/backend/include/dpnp_iface.hpp           |   3 +-
 dpnp/backend/include/dpnp_iface_fptr.hpp      |   4 +-
 dpnp/backend/kernels/dpnp_krnl_elemwise.cpp   | 361 ++++++++----------
 dpnp/backend/kernels/dpnp_krnl_logic.cpp      |  16 +-
 dpnp/backend/src/dpnp_fptr.hpp                |  49 +++
 dpnp/dpnp_iface.py                            |   5 +-
 dpnp/dpnp_iface_logic.py                      |  90 +++--
 dpnp/dpnp_iface_mathematical.py               |  94 +++--
 dpnp/dpnp_utils/dpnp_algo_utils.pyx           |  49 ++-
 tests/skipped_tests.tbl                       |   1 -
 tests/skipped_tests_gpu.tbl                   |   1 -
 tests/test_indexing.py                        |  55 +--
 tests/test_mathematical.py                    | 148 +++----
 tests/test_strides.py                         |  67 ++--
 tests/test_usm_type.py                        |  42 +-
 .../cupy/math_tests/test_arithmetic.py        |   2 +-
 17 files changed, 605 insertions(+), 475 deletions(-)

diff --git a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
index 5d4ae22f796..33f5e0d19a4 100644
--- a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -31,7 +31,10 @@
  * Parameters:
  * - public name of the function and kernel name
  * - operation used to calculate the result
+ * - vector operation over SYCL group used to calculate the result
+ * - list of types vector operation accepts
  * - mkl operation used to calculate the result
+ * - list of types mkl operation accepts
  *
  */
 
@@ -41,11 +44,12 @@
 
 #ifdef _SECTION_DOCUMENTATION_GENERATION_
 
-#define MACRO_2ARG_3TYPES_OP(__name__, __operation1__, __operation2__)                                                  \
+#define MACRO_2ARG_3TYPES_OP(                                                                                           \
+    __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__)                        \
     /** @ingroup BACKEND_API                                                                                         */ \
     /** @brief Per element operation function __name__                                                               */ \
     /**                                                                                                              */ \
-    /** Function "__name__" executes operator "__operation1__" over corresponding elements of input arrays           */ \
+    /** Function "__name__" executes operator "__operation__" over corresponding elements of input arrays            */ \
     /**                                                                                                              */ \
     /** @param[in]  q_ref              Reference to SYCL queue.                                                      */ \
     /** @param[out] result_out         Output array.                                                                 */ \
@@ -105,23 +109,84 @@
 
 #endif
 
-MACRO_2ARG_3TYPES_OP(dpnp_add_c, input1_elem + input2_elem, oneapi::mkl::vm::add)
-MACRO_2ARG_3TYPES_OP(dpnp_arctan2_c, sycl::atan2((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::atan2)
+MACRO_2ARG_3TYPES_OP(dpnp_add_c,
+                     input1_elem + input2_elem,
+                     sycl::add_sat(x1, x2),
+                     MACRO_UNPACK_TYPES(int, long),
+                     oneapi::mkl::vm::add,
+                     MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))
+
+MACRO_2ARG_3TYPES_OP(dpnp_arctan2_c,
+                     sycl::atan2((double)input1_elem, (double)input2_elem),
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::atan2,
+                     MACRO_UNPACK_TYPES(float, double))
+
 MACRO_2ARG_3TYPES_OP(dpnp_copysign_c,
                      sycl::copysign((double)input1_elem, (double)input2_elem),
-                     oneapi::mkl::vm::copysign)
-MACRO_2ARG_3TYPES_OP(dpnp_divide_c, input1_elem / input2_elem, oneapi::mkl::vm::div)
-MACRO_2ARG_3TYPES_OP(dpnp_fmod_c, sycl::fmod((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::fmod)
-MACRO_2ARG_3TYPES_OP(dpnp_hypot_c, sycl::hypot((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::hypot)
-MACRO_2ARG_3TYPES_OP(dpnp_maximum_c, sycl::max(input1_elem, input2_elem), oneapi::mkl::vm::fmax)
-MACRO_2ARG_3TYPES_OP(dpnp_minimum_c, sycl::min(input1_elem, input2_elem), oneapi::mkl::vm::fmin)
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::copysign,
+                     MACRO_UNPACK_TYPES(float, double))
+
+MACRO_2ARG_3TYPES_OP(dpnp_divide_c,
+                     input1_elem / input2_elem,
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::div,
+                     MACRO_UNPACK_TYPES(float, double))
+
+MACRO_2ARG_3TYPES_OP(dpnp_fmod_c,
+                     sycl::fmod((double)input1_elem, (double)input2_elem),
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::fmod,
+                     MACRO_UNPACK_TYPES(float, double))
+
+MACRO_2ARG_3TYPES_OP(dpnp_hypot_c,
+                     sycl::hypot((double)input1_elem, (double)input2_elem),
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::hypot,
+                     MACRO_UNPACK_TYPES(float, double))
+
+MACRO_2ARG_3TYPES_OP(dpnp_maximum_c,
+                     sycl::max(input1_elem, input2_elem),
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::fmax,
+                     MACRO_UNPACK_TYPES(float, double))
+
+MACRO_2ARG_3TYPES_OP(dpnp_minimum_c,
+                     sycl::min(input1_elem, input2_elem),
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::fmin,
+                     MACRO_UNPACK_TYPES(float, double))
 
 // "multiply" needs to be standalone kernel (not autogenerated) due to complex algorithm. This is not an element wise.
 // pytest "tests/third_party/cupy/creation_tests/test_ranges.py::TestMgrid::test_mgrid3"
 // requires multiplication shape1[10] with shape2[10,1] and result expected as shape[10,10]
-MACRO_2ARG_3TYPES_OP(dpnp_multiply_c, input1_elem* input2_elem, oneapi::mkl::vm::mul)
+MACRO_2ARG_3TYPES_OP(dpnp_multiply_c,
+                     input1_elem* input2_elem,
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::mul,
+                     MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))
+
+MACRO_2ARG_3TYPES_OP(dpnp_power_c,
+                     sycl::pow((double)input1_elem, (double)input2_elem),
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::pow,
+                     MACRO_UNPACK_TYPES(float, double))
 
-MACRO_2ARG_3TYPES_OP(dpnp_power_c, sycl::pow((double)input1_elem, (double)input2_elem), oneapi::mkl::vm::pow)
-MACRO_2ARG_3TYPES_OP(dpnp_subtract_c, input1_elem - input2_elem, oneapi::mkl::vm::sub)
+MACRO_2ARG_3TYPES_OP(dpnp_subtract_c,
+                     input1_elem - input2_elem,
+                     nullptr,
+                     std::false_type,
+                     oneapi::mkl::vm::sub,
+                     MACRO_UNPACK_TYPES(float, double))
 
 #undef MACRO_2ARG_3TYPES_OP
diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp
index 713e3e82197..7a80b40a3d2 100644
--- a/dpnp/backend/include/dpnp_iface.hpp
+++ b/dpnp/backend/include/dpnp_iface.hpp
@@ -1829,7 +1829,8 @@ INP_DLLEXPORT void dpnp_invert_c(void* array1_in, void* result, size_t size);
 
 #include <dpnp_gen_2arg_2type_tbl.hpp>
 
-#define MACRO_2ARG_3TYPES_OP(__name__, __operation1__, __operation2__)                                                 \
+#define MACRO_2ARG_3TYPES_OP(                                                                                          \
+    __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__)                       \
     template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>                         \
     INP_DLLEXPORT DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                  \
                                              void* result_out,                                                         \
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index f77a37aade8..61c1c9838ad 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -394,13 +394,13 @@ enum class DPNPFuncName : size_t
 enum class DPNPFuncType : size_t
 {
     DPNP_FT_NONE,     /**< Very first element of the enumeration */
+    DPNP_FT_BOOL,     /**< analog of numpy.bool_ or bool */
     DPNP_FT_INT,      /**< analog of numpy.int32 or int */
     DPNP_FT_LONG,     /**< analog of numpy.int64 or long */
     DPNP_FT_FLOAT,    /**< analog of numpy.float32 or float */
     DPNP_FT_DOUBLE,   /**< analog of numpy.float32 or double */
     DPNP_FT_CMPLX64,  /**< analog of numpy.complex64 or std::complex<float> */
-    DPNP_FT_CMPLX128, /**< analog of numpy.complex128 or std::complex<double> */
-    DPNP_FT_BOOL      /**< analog of numpy.bool_ or bool */
+    DPNP_FT_CMPLX128  /**< analog of numpy.complex128 or std::complex<double> */
 };
 
 /**
diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
index eafa50d4cee..32097d321a7 100644
--- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2022, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -825,7 +825,9 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
     return;
 }
 
-#define MACRO_2ARG_3TYPES_OP(__name__, __operation1__, __operation2__)                                                 \
+
+#define MACRO_2ARG_3TYPES_OP(                                                                                          \
+    __name__, __operation__, __vec_operation__, __vec_types__, __mkl_operation__, __mkl_types__)                       \
     template <typename _KernelNameSpecialization1,                                                                     \
               typename _KernelNameSpecialization2,                                                                     \
               typename _KernelNameSpecialization3>                                                                     \
@@ -834,6 +836,11 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
     template <typename _KernelNameSpecialization1,                                                                     \
               typename _KernelNameSpecialization2,                                                                     \
               typename _KernelNameSpecialization3>                                                                     \
+    class __name__##_sg_kernel;                                                                                        \
+                                                                                                                       \
+    template <typename _KernelNameSpecialization1,                                                                     \
+              typename _KernelNameSpecialization2,                                                                     \
+              typename _KernelNameSpecialization3>                                                                     \
     class __name__##_broadcast_kernel;                                                                                 \
                                                                                                                        \
     template <typename _KernelNameSpecialization1,                                                                     \
@@ -874,45 +881,24 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                                                                                                                        \
         sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));                                                      \
                                                                                                                        \
-        DPNPC_ptr_adapter<_DataType_input1> input1_ptr(q_ref, input1_in, input1_size);                                 \
-        DPNPC_ptr_adapter<shape_elem_type> input1_shape_ptr(q_ref, input1_shape, input1_ndim, true);                   \
-        DPNPC_ptr_adapter<shape_elem_type> input1_strides_ptr(q_ref, input1_strides, input1_ndim, true);               \
-        DPNPC_ptr_adapter<_DataType_input2> input2_ptr(q_ref, input2_in, input2_size);                                 \
-        DPNPC_ptr_adapter<shape_elem_type> input2_shape_ptr(q_ref, input2_shape, input2_ndim, true);                   \
-        DPNPC_ptr_adapter<shape_elem_type> input2_strides_ptr(q_ref, input2_strides, input2_ndim, true);               \
-                                                                                                                       \
-        DPNPC_ptr_adapter<_DataType_output> result_ptr(q_ref, result_out, result_size, false, true);                   \
-        DPNPC_ptr_adapter<shape_elem_type> result_shape_ptr(q_ref, result_shape, result_ndim);                         \
-        DPNPC_ptr_adapter<shape_elem_type> result_strides_ptr(q_ref, result_strides, result_ndim);                     \
-                                                                                                                       \
-        _DataType_input1* input1_data = input1_ptr.get_ptr();                                                          \
-        shape_elem_type* input1_shape_data = input1_shape_ptr.get_ptr();                                               \
-        shape_elem_type* input1_strides_data = input1_strides_ptr.get_ptr();                                           \
+        _DataType_input1* input1_data = static_cast<_DataType_input1 *>(const_cast<void *>(input1_in));                \
+        _DataType_input2* input2_data = static_cast<_DataType_input2 *>(const_cast<void *>(input2_in));                \
+        _DataType_output* result = static_cast<_DataType_output *>(result_out);                                        \
                                                                                                                        \
-        _DataType_input2* input2_data = input2_ptr.get_ptr();                                                          \
-        shape_elem_type* input2_shape_data = input2_shape_ptr.get_ptr();                                               \
-        shape_elem_type* input2_strides_data = input2_strides_ptr.get_ptr();                                           \
+        bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim);                    \
                                                                                                                        \
-        _DataType_output* result = result_ptr.get_ptr();                                                               \
-        shape_elem_type* result_shape_data = result_shape_ptr.get_ptr();                                               \
-        shape_elem_type* result_strides_data = result_strides_ptr.get_ptr();                                           \
+        shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim];                                      \
                                                                                                                        \
-        bool use_broadcasting = !array_equal(input1_shape_data, input1_ndim, input2_shape_data, input2_ndim);          \
+        get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets);                                   \
+        bool use_strides = !array_equal(input1_strides, input1_ndim, input1_shape_offsets, input1_ndim);               \
+        delete[] input1_shape_offsets;                                                                                 \
                                                                                                                        \
-        const size_t input1_shape_size_in_bytes = input1_ndim * sizeof(shape_elem_type);                               \
-        shape_elem_type* input1_shape_offsets =                                                                        \
-            reinterpret_cast<shape_elem_type*>(sycl::malloc_shared(input1_shape_size_in_bytes, q));                    \
-        get_shape_offsets_inkernel(input1_shape_data, input1_ndim, input1_shape_offsets);                              \
-        bool use_strides = !array_equal(input1_strides_data, input1_ndim, input1_shape_offsets, input1_ndim);          \
-        sycl::free(input1_shape_offsets, q);                                                                           \
+        shape_elem_type* input2_shape_offsets = new shape_elem_type[input2_ndim];                                      \
                                                                                                                        \
-        const size_t input2_shape_size_in_bytes = input2_ndim * sizeof(shape_elem_type);                               \
-        shape_elem_type* input2_shape_offsets =                                                                        \
-            reinterpret_cast<shape_elem_type*>(sycl::malloc_shared(input2_shape_size_in_bytes, q));                    \
-        get_shape_offsets_inkernel(input2_shape_data, input2_ndim, input2_shape_offsets);                              \
+        get_shape_offsets_inkernel(input2_shape, input2_ndim, input2_shape_offsets);                                   \
         use_strides =                                                                                                  \
-            use_strides || !array_equal(input2_strides_data, input2_ndim, input2_shape_offsets, input2_ndim);          \
-        sycl::free(input2_shape_offsets, q);                                                                           \
+            use_strides || !array_equal(input2_strides, input2_ndim, input2_shape_offsets, input2_ndim);               \
+        delete[] input2_shape_offsets;                                                                                 \
                                                                                                                        \
         sycl::event event;                                                                                             \
         sycl::range<1> gws(result_size);                                                                               \
@@ -924,25 +910,25 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
             input1_it = reinterpret_cast<DPNPC_id<_DataType_input1>*>(dpnp_memory_alloc_c(q_ref,                       \
                                                                                           input1_it_size_in_bytes));   \
             new (input1_it)                                                                                            \
-                DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape_data, input1_strides_data, input1_ndim);   \
+                DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim);             \
                                                                                                                        \
-            input1_it->broadcast_to_shape(result_shape_data, result_ndim);                                             \
+            input1_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
                                                                                                                        \
             DPNPC_id<_DataType_input2>* input2_it;                                                                     \
             const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>);                                 \
             input2_it = reinterpret_cast<DPNPC_id<_DataType_input2>*>(dpnp_memory_alloc_c(q_ref,                       \
                                                                                           input2_it_size_in_bytes));   \
             new (input2_it)                                                                                            \
-                DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape_data, input2_strides_data, input2_ndim);   \
+                DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim);             \
                                                                                                                        \
-            input2_it->broadcast_to_shape(result_shape_data, result_ndim);                                             \
+            input2_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
                                                                                                                        \
             auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
-                const size_t i = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/                            \
+                const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                          \
                 {                                                                                                      \
                     const _DataType_output input1_elem = (*input1_it)[i];                                              \
                     const _DataType_output input2_elem = (*input2_it)[i];                                              \
-                    result[i] = __operation1__;                                                                        \
+                    result[i] = __operation__;                                                                         \
                 }                                                                                                      \
             };                                                                                                         \
             auto kernel_func = [&](sycl::handler& cgh) {                                                               \
@@ -951,8 +937,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                     gws, kernel_parallel_for_func);                                                                    \
             };                                                                                                         \
                                                                                                                        \
-            event = q.submit(kernel_func);                                                                             \
-            event.wait();                                                                                              \
+            q.submit(kernel_func).wait();                                                                              \
                                                                                                                        \
             input1_it->~DPNPC_id();                                                                                    \
             input2_it->~DPNPC_id();                                                                                    \
@@ -961,11 +946,42 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
         }                                                                                                              \
         else if (use_strides)                                                                                          \
         {                                                                                                              \
+            if ((result_ndim != input1_ndim) || (result_ndim != input2_ndim))                                          \
+            {                                                                                                          \
+                throw std::runtime_error("Result ndim=" + std::to_string(result_ndim) +                                \
+                                         " mismatches with either input1 ndim=" + std::to_string(input1_ndim) +        \
+                                         " or input2 ndim=" + std::to_string(input2_ndim));                            \
+            }                                                                                                          \
+                                                                                                                       \
+            /* memory transfer optimization, use USM-host for temporary speeds up tranfer to device */                 \
+            using usm_host_allocatorT = sycl::usm_allocator<shape_elem_type, sycl::usm::alloc::host>;                  \
+                                                                                                                       \
+            size_t strides_size = 3 * result_ndim;                                                                     \
+            shape_elem_type *dev_strides_data = sycl::malloc_device<shape_elem_type>(strides_size, q);                 \
+                                                                                                                       \
+            /* create host temporary for packed strides managed by shared pointer */                                   \
+            auto strides_host_packed = std::vector<shape_elem_type, usm_host_allocatorT>(strides_size,                 \
+                                                                                         usm_host_allocatorT(q));      \
+                                                                                                                       \
+            /* packed vector is concatenation of result_strides, input1_strides and input2_strides */                  \
+            std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin());                      \
+            std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + result_ndim);        \
+            std::copy(input2_strides, input2_strides + result_ndim, strides_host_packed.begin() + 2 * result_ndim);    \
+                                                                                                                       \
+            auto copy_strides_ev = q.copy<shape_elem_type>(strides_host_packed.data(),                                 \
+                                                           dev_strides_data,                                           \
+                                                           strides_host_packed.size());                                \
+                                                                                                                       \
             auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
-                const size_t output_id = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/                    \
+                const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
                 {                                                                                                      \
+                    const shape_elem_type *result_strides_data = &dev_strides_data[0];                                 \
+                    const shape_elem_type *input1_strides_data = &dev_strides_data[1];                                 \
+                    const shape_elem_type *input2_strides_data = &dev_strides_data[2];                                 \
+                                                                                                                       \
                     size_t input1_id = 0;                                                                              \
                     size_t input2_id = 0;                                                                              \
+                                                                                                                       \
                     for (size_t i = 0; i < result_ndim; ++i)                                                           \
                     {                                                                                                  \
                         const size_t output_xyz_id =                                                                   \
@@ -976,34 +992,93 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                                                                                                                        \
                     const _DataType_output input1_elem = input1_data[input1_id];                                       \
                     const _DataType_output input2_elem = input2_data[input2_id];                                       \
-                    result[output_id] = __operation1__;                                                                \
+                    result[output_id] = __operation__;                                                                 \
                 }                                                                                                      \
             };                                                                                                         \
             auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.depends_on(copy_strides_ev);                                                                       \
                 cgh.parallel_for<                                                                                      \
                     class __name__##_strides_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(            \
                     gws, kernel_parallel_for_func);                                                                    \
             };                                                                                                         \
                                                                                                                        \
-            event = q.submit(kernel_func);                                                                             \
+            q.submit(kernel_func).wait();                                                                              \
+                                                                                                                       \
+            sycl::free(dev_strides_data, q);                                                                           \
+            return event_ref;                                                                                          \
         }                                                                                                              \
         else                                                                                                           \
         {                                                                                                              \
-            if constexpr ((std::is_same<_DataType_input1, double>::value ||                                            \
-                           std::is_same<_DataType_input1, float>::value) &&                                            \
-                          std::is_same<_DataType_input2, _DataType_input1>::value)                                     \
+            if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, __mkl_types__>)                      \
             {                                                                                                          \
-                event = __operation2__(q, result_size, input1_data, input2_data, result);                              \
+                event = __mkl_operation__(q, result_size, input1_data, input2_data, result);                           \
             }                                                                                                          \
-            else                                                                                                       \
+            else if constexpr (none_of_both_types<_DataType_input1, _DataType_input2,                                  \
+                                                  std::complex<float>, std::complex<double>>)                          \
             {                                                                                                          \
-                auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                           \
-                    const size_t i = global_id[0]; /*for (size_t i = 0; i < result_size; ++i)*/                        \
+                constexpr size_t lws = 64;                                                                             \
+                constexpr unsigned int vec_sz = 8;                                                                     \
+                constexpr sycl::access::address_space global_space = sycl::access::address_space::global_space;        \
+                                                                                                                       \
+                auto gws_range = sycl::range<1>(((result_size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);            \
+                auto lws_range = sycl::range<1>(lws);                                                                  \
+                                                                                                                       \
+                auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {                                          \
+                    auto sg = nd_it.get_sub_group();                                                                   \
+                    const auto max_sg_size = sg.get_max_local_range()[0];                                              \
+                    const size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +                     \
+                                                   sg.get_group_id()[0] * max_sg_size);                                \
+                                                                                                                       \
+                    if (start + static_cast<size_t>(vec_sz) * max_sg_size < result_size)                               \
+                    {                                                                                                  \
+                        sycl::vec<_DataType_input1, vec_sz> x1 =                                                       \
+                            sg.load<vec_sz>(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start]));     \
+                        sycl::vec<_DataType_input2, vec_sz> x2 =                                                       \
+                            sg.load<vec_sz>(sycl::multi_ptr<_DataType_input2, global_space>(&input2_data[start]));     \
+                        sycl::vec<_DataType_output, vec_sz> res_vec;                                                   \
+                        if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, __vec_types__>)          \
+                        {                                                                                              \
+                            res_vec = __vec_operation__;                                                               \
+                        }                                                                                              \
+                        else                                                                                           \
+                        {                                                                                              \
+                            for (size_t k = 0; k < vec_sz; ++k)                                                        \
+                            {                                                                                          \
+                                const _DataType_output input1_elem = x1[k];                                            \
+                                const _DataType_output input2_elem = x2[k];                                            \
+                                res_vec[k] = __operation__;                                                            \
+                            }                                                                                          \
+                        }                                                                                              \
+                        sg.store<vec_sz>(sycl::multi_ptr<_DataType_output, global_space>(&result[start]), res_vec);    \
+                                                                                                                       \
+                    }                                                                                                  \
+                    else                                                                                               \
                     {                                                                                                  \
-                        const _DataType_output input1_elem = input1_data[i];                                           \
-                        const _DataType_output input2_elem = input2_data[i];                                           \
-                        result[i] = __operation1__;                                                                    \
+                        for (size_t k = start; k < result_size; ++k)                                                   \
+                        {                                                                                              \
+                            const _DataType_output input1_elem = input1_data[k];                                       \
+                            const _DataType_output input2_elem = input2_data[k];                                       \
+                            result[k] = __operation__;                                                                 \
+                        }                                                                                              \
                     }                                                                                                  \
+                };                                                                                                     \
+                                                                                                                       \
+                auto kernel_func = [&](sycl::handler& cgh) {                                                           \
+                    sycl::stream out(65536, 128, cgh);\
+                    cgh.parallel_for<class __name__##_sg_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(\
+                        sycl::nd_range<1>(gws_range, lws_range), kernel_parallel_for_func);                            \
+                };                                                                                                     \
+                event = q.submit(kernel_func);                                                                         \
+            }                                                                                                          \
+            else /* either input1 or input2 has complex type */                                                        \
+            {                                                                                                          \
+                auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                           \
+                    const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                      \
+                                                                                                                       \
+                    const _DataType_output input1_elem = input1_data[i];                                               \
+                    const _DataType_output input2_elem = input2_data[i];                                               \
+                    result[i] = __operation__;                                                                         \
+                                                                                                                       \
                 };                                                                                                     \
                 auto kernel_func = [&](sycl::handler& cgh) {                                                           \
                     cgh.parallel_for<class __name__##_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(   \
@@ -1013,18 +1088,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
             }                                                                                                          \
         }                                                                                                              \
                                                                                                                        \
-        input1_ptr.depends_on(event);                                                                                  \
-        input1_shape_ptr.depends_on(event);                                                                            \
-        input1_strides_ptr.depends_on(event);                                                                          \
-        input2_ptr.depends_on(event);                                                                                  \
-        input2_shape_ptr.depends_on(event);                                                                            \
-        input2_strides_ptr.depends_on(event);                                                                          \
-        result_ptr.depends_on(event);                                                                                  \
-        result_shape_ptr.depends_on(event);                                                                            \
-        result_strides_ptr.depends_on(event);                                                                          \
-                                                                                                                       \
         event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);                                                       \
-                                                                                                                       \
         return DPCTLEvent_Copy(event_ref);                                                                             \
     }                                                                                                                  \
                                                                                                                        \
@@ -1114,6 +1178,29 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
 
 #include <dpnp_gen_2arg_3type_tbl.hpp>
 
+template <DPNPFuncType FT1, DPNPFuncType... FTs>
+static void func_map_elemwise_2arg_3type_core(func_map_t& fmap)
+{
+    ((fmap[DPNPFuncName::DPNP_FN_ADD_EXT][FT1][FTs] =
+          {populate_func_types<FT1, FTs>(),
+           (void*)dpnp_add_c_ext<func_type_map_t::find_type<populate_func_types<FT1, FTs>()>,
+                                 func_type_map_t::find_type<FT1>,
+                                 func_type_map_t::find_type<FTs>>}),
+     ...);
+    ((fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][FT1][FTs] =
+          {populate_func_types<FT1, FTs>(),
+           (void*)dpnp_multiply_c_ext<func_type_map_t::find_type<populate_func_types<FT1, FTs>()>,
+                                      func_type_map_t::find_type<FT1>,
+                                      func_type_map_t::find_type<FTs>>}),
+     ...);
+}
+
+template <DPNPFuncType... FTs>
+static void func_map_elemwise_2arg_3type_helper(func_map_t& fmap)
+{
+    ((func_map_elemwise_2arg_3type_core<FTs, FTs...>(fmap)), ...);
+}
+
 static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
 {
     fmap[DPNPFuncName::DPNP_FN_ADD][eft_INT][eft_INT] = {eft_INT,
@@ -1149,39 +1236,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_ADD][eft_DBL][eft_DBL] = {eft_DBL,
                                                          (void*)dpnp_add_c_default<double, double, double>};
 
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_INT] = {eft_INT,
-                                                             (void*)dpnp_add_c_ext<int32_t, int32_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_LNG] = {eft_LNG,
-                                                             (void*)dpnp_add_c_ext<int64_t, int32_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_FLT] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, int32_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_INT][eft_DBL] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, int32_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_INT] = {eft_LNG,
-                                                             (void*)dpnp_add_c_ext<int64_t, int64_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_LNG] = {eft_LNG,
-                                                             (void*)dpnp_add_c_ext<int64_t, int64_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_FLT] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, int64_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_LNG][eft_DBL] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, int64_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_INT] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, float, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_LNG] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, float, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_FLT] = {eft_FLT,
-                                                             (void*)dpnp_add_c_ext<float, float, float>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_FLT][eft_DBL] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, float, double>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_INT] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, double, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_LNG] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, double, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_FLT] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, double, float>};
-    fmap[DPNPFuncName::DPNP_FN_ADD_EXT][eft_DBL][eft_DBL] = {eft_DBL,
-                                                             (void*)dpnp_add_c_ext<double, double, double>};
-
     fmap[DPNPFuncName::DPNP_FN_ARCTAN2][eft_INT][eft_INT] = {eft_DBL,
                                                              (void*)dpnp_arctan2_c_default<double, int32_t, int32_t>};
     fmap[DPNPFuncName::DPNP_FN_ARCTAN2][eft_INT][eft_LNG] = {eft_DBL,
@@ -1725,111 +1779,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_MULTIPLY][eft_C128][eft_C128] = {
         eft_C128, (void*)dpnp_multiply_c_default<std::complex<double>, std::complex<double>, std::complex<double>>};
 
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_BLN] = {
-        eft_BLN, (void*)dpnp_multiply_c_ext<bool, bool, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_INT] = {
-        eft_INT, (void*)dpnp_multiply_c_ext<int32_t, bool, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_LNG] = {
-        eft_LNG, (void*)dpnp_multiply_c_ext<int64_t, bool, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_FLT] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, bool, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_DBL] = {
-        eft_DBL,  (void*)dpnp_multiply_c_ext<double, bool, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_C64] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, bool, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_BLN][eft_C128] = {
-        eft_C128,  (void*)dpnp_multiply_c_ext<std::complex<double>, bool, std::complex<double>>};
-
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_BLN] = {
-        eft_INT, (void*)dpnp_multiply_c_ext<int32_t, int32_t, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_INT] = {
-        eft_INT, (void*)dpnp_multiply_c_ext<int32_t, int32_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_LNG] = {
-        eft_LNG, (void*)dpnp_multiply_c_ext<int64_t, int32_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_FLT] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, int32_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_DBL] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, int32_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_C64] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, int32_t, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_INT][eft_C128] = {
-        eft_C128,  (void*)dpnp_multiply_c_ext<std::complex<double>, int32_t, std::complex<double>>};
-
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_BLN] = {
-        eft_LNG, (void*)dpnp_multiply_c_ext<int64_t, int64_t, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_INT] = {
-        eft_LNG, (void*)dpnp_multiply_c_ext<int64_t, int64_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_LNG] = {
-        eft_LNG, (void*)dpnp_multiply_c_ext<int64_t, int64_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_FLT] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, int64_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_DBL] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, int64_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_C64] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, int64_t, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_LNG][eft_C128] = {
-        eft_C128,  (void*)dpnp_multiply_c_ext<std::complex<double>, int64_t, std::complex<double>>};
-
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_BLN] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, float, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_INT] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, float, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_LNG] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, float, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_FLT] = {
-        eft_FLT, (void*)dpnp_multiply_c_ext<float, float, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_DBL] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, float, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_C64] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, float, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_FLT][eft_C128] = {
-        eft_C128,  (void*)dpnp_multiply_c_ext<std::complex<double>, float, std::complex<double>>};
-
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_BLN] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, double, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_INT] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, double, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_LNG] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, double, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_FLT] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, double, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_DBL] = {
-        eft_DBL, (void*)dpnp_multiply_c_ext<double, double, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_C64] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, double, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_DBL][eft_C128] = {
-        eft_C128,  (void*)dpnp_multiply_c_ext<std::complex<double>, double, std::complex<double>>};
-
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_BLN] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, std::complex<float>, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_INT] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, std::complex<float>, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_LNG] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, std::complex<float>, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_FLT] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, std::complex<float>, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_DBL] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, std::complex<float>, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_C64] = {
-        eft_C64, (void*)dpnp_multiply_c_ext<std::complex<float>, std::complex<float>, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C64][eft_C128] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<float>, std::complex<double>>};
-
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_BLN] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, bool>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_INT] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_LNG] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_FLT] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, float>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_DBL] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, double>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_C64] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, std::complex<float>>};
-    fmap[DPNPFuncName::DPNP_FN_MULTIPLY_EXT][eft_C128][eft_C128] = {
-        eft_C128, (void*)dpnp_multiply_c_ext<std::complex<double>, std::complex<double>, std::complex<double>>};
-
     fmap[DPNPFuncName::DPNP_FN_POWER][eft_INT][eft_INT] = {eft_INT,
                                                            (void*)dpnp_power_c_default<int32_t, int32_t, int32_t>};
     fmap[DPNPFuncName::DPNP_FN_POWER][eft_INT][eft_LNG] = {eft_LNG,
@@ -1962,6 +1911,8 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_DBL] = {
         eft_DBL, (void*)dpnp_subtract_c_ext<double, double, double>};
 
+    func_map_elemwise_2arg_3type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL, eft_C64, eft_C128>(fmap);
+
     return;
 }
 
diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
index be1bb1bab79..157347aa90c 100644
--- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
@@ -403,11 +403,11 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef,
                                                                                                                        \
             auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {                                              \
                 auto sg = nd_it.get_sub_group();                                                                       \
-                size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +                               \
-                                         sg.get_group_id()[0] * sg.get_max_local_range()[0]);                          \
-                size_t end = start + static_cast<size_t>(vec_sz);                                                      \
+                const auto max_sg_size = sg.get_max_local_range()[0];                                                  \
+                const size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +                         \
+                                               sg.get_group_id()[0] * max_sg_size);                                    \
                                                                                                                        \
-                if (end < result_size) {                                                                               \
+                if (start + static_cast<size_t>(vec_sz) * max_sg_size < result_size) {                                 \
                     sycl::vec<_DataType_input1, vec_sz> x1 =                                                           \
                         sg.load<vec_sz>(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start]));         \
                     sycl::vec<bool, vec_sz> res_vec;                                                                   \
@@ -647,11 +647,11 @@ static void func_map_logic_1arg_1type_helper(func_map_t& fmap)
                                                                                                                        \
             auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {                                              \
                 auto sg = nd_it.get_sub_group();                                                                       \
-                size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +                               \
-                                         sg.get_group_id()[0] * sg.get_max_local_range()[0]);                          \
-                size_t end = start + static_cast<size_t>(vec_sz);                                                      \
+                const auto max_sg_size = sg.get_max_local_range()[0];                                                  \
+                const size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +                         \
+                                               sg.get_group_id()[0] * max_sg_size);                                    \
                                                                                                                        \
-                if (end < result_size) {                                                                               \
+                if (start + static_cast<size_t>(vec_sz) * max_sg_size < result_size) {                                 \
                     sycl::vec<_DataType_input1, vec_sz> x1 =                                                           \
                         sg.load<vec_sz>(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start]));         \
                     sycl::vec<_DataType_input2, vec_sz> x2 =                                                           \
diff --git a/dpnp/backend/src/dpnp_fptr.hpp b/dpnp/backend/src/dpnp_fptr.hpp
index 76116cafae7..4cb66485831 100644
--- a/dpnp/backend/src/dpnp_fptr.hpp
+++ b/dpnp/backend/src/dpnp_fptr.hpp
@@ -99,6 +99,55 @@ typedef func_type_map_factory_t<func_type_pair_t<eft_BLN, bool>,
                                 func_type_pair_t<eft_C64, std::complex<float>>,
                                 func_type_pair_t<eft_C128, std::complex<double>>> func_type_map_t;
 
+/**
+ * Return an enum value of result type populated from input types.
+ */
+template <DPNPFuncType FT1, DPNPFuncType FT2>
+static constexpr DPNPFuncType populate_func_types()
+{
+    if constexpr (FT1 == DPNPFuncType::DPNP_FT_NONE)
+    {
+        throw std::runtime_error("Templated enum value of FT1 is None");
+    }
+    else if constexpr (FT2 == DPNPFuncType::DPNP_FT_NONE)
+    {
+        throw std::runtime_error("Templated enum value of FT2 is None");
+    }
+    return (FT1 < FT2) ? FT2 : FT1;
+}
+
+/**
+ * Removes parentheses for a passed list of types separated by comma.
+ * It's intended to be used in operations macro.
+ */
+#define MACRO_UNPACK_TYPES(...) __VA_ARGS__
+
+/**
+ * Implements std::is_same<> with variadic number of types to compare with
+ * and when type T has to match only one of types Ts.
+ */
+template <typename T, typename... Ts>
+struct is_any : std::disjunction<std::is_same<T, Ts>...> {};
+
+/**
+ * Implements std::is_same<> with variadic number of types to compare with
+ * and when type T has to match every type from Ts sequence.
+ */
+template <typename T, typename... Ts>
+struct are_same : std::conjunction<std::is_same<T, Ts>...> {};
+
+/**
+ * A template constat to check if both types T1 and T2 match every type from Ts sequence.
+ */
+template <typename T1, typename T2, typename... Ts>
+constexpr auto both_types_are_same = std::conjunction_v<is_any<T1, Ts...>, are_same<T1, T2>>;
+
+/**
+ * A template constat to check if both types T1 and T2 don't match any type from Ts sequence.
+ */
+template <typename T1, typename T2, typename... Ts>
+constexpr auto none_of_both_types = !std::disjunction_v<is_any<T1, Ts...>, is_any<T2, Ts...>>;
+
 /**
  * FPTR interface initialization functions
  */
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index 4806b511aff..1c60d1c999e 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -225,6 +225,7 @@ def default_float_type(device=None, sycl_queue=None):
 def get_dpnp_descriptor(ext_obj,
                         copy_when_strides=True,
                         copy_when_nondefault_queue=True,
+                        alloc_usm_type=None,
                         alloc_queue=None):
     """
     Return True:
@@ -245,9 +246,9 @@ def get_dpnp_descriptor(ext_obj,
         return False
 
     # If input object is a scalar, it means it was allocated on host memory.
-    # We need to copy it to device memory according to compute follows data paradigm.
+    # We need to copy it to USM memory according to compute follows data paradigm.
     if isscalar(ext_obj):
-        ext_obj = array(ext_obj, sycl_queue=alloc_queue)
+        ext_obj = array(ext_obj, usm_type=alloc_usm_type, sycl_queue=alloc_queue)
 
     # while dpnp functions have no implementation with strides support
     # we need to create a non-strided copy
diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index de7537a4287..e94b0f6c1ef 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -277,11 +277,13 @@ def equal(x1,
         # at least either x1 or x2 has to be an array
         pass
     else:
-        # get a common queue to copy data from the host into a device if any input is scalar
-        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_equal(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.equal, x1, x2)
@@ -345,11 +347,13 @@ def greater(x1,
         # at least either x1 or x2 has to be an array
         pass
     else:
-        # get a common queue to copy data from the host into a device if any input is scalar
-        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_greater(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.greater, x1, x2)
@@ -413,11 +417,13 @@ def greater_equal(x1,
         # at least either x1 or x2 has to be an array
         pass
     else:
-        # get a common queue to copy data from the host into a device if any input is scalar
-        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_greater_equal(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.greater_equal, x1, x2)
@@ -659,11 +665,13 @@ def less(x1,
         # at least either x1 or x2 has to be an array
         pass
     else:
-        # get a common queue to copy data from the host into a device if any input is scalar
-        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_less(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.less, x1, x2)
@@ -727,11 +735,13 @@ def less_equal(x1,
         # at least either x1 or x2 has to be an array
         pass
     else:
-        # get a common queue to copy data from the host into a device if any input is scalar
-        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_less_equal(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.less_equal, x1, x2)
@@ -794,11 +804,13 @@ def logical_and(x1,
         # at least either x1 or x2 has to be an array
         pass
     else:
-        # get a common queue to copy data from the host into a device if any input is scalar
-        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_logical_and(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.logical_and, x1, x2)
@@ -918,11 +930,13 @@ def logical_or(x1,
         # at least either x1 or x2 has to be an array
         pass
     else:
-        # get a common queue to copy data from the host into a device if any input is scalar
-        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_logical_or(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.logical_or, x1, x2)
@@ -985,11 +999,13 @@ def logical_xor(x1,
         # at least either x1 or x2 has to be an array
         pass
     else:
-        # get a common queue to copy data from the host into a device if any input is scalar
-        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_logical_xor(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.logical_xor, x1, x2)
@@ -1053,11 +1069,13 @@ def not_equal(x1,
         # at least either x1 or x2 has to be an array
         pass
     else:
-        # get a common queue to copy data from the host into a device if any input is scalar
-        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_not_equal(x1_desc, x2_desc).get_pyobj()
     return call_origin(numpy.not_equal, x1, x2)
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index ce9f340e8e4..e254e916b84 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -154,56 +154,66 @@ def absolute(x1, **kwargs):
     return call_origin(numpy.absolute, x1, **kwargs)
 
 
-def add(x1, x2, dtype=None, out=None, where=True, **kwargs):
+def add(x1,
+        x2,
+        /,
+        out=None,
+        *,
+        where=True,
+        dtype=None,
+        subok=True,
+        **kwargs):
     """
     Add arguments element-wise.
 
     For full documentation refer to :obj:`numpy.add`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        The sum of `x1` and `x2`, element-wise.
+
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
-    Otherwise the functions will be executed sequentially on CPU.
+    Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
     Examples
     --------
-    >>> import dpnp as np
-    >>> a = np.array([1, 2, 3])
-    >>> b = np.array([1, 2, 3])
-    >>> result = np.add(a, b)
-    >>> [x for x in result]
+    >>> import dpnp as dp
+    >>> a = dp.array([1, 2, 3])
+    >>> b = dp.array([1, 2, 3])
+    >>> result = dp.add(a, b)
+    >>> print(result)
     [2, 4, 6]
 
     """
 
-    x1_is_scalar = dpnp.isscalar(x1)
-    x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-    if x1_desc and x2_desc and not kwargs:
-        if not x1_desc and not x1_is_scalar:
-            pass
-        elif not x2_desc and not x2_is_scalar:
-            pass
-        elif x1_is_scalar and x2_is_scalar:
-            pass
-        elif x1_desc and x1_desc.ndim == 0:
-            pass
-        elif x2_desc and x2_desc.ndim == 0:
-            pass
-        elif dtype is not None:
-            pass
-        elif out is not None:
-            pass
-        elif not where:
-            pass
-        else:
-            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
-            return dpnp_add(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj()
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            return dpnp_add(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
 
     return call_origin(numpy.add, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
 
@@ -1093,11 +1103,11 @@ def multiply(x1,
     -------
     y : {dpnp.ndarray, scalar}
         The product of `x1` and `x2`, element-wise.
-        The result is a scalar if both x1 and x2 are scalars.
 
     Limitations
     -----------
-    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
     Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
     Otherwise the functions will be executed sequentially on CPU.
@@ -1122,14 +1132,16 @@ def multiply(x1,
     elif subok is not True:
         pass
     elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
-        # keep the result in host memory, if both inputs are scalars
-        return x1 * x2
+        # at least either x1 or x2 has to be an array
+        pass
     else:
-        # get a common queue to copy data from the host into a device if any input is scalar
-        queue = get_common_allocation_queue([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else None
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
-        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False, alloc_queue=queue)
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_multiply(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
 
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
index 4913d585491..abdc4107f64 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
@@ -34,7 +34,7 @@ This module contains differnt helpers and utilities
 import numpy
 
 import dpctl
-import dpctl.tensor as dpt
+import dpctl.utils as dpu
 
 import dpnp.config as config
 import dpnp.dpnp_container as dpnp_container
@@ -70,7 +70,7 @@ __all__ = [
     "dpnp_descriptor",
     "get_axis_indeces",
     "get_axis_offsets",
-    "get_common_allocation_queue",
+    "get_usm_allocations",
     "_get_linear_index",
     "map_dtype_to_device",
     "normalize_axis",
@@ -163,7 +163,7 @@ def call_origin(function, *args, **kwargs):
         kwargx = convert_item(kwarg)
         kwargs_new[key] = kwargx
 
-    exec_q = dpctl.utils.get_execution_queue(alloc_queues)
+    exec_q = dpu.get_execution_queue(alloc_queues)
     if exec_q is None:
         exec_q = dpnp.get_normalized_queue_device(sycl_queue=sycl_queue)
     # print(f"DPNP call_origin(): bakend called. \n\t function={function}, \n\t args_new={args_new}, \n\t kwargs_new={kwargs_new}, \n\t dpnp_inplace={dpnp_inplace}")
@@ -220,30 +220,49 @@ def unwrap_array(x1):
     return x1
 
 
-def get_common_allocation_queue(objects):
-    """
-    Given a list of objects returns the queue which can be used for a memory allocation
-    to follow compute follows data paradigm, or returns `None` if the default queue can be used.
-    An exception will be raised, if the paradigm is broked for the given list of objects.
-    """
-    if not isinstance(objects, (list, tuple)):
-        raise TypeError("Expected a list or a tuple, got {}".format(type(objects)))
-    
-    if len(objects) == 0:
+def _get_coerced_usm_type(objects):
+    types_in_use = [obj.usm_type for obj in objects if hasattr(obj, "usm_type")]
+    if len(types_in_use) == 0:
         return None
+    elif len(types_in_use) == 1:
+        return types_in_use[0]
+
+    common_usm_type = dpu.get_coerced_usm_type(types_in_use)
+    if common_usm_type is None:
+        raise ValueError("Input arrays must have coerced USM types")
+    return common_usm_type
 
+
+def _get_common_allocation_queue(objects):
     queues_in_use = [obj.sycl_queue for obj in objects if hasattr(obj, "sycl_queue")]
     if len(queues_in_use) == 0:
         return None
     elif len(queues_in_use) == 1:
         return queues_in_use[0]
 
-    common_queue = dpt.get_execution_queue(queues_in_use)
+    common_queue = dpu.get_execution_queue(queues_in_use)
     if common_queue is None:
         raise ValueError("Input arrays must be allocated on the same SYCL queue")
     return common_queue
 
 
+def get_usm_allocations(objects):
+    """
+    Given a list of objects returns a tuple of USM type and SYCL queue
+    which can be used for a memory allocation and to follow compute follows data paradigm,
+    or returns `(None, None)` if the default USM type and SYCL queue can be used.
+    An exception will be raised, if the paradigm is broked for the given list of objects.
+
+    """
+
+    if not isinstance(objects, (list, tuple)):
+        raise TypeError("Expected a list or a tuple, got {}".format(type(objects)))
+    
+    if len(objects) == 0:
+        return (None, None)
+    return (_get_coerced_usm_type(objects), _get_common_allocation_queue(objects))
+
+
 def map_dtype_to_device(dtype, device):
     """
     Map an input ``dtype`` with type ``device`` may use
@@ -631,7 +650,7 @@ cdef tuple get_common_usm_allocation(dpnp_descriptor x1, dpnp_descriptor x2):
             "could not recognize common USM type for inputs of USM types {} and {}"
             "".format(array1_obj.usm_type, array2_obj.usm_type))
 
-    common_sycl_queue = dpctl.utils.get_execution_queue((array1_obj.sycl_queue, array2_obj.sycl_queue))
+    common_sycl_queue = dpu.get_execution_queue((array1_obj.sycl_queue, array2_obj.sycl_queue))
     # TODO: refactor, remove when CFD is implemented in all array constructors
     if common_sycl_queue is None and array1_obj.sycl_context == array2_obj.sycl_context:
         common_sycl_queue = array1_obj.sycl_queue
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index 63c6cbd0d13..53bdec8af0a 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -769,7 +769,6 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNu
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_2_{name='add', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_4_{name='divide', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_5_{name='power', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_6_{name='subtract', nargs=2}::test_raises_with_numpy_input
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index c64c7fa45f9..af2dbd783a4 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -990,7 +990,6 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNu
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_2_{name='add', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_4_{name='divide', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_5_{name='power', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_6_{name='subtract', nargs=2}::test_raises_with_numpy_input
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
index 091cf1345c4..1a40777afac 100644
--- a/tests/test_indexing.py
+++ b/tests/test_indexing.py
@@ -3,6 +3,9 @@
 import dpnp
 
 import numpy
+from numpy.testing import (
+    assert_array_equal
+)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -16,7 +19,7 @@ def test_choose():
 
     expected = numpy.choose([0, 0, 0, 0], [a, b, c])
     result = dpnp.choose([0, 0, 0, 0], [ia, ib, ic])
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("offset",
@@ -47,7 +50,7 @@ def test_diagonal(array, offset):
     ia = dpnp.array(a)
     expected = numpy.diagonal(a, offset)
     result = dpnp.diagonal(ia, offset)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("val",
@@ -71,7 +74,7 @@ def test_fill_diagonal(array, val):
     ia = dpnp.array(a)
     expected = numpy.fill_diagonal(a, val)
     result = dpnp.fill_diagonal(ia, val)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("dimension",
@@ -81,7 +84,7 @@ def test_fill_diagonal(array, val):
 def test_indices(dimension):
     expected = numpy.indices(dimension)
     result = dpnp.indices(dimension)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("array",
@@ -107,7 +110,7 @@ def test_nonzero(array):
     ia = dpnp.array(array)
     expected = numpy.nonzero(a)
     result = dpnp.nonzero(ia)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -137,7 +140,7 @@ def test_place1(arr, mask, vals):
     im = dpnp.array(m)
     numpy.place(a, m, vals)
     dpnp.place(ia, im, vals)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -161,7 +164,7 @@ def test_place2(arr, mask, vals):
     im = dpnp.array(m)
     numpy.place(a, m, vals)
     dpnp.place(ia, im, vals)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -186,7 +189,7 @@ def test_place3(arr, mask, vals):
     im = dpnp.array(m)
     numpy.place(a, m, vals)
     dpnp.place(ia, im, vals)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.parametrize("v",
@@ -211,7 +214,7 @@ def test_put(array, ind, v):
     ia = dpnp.array(a)
     numpy.put(a, ind, v)
     dpnp.put(ia, ind, v)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.parametrize("v",
@@ -236,7 +239,7 @@ def test_put2(array, ind, v):
     ia = dpnp.array(a)
     numpy.put(a, ind, v)
     dpnp.put(ia, ind, v)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 def test_put3():
@@ -244,7 +247,7 @@ def test_put3():
     ia = dpnp.array(a)
     dpnp.put(ia, [0, 2], [-44, -55])
     numpy.put(a, [0, 2], [-44, -55])
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -256,7 +259,7 @@ def test_put_along_axis_val_int():
     for axis in range(2):
         numpy.put_along_axis(a, ind_r, 777, axis)
         dpnp.put_along_axis(ai, ind_r_i, 777, axis)
-        numpy.testing.assert_array_equal(a, ai)
+        assert_array_equal(a, ai)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -268,7 +271,7 @@ def test_put_along_axis1():
     for axis in range(3):
         numpy.put_along_axis(a, ind_r, 777, axis)
         dpnp.put_along_axis(ai, ind_r_i, 777, axis)
-        numpy.testing.assert_array_equal(a, ai)
+        assert_array_equal(a, ai)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -280,7 +283,7 @@ def test_put_along_axis2():
     for axis in range(3):
         numpy.put_along_axis(a, ind_r, [100, 200, 300, 400], axis)
         dpnp.put_along_axis(ai, ind_r_i, [100, 200, 300, 400], axis)
-        numpy.testing.assert_array_equal(a, ai)
+        assert_array_equal(a, ai)
 
 
 @pytest.mark.parametrize("vals",
@@ -309,7 +312,7 @@ def test_putmask1(arr, mask, vals):
     iv = dpnp.array(v)
     numpy.putmask(a, m, v)
     dpnp.putmask(ia, im, iv)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.parametrize("vals",
@@ -334,7 +337,7 @@ def test_putmask2(arr, mask, vals):
     iv = dpnp.array(v)
     numpy.putmask(a, m, v)
     dpnp.putmask(ia, im, iv)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 @pytest.mark.parametrize("vals",
@@ -360,7 +363,7 @@ def test_putmask3(arr, mask, vals):
     iv = dpnp.array(v)
     numpy.putmask(a, m, v)
     dpnp.putmask(ia, im, iv)
-    numpy.testing.assert_array_equal(a, ia)
+    assert_array_equal(a, ia)
 
 
 def test_select():
@@ -378,7 +381,7 @@ def test_select():
     ichoicelist = [ichoice_val1, ichoice_val2]
     expected = numpy.select(condlist, choicelist)
     result = dpnp.select(icondlist, ichoicelist)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("array_type",
@@ -415,10 +418,9 @@ def test_take(array, indices, array_type, indices_type):
     iind = dpnp.array(ind)
     expected = numpy.take(a, ind)
     result = dpnp.take(ia, iind)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_take_along_axis():
     a = numpy.arange(16).reshape(4, 4)
     ai = dpnp.array(a)
@@ -427,10 +429,9 @@ def test_take_along_axis():
     for axis in range(2):
         expected = numpy.take_along_axis(a, ind_r, axis)
         result = dpnp.take_along_axis(ai, ind_r_i, axis)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 def test_take_along_axis1():
     a = numpy.arange(64).reshape(4, 4, 4)
     ai = dpnp.array(a)
@@ -439,7 +440,7 @@ def test_take_along_axis1():
     for axis in range(3):
         expected = numpy.take_along_axis(a, ind_r, axis)
         result = dpnp.take_along_axis(ai, ind_r_i, axis)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("m",
@@ -454,7 +455,7 @@ def test_take_along_axis1():
 def test_tril_indices(n, k, m):
     result = dpnp.tril_indices(n, k, m)
     expected = numpy.tril_indices(n, k, m)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("k",
@@ -472,7 +473,7 @@ def test_tril_indices_from(array, k):
     ia = dpnp.array(a)
     result = dpnp.tril_indices_from(ia, k)
     expected = numpy.tril_indices_from(a, k)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("m",
@@ -487,7 +488,7 @@ def test_tril_indices_from(array, k):
 def test_triu_indices(n, k, m):
     result = dpnp.triu_indices(n, k, m)
     expected = numpy.triu_indices(n, k, m)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("k",
@@ -505,4 +506,4 @@ def test_triu_indices_from(array, k):
     ia = dpnp.array(a)
     result = dpnp.triu_indices_from(ia, k)
     expected = numpy.triu_indices_from(a, k)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index 21071bec41e..70e0bd73dc5 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -1,8 +1,15 @@
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp
 
 import numpy
+from numpy.testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+    assert_raises
+)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -10,27 +17,27 @@ class TestConvolve:
     def test_object(self):
         d = [1.] * 100
         k = [1.] * 3
-        numpy.testing.assert_array_almost_equal(dpnp.convolve(d, k)[2:-2], dpnp.full(98, 3))
+        assert_array_almost_equal(dpnp.convolve(d, k)[2:-2], dpnp.full(98, 3))
 
     def test_no_overwrite(self):
         d = dpnp.ones(100)
         k = dpnp.ones(3)
         dpnp.convolve(d, k)
-        numpy.testing.assert_array_equal(d, dpnp.ones(100))
-        numpy.testing.assert_array_equal(k, dpnp.ones(3))
+        assert_array_equal(d, dpnp.ones(100))
+        assert_array_equal(k, dpnp.ones(3))
 
     def test_mode(self):
         d = dpnp.ones(100)
         k = dpnp.ones(3)
         default_mode = dpnp.convolve(d, k, mode='full')
         full_mode = dpnp.convolve(d, k, mode='f')
-        numpy.testing.assert_array_equal(full_mode, default_mode)
+        assert_array_equal(full_mode, default_mode)
         # integer mode
-        with numpy.testing.assert_raises(ValueError):
+        with assert_raises(ValueError):
             dpnp.convolve(d, k, mode=-1)
-        numpy.testing.assert_array_equal(dpnp.convolve(d, k, mode=2), full_mode)
+        assert_array_equal(dpnp.convolve(d, k, mode=2), full_mode)
         # illegal arguments
-        with numpy.testing.assert_raises(TypeError):
+        with assert_raises(TypeError):
             dpnp.convolve(d, k, mode=None)
 
 
@@ -53,33 +60,34 @@ def test_diff(array):
     dpnp_a = dpnp.array(array)
     expected = numpy.diff(np_a)
     result = dpnp.diff(dpnp_a)
-    numpy.testing.assert_allclose(expected, result)
+    assert_allclose(expected, result)
 
 
-@pytest.mark.parametrize("dtype1",
-                         [numpy.bool_, numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.complex64, numpy.complex128],
-                         ids=['numpy.bool_', 'numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32', 'numpy.complex64', 'numpy.complex128'])
-@pytest.mark.parametrize("dtype2",
-                         [numpy.bool_, numpy.float64, numpy.float32, numpy.int64, numpy.int32, numpy.complex64, numpy.complex128],
-                         ids=['numpy.bool_', 'numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32', 'numpy.complex64', 'numpy.complex128'])
+@pytest.mark.parametrize("dtype1", get_all_dtypes())
+@pytest.mark.parametrize("dtype2", get_all_dtypes())
+@pytest.mark.parametrize("func",
+                         ['add', 'multiply'])
 @pytest.mark.parametrize("data",
                          [[[1, 2], [3, 4]]],
                          ids=['[[1, 2], [3, 4]]'])
-def test_multiply_dtype(dtype1, dtype2, data):
+def test_op_multiple_dtypes(dtype1, func, dtype2, data):
     np_a = numpy.array(data, dtype=dtype1)
     dpnp_a = dpnp.array(data, dtype=dtype1)
 
     np_b = numpy.array(data, dtype=dtype2)
     dpnp_b = dpnp.array(data, dtype=dtype2)
 
-    result = dpnp.multiply(dpnp_a, dpnp_b)
-    expected = numpy.multiply(np_a, np_b)
-    numpy.testing.assert_array_equal(result, expected)
+    result = getattr(dpnp, func)(dpnp_a, dpnp_b)
+    expected = getattr(numpy, func)(np_a, np_b)
+    assert_array_equal(result, expected)
 
 
 @pytest.mark.parametrize("rhs", [[[1, 2, 3], [4, 5, 6]], [2.0, 1.5, 1.0], 3, 0.3])
-@pytest.mark.parametrize("lhs", [[[6, 5, 4], [3, 2, 1]], [1.3, 2.6, 3.9], 5, 0.5])
-@pytest.mark.parametrize("dtype", [numpy.int32, numpy.int64, numpy.float32, numpy.float64])
+@pytest.mark.parametrize("lhs", [[[6, 5, 4], [3, 2, 1]], [1.3, 2.6, 3.9]])
+# TODO: achieve the same level of dtype support for all mathematical operations, like
+# @pytest.mark.parametrize("dtype", get_all_dtypes())
+# and to get rid of fallbacks on numpy allowed by below fixture
+# @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestMathematical:
 
     @staticmethod
@@ -98,56 +106,68 @@ def _test_mathematical(self, name, dtype, lhs, rhs):
         b = self.array_or_scalar(numpy, rhs, dtype=dtype)
         expected = getattr(numpy, name)(a, b)
 
-        numpy.testing.assert_allclose(result, expected, atol=1e-4)
+        assert_allclose(result, expected, atol=1e-4)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
     def test_add(self, dtype, lhs, rhs):
         self._test_mathematical('add', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_arctan2(self, dtype, lhs, rhs):
         self._test_mathematical('arctan2', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_copysign(self, dtype, lhs, rhs):
         self._test_mathematical('copysign', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_divide(self, dtype, lhs, rhs):
         self._test_mathematical('divide', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_fmod(self, dtype, lhs, rhs):
         self._test_mathematical('fmod', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_floor_divide(self, dtype, lhs, rhs):
         self._test_mathematical('floor_divide', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_hypot(self, dtype, lhs, rhs):
         self._test_mathematical('hypot', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_maximum(self, dtype, lhs, rhs):
         self._test_mathematical('maximum', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_minimum(self, dtype, lhs, rhs):
         self._test_mathematical('minimum', dtype, lhs, rhs)
 
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
     def test_multiply(self, dtype, lhs, rhs):
         self._test_mathematical('multiply', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_remainder(self, dtype, lhs, rhs):
         self._test_mathematical('remainder', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_power(self, dtype, lhs, rhs):
         self._test_mathematical('power', dtype, lhs, rhs)
 
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
     def test_subtract(self, dtype, lhs, rhs):
         self._test_mathematical('subtract', dtype, lhs, rhs)
 
@@ -155,9 +175,9 @@ def test_subtract(self, dtype, lhs, rhs):
 @pytest.mark.parametrize("val_type",
                          [bool, int, float],
                          ids=['bool', 'int', 'float'])
-@pytest.mark.parametrize("data_type",
-                         [numpy.bool_, numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['numpy.bool_', 'numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32'])
+@pytest.mark.parametrize("data_type", get_all_dtypes())
+@pytest.mark.parametrize("func",
+                         ['add', 'multiply'])
 @pytest.mark.parametrize("val",
                          [0, 1, 5],
                          ids=['0', '1', '5'])
@@ -172,18 +192,18 @@ def test_subtract(self, dtype, lhs, rhs):
                               '[[1, 2], [3, 4]]',
                               '[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]',
                               '[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]'])
-def test_multiply_scalar(array, val, data_type, val_type):
+def test_op_with_scalar(array, val, func, data_type, val_type):
     np_a = numpy.array(array, dtype=data_type)
     dpnp_a = dpnp.array(array, dtype=data_type)
     val_ = val_type(val)
 
-    result = dpnp.multiply(dpnp_a, val_)
-    expected = numpy.multiply(np_a, val_)
-    numpy.testing.assert_array_equal(result, expected)
+    result = getattr(dpnp, func)(dpnp_a, val_)
+    expected = getattr(numpy, func)(np_a, val_)
+    assert_array_equal(result, expected)
 
-    result = dpnp.multiply(val_, dpnp_a)
-    expected = numpy.multiply(val_, np_a)
-    numpy.testing.assert_array_equal(result, expected)
+    result = getattr(dpnp, func)(val_, dpnp_a)
+    expected = getattr(numpy, func)(val_, np_a)
+    assert_array_equal(result, expected)
 
 
 @pytest.mark.parametrize("shape",
@@ -196,9 +216,9 @@ def test_multiply_scalar2(shape, dtype):
     np_a = numpy.ones(shape, dtype=dtype)
     dpnp_a = dpnp.ones(shape, dtype=dtype)
 
-    result = 0.5 * dpnp_a
-    expected = 0.5 * np_a
-    numpy.testing.assert_array_equal(result, expected)
+    result = 0.5 * dpnp_a * 1.7
+    expected = 0.5 * np_a * 1.7
+    assert_allclose(result, expected)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -211,7 +231,7 @@ def test_nancumprod(array):
 
     result = dpnp.nancumprod(dpnp_a)
     expected = numpy.nancumprod(np_a)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -224,31 +244,25 @@ def test_nancumsum(array):
 
     result = dpnp.nancumsum(dpnp_a)
     expected = numpy.nancumsum(np_a)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("data",
                          [[[1., -1.], [0.1, -0.1]], [-2, -1, 0, 1, 2]],
                          ids=['[[1., -1.], [0.1, -0.1]]', '[-2, -1, 0, 1, 2]'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32'])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 def test_negative(data, dtype):
     np_a = numpy.array(data, dtype=dtype)
     dpnp_a = dpnp.array(data, dtype=dtype)
 
     result = dpnp.negative(dpnp_a)
     expected = numpy.negative(np_a)
-    numpy.testing.assert_array_equal(result, expected)
+    assert_array_equal(result, expected)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("val_type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32'])
-@pytest.mark.parametrize("data_type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['numpy.float64', 'numpy.float32', 'numpy.int64', 'numpy.int32'])
+@pytest.mark.parametrize("val_type", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
+@pytest.mark.parametrize("data_type", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("val",
                          [0, 1, 5],
                          ids=['0', '1', '5'])
@@ -269,12 +283,11 @@ def test_power(array, val, data_type, val_type):
     val_ = val_type(val)
     result = dpnp.power(dpnp_a, val_)
     expected = numpy.power(np_a, val_)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 class TestEdiff1d:
-    @pytest.mark.parametrize("data_type",
-                             [numpy.float64, numpy.float32, numpy.int64, numpy.int32])
+    @pytest.mark.parametrize("data_type", get_all_dtypes(no_bool=True, no_complex=True))
     @pytest.mark.parametrize("array", [[1, 2, 4, 7, 0],
                                        [],
                                        [1],
@@ -285,7 +298,7 @@ def test_ediff1d_int(self, array, data_type):
 
         result = dpnp.ediff1d(dpnp_a)
         expected = numpy.ediff1d(np_a)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     
     @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -297,13 +310,12 @@ def test_ediff1d_args(self):
 
         result = dpnp.ediff1d(np_a, to_end=to_end, to_begin=to_begin)
         expected = numpy.ediff1d(np_a, to_end=to_end, to_begin=to_begin)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestTrapz:
-    @pytest.mark.parametrize("data_type",
-                             [numpy.float64, numpy.float32, numpy.int64, numpy.int32])
+    @pytest.mark.parametrize("data_type", get_all_dtypes(no_bool=True, no_complex=True))
     @pytest.mark.parametrize("array", [[1, 2, 3],
                                        [[1, 2, 3], [4, 5, 6]],
                                        [1, 4, 6, 9, 10, 12],
@@ -315,12 +327,10 @@ def test_trapz_default(self, array, data_type):
 
         result = dpnp.trapz(dpnp_a)
         expected = numpy.trapz(np_a)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
-    @pytest.mark.parametrize("data_type_y",
-                             [numpy.float64, numpy.float32, numpy.int64, numpy.int32])
-    @pytest.mark.parametrize("data_type_x",
-                             [numpy.float64, numpy.float32, numpy.int64, numpy.int32])
+    @pytest.mark.parametrize("data_type_y", get_all_dtypes(no_bool=True, no_complex=True))
+    @pytest.mark.parametrize("data_type_x", get_all_dtypes(no_bool=True, no_complex=True))
     @pytest.mark.parametrize("y_array", [[1, 2, 4, 5],
                                          [1., 2.5, 6., 7.]])
     @pytest.mark.parametrize("x_array", [[2, 5, 6, 9]])
@@ -333,7 +343,7 @@ def test_trapz_with_x_params(self, y_array, x_array, data_type_y, data_type_x):
 
         result = dpnp.trapz(dpnp_y, dpnp_x)
         expected = numpy.trapz(np_y, np_x)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("array", [[1, 2, 3], [4, 5, 6]])
     def test_trapz_with_x_param_2ndim(self, array):
@@ -342,7 +352,7 @@ def test_trapz_with_x_param_2ndim(self, array):
 
         result = dpnp.trapz(dpnp_a, dpnp_a)
         expected = numpy.trapz(np_a, np_a)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("y_array", [[1, 2, 4, 5],
                                          [1., 2.5, 6., 7., ]])
@@ -353,7 +363,7 @@ def test_trapz_with_dx_params(self, y_array, dx):
 
         result = dpnp.trapz(dpnp_y, dx=dx)
         expected = numpy.trapz(np_y, dx=dx)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -388,7 +398,7 @@ def test_cross_3x3(self, x1, x2, axisa, axisb, axisc, axis):
 
         result = dpnp.cross(dpnp_x1, dpnp_x2, axisa, axisb, axisc, axis)
         expected = numpy.cross(np_x1, np_x2, axisa, axisb, axisc, axis)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -403,7 +413,7 @@ def test_gradient_y1(self, array):
 
         result = dpnp.gradient(dpnp_y)
         expected = numpy.gradient(np_y)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("array", [[2, 3, 6, 8, 4, 9],
                                        [3., 4., 7.5, 9.],
@@ -415,7 +425,7 @@ def test_gradient_y1_dx(self, array, dx):
 
         result = dpnp.gradient(dpnp_y, dx)
         expected = numpy.gradient(np_y, dx)
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
 
 class TestCeil:
@@ -433,7 +443,7 @@ def test_ceil(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.ceil(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -473,7 +483,7 @@ def test_floor(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.floor(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -513,7 +523,7 @@ def test_trunc(self):
         np_array = numpy.array(array_data, dtype=numpy.float64)
         expected = numpy.trunc(np_array, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
@@ -556,7 +566,7 @@ def test_power(self):
         np_array2 = numpy.array(array2_data, dtype=numpy.float64)
         expected = numpy.power(np_array1, np_array2, out=out)
 
-        numpy.testing.assert_array_equal(expected, result)
+        assert_array_equal(expected, result)
 
     @pytest.mark.parametrize("dtype",
                              [numpy.float32, numpy.int64, numpy.int32],
diff --git a/tests/test_strides.py b/tests/test_strides.py
index 7ec1d6b3f03..3c0d86a44a5 100644
--- a/tests/test_strides.py
+++ b/tests/test_strides.py
@@ -1,8 +1,13 @@
 import math
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp
+
 import numpy
+from numpy.testing import (
+    assert_allclose
+)
 
 
 def _getattr(ex, str_):
@@ -15,12 +20,10 @@ def _getattr(ex, str_):
 
 @pytest.mark.parametrize("func_name",
                          ['abs', ])
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
-def test_strides(func_name, type):
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+def test_strides(func_name, dtype):
     shape = (4, 4)
-    a = numpy.arange(shape[0] * shape[1], dtype=type).reshape(shape)
+    a = numpy.arange(shape[0] * shape[1], dtype=dtype).reshape(shape)
     a_strides = a[0::2, 0::2]
     dpa = dpnp.array(a)
     dpa_strides = dpa[0::2, 0::2]
@@ -31,7 +34,7 @@ def test_strides(func_name, type):
     numpy_func = _getattr(numpy, func_name)
     expected = numpy_func(a_strides)
 
-    numpy.testing.assert_allclose(expected, result)
+    assert_allclose(expected, result)
 
 
 @pytest.mark.parametrize("func_name",
@@ -39,9 +42,7 @@ def test_strides(func_name, type):
                           "cosh", "conjugate", "degrees", "ediff1d", "exp", "exp2", "expm1", "fabs", "floor", "log",
                           "log10", "log1p", "log2", "negative", "radians", "sign", "sin", "sinh", "sqrt", "square",
                           "tanh", "trunc"])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(10,)],
                          ids=["(10,)"])
@@ -58,12 +59,10 @@ def test_strides_1arg(func_name, dtype, shape):
     numpy_func = _getattr(numpy, func_name)
     expected = numpy_func(b)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
 
 
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(10,)],
                          ids=["(10,)"])
@@ -80,12 +79,10 @@ def test_strides_erf(dtype, shape):
     for idx, val in enumerate(b):
         expected[idx] = math.erf(val)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
 
 
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(10,)],
                          ids=["(10,)"])
@@ -101,12 +98,10 @@ def test_strides_reciprocal(dtype, shape):
     result = dpnp.reciprocal(dpb)
     expected = numpy.reciprocal(b)
 
-    numpy.testing.assert_allclose(result, expected, rtol=1e-06)
+    assert_allclose(result, expected, rtol=1e-06)
 
 
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(10,)],
                          ids=["(10,)"])
@@ -120,14 +115,12 @@ def test_strides_tan(dtype, shape):
     result = dpnp.tan(dpb)
     expected = numpy.tan(b)
 
-    numpy.testing.assert_allclose(result, expected, rtol=1e-06)
+    assert_allclose(result, expected, rtol=1e-06)
 
 
 @pytest.mark.parametrize("func_name",
                          ["add", "arctan2", "hypot", "maximum", "minimum", "multiply", "power", "subtract"])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(3, 3)],
                          ids=["(3, 3)"])
@@ -144,7 +137,7 @@ def test_strides_2args(func_name, dtype, shape):
     numpy_func = _getattr(numpy, func_name)
     expected = numpy_func(a, b)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
 
 
 @pytest.mark.parametrize("func_name",
@@ -168,12 +161,10 @@ def test_strides_bitwise(func_name, dtype, shape):
     numpy_func = _getattr(numpy, func_name)
     expected = numpy_func(a, b)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
 
 
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(3, 3)],
                          ids=["(3, 3)"])
@@ -187,13 +178,10 @@ def test_strides_copysign(dtype, shape):
     result = dpnp.copysign(dpa, dpb)
     expected = numpy.copysign(a, b)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(3, 3)],
                          ids=["(3, 3)"])
@@ -207,13 +195,10 @@ def test_strides_fmod(dtype, shape):
     result = dpnp.fmod(dpa, dpb)
     expected = numpy.fmod(a, b)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=["float64", "float32", "int64", "int32"])
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(3, 3)],
                          ids=["(3, 3)"])
@@ -227,4 +212,4 @@ def test_strides_true_devide(dtype, shape):
     result = dpnp.fmod(dpa, dpb)
     expected = numpy.fmod(a, b)
 
-    numpy.testing.assert_allclose(result, expected)
+    assert_allclose(result, expected)
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 094fe419c26..15b853b3bfa 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -11,16 +11,17 @@
 ]
 
 
-@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
-def test_coerced_usm_types_sum(usm_type):
-    x = dp.arange(10, usm_type = "device")
-    y = dp.arange(10, usm_type = usm_type)
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+def test_coerced_usm_types_sum(usm_type_x, usm_type_y):
+    x = dp.arange(1000, usm_type = usm_type_x)
+    y = dp.arange(1000, usm_type = usm_type_y)
 
-    z = x + y
-    
-    assert z.usm_type == x.usm_type
-    assert z.usm_type == "device"
-    assert y.usm_type == usm_type
+    z = 1.3 + x + y + 2
+
+    assert x.usm_type == usm_type_x
+    assert y.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
 
 
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
@@ -29,8 +30,8 @@ def test_coerced_usm_types_mul(usm_type_x, usm_type_y):
     x = dp.arange(10, usm_type = usm_type_x)
     y = dp.arange(10, usm_type = usm_type_y)
 
-    z = x * y
-    
+    z = 3 * x * y * 1.5
+
     assert x.usm_type == usm_type_x
     assert y.usm_type == usm_type_y
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
@@ -61,3 +62,22 @@ def test_array_creation(func, args, usm_type_x, usm_type_y):
 
     assert x.usm_type == usm_type_x
     assert y.usm_type == usm_type_y
+
+@pytest.mark.parametrize("op",
+                         ['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                          'logical_and', 'logical_or', 'logical_xor', 'not_equal'],
+                         ids=['equal', 'greater', 'greater_equal', 'less', 'less_equal',
+                              'logical_and', 'logical_or', 'logical_xor', 'not_equal'])
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+def test_coerced_usm_types_logic_op(op, usm_type_x, usm_type_y):
+    x = dp.arange(100, usm_type = usm_type_x)
+    y = dp.arange(100, usm_type = usm_type_y)[::-1]
+
+    z = getattr(dp, op)(x, y)
+    zx = getattr(dp, op)(x, 50)
+    zy = getattr(dp, op)(30, y)
+
+    assert x.usm_type == zx.usm_type == usm_type_x
+    assert y.usm_type == zy.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py
index 158f5cc1442..a53a8494707 100644
--- a/tests/third_party/cupy/math_tests/test_arithmetic.py
+++ b/tests/third_party/cupy/math_tests/test_arithmetic.py
@@ -146,7 +146,7 @@ def check_binary(self, xp):
                 y = y.astype(numpy.complex64)
 
         # NumPy returns an output array of another type than DPNP when input ones have diffrent types.
-        if self.name == 'multiply' and xp is cupy:
+        if self.name in ('add', 'multiply') and xp is cupy:
             if xp.isscalar(arg1) and xp.isscalar(arg2):
                 # If both are scalars, the result will be a scalar, so needs to convert into numpy-scalar.
                 y = numpy.asarray(y)

From 03450eb880af8c26610c774373cf635c1d4e521c Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Tue, 14 Feb 2023 15:37:12 +0100
Subject: [PATCH 21/32] Add dlpack support with tests and docstrings

---
 dpnp/dpnp_array.py    | 10 ++++++++--
 dpnp/dpnp_iface.py    | 25 +++++++++++++++++++++++++
 tests/test_dparray.py | 19 +++++++++++++++++++
 3 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 473e08c83fc..7bdb5a7560f 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -140,7 +140,7 @@ def __bool__(self):
         return self._array_obj.__bool__()
 
  # '__class__',
- 
+
     def __complex__(self):
         return self._array_obj.__complex__()
 
@@ -153,6 +153,12 @@ def __complex__(self):
  # '__divmod__',
  # '__doc__',
 
+    def __dlpack__(self, stream=None):
+        return self._array_obj.__dlpack__(stream=stream)
+
+    def __dlpack_device__(self):
+        return self._array_obj.__dlpack_device__()
+
     def __eq__(self, other):
         return dpnp.equal(self, other)
 
@@ -190,7 +196,7 @@ def __gt__(self, other):
  # '__imatmul__',
  # '__imod__',
  # '__imul__',
- 
+
     def __index__(self):
         return self._array_obj.__index__()
 
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index 1c60d1c999e..019f2324f3e 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -64,6 +64,7 @@
     "default_float_type",
     "dpnp_queue_initialize",
     "dpnp_queue_is_cpu",
+    "from_dlpack",
     "get_dpnp_descriptor",
     "get_include",
     "get_normalized_queue_device"
@@ -222,6 +223,30 @@ def default_float_type(device=None, sycl_queue=None):
     return map_dtype_to_device(float64, _sycl_queue.sycl_device)
 
 
+def from_dlpack(obj):
+    """
+    Create a dpnp array from a Python object implementing the ``__dlpack__``
+    protocol.
+
+    See https://dmlc.github.io/dlpack/latest/ for more details.
+
+    Parameters
+    ----------
+    obj : A Python object representing an array that implements the ``__dlpack__``
+        and ``__dlpack_device__`` methods.
+
+    Returns
+    -------
+    array : dpnp_array
+
+    """
+
+    usm_ary = dpt.from_dlpack(obj)
+    dpnp_ary = dpnp_array.__new__(dpnp_array)
+    dpnp_ary._array_obj = usm_ary
+    return dpnp_ary
+
+
 def get_dpnp_descriptor(ext_obj,
                         copy_when_strides=True,
                         copy_when_nondefault_queue=True,
diff --git a/tests/test_dparray.py b/tests/test_dparray.py
index 62a0120f8a3..c49db4da1ae 100644
--- a/tests/test_dparray.py
+++ b/tests/test_dparray.py
@@ -23,6 +23,25 @@ def test_astype(arr, arr_dtype, res_dtype):
     assert_array_equal(expected, result)
 
 
+@pytest.mark.parametrize("arr_dtype", get_all_dtypes())
+@pytest.mark.parametrize("shape", [tuple(), (2,), (3, 0, 1), (2, 2, 2)])
+def test_from_dlpack(arr_dtype,shape):
+    X = dpnp.empty(shape=shape,dtype=arr_dtype)
+    Y = dpnp.from_dlpack(X)
+    assert_array_equal(X, Y)
+    assert X.__dlpack_device__() == Y.__dlpack_device__()
+    assert X.shape == Y.shape
+    assert X.dtype == Y.dtype or (
+        str(X.dtype) == "bool" and str(Y.dtype) == "uint8"
+    )
+    assert X.sycl_device == Y.sycl_device
+    assert X.usm_type == Y.usm_type
+    if Y.ndim:
+        V = Y[::-1]
+        W = dpnp.from_dlpack(V)
+        assert V.strides == W.strides
+
+
 @pytest.mark.parametrize("arr_dtype", get_all_dtypes())
 @pytest.mark.parametrize("arr",
                          [[-2, -1, 0, 1, 2], [[-2, -1], [1, 2]], []],

From 75695ce81b3f09a5c0e4dad0a8902f9872bd908f Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Tue, 14 Feb 2023 16:27:28 +0100
Subject: [PATCH 22/32] Add a test for dlpack with dpt

---
 tests/test_dparray.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/test_dparray.py b/tests/test_dparray.py
index c49db4da1ae..8c40631c930 100644
--- a/tests/test_dparray.py
+++ b/tests/test_dparray.py
@@ -41,6 +41,20 @@ def test_from_dlpack(arr_dtype,shape):
         W = dpnp.from_dlpack(V)
         assert V.strides == W.strides
 
+@pytest.mark.parametrize("arr_dtype", get_all_dtypes())
+def test_from_dlpack_with_dpt(arr_dtype):
+    X = dpt.empty((64,),dtype=arr_dtype)
+    Y = dpnp.from_dlpack(X)
+    assert_array_equal(X, Y)
+    assert isinstance(Y, dpnp.dpnp_array.dpnp_array)
+    assert X.__dlpack_device__() == Y.__dlpack_device__()
+    assert X.shape == Y.shape
+    assert X.dtype == Y.dtype or (
+        str(X.dtype) == "bool" and str(Y.dtype) == "uint8"
+    )
+    assert X.sycl_device == Y.sycl_device
+    assert X.usm_type == Y.usm_type
+
 
 @pytest.mark.parametrize("arr_dtype", get_all_dtypes())
 @pytest.mark.parametrize("arr",

From 4c158daa0738043c7addf4a57855c0685124c9a7 Mon Sep 17 00:00:00 2001
From: Vladislav Perevezentsev <vladislav.perevezentsev@intel.com>
Date: Wed, 15 Feb 2023 19:44:47 +0100
Subject: [PATCH 23/32] Fix remarks, add _create_from_usm_ndarray func and move
 tests to test_sycl_queue

---
 dpnp/dpnp_array.py       | 10 +++++++++
 dpnp/dpnp_iface.py       | 13 ++++++------
 tests/helper.py          |  2 +-
 tests/test_dparray.py    | 33 -----------------------------
 tests/test_sycl_queue.py | 45 ++++++++++++++++++++++++++++++++++++++--
 5 files changed, 61 insertions(+), 42 deletions(-)

diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 7bdb5a7560f..8e284ca590b 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -319,6 +319,16 @@ def __truediv__(self, other):
 
  # '__xor__',
 
+    @staticmethod
+    def _create_from_usm_ndarray(usm_ary : dpt.usm_ndarray):
+        if not isinstance(usm_ary, dpt.usm_ndarray):
+            raise TypeError(
+                f"Expected dpctl.tensor.usm_ndarray, got {type(usm_ary)}"
+                )
+        res = dpnp_array.__new__(dpnp_array)
+        res._array_obj = usm_ary
+        return res
+
     def all(self, axis=None, out=None, keepdims=False):
         """
         Returns True if all elements evaluate to True.
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index 019f2324f3e..b7cdef8cc61 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -223,7 +223,7 @@ def default_float_type(device=None, sycl_queue=None):
     return map_dtype_to_device(float64, _sycl_queue.sycl_device)
 
 
-def from_dlpack(obj):
+def from_dlpack(obj, /):
     """
     Create a dpnp array from a Python object implementing the ``__dlpack__``
     protocol.
@@ -232,19 +232,20 @@ def from_dlpack(obj):
 
     Parameters
     ----------
-    obj : A Python object representing an array that implements the ``__dlpack__``
+    obj : object
+        A Python object representing an array that implements the ``__dlpack__``
         and ``__dlpack_device__`` methods.
 
     Returns
     -------
-    array : dpnp_array
+    out : dpnp_array
+        Returns a new dpnp array containing the data from another array
+        (obj) with the ``__dlpack__`` method on the same device as object.
 
     """
 
     usm_ary = dpt.from_dlpack(obj)
-    dpnp_ary = dpnp_array.__new__(dpnp_array)
-    dpnp_ary._array_obj = usm_ary
-    return dpnp_ary
+    return dpnp_array._create_from_usm_ndarray(usm_ary)
 
 
 def get_dpnp_descriptor(ext_obj,
diff --git a/tests/helper.py b/tests/helper.py
index be550a995dc..17c62cecd28 100644
--- a/tests/helper.py
+++ b/tests/helper.py
@@ -32,7 +32,7 @@ def get_all_dtypes(no_bool=False,
         dtypes.append(dpnp.complex64)
         if dev.has_aspect_fp64:
             dtypes.append(dpnp.complex128)
-    
+
     # add None value to validate a default dtype
     if not no_none:
         dtypes.append(None)
diff --git a/tests/test_dparray.py b/tests/test_dparray.py
index 8c40631c930..62a0120f8a3 100644
--- a/tests/test_dparray.py
+++ b/tests/test_dparray.py
@@ -23,39 +23,6 @@ def test_astype(arr, arr_dtype, res_dtype):
     assert_array_equal(expected, result)
 
 
-@pytest.mark.parametrize("arr_dtype", get_all_dtypes())
-@pytest.mark.parametrize("shape", [tuple(), (2,), (3, 0, 1), (2, 2, 2)])
-def test_from_dlpack(arr_dtype,shape):
-    X = dpnp.empty(shape=shape,dtype=arr_dtype)
-    Y = dpnp.from_dlpack(X)
-    assert_array_equal(X, Y)
-    assert X.__dlpack_device__() == Y.__dlpack_device__()
-    assert X.shape == Y.shape
-    assert X.dtype == Y.dtype or (
-        str(X.dtype) == "bool" and str(Y.dtype) == "uint8"
-    )
-    assert X.sycl_device == Y.sycl_device
-    assert X.usm_type == Y.usm_type
-    if Y.ndim:
-        V = Y[::-1]
-        W = dpnp.from_dlpack(V)
-        assert V.strides == W.strides
-
-@pytest.mark.parametrize("arr_dtype", get_all_dtypes())
-def test_from_dlpack_with_dpt(arr_dtype):
-    X = dpt.empty((64,),dtype=arr_dtype)
-    Y = dpnp.from_dlpack(X)
-    assert_array_equal(X, Y)
-    assert isinstance(Y, dpnp.dpnp_array.dpnp_array)
-    assert X.__dlpack_device__() == Y.__dlpack_device__()
-    assert X.shape == Y.shape
-    assert X.dtype == Y.dtype or (
-        str(X.dtype) == "bool" and str(Y.dtype) == "uint8"
-    )
-    assert X.sycl_device == Y.sycl_device
-    assert X.usm_type == Y.usm_type
-
-
 @pytest.mark.parametrize("arr_dtype", get_all_dtypes())
 @pytest.mark.parametrize("arr",
                          [[-2, -1, 0, 1, 2], [[-2, -1], [1, 2]], []],
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index bc42f70b370..1a1c2a85f2e 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -1,9 +1,14 @@
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp
 import dpctl
 import numpy
 
+from numpy.testing import (
+    assert_array_equal
+)
+
 
 list_of_backend_str = [
     "host",
@@ -155,7 +160,7 @@ def test_array_creation_like(func, kwargs, device_x, device_y):
 
     dpnp_kwargs = dict(kwargs)
     dpnp_kwargs['device'] = device_y
-    
+
     y = getattr(dpnp, func)(x, **dpnp_kwargs)
     numpy.testing.assert_array_equal(y_orig, y)
     assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue)
@@ -637,7 +642,7 @@ def test_eig(device):
     dpnp_val_queue = dpnp_val.get_array().sycl_queue
     dpnp_vec_queue = dpnp_vec.get_array().sycl_queue
 
-    # compare queue and device    
+    # compare queue and device
     assert_sycl_queue_equal(dpnp_val_queue, expected_queue)
     assert_sycl_queue_equal(dpnp_vec_queue, expected_queue)
 
@@ -806,3 +811,39 @@ def test_array_copy(device, func, device_param, queue_param):
     result = dpnp.array(dpnp_data, **kwargs)
 
     assert_sycl_queue_equal(result.sycl_queue, dpnp_data.sycl_queue)
+
+
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
+#TODO need to delete no_bool=True when use dlpack > 0.7 version
+@pytest.mark.parametrize("arr_dtype", get_all_dtypes(no_float16=True, no_bool=True))
+@pytest.mark.parametrize("shape", [tuple(), (2,), (3, 0, 1), (2, 2, 2)])
+def test_from_dlpack(arr_dtype, shape, device):
+    X = dpnp.empty(shape=shape, dtype=arr_dtype, device=device)
+    Y = dpnp.from_dlpack(X)
+    assert_array_equal(X, Y)
+    assert X.__dlpack_device__() == Y.__dlpack_device__()
+    assert X.sycl_device == Y.sycl_device
+    assert X.sycl_context == Y.sycl_context
+    assert X.usm_type == Y.usm_type
+    if Y.ndim:
+        V = Y[::-1]
+        W = dpnp.from_dlpack(V)
+        assert V.strides == W.strides
+
+
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
+#TODO need to delete no_bool=True when use dlpack > 0.7 version
+@pytest.mark.parametrize("arr_dtype", get_all_dtypes(no_float16=True, no_bool=True))
+def test_from_dlpack_with_dpt(arr_dtype, device):
+    X = dpctl.tensor.empty((64,), dtype=arr_dtype, device=device)
+    Y = dpnp.from_dlpack(X)
+    assert_array_equal(X, Y)
+    assert isinstance(Y, dpnp.dpnp_array.dpnp_array)
+    assert X.__dlpack_device__() == Y.__dlpack_device__()
+    assert X.sycl_device == Y.sycl_device
+    assert X.sycl_context == Y.sycl_context
+    assert X.usm_type == Y.usm_type

From 439f2b503b66cee71f2deff83e9c823a810158b8 Mon Sep 17 00:00:00 2001
From: Natalia Polina <natalia.polina@intel.com>
Date: Wed, 15 Feb 2023 22:13:55 -0800
Subject: [PATCH 24/32] Use tril() and triu() function from dpctl.tensor
 (#1286)

* Use tril() function from dpctl.tensor

* Use triu() function from dpctl.tensor

* Changed tests for tril() and triu() functions.

* Skip tests for tril() and triu() functions with usm_type.
---
 dpnp/backend/include/dpnp_iface_fptr.hpp      |  2 -
 .../kernels/dpnp_krnl_arraycreation.cpp       | 32 -------
 dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx    | 90 -------------------
 dpnp/dpnp_container.py                        | 14 +++
 dpnp/dpnp_iface_arraycreation.py              | 57 +++++++++---
 tests/test_arraycreation.py                   | 49 +++++-----
 tests/test_sycl_queue.py                      | 10 +++
 tests/test_usm_type.py                        | 10 +++
 .../cupy/creation_tests/test_matrix.py        | 11 ++-
 9 files changed, 107 insertions(+), 168 deletions(-)

diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index 61c1c9838ad..70a2d860910 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -370,9 +370,7 @@ enum class DPNPFuncName : size_t
     DPNP_FN_TRI,                          /**< Used in numpy.tri() impl  */
     DPNP_FN_TRI_EXT,                      /**< Used in numpy.tri() impl, requires extra parameters */
     DPNP_FN_TRIL,                         /**< Used in numpy.tril() impl  */
-    DPNP_FN_TRIL_EXT,                     /**< Used in numpy.tril() impl, requires extra parameters */
     DPNP_FN_TRIU,                         /**< Used in numpy.triu() impl  */
-    DPNP_FN_TRIU_EXT,                     /**< Used in numpy.triu() impl, requires extra parameters */
     DPNP_FN_TRUNC,                        /**< Used in numpy.trunc() impl  */
     DPNP_FN_TRUNC_EXT,                    /**< Used in numpy.trunc() impl, requires extra parameters */
     DPNP_FN_VANDER,                       /**< Used in numpy.vander() impl  */
diff --git a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp
index 8727e37fafc..a29fcca0975 100644
--- a/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_arraycreation.cpp
@@ -1055,17 +1055,6 @@ void (*dpnp_tril_default_c)(void*,
                             const size_t,
                             const size_t) = dpnp_tril_c<_DataType>;
 
-template <typename _DataType>
-DPCTLSyclEventRef (*dpnp_tril_ext_c)(DPCTLSyclQueueRef,
-                                     void*,
-                                     void*,
-                                     const int,
-                                     shape_elem_type*,
-                                     shape_elem_type*,
-                                     const size_t,
-                                     const size_t,
-                                     const DPCTLEventVectorRef) = dpnp_tril_c<_DataType>;
-
 template <typename _DataType>
 DPCTLSyclEventRef dpnp_triu_c(DPCTLSyclQueueRef q_ref,
                               void* array_in,
@@ -1218,17 +1207,6 @@ void (*dpnp_triu_default_c)(void*,
                             const size_t,
                             const size_t) = dpnp_triu_c<_DataType>;
 
-template <typename _DataType>
-DPCTLSyclEventRef (*dpnp_triu_ext_c)(DPCTLSyclQueueRef,
-                                     void*,
-                                     void*,
-                                     const int,
-                                     shape_elem_type*,
-                                     shape_elem_type*,
-                                     const size_t,
-                                     const size_t,
-                                     const DPCTLEventVectorRef) = dpnp_triu_c<_DataType>;
-
 template <typename _DataType>
 DPCTLSyclEventRef dpnp_zeros_c(DPCTLSyclQueueRef q_ref,
                                void* result,
@@ -1439,21 +1417,11 @@ void func_map_init_arraycreation(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_TRIL][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_tril_default_c<float>};
     fmap[DPNPFuncName::DPNP_FN_TRIL][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_tril_default_c<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_TRIL_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_tril_ext_c<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_TRIL_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_tril_ext_c<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_TRIL_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_tril_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_TRIL_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_tril_ext_c<double>};
-
     fmap[DPNPFuncName::DPNP_FN_TRIU][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_triu_default_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_TRIU][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_triu_default_c<int64_t>};
     fmap[DPNPFuncName::DPNP_FN_TRIU][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_triu_default_c<float>};
     fmap[DPNPFuncName::DPNP_FN_TRIU][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_triu_default_c<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_TRIU_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_triu_ext_c<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_TRIU_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_triu_ext_c<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_TRIU_EXT][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_triu_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_TRIU_EXT][eft_DBL][eft_DBL] = {eft_DBL, (void*)dpnp_triu_ext_c<double>};
-
     fmap[DPNPFuncName::DPNP_FN_ZEROS][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_zeros_default_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_ZEROS][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_zeros_default_c<int64_t>};
     fmap[DPNPFuncName::DPNP_FN_ZEROS][eft_FLT][eft_FLT] = {eft_FLT, (void*)dpnp_zeros_default_c<float>};
diff --git a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
index 6986bf0ec70..cb44a08db59 100644
--- a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pyx
@@ -45,8 +45,6 @@ __all__ += [
     "dpnp_ptp",
     "dpnp_trace",
     "dpnp_tri",
-    "dpnp_tril",
-    "dpnp_triu",
     "dpnp_vander",
 ]
 
@@ -426,94 +424,6 @@ cpdef utils.dpnp_descriptor dpnp_tri(N, M=None, k=0, dtype=dpnp.float):
     return result
 
 
-cpdef utils.dpnp_descriptor dpnp_tril(utils.dpnp_descriptor m, int k):
-    cdef shape_type_c input_shape = m.shape
-    cdef shape_type_c result_shape
-
-    if m.ndim == 1:
-        result_shape = (m.shape[0], m.shape[0])
-    else:
-        result_shape = m.shape
-
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(m.dtype)
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRIL_EXT, param1_type, param1_type)
-
-    m_obj = m.get_array()
-
-    # ceate result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
-                                                                       kernel_data.return_type,
-                                                                       None,
-                                                                       device=m_obj.sycl_device,
-                                                                       usm_type=m_obj.usm_type,
-                                                                       sycl_queue=m_obj.sycl_queue)
-
-    result_sycl_queue = result.get_array().sycl_queue
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef custom_1in_1out_func_ptr_t func = <custom_1in_1out_func_ptr_t > kernel_data.ptr
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
-                                                    m.get_data(),
-                                                    result.get_data(),
-                                                    k,
-                                                    input_shape.data(),
-                                                    result_shape.data(),
-                                                    m.ndim,
-                                                    result.ndim,
-                                                    NULL)  # dep_events_ref
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
-
-
-cpdef utils.dpnp_descriptor dpnp_triu(utils.dpnp_descriptor m, int k):
-    cdef shape_type_c input_shape = m.shape
-    cdef shape_type_c result_shape
-
-    if m.ndim == 1:
-        result_shape = (m.shape[0], m.shape[0])
-    else:
-        result_shape = m.shape
-
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(m.dtype)
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_TRIU_EXT, param1_type, param1_type)
-
-    m_obj = m.get_array()
-
-    # ceate result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
-                                                                       kernel_data.return_type,
-                                                                       None,
-                                                                       device=m_obj.sycl_device,
-                                                                       usm_type=m_obj.usm_type,
-                                                                       sycl_queue=m_obj.sycl_queue)
-
-    result_sycl_queue = result.get_array().sycl_queue
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef custom_1in_1out_func_ptr_t func = <custom_1in_1out_func_ptr_t > kernel_data.ptr
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
-                                                    m.get_data(),
-                                                    result.get_data(),
-                                                    k,
-                                                    input_shape.data(),
-                                                    result_shape.data(),
-                                                    m.ndim,
-                                                    result.ndim,
-                                                    NULL)  # dep_events_ref
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
-
-
 cpdef utils.dpnp_descriptor dpnp_vander(utils.dpnp_descriptor x1, int N, int increasing):
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_VANDER_EXT, param1_type, DPNP_FT_NONE)
diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py
index 7065e497652..75e20f8a0cb 100644
--- a/dpnp/dpnp_container.py
+++ b/dpnp/dpnp_container.py
@@ -48,6 +48,8 @@
     "eye",
     "full",
     "ones"
+    "tril",
+    "triu",
     "zeros",
 ]
 
@@ -200,6 +202,18 @@ def ones(shape,
     return dpnp_array(array_obj.shape, buffer=array_obj, order=order)
 
 
+def tril(x1, /, *, k=0):
+    """"Creates `dpnp_array` as lower triangular part of an input array."""
+    array_obj = dpt.tril(x1.get_array() if isinstance(x1, dpnp_array) else x1, k)
+    return dpnp_array(array_obj.shape, buffer=array_obj, order="K")
+
+
+def triu(x1, /, *, k=0):
+    """"Creates `dpnp_array` as upper triangular part of an input array."""
+    array_obj = dpt.triu(x1.get_array() if isinstance(x1, dpnp_array) else x1, k)
+    return dpnp_array(array_obj.shape, buffer=array_obj, order="K")
+
+
 def zeros(shape,
           *,
           dtype=None,
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index 1740b1d6001..5b062a346b9 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -42,6 +42,7 @@
 
 import numpy
 import dpnp
+import operator
 
 import dpnp.config as config
 from dpnp.dpnp_algo import *
@@ -1332,7 +1333,7 @@ def tri(N, M=None, k=0, dtype=dpnp.float, **kwargs):
     return call_origin(numpy.tri, N, M, k, dtype, **kwargs)
 
 
-def tril(x1, k=0):
+def tril(x1, /, *, k=0):
     """
     Lower triangle of an array.
 
@@ -1340,6 +1341,12 @@ def tril(x1, k=0):
 
     For full documentation refer to :obj:`numpy.tril`.
 
+    Limitations
+    -----------
+    Parameter `x1` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray` with two or more dimensions.
+    Parameter `k` is supported only of integer data type.
+    Otherwise the function will be executed sequentially on CPU.
+
     Examples
     --------
     >>> import dpnp as np
@@ -1351,17 +1358,25 @@ def tril(x1, k=0):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    if x1_desc:
-        if not isinstance(k, int):
-            pass
-        else:
-            return dpnp_tril(x1_desc, k).get_pyobj()
+    _k = None
+    try:
+        _k = operator.index(k)
+    except TypeError:
+        pass
+
+    if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)):
+        pass
+    elif x1.ndim < 2:
+        pass
+    elif _k is None:
+        pass
+    else:
+        return dpnp_container.tril(x1, k=_k)
 
     return call_origin(numpy.tril, x1, k)
 
 
-def triu(x1, k=0):
+def triu(x1, /, *, k=0):
     """
     Upper triangle of an array.
 
@@ -1370,6 +1385,12 @@ def triu(x1, k=0):
 
     For full documentation refer to :obj:`numpy.triu`.
 
+    Limitations
+    -----------
+    Parameter `x1` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray` with two or more dimensions.
+    Parameter `k` is supported only of integer data type.
+    Otherwise the function will be executed sequentially on CPU.
+
     Examples
     --------
     >>> import dpnp as np
@@ -1381,12 +1402,20 @@ def triu(x1, k=0):
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    if x1_desc:
-        if not isinstance(k, int):
-            pass
-        else:
-            return dpnp_triu(x1_desc, k).get_pyobj()
+    _k = None
+    try:
+        _k = operator.index(k)
+    except TypeError:
+        pass
+
+    if not isinstance(x1, (dpnp.ndarray, dpt.usm_ndarray)):
+        pass
+    elif x1.ndim < 2:
+        pass
+    elif _k is None:
+        pass
+    else:
+        return dpnp_container.triu(x1, k=_k)
 
     return call_origin(numpy.triu, x1, k)
 
diff --git a/tests/test_arraycreation.py b/tests/test_arraycreation.py
index 833ea6109c3..fe371dbece6 100644
--- a/tests/test_arraycreation.py
+++ b/tests/test_arraycreation.py
@@ -15,6 +15,7 @@
 )
 
 import tempfile
+import operator
 
 
 @pytest.mark.parametrize("start",
@@ -258,48 +259,48 @@ def test_tri_default_dtype():
 
 
 @pytest.mark.parametrize("k",
-                         [-6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6],
-                         ids=['-6', '-5', '-4', '-3', '-2', '-1', '0', '1', '2', '3', '4', '5', '6'])
+                         [-3, -2, -1, 0, 1, 2, 3, 4, 5,
+                          numpy.array(1), dpnp.array(2), dpt.asarray(3)],
+                         ids=['-3', '-2', '-1', '0', '1', '2', '3', '4', '5',
+                              'np.array(1)', 'dpnp.array(2)', 'dpt.asarray(3)'])
 @pytest.mark.parametrize("m",
-                         [[0, 1, 2, 3, 4],
-                          [1, 1, 1, 1, 1],
-                          [[0, 0], [0, 0]],
+                         [[[0, 0], [0, 0]],
                           [[1, 2], [1, 2]],
                           [[1, 2], [3, 4]],
                           [[0, 1, 2], [3, 4, 5], [6, 7, 8]],
                           [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]],
-                         ids=['[0, 1, 2, 3, 4]',
-                              '[1, 1, 1, 1, 1]',
-                              '[[0, 0], [0, 0]]',
+                         ids=['[[0, 0], [0, 0]]',
                               '[[1, 2], [1, 2]]',
                               '[[1, 2], [3, 4]]',
                               '[[0, 1, 2], [3, 4, 5], [6, 7, 8]]',
                               '[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]'])
-def test_tril(m, k):
-    a = numpy.array(m)
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
+def test_tril(m, k, dtype):
+    a = numpy.array(m, dtype=dtype)
     ia = dpnp.array(a)
-    expected = numpy.tril(a, k)
-    result = dpnp.tril(ia, k)
+    expected = numpy.tril(a, k=operator.index(k))
+    result = dpnp.tril(ia, k=k)
     assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("k",
-                         [-4, -3, -2, -1, 0, 1, 2, 3, 4],
-                         ids=['-4', '-3', '-2', '-1', '0', '1', '2', '3', '4'])
+                         [-3, -2, -1, 0, 1, 2, 3, 4, 5,
+                          numpy.array(1), dpnp.array(2), dpt.asarray(3)],
+                         ids=['-3', '-2', '-1', '0', '1', '2', '3', '4', '5',
+                              'np.array(1)', 'dpnp.array(2)', 'dpt.asarray(3)'])
 @pytest.mark.parametrize("m",
-                         [[0, 1, 2, 3, 4],
-                          [[1, 2], [3, 4]],
+                         [[[1, 2], [3, 4]],
                           [[0, 1, 2], [3, 4, 5], [6, 7, 8]],
                           [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]],
-                         ids=['[0, 1, 2, 3, 4]',
-                              '[[1, 2], [3, 4]]',
+                         ids=['[[1, 2], [3, 4]]',
                               '[[0, 1, 2], [3, 4, 5], [6, 7, 8]]',
                               '[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]'])
-def test_triu(m, k):
-    a = numpy.array(m)
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
+def test_triu(m, k, dtype):
+    a = numpy.array(m, dtype=dtype)
     ia = dpnp.array(a)
-    expected = numpy.triu(a, k)
-    result = dpnp.triu(ia, k)
+    expected = numpy.triu(a, k=operator.index(k))
+    result = dpnp.triu(ia, k=k)
     assert_array_equal(expected, result)
 
 
@@ -309,8 +310,8 @@ def test_triu(m, k):
 def test_triu_size_null(k):
     a = numpy.ones(shape=(1, 2, 0))
     ia = dpnp.array(a)
-    expected = numpy.triu(a, k)
-    result = dpnp.triu(ia, k)
+    expected = numpy.triu(a, k=k)
+    result = dpnp.triu(ia, k=k)
     assert_array_equal(expected, result)
 
 
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index bc42f70b370..77967a46af0 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -161,6 +161,16 @@ def test_array_creation_like(func, kwargs, device_x, device_y):
     assert_sycl_queue_equal(y.sycl_queue, x.to_device(device_y).sycl_queue)
 
 
+@pytest.mark.parametrize("func", ["tril", "triu"], ids=["tril", "triu"])
+@pytest.mark.parametrize("device",
+                          valid_devices,
+                          ids=[device.filter_string for device in valid_devices])
+def test_tril_triu(func, device):
+    x0 = dpnp.ones((3,3), device=device)
+    x = getattr(dpnp, func)(x0)
+    assert_sycl_queue_equal(x.sycl_queue, x0.sycl_queue)
+
+
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize(
     "func,data",
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 15b853b3bfa..91831648813 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -63,6 +63,16 @@ def test_array_creation(func, args, usm_type_x, usm_type_y):
     assert x.usm_type == usm_type_x
     assert y.usm_type == usm_type_y
 
+
+@pytest.mark.skip()
+@pytest.mark.parametrize("func", ["tril", "triu"], ids=["tril", "triu"])
+@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
+def test_tril_triu(func, usm_type):
+    x0 = dp.ones((3,3), usm_type=usm_type)
+    x = getattr(dp, func)(x0)
+    assert x.usm_type == usm_type
+
+
 @pytest.mark.parametrize("op",
                          ['equal', 'greater', 'greater_equal', 'less', 'less_equal',
                           'logical_and', 'logical_or', 'logical_xor', 'not_equal'],
diff --git a/tests/third_party/cupy/creation_tests/test_matrix.py b/tests/third_party/cupy/creation_tests/test_matrix.py
index a5471f213eb..fe144cbc58c 100644
--- a/tests/third_party/cupy/creation_tests/test_matrix.py
+++ b/tests/third_party/cupy/creation_tests/test_matrix.py
@@ -140,6 +140,7 @@ def test_tri_posi(self, xp, dtype):
     {'shape': (2, 3, 4)},
 )
 @testing.gpu
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestTriLowerAndUpper(unittest.TestCase):
 
     @testing.for_all_dtypes(no_complex=True)
@@ -148,7 +149,6 @@ def test_tril(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
         return xp.tril(m)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_tril_array_like(self, xp):
         return xp.tril([[1, 2], [3, 4]])
@@ -157,13 +157,13 @@ def test_tril_array_like(self, xp):
     @testing.numpy_cupy_array_equal()
     def test_tril_nega(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
-        return xp.tril(m, -1)
+        return xp.tril(m, k=-1)
 
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_array_equal()
     def test_tril_posi(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
-        return xp.tril(m, 1)
+        return xp.tril(m, k=1)
 
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_array_equal()
@@ -171,7 +171,6 @@ def test_triu(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
         return xp.triu(m)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_triu_array_like(self, xp):
         return xp.triu([[1, 2], [3, 4]])
@@ -180,10 +179,10 @@ def test_triu_array_like(self, xp):
     @testing.numpy_cupy_array_equal()
     def test_triu_nega(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
-        return xp.triu(m, -1)
+        return xp.triu(m, k=-1)
 
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_array_equal()
     def test_triu_posi(self, xp, dtype):
         m = testing.shaped_arange(self.shape, xp, dtype)
-        return xp.triu(m, 1)
+        return xp.triu(m, k=1)

From a516f1cbd123df7458aa430bd01c7722589660dc Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Thu, 16 Feb 2023 17:55:22 +0100
Subject: [PATCH 25/32] dpnp.subtract() doesn't work properly with a scalar
 (#1292)

* dpnp.add() doesn't work properly with a scalar

* dpnp.subtract() doesn't work properly with a scalar

* USM type in operations with a scalar

* Rollback excluded 'floor_divide' tests from skip scope

* Explicit vector operations instead of saturation functions

* Use std::int32_t and std::int64_t types

* Tune tail's loop of kernel for the vector op
---
 .../include/dpnp_gen_2arg_3type_tbl.hpp       |  14 +-
 dpnp/backend/kernels/dpnp_krnl_elemwise.cpp   | 142 +++++++-----------
 dpnp/dpnp_array.py                            |   4 +-
 dpnp/dpnp_iface_mathematical.py               |  85 ++++++-----
 tests/skipped_tests.tbl                       |   8 +-
 tests/skipped_tests_gpu.tbl                   |  14 +-
 tests/test_arraycreation.py                   |   8 +-
 tests/test_mathematical.py                    |  89 ++++++++---
 tests/test_usm_type.py                        |  13 ++
 .../cupy/creation_tests/test_from_data.py     |   1 +
 .../cupy/math_tests/test_arithmetic.py        |  48 +++---
 11 files changed, 225 insertions(+), 201 deletions(-)

diff --git a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
index 33f5e0d19a4..9a3c69aee8e 100644
--- a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
@@ -111,8 +111,8 @@
 
 MACRO_2ARG_3TYPES_OP(dpnp_add_c,
                      input1_elem + input2_elem,
-                     sycl::add_sat(x1, x2),
-                     MACRO_UNPACK_TYPES(int, long),
+                     x1 + x2,
+                     MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
                      oneapi::mkl::vm::add,
                      MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))
 
@@ -170,8 +170,8 @@ MACRO_2ARG_3TYPES_OP(dpnp_minimum_c,
 // requires multiplication shape1[10] with shape2[10,1] and result expected as shape[10,10]
 MACRO_2ARG_3TYPES_OP(dpnp_multiply_c,
                      input1_elem* input2_elem,
-                     nullptr,
-                     std::false_type,
+                     x1 * x2,
+                     MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
                      oneapi::mkl::vm::mul,
                      MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))
 
@@ -184,9 +184,9 @@ MACRO_2ARG_3TYPES_OP(dpnp_power_c,
 
 MACRO_2ARG_3TYPES_OP(dpnp_subtract_c,
                      input1_elem - input2_elem,
-                     nullptr,
-                     std::false_type,
+                     x1 - x2,
+                     MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
                      oneapi::mkl::vm::sub,
-                     MACRO_UNPACK_TYPES(float, double))
+                     MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))
 
 #undef MACRO_2ARG_3TYPES_OP
diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
index 32097d321a7..057e0805db6 100644
--- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
@@ -881,9 +881,9 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                                                                                                                        \
         sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));                                                      \
                                                                                                                        \
-        _DataType_input1* input1_data = static_cast<_DataType_input1 *>(const_cast<void *>(input1_in));                \
-        _DataType_input2* input2_data = static_cast<_DataType_input2 *>(const_cast<void *>(input2_in));                \
-        _DataType_output* result = static_cast<_DataType_output *>(result_out);                                        \
+        _DataType_input1* input1_data = static_cast<_DataType_input1*>(const_cast<void*>(input1_in));                  \
+        _DataType_input2* input2_data = static_cast<_DataType_input2*>(const_cast<void*>(input2_in));                  \
+        _DataType_output* result = static_cast<_DataType_output*>(result_out);                                         \
                                                                                                                        \
         bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim);                    \
                                                                                                                        \
@@ -896,8 +896,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
         shape_elem_type* input2_shape_offsets = new shape_elem_type[input2_ndim];                                      \
                                                                                                                        \
         get_shape_offsets_inkernel(input2_shape, input2_ndim, input2_shape_offsets);                                   \
-        use_strides =                                                                                                  \
-            use_strides || !array_equal(input2_strides, input2_ndim, input2_shape_offsets, input2_ndim);               \
+        use_strides = use_strides || !array_equal(input2_strides, input2_ndim, input2_shape_offsets, input2_ndim);     \
         delete[] input2_shape_offsets;                                                                                 \
                                                                                                                        \
         sycl::event event;                                                                                             \
@@ -907,19 +906,17 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
         {                                                                                                              \
             DPNPC_id<_DataType_input1>* input1_it;                                                                     \
             const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input1>);                                 \
-            input1_it = reinterpret_cast<DPNPC_id<_DataType_input1>*>(dpnp_memory_alloc_c(q_ref,                       \
-                                                                                          input1_it_size_in_bytes));   \
-            new (input1_it)                                                                                            \
-                DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim);             \
+            input1_it =                                                                                                \
+                reinterpret_cast<DPNPC_id<_DataType_input1>*>(dpnp_memory_alloc_c(q_ref, input1_it_size_in_bytes));    \
+            new (input1_it) DPNPC_id<_DataType_input1>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim); \
                                                                                                                        \
             input1_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
                                                                                                                        \
             DPNPC_id<_DataType_input2>* input2_it;                                                                     \
             const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType_input2>);                                 \
-            input2_it = reinterpret_cast<DPNPC_id<_DataType_input2>*>(dpnp_memory_alloc_c(q_ref,                       \
-                                                                                          input2_it_size_in_bytes));   \
-            new (input2_it)                                                                                            \
-                DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim);             \
+            input2_it =                                                                                                \
+                reinterpret_cast<DPNPC_id<_DataType_input2>*>(dpnp_memory_alloc_c(q_ref, input2_it_size_in_bytes));    \
+            new (input2_it) DPNPC_id<_DataType_input2>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim); \
                                                                                                                        \
             input2_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
                                                                                                                        \
@@ -957,27 +954,26 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
             using usm_host_allocatorT = sycl::usm_allocator<shape_elem_type, sycl::usm::alloc::host>;                  \
                                                                                                                        \
             size_t strides_size = 3 * result_ndim;                                                                     \
-            shape_elem_type *dev_strides_data = sycl::malloc_device<shape_elem_type>(strides_size, q);                 \
+            shape_elem_type* dev_strides_data = sycl::malloc_device<shape_elem_type>(strides_size, q);                 \
                                                                                                                        \
             /* create host temporary for packed strides managed by shared pointer */                                   \
-            auto strides_host_packed = std::vector<shape_elem_type, usm_host_allocatorT>(strides_size,                 \
-                                                                                         usm_host_allocatorT(q));      \
+            auto strides_host_packed =                                                                                 \
+                std::vector<shape_elem_type, usm_host_allocatorT>(strides_size, usm_host_allocatorT(q));               \
                                                                                                                        \
             /* packed vector is concatenation of result_strides, input1_strides and input2_strides */                  \
             std::copy(result_strides, result_strides + result_ndim, strides_host_packed.begin());                      \
             std::copy(input1_strides, input1_strides + result_ndim, strides_host_packed.begin() + result_ndim);        \
             std::copy(input2_strides, input2_strides + result_ndim, strides_host_packed.begin() + 2 * result_ndim);    \
                                                                                                                        \
-            auto copy_strides_ev = q.copy<shape_elem_type>(strides_host_packed.data(),                                 \
-                                                           dev_strides_data,                                           \
-                                                           strides_host_packed.size());                                \
+            auto copy_strides_ev =                                                                                     \
+                q.copy<shape_elem_type>(strides_host_packed.data(), dev_strides_data, strides_host_packed.size());     \
                                                                                                                        \
             auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
                 const size_t output_id = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                  \
                 {                                                                                                      \
-                    const shape_elem_type *result_strides_data = &dev_strides_data[0];                                 \
-                    const shape_elem_type *input1_strides_data = &dev_strides_data[1];                                 \
-                    const shape_elem_type *input2_strides_data = &dev_strides_data[2];                                 \
+                    const shape_elem_type* result_strides_data = &dev_strides_data[0];                                 \
+                    const shape_elem_type* input1_strides_data = &dev_strides_data[1];                                 \
+                    const shape_elem_type* input2_strides_data = &dev_strides_data[2];                                 \
                                                                                                                        \
                     size_t input1_id = 0;                                                                              \
                     size_t input2_id = 0;                                                                              \
@@ -1013,8 +1009,10 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
             {                                                                                                          \
                 event = __mkl_operation__(q, result_size, input1_data, input2_data, result);                           \
             }                                                                                                          \
-            else if constexpr (none_of_both_types<_DataType_input1, _DataType_input2,                                  \
-                                                  std::complex<float>, std::complex<double>>)                          \
+            else if constexpr (none_of_both_types<_DataType_input1,                                                    \
+                                                  _DataType_input2,                                                    \
+                                                  std::complex<float>,                                                 \
+                                                  std::complex<double>>)                                               \
             {                                                                                                          \
                 constexpr size_t lws = 64;                                                                             \
                 constexpr unsigned int vec_sz = 8;                                                                     \
@@ -1026,8 +1024,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                 auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {                                          \
                     auto sg = nd_it.get_sub_group();                                                                   \
                     const auto max_sg_size = sg.get_max_local_range()[0];                                              \
-                    const size_t start = vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +                     \
-                                                   sg.get_group_id()[0] * max_sg_size);                                \
+                    const size_t start =                                                                               \
+                        vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size); \
                                                                                                                        \
                     if (start + static_cast<size_t>(vec_sz) * max_sg_size < result_size)                               \
                     {                                                                                                  \
@@ -1036,6 +1034,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                         sycl::vec<_DataType_input2, vec_sz> x2 =                                                       \
                             sg.load<vec_sz>(sycl::multi_ptr<_DataType_input2, global_space>(&input2_data[start]));     \
                         sycl::vec<_DataType_output, vec_sz> res_vec;                                                   \
+                                                                                                                       \
                         if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, __vec_types__>)          \
                         {                                                                                              \
                             res_vec = __vec_operation__;                                                               \
@@ -1050,11 +1049,10 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                             }                                                                                          \
                         }                                                                                              \
                         sg.store<vec_sz>(sycl::multi_ptr<_DataType_output, global_space>(&result[start]), res_vec);    \
-                                                                                                                       \
                     }                                                                                                  \
                     else                                                                                               \
                     {                                                                                                  \
-                        for (size_t k = start; k < result_size; ++k)                                                   \
+                        for (size_t k = start + sg.get_local_id()[0]; k < result_size; k += max_sg_size)               \
                         {                                                                                              \
                             const _DataType_output input1_elem = input1_data[k];                                       \
                             const _DataType_output input2_elem = input2_data[k];                                       \
@@ -1064,8 +1062,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                 };                                                                                                     \
                                                                                                                        \
                 auto kernel_func = [&](sycl::handler& cgh) {                                                           \
-                    sycl::stream out(65536, 128, cgh);\
-                    cgh.parallel_for<class __name__##_sg_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(\
+                    cgh.parallel_for<                                                                                  \
+                        class __name__##_sg_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(             \
                         sycl::nd_range<1>(gws_range, lws_range), kernel_parallel_for_func);                            \
                 };                                                                                                     \
                 event = q.submit(kernel_func);                                                                         \
@@ -1078,7 +1076,6 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                     const _DataType_output input1_elem = input1_data[i];                                               \
                     const _DataType_output input2_elem = input2_data[i];                                               \
                     result[i] = __operation__;                                                                         \
-                                                                                                                       \
                 };                                                                                                     \
                 auto kernel_func = [&](sycl::handler& cgh) {                                                           \
                     cgh.parallel_for<class __name__##_kernel<_DataType_output, _DataType_input1, _DataType_input2>>(   \
@@ -1112,26 +1109,25 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
     {                                                                                                                  \
         DPCTLSyclQueueRef q_ref = reinterpret_cast<DPCTLSyclQueueRef>(&DPNP_QUEUE);                                    \
         DPCTLEventVectorRef dep_event_vec_ref = nullptr;                                                               \
-        DPCTLSyclEventRef event_ref = __name__<_DataType_output, _DataType_input1, _DataType_input2>(                  \
-            q_ref,                                                                                                     \
-            result_out,                                                                                                \
-            result_size,                                                                                               \
-            result_ndim,                                                                                               \
-            result_shape,                                                                                              \
-            result_strides,                                                                                            \
-            input1_in,                                                                                                 \
-            input1_size,                                                                                               \
-            input1_ndim,                                                                                               \
-            input1_shape,                                                                                              \
-            input1_strides,                                                                                            \
-            input2_in,                                                                                                 \
-            input2_size,                                                                                               \
-            input2_ndim,                                                                                               \
-            input2_shape,                                                                                              \
-            input2_strides,                                                                                            \
-            where,                                                                                                     \
-            dep_event_vec_ref                                                                                          \
-        );                                                                                                             \
+        DPCTLSyclEventRef event_ref =                                                                                  \
+            __name__<_DataType_output, _DataType_input1, _DataType_input2>(q_ref,                                      \
+                                                                           result_out,                                 \
+                                                                           result_size,                                \
+                                                                           result_ndim,                                \
+                                                                           result_shape,                               \
+                                                                           result_strides,                             \
+                                                                           input1_in,                                  \
+                                                                           input1_size,                                \
+                                                                           input1_ndim,                                \
+                                                                           input1_shape,                               \
+                                                                           input1_strides,                             \
+                                                                           input2_in,                                  \
+                                                                           input2_size,                                \
+                                                                           input2_ndim,                                \
+                                                                           input2_shape,                               \
+                                                                           input2_strides,                             \
+                                                                           where,                                      \
+                                                                           dep_event_vec_ref);                         \
         DPCTLEvent_WaitAndThrow(event_ref);                                                                            \
         DPCTLEvent_Delete(event_ref);                                                                                  \
     }                                                                                                                  \
@@ -1172,9 +1168,8 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                                         const shape_elem_type*,                                                        \
                                         const shape_elem_type*,                                                        \
                                         const size_t*,                                                                 \
-                                        const DPCTLEventVectorRef) = __name__<_DataType_output,                        \
-                                                                              _DataType_input1,                        \
-                                                                              _DataType_input2>;
+                                        const DPCTLEventVectorRef) =                                                   \
+        __name__<_DataType_output, _DataType_input1, _DataType_input2>;
 
 #include <dpnp_gen_2arg_3type_tbl.hpp>
 
@@ -1193,6 +1188,12 @@ static void func_map_elemwise_2arg_3type_core(func_map_t& fmap)
                                       func_type_map_t::find_type<FT1>,
                                       func_type_map_t::find_type<FTs>>}),
      ...);
+    ((fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][FT1][FTs] =
+          {populate_func_types<FT1, FTs>(),
+           (void*)dpnp_subtract_c_ext<func_type_map_t::find_type<populate_func_types<FT1, FTs>()>,
+                                      func_type_map_t::find_type<FT1>,
+                                      func_type_map_t::find_type<FTs>>}),
+     ...);
 }
 
 template <DPNPFuncType... FTs>
@@ -1878,39 +1879,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_SUBTRACT][eft_DBL][eft_DBL] = {
         eft_DBL, (void*)dpnp_subtract_c_default<double, double, double>};
 
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_INT] = {
-        eft_INT, (void*)dpnp_subtract_c_ext<int32_t, int32_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_LNG] = {
-        eft_LNG, (void*)dpnp_subtract_c_ext<int64_t, int32_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_FLT] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, int32_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_INT][eft_DBL] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, int32_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_INT] = {
-        eft_LNG, (void*)dpnp_subtract_c_ext<int64_t, int64_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_LNG] = {
-        eft_LNG, (void*)dpnp_subtract_c_ext<int64_t, int64_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_FLT] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, int64_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_LNG][eft_DBL] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, int64_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_INT] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, float, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_LNG] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, float, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_FLT] = {
-        eft_FLT, (void*)dpnp_subtract_c_ext<float, float, float>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_FLT][eft_DBL] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, float, double>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_INT] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, double, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_LNG] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, double, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_FLT] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, double, float>};
-    fmap[DPNPFuncName::DPNP_FN_SUBTRACT_EXT][eft_DBL][eft_DBL] = {
-        eft_DBL, (void*)dpnp_subtract_c_ext<double, double, double>};
-
     func_map_elemwise_2arg_3type_helper<eft_BLN, eft_INT, eft_LNG, eft_FLT, eft_DBL, eft_C64, eft_C128>(fmap);
 
     return;
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 473e08c83fc..420267ff2e3 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -273,7 +273,9 @@ def __rmul__(self, other):
  # '__rpow__',
  # '__rrshift__',
  # '__rshift__',
- # '__rsub__',
+
+    def __rsub__(self, other):
+        return dpnp.subtract(other, self)
 
     def __rtruediv__(self, other):
         return dpnp.true_divide(other, self)
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index e254e916b84..64886de23c0 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -215,7 +215,7 @@ def add(x1,
         if x1_desc and x2_desc:
             return dpnp_add(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
 
-    return call_origin(numpy.add, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+    return call_origin(numpy.add, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def around(x1, decimals=0, out=None):
@@ -1145,7 +1145,7 @@ def multiply(x1,
         if x1_desc and x2_desc:
             return dpnp_multiply(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
 
-    return call_origin(numpy.multiply, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+    return call_origin(numpy.multiply, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def nancumprod(x1, **kwargs):
@@ -1520,60 +1520,69 @@ def sign(x1, **kwargs):
     return call_origin(numpy.sign, x1, **kwargs)
 
 
-def subtract(x1, x2, dtype=None, out=None, where=True, **kwargs):
+def subtract(x1,
+             x2,
+             /,
+             out=None,
+             *,
+             where=True,
+             dtype=None,
+             subok=True,
+             **kwargs):
     """
     Subtract arguments, element-wise.
 
     For full documentation refer to :obj:`numpy.subtract`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        The difference of `x1` and `x2`, element-wise.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
-    Otherwise the functions will be executed sequentially on CPU.
+    Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
     Example
     -------
-    >>> import dpnp as np
-    >>> result = np.subtract(np.array([4, 3]), np.array([2, 7]))
-    >>> [x for x in result]
+    >>> import dpnp as dp
+    >>> result = dp.subtract(dp.array([4, 3]), dp.array([2, 7]))
+    >>> print(result)
     [2, -4]
 
     """
 
-    x1_is_scalar = dpnp.isscalar(x1)
-    x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-    if x1_desc and x2_desc and not kwargs:
-        if not x1_desc and not x1_is_scalar:
-            pass
-        elif not x2_desc and not x2_is_scalar:
-            pass
-        elif x1_is_scalar and x2_is_scalar:
-            pass
-        elif x1_desc and x1_desc.ndim == 0:
-            pass
-        elif x1_desc and x1_desc.dtype == dpnp.bool:
-            pass
-        elif x2_desc and x2_desc.ndim == 0:
-            pass
-        elif x2_desc and x2_desc.dtype == dpnp.bool:
-            pass
-        elif dtype is not None:
-            pass
-        elif out is not None:
-            pass
-        elif not where:
-            pass
-        else:
-            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
-            return dpnp_subtract(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            if x1_desc.dtype == x2_desc.dtype == dpnp.bool:
+                raise TypeError("DPNP boolean subtract, the `-` operator, is not supported, "
+                                "use the bitwise_xor, the `^` operator, or the logical_xor function instead.")
+            return dpnp_subtract(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
 
-    return call_origin(numpy.subtract, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+    return call_origin(numpy.subtract, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def sum(x1, axis=None, dtype=None, out=None, keepdims=False, initial=None, where=True):
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index 53bdec8af0a..25d1fd1bc0f 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -389,7 +389,7 @@ tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asar
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_ascontiguousarray_on_noncontiguous_array
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim_dtype
-tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_fromfile
+
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid0
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid1
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid2
@@ -765,17 +765,15 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_para
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_547_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='remainder', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_549_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='mod', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticModf::test_modf
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_0_{name='reciprocal', nargs=1}::test_raises_with_numpy_input
+
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_4_{name='divide', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_5_{name='power', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_6_{name='subtract', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_7_{name='true_divide', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_8_{name='floor_divide', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_9_{name='fmod', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestBoolSubtract_param_3_{shape=(), xp=dpnp}::test_bool_subtract
+
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp2
 tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_copysign_float
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index af2dbd783a4..34d1795cc98 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -18,7 +18,6 @@ tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-conjugate-data2]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-copy-data3]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumprod-data4]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-cumsum-data5]
-tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-diff-data6]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-ediff1d-data7]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-fabs-data8]
 tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-floor-data9]
@@ -29,11 +28,9 @@ tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-conjugate-data2]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-copy-data3]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-cumprod-data4]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-cumsum-data5]
-tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-diff-data6]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-ediff1d-data7]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-fabs-data8]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-floor-data9]
-tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-gradient-data10]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nancumprod-data11]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nancumsum-data12]
 tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-nanprod-data13]
@@ -91,6 +88,7 @@ tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesInvalidValu
 tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_0_{shape=(3, 3)}::test_diag_indices_from
 tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_1_{shape=(0, 0)}::test_diag_indices_from
 tests/third_party/cupy/indexing_tests/test_insert.py::TestDiagIndicesFrom_param_2_{shape=(2, 2, 2)}::test_diag_indices_from
+
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_295_{arg1=array([[1., 2., 3.],       [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_303_{arg1=array([[1., 2., 3.],       [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_375_{arg1=array([[1., 2., 3.],       [4., 5., 6.]]), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
@@ -103,6 +101,7 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_para
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_527_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int64), arg2=array([[0., 1., 2.],       [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_535_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int64), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_543_{arg1=array([[1, 2, 3],       [4, 5, 6]], dtype=int64), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int64), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
+
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_prod_all
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_prod_axis
 tests/third_party/cupy/math_tests/test_sumprod.py::TestSumprod::test_external_sum_all
@@ -565,7 +564,6 @@ tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asar
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_ascontiguousarray_on_noncontiguous_array
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim
 tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_asfortranarray_cuda_array_zero_dim_dtype
-tests/third_party/cupy/creation_tests/test_from_data.py::TestFromData::test_fromfile
 
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid0
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_0_{copy=False, indexing='xy', sparse=False}::test_meshgrid1
@@ -969,6 +967,7 @@ tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_4_{reps
 tests/third_party/cupy/manipulation_tests/test_tiling.py::TestTile_param_5_{reps=(2, 3, 4, 5)}::test_array_tile
 tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_2
 tests/third_party/cupy/manipulation_tests/test_transpose.py::TestTranspose::test_moveaxis_invalid5_3
+
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_279_{arg1=array([[1., 2., 3.],       [4., 5., 6.]], dtype=float32), arg2=array([[0., 1., 2.],       [3., 4., 5.]], dtype=float32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_287_{arg1=array([[1., 2., 3.],       [4., 5., 6.]], dtype=float32), arg2=array([[0., 1., 2.],       [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_295_{arg1=array([[1., 2., 3.],       [4., 5., 6.]], dtype=float32), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
@@ -985,18 +984,15 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_para
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_527_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0., 1., 2.],       [3., 4., 5.]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_535_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]], dtype=int32), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_543_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='floor_divide', use_dtype=False}::test_binary
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticModf::test_modf
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_0_{name='reciprocal', nargs=1}::test_raises_with_numpy_input
+
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_4_{name='divide', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_5_{name='power', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_6_{name='subtract', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_7_{name='true_divide', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_8_{name='floor_divide', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_9_{name='fmod', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestBoolSubtract_param_3_{shape=(), xp=dpnp}::test_bool_subtract
+
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp2
 tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_copysign_float
diff --git a/tests/test_arraycreation.py b/tests/test_arraycreation.py
index fe371dbece6..63435bca11f 100644
--- a/tests/test_arraycreation.py
+++ b/tests/test_arraycreation.py
@@ -110,7 +110,7 @@ def test_frombuffer(dtype):
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
+@pytest.mark.parametrize("dtype", get_all_dtypes())
 def test_fromfile(dtype):
     with tempfile.TemporaryFile() as fh:
         fh.write(b"\x00\x01\x02\x03\x04\x05\x06\x07\x08")
@@ -274,11 +274,12 @@ def test_tri_default_dtype():
                               '[[1, 2], [3, 4]]',
                               '[[0, 1, 2], [3, 4, 5], [6, 7, 8]]',
                               '[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]'])
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 def test_tril(m, k, dtype):
     a = numpy.array(m, dtype=dtype)
     ia = dpnp.array(a)
-    expected = numpy.tril(a, k=operator.index(k))
+    expected = numpy.tril(a, k=k)
     result = dpnp.tril(ia, k=k)
     assert_array_equal(expected, result)
 
@@ -295,11 +296,12 @@ def test_tril(m, k, dtype):
                          ids=['[[1, 2], [3, 4]]',
                               '[[0, 1, 2], [3, 4, 5], [6, 7, 8]]',
                               '[[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]]'])
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("dtype", get_all_dtypes(no_float16=False))
 def test_triu(m, k, dtype):
     a = numpy.array(m, dtype=dtype)
     ia = dpnp.array(a)
-    expected = numpy.triu(a, k=operator.index(k))
+    expected = numpy.triu(a, k=k)
     result = dpnp.triu(ia, k=k)
     assert_array_equal(expected, result)
 
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index 70e0bd73dc5..6f7ee58c038 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -66,7 +66,7 @@ def test_diff(array):
 @pytest.mark.parametrize("dtype1", get_all_dtypes())
 @pytest.mark.parametrize("dtype2", get_all_dtypes())
 @pytest.mark.parametrize("func",
-                         ['add', 'multiply'])
+                         ['add', 'multiply', 'subtract'])
 @pytest.mark.parametrize("data",
                          [[[1, 2], [3, 4]]],
                          ids=['[[1, 2], [3, 4]]'])
@@ -77,9 +77,14 @@ def test_op_multiple_dtypes(dtype1, func, dtype2, data):
     np_b = numpy.array(data, dtype=dtype2)
     dpnp_b = dpnp.array(data, dtype=dtype2)
 
-    result = getattr(dpnp, func)(dpnp_a, dpnp_b)
-    expected = getattr(numpy, func)(np_a, np_b)
-    assert_array_equal(result, expected)
+    if func == 'subtract' and (dtype1 == dtype2 == dpnp.bool):
+        with pytest.raises(TypeError):
+            result = getattr(dpnp, func)(dpnp_a, dpnp_b)
+            expected = getattr(numpy, func)(np_a, np_b)
+    else:
+        result = getattr(dpnp, func)(dpnp_a, dpnp_b)
+        expected = getattr(numpy, func)(np_a, np_b)
+        assert_array_equal(result, expected)
 
 
 @pytest.mark.parametrize("rhs", [[[1, 2, 3], [4, 5, 6]], [2.0, 1.5, 1.0], 3, 0.3])
@@ -98,15 +103,20 @@ def array_or_scalar(xp, data, dtype=None):
         return xp.array(data, dtype=dtype)
 
     def _test_mathematical(self, name, dtype, lhs, rhs):
-        a = self.array_or_scalar(dpnp, lhs, dtype=dtype)
-        b = self.array_or_scalar(dpnp, rhs, dtype=dtype)
-        result = getattr(dpnp, name)(a, b)
+        a_dpnp = self.array_or_scalar(dpnp, lhs, dtype=dtype)
+        b_dpnp = self.array_or_scalar(dpnp, rhs, dtype=dtype)
 
-        a = self.array_or_scalar(numpy, lhs, dtype=dtype)
-        b = self.array_or_scalar(numpy, rhs, dtype=dtype)
-        expected = getattr(numpy, name)(a, b)
+        a_np = self.array_or_scalar(numpy, lhs, dtype=dtype)
+        b_np = self.array_or_scalar(numpy, rhs, dtype=dtype)
 
-        assert_allclose(result, expected, atol=1e-4)
+        if name == 'subtract' and not numpy.isscalar(rhs) and dtype == dpnp.bool:
+            with pytest.raises(TypeError):
+                result = getattr(dpnp, name)(a_dpnp, b_dpnp)
+                expected = getattr(numpy, name)(a_np, b_np)
+        else:
+            result = getattr(dpnp, name)(a_dpnp, b_dpnp)
+            expected = getattr(numpy, name)(a_np, b_np)
+            assert_allclose(result, expected, atol=1e-4)
 
     @pytest.mark.parametrize("dtype", get_all_dtypes())
     def test_add(self, dtype, lhs, rhs):
@@ -166,8 +176,7 @@ def test_remainder(self, dtype, lhs, rhs):
     def test_power(self, dtype, lhs, rhs):
         self._test_mathematical('power', dtype, lhs, rhs)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
     def test_subtract(self, dtype, lhs, rhs):
         self._test_mathematical('subtract', dtype, lhs, rhs)
 
@@ -177,7 +186,7 @@ def test_subtract(self, dtype, lhs, rhs):
                          ids=['bool', 'int', 'float'])
 @pytest.mark.parametrize("data_type", get_all_dtypes())
 @pytest.mark.parametrize("func",
-                         ['add', 'multiply'])
+                         ['add', 'multiply', 'subtract'])
 @pytest.mark.parametrize("val",
                          [0, 1, 5],
                          ids=['0', '1', '5'])
@@ -197,22 +206,28 @@ def test_op_with_scalar(array, val, func, data_type, val_type):
     dpnp_a = dpnp.array(array, dtype=data_type)
     val_ = val_type(val)
 
-    result = getattr(dpnp, func)(dpnp_a, val_)
-    expected = getattr(numpy, func)(np_a, val_)
-    assert_array_equal(result, expected)
+    if func == 'subtract' and val_type == bool and data_type == dpnp.bool:
+        with pytest.raises(TypeError):
+            result = getattr(dpnp, func)(dpnp_a, val_)
+            expected = getattr(numpy, func)(np_a, val_)
 
-    result = getattr(dpnp, func)(val_, dpnp_a)
-    expected = getattr(numpy, func)(val_, np_a)
-    assert_array_equal(result, expected)
+            result = getattr(dpnp, func)(val_, dpnp_a)
+            expected = getattr(numpy, func)(val_, np_a)
+    else:
+        result = getattr(dpnp, func)(dpnp_a, val_)
+        expected = getattr(numpy, func)(np_a, val_)
+        assert_array_equal(result, expected)
+
+        result = getattr(dpnp, func)(val_, dpnp_a)
+        expected = getattr(numpy, func)(val_, np_a)
+        assert_array_equal(result, expected)
 
 
 @pytest.mark.parametrize("shape",
                          [(), (3, 2)],
                          ids=['()', '(3, 2)'])
-@pytest.mark.parametrize("dtype",
-                         [numpy.float32, numpy.float64],
-                         ids=['numpy.float32', 'numpy.float64'])
-def test_multiply_scalar2(shape, dtype):
+@pytest.mark.parametrize("dtype", get_all_dtypes())
+def test_multiply_scalar(shape, dtype):
     np_a = numpy.ones(shape, dtype=dtype)
     dpnp_a = dpnp.ones(shape, dtype=dtype)
 
@@ -221,6 +236,32 @@ def test_multiply_scalar2(shape, dtype):
     assert_allclose(result, expected)
 
 
+@pytest.mark.parametrize("shape",
+                         [(), (3, 2)],
+                         ids=['()', '(3, 2)'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
+def test_add_scalar(shape, dtype):
+    np_a = numpy.ones(shape, dtype=dtype)
+    dpnp_a = dpnp.ones(shape, dtype=dtype)
+
+    result = 0.5 + dpnp_a + 1.7
+    expected = 0.5 + np_a + 1.7
+    assert_allclose(result, expected)
+
+
+@pytest.mark.parametrize("shape",
+                         [(), (3, 2)],
+                         ids=['()', '(3, 2)'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
+def test_subtract_scalar(shape, dtype):
+    np_a = numpy.ones(shape, dtype=dtype)
+    dpnp_a = dpnp.ones(shape, dtype=dtype)
+
+    result = 0.5 - dpnp_a - 1.7
+    expected = 0.5 - np_a - 1.7
+    assert_allclose(result, expected)
+
+
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("array", [[1, 2, 3, 4, 5],
                                    [1, 2, numpy.nan, 4, 5],
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 91831648813..46531cb78aa 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -37,6 +37,19 @@ def test_coerced_usm_types_mul(usm_type_x, usm_type_y):
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
 
 
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+def test_coerced_usm_types_subtract(usm_type_x, usm_type_y):
+    x = dp.arange(50, usm_type = usm_type_x)
+    y = dp.arange(50, usm_type = usm_type_y)
+
+    z = 20 - x - y - 7.4
+
+    assert x.usm_type == usm_type_x
+    assert y.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
+
+
 @pytest.mark.parametrize(
     "func, args",
     [
diff --git a/tests/third_party/cupy/creation_tests/test_from_data.py b/tests/third_party/cupy/creation_tests/test_from_data.py
index e07d927b1cf..ce71ef311a5 100644
--- a/tests/third_party/cupy/creation_tests/test_from_data.py
+++ b/tests/third_party/cupy/creation_tests/test_from_data.py
@@ -454,6 +454,7 @@ def test_asfortranarray_cuda_array_zero_dim_dtype(
         a = xp.ones((), dtype=dtype_a)
         return xp.asfortranarray(a, dtype=dtype_b)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_fromfile(self, xp):
         with tempfile.TemporaryFile() as fh:
diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py
index a53a8494707..21068ece874 100644
--- a/tests/third_party/cupy/math_tests/test_arithmetic.py
+++ b/tests/third_party/cupy/math_tests/test_arithmetic.py
@@ -146,31 +146,27 @@ def check_binary(self, xp):
                 y = y.astype(numpy.complex64)
 
         # NumPy returns an output array of another type than DPNP when input ones have diffrent types.
-        if self.name in ('add', 'multiply') and xp is cupy:
-            if xp.isscalar(arg1) and xp.isscalar(arg2):
-                # If both are scalars, the result will be a scalar, so needs to convert into numpy-scalar.
-                y = numpy.asarray(y)
-            elif dtype1 != dtype2:
-                is_array_arg1 = not xp.isscalar(arg1)
-                is_array_arg2 = not xp.isscalar(arg2)
-
-                is_int_float = lambda _x, _y: numpy.issubdtype(_x, numpy.integer) and numpy.issubdtype(_y, numpy.floating)
-                is_same_type = lambda _x, _y, _type: numpy.issubdtype(_x, _type) and numpy.issubdtype(_y, _type)
-
-                if is_array_arg1 and is_array_arg2:
-                    # If both inputs are arrays where one is of floating type and another - integer,
-                    # NumPy will return an output array of always "float64" type,
-                    # while DPNP will return the array of a wider type from the input arrays.
-                    if is_int_float(dtype1, dtype2) or is_int_float(dtype2, dtype1):
-                        y = y.astype(numpy.float64)
-                elif is_same_type(dtype1, dtype2, numpy.floating) or is_same_type(dtype1, dtype2, numpy.integer):
-                    # If one input is an array and another - scalar,
-                    # NumPy will return an output array of the same type as the inpupt array has,
-                    # while DPNP will return the array of a wider type from the inputs (considering both array and scalar).
-                    if is_array_arg1 and not is_array_arg2:
-                        y = y.astype(dtype1)
-                    elif is_array_arg2 and not is_array_arg1:
-                        y = y.astype(dtype2)
+        if self.name in ('add', 'multiply', 'subtract') and xp is cupy and dtype1 != dtype2 and not self.use_dtype:
+            is_array_arg1 = not xp.isscalar(arg1)
+            is_array_arg2 = not xp.isscalar(arg2)
+
+            is_int_float = lambda _x, _y: numpy.issubdtype(_x, numpy.integer) and numpy.issubdtype(_y, numpy.floating)
+            is_same_type = lambda _x, _y, _type: numpy.issubdtype(_x, _type) and numpy.issubdtype(_y, _type)
+
+            if is_array_arg1 and is_array_arg2:
+                # If both inputs are arrays where one is of floating type and another - integer,
+                # NumPy will return an output array of always "float64" type,
+                # while DPNP will return the array of a wider type from the input arrays.
+                if is_int_float(dtype1, dtype2) or is_int_float(dtype2, dtype1):
+                    y = y.astype(numpy.float64)
+            elif is_same_type(dtype1, dtype2, numpy.floating) or is_same_type(dtype1, dtype2, numpy.integer):
+                # If one input is an array and another - scalar,
+                # NumPy will return an output array of the same type as the inpupt array has,
+                # while DPNP will return the array of a wider type from the inputs (considering both array and scalar).
+                if is_array_arg1 and not is_array_arg2:
+                    y = y.astype(dtype1)
+                elif is_array_arg2 and not is_array_arg1:
+                    y = y.astype(dtype2)
 
         # NumPy returns different values (nan/inf) on division by zero
         # depending on the architecture.
@@ -188,7 +184,6 @@ def check_binary(self, xp):
 @testing.gpu
 @testing.parameterize(*(
     testing.product({
-        # TODO(unno): boolean subtract causes DeprecationWarning in numpy>=1.13
         'arg1': [testing.shaped_arange((2, 3), numpy, dtype=d)
                  for d in all_types
                  ] + [0, 0.0, 2, 2.0],
@@ -283,7 +278,6 @@ def test_modf(self, xp, dtype):
     'xp': [numpy, cupy],
     'shape': [(3, 2), (), (3, 0, 2)]
 }))
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestBoolSubtract(unittest.TestCase):
 

From 19715dedcaa04ece03c2ee5183fe8ce24ffe484d Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Thu, 16 Feb 2023 22:42:38 +0100
Subject: [PATCH 26/32] dpnp.divide() doesn't work properly with a scalar
 (#1295)

* dpnp.add() doesn't work properly with a scalar

* dpnp.subtract() doesn't work properly with a scalar

* dpnp.divide() doesn't work properly with a scalar

* dpnp.divide() doesn't work properly with a scalar

* Use std::int32_t and std::int64_t types

* Disable floating-point optimizations that assume arguments and results are not NaNs or +-Inf

* Fix issue with divide on Iris Xe
---
 dpnp/backend/CMakeLists.txt                   |   1 +
 .../include/dpnp_gen_2arg_3type_tbl.hpp       |   8 +-
 dpnp/backend/include/dpnp_iface_fptr.hpp      |  22 +++-
 dpnp/backend/kernels/dpnp_krnl_elemwise.cpp   | 122 ++++++++++++------
 dpnp/backend/src/dpnp_fptr.hpp                |  33 +++++
 dpnp/dpnp_algo/dpnp_algo.pxd                  |   2 +
 dpnp/dpnp_algo/dpnp_algo.pyx                  |  18 ++-
 dpnp/dpnp_iface_mathematical.py               |  71 +++++-----
 tests/conftest.py                             |  21 ++-
 tests/skipped_tests.tbl                       |   2 -
 tests/skipped_tests_gpu.tbl                   |   2 -
 tests/test_linalg.py                          |  71 +++++-----
 tests/test_mathematical.py                    |  27 +++-
 tests/test_strides.py                         |   1 +
 tests/test_usm_type.py                        |  13 ++
 .../cupy/math_tests/test_arithmetic.py        |  38 +++---
 .../cupy/statistics_tests/test_meanvar.py     |   5 +-
 utils/command_build_clib.py                   |   4 +-
 18 files changed, 312 insertions(+), 149 deletions(-)

diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt
index f66aa4be1ae..52e9cb21985 100644
--- a/dpnp/backend/CMakeLists.txt
+++ b/dpnp/backend/CMakeLists.txt
@@ -93,6 +93,7 @@ string(CONCAT COMMON_COMPILE_FLAGS
   "-fsycl "
   "-fsycl-device-code-split=per_kernel "
   "-fno-approx-func "
+  "-fno-finite-math-only "
 )
 string(CONCAT COMMON_LINK_FLAGS
   "-fsycl "
diff --git a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
index 9a3c69aee8e..e345c6eefea 100644
--- a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
@@ -132,10 +132,10 @@ MACRO_2ARG_3TYPES_OP(dpnp_copysign_c,
 
 MACRO_2ARG_3TYPES_OP(dpnp_divide_c,
                      input1_elem / input2_elem,
-                     nullptr,
-                     std::false_type,
+                     x1 / x2,
+                     MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
                      oneapi::mkl::vm::div,
-                     MACRO_UNPACK_TYPES(float, double))
+                     MACRO_UNPACK_TYPES(float, double, std::complex<float>, std::complex<double>))
 
 MACRO_2ARG_3TYPES_OP(dpnp_fmod_c,
                      sycl::fmod((double)input1_elem, (double)input2_elem),
@@ -169,7 +169,7 @@ MACRO_2ARG_3TYPES_OP(dpnp_minimum_c,
 // pytest "tests/third_party/cupy/creation_tests/test_ranges.py::TestMgrid::test_mgrid3"
 // requires multiplication shape1[10] with shape2[10,1] and result expected as shape[10,10]
 MACRO_2ARG_3TYPES_OP(dpnp_multiply_c,
-                     input1_elem* input2_elem,
+                     input1_elem * input2_elem,
                      x1 * x2,
                      MACRO_UNPACK_TYPES(bool, std::int32_t, std::int64_t),
                      oneapi::mkl::vm::mul,
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index 70a2d860910..fb154fcabfa 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -417,8 +417,26 @@ size_t operator-(DPNPFuncType lhs, DPNPFuncType rhs);
  */
 typedef struct DPNPFuncData
 {
-    DPNPFuncType return_type; /**< return type identifier which expected by the @ref ptr function */
-    void* ptr;                /**< C++ backend function pointer */
+    DPNPFuncData(const DPNPFuncType gen_type, void* gen_ptr, const DPNPFuncType type_no_fp64, void* ptr_no_fp64)
+        : return_type(gen_type)
+        , ptr(gen_ptr)
+        , return_type_no_fp64(type_no_fp64)
+        , ptr_no_fp64(ptr_no_fp64)
+    {
+    }
+    DPNPFuncData(const DPNPFuncType gen_type, void* gen_ptr)
+        : DPNPFuncData(gen_type, gen_ptr, DPNPFuncType::DPNP_FT_NONE, nullptr)
+    {
+    }
+    DPNPFuncData()
+        : DPNPFuncData(DPNPFuncType::DPNP_FT_NONE, nullptr)
+    {
+    }
+
+    DPNPFuncType return_type;         /**< return type identifier which expected by the @ref ptr function */
+    void* ptr;                        /**< C++ backend function pointer */
+    DPNPFuncType return_type_no_fp64; /**< alternative return type identifier when no fp64 support by device */
+    void* ptr_no_fp64;                /**< alternative C++ backend function pointer when no fp64 support by device */
 } DPNPFuncData_t;
 
 /**
diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
index 057e0805db6..5133473d393 100644
--- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
@@ -1029,18 +1029,42 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                                                                                                                        \
                     if (start + static_cast<size_t>(vec_sz) * max_sg_size < result_size)                               \
                     {                                                                                                  \
-                        sycl::vec<_DataType_input1, vec_sz> x1 =                                                       \
-                            sg.load<vec_sz>(sycl::multi_ptr<_DataType_input1, global_space>(&input1_data[start]));     \
-                        sycl::vec<_DataType_input2, vec_sz> x2 =                                                       \
-                            sg.load<vec_sz>(sycl::multi_ptr<_DataType_input2, global_space>(&input2_data[start]));     \
+                        using input1_ptrT = sycl::multi_ptr<_DataType_input1, global_space>;                           \
+                        using input2_ptrT = sycl::multi_ptr<_DataType_input2, global_space>;                           \
+                        using result_ptrT = sycl::multi_ptr<_DataType_output, global_space>;                           \
+                                                                                                                       \
                         sycl::vec<_DataType_output, vec_sz> res_vec;                                                   \
                                                                                                                        \
-                        if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, __vec_types__>)          \
+                        if constexpr (both_types_are_any_of<_DataType_input1, _DataType_input2, __vec_types__>)        \
                         {                                                                                              \
-                            res_vec = __vec_operation__;                                                               \
+                            if constexpr (both_types_are_same<_DataType_input1, _DataType_input2, _DataType_output>)   \
+                            {                                                                                          \
+                                sycl::vec<_DataType_input1, vec_sz> x1 =                                               \
+                                    sg.load<vec_sz>(input1_ptrT(&input1_data[start]));                                 \
+                                sycl::vec<_DataType_input2, vec_sz> x2 =                                               \
+                                    sg.load<vec_sz>(input2_ptrT(&input2_data[start]));                                 \
+                                                                                                                       \
+                                res_vec = __vec_operation__;                                                           \
+                            }                                                                                          \
+                            else /* input types don't match result type, so explicit casting is required */            \
+                            {                                                                                          \
+                                sycl::vec<_DataType_output, vec_sz> x1 =                                               \
+                                    dpnp_vec_cast<_DataType_output, _DataType_input1, vec_sz>(                         \
+                                        sg.load<vec_sz>(input1_ptrT(&input1_data[start])));                            \
+                                sycl::vec<_DataType_output, vec_sz> x2 =                                               \
+                                    dpnp_vec_cast<_DataType_output, _DataType_input2, vec_sz>(                         \
+                                        sg.load<vec_sz>(input2_ptrT(&input2_data[start])));                            \
+                                                                                                                       \
+                                res_vec = __vec_operation__;                                                           \
+                            }                                                                                          \
                         }                                                                                              \
                         else                                                                                           \
                         {                                                                                              \
+                            sycl::vec<_DataType_input1, vec_sz> x1 =                                                   \
+                                sg.load<vec_sz>(input1_ptrT(&input1_data[start]));                                     \
+                            sycl::vec<_DataType_input2, vec_sz> x2 =                                                   \
+                                sg.load<vec_sz>(input2_ptrT(&input2_data[start]));                                     \
+                                                                                                                       \
                             for (size_t k = 0; k < vec_sz; ++k)                                                        \
                             {                                                                                          \
                                 const _DataType_output input1_elem = x1[k];                                            \
@@ -1048,7 +1072,7 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
                                 res_vec[k] = __operation__;                                                            \
                             }                                                                                          \
                         }                                                                                              \
-                        sg.store<vec_sz>(sycl::multi_ptr<_DataType_output, global_space>(&result[start]), res_vec);    \
+                        sg.store<vec_sz>(result_ptrT(&result[start]), res_vec);                                        \
                     }                                                                                                  \
                     else                                                                                               \
                     {                                                                                                  \
@@ -1173,6 +1197,47 @@ static void func_map_init_elemwise_1arg_1type(func_map_t& fmap)
 
 #include <dpnp_gen_2arg_3type_tbl.hpp>
 
+template <DPNPFuncType FT1, DPNPFuncType FT2, typename has_fp64 = std::true_type>
+static constexpr DPNPFuncType get_divide_res_type()
+{
+    constexpr auto widest_type = populate_func_types<FT1, FT2>();
+    constexpr auto shortes_type = (widest_type == FT1) ? FT2 : FT1;
+
+    if constexpr (widest_type == DPNPFuncType::DPNP_FT_CMPLX128 || widest_type == DPNPFuncType::DPNP_FT_DOUBLE)
+    {
+        return widest_type;
+    }
+    else if constexpr (widest_type == DPNPFuncType::DPNP_FT_CMPLX64)
+    {
+        if constexpr (shortes_type == DPNPFuncType::DPNP_FT_DOUBLE)
+        {
+            return DPNPFuncType::DPNP_FT_CMPLX128;
+        }
+        else if constexpr (has_fp64::value &&
+                           (shortes_type == DPNPFuncType::DPNP_FT_INT || shortes_type == DPNPFuncType::DPNP_FT_LONG))
+        {
+            return DPNPFuncType::DPNP_FT_CMPLX128;
+        }
+    }
+    else if constexpr (widest_type == DPNPFuncType::DPNP_FT_FLOAT)
+    {
+        if constexpr (has_fp64::value &&
+                      (shortes_type == DPNPFuncType::DPNP_FT_INT || shortes_type == DPNPFuncType::DPNP_FT_LONG))
+        {
+            return DPNPFuncType::DPNP_FT_DOUBLE;
+        }
+    }
+    else if constexpr (has_fp64::value)
+    {
+        return DPNPFuncType::DPNP_FT_DOUBLE;
+    }
+    else
+    {
+        return DPNPFuncType::DPNP_FT_FLOAT;
+    }
+    return widest_type;
+}
+
 template <DPNPFuncType FT1, DPNPFuncType... FTs>
 static void func_map_elemwise_2arg_3type_core(func_map_t& fmap)
 {
@@ -1194,6 +1259,16 @@ static void func_map_elemwise_2arg_3type_core(func_map_t& fmap)
                                       func_type_map_t::find_type<FT1>,
                                       func_type_map_t::find_type<FTs>>}),
      ...);
+    ((fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][FT1][FTs] =
+          {get_divide_res_type<FT1, FTs>(),
+           (void*)dpnp_divide_c_ext<func_type_map_t::find_type<get_divide_res_type<FT1, FTs>()>,
+                                    func_type_map_t::find_type<FT1>,
+                                    func_type_map_t::find_type<FTs>>,
+           get_divide_res_type<FT1, FTs, std::false_type>(),
+           (void*)dpnp_divide_c_ext<func_type_map_t::find_type<get_divide_res_type<FT1, FTs, std::false_type>()>,
+                                    func_type_map_t::find_type<FT1>,
+                                    func_type_map_t::find_type<FTs>>}),
+     ...);
 }
 
 template <DPNPFuncType... FTs>
@@ -1402,39 +1477,6 @@ static void func_map_init_elemwise_2arg_3type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_DIVIDE][eft_DBL][eft_DBL] = {eft_DBL,
                                                             (void*)dpnp_divide_c_default<double, double, double>};
 
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_INT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int32_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_LNG] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int32_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_FLT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int32_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_INT][eft_DBL] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int32_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_INT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int64_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_LNG] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int64_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_FLT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int64_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_LNG][eft_DBL] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, int64_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_INT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, float, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_LNG] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, float, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_FLT] = {eft_FLT,
-                                                                (void*)dpnp_divide_c_ext<float, float, float>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_FLT][eft_DBL] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, float, double>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_INT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, double, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_LNG] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, double, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_FLT] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, double, float>};
-    fmap[DPNPFuncName::DPNP_FN_DIVIDE_EXT][eft_DBL][eft_DBL] = {eft_DBL,
-                                                                (void*)dpnp_divide_c_ext<double, double, double>};
-
     fmap[DPNPFuncName::DPNP_FN_FMOD][eft_INT][eft_INT] = {eft_INT,
                                                           (void*)dpnp_fmod_c_default<int32_t, int32_t, int32_t>};
     fmap[DPNPFuncName::DPNP_FN_FMOD][eft_INT][eft_LNG] = {eft_LNG,
diff --git a/dpnp/backend/src/dpnp_fptr.hpp b/dpnp/backend/src/dpnp_fptr.hpp
index 4cb66485831..742e6dff378 100644
--- a/dpnp/backend/src/dpnp_fptr.hpp
+++ b/dpnp/backend/src/dpnp_fptr.hpp
@@ -35,6 +35,8 @@
 #include <map>
 #include <complex>
 
+#include <CL/sycl.hpp>
+
 #include <dpnp_iface_fptr.hpp>
 
 /**
@@ -116,6 +118,31 @@ static constexpr DPNPFuncType populate_func_types()
     return (FT1 < FT2) ? FT2 : FT1;
 }
 
+/**
+ * @brief A helper function to cast SYCL vector between types.
+ */
+template <typename Op, typename Vec, std::size_t... I>
+static auto dpnp_vec_cast_impl(const Vec& v, std::index_sequence<I...>)
+{
+    return Op{v[I]...};
+}
+
+/**
+ * @brief A casting function for SYCL vector.
+ * 
+ * @tparam dstT A result type upon casting.
+ * @tparam srcT An incoming type of the vector.
+ * @tparam N A number of elements with the vector.
+ * @tparam Indices A sequence of integers
+ * @param s An incoming SYCL vector to cast.
+ * @return SYCL vector casted to desctination type.
+ */
+template <typename dstT, typename srcT, std::size_t N, typename Indices = std::make_index_sequence<N>>
+static auto dpnp_vec_cast(const sycl::vec<srcT, N>& s)
+{
+    return dpnp_vec_cast_impl<sycl::vec<dstT, N>, sycl::vec<srcT, N>>(s, Indices{});
+}
+
 /**
  * Removes parentheses for a passed list of types separated by comma.
  * It's intended to be used in operations macro.
@@ -142,6 +169,12 @@ struct are_same : std::conjunction<std::is_same<T, Ts>...> {};
 template <typename T1, typename T2, typename... Ts>
 constexpr auto both_types_are_same = std::conjunction_v<is_any<T1, Ts...>, are_same<T1, T2>>;
 
+/**
+ * A template constat to check if both types T1 and T2 match any type from Ts.
+ */
+template <typename T1, typename T2, typename... Ts>
+constexpr auto both_types_are_any_of = std::conjunction_v<is_any<T1, Ts...>, is_any<T2, Ts...>>;
+
 /**
  * A template constat to check if both types T1 and T2 don't match any type from Ts sequence.
  */
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index 485e8adb1a6..65e07a9c704 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -374,6 +374,8 @@ cdef extern from "dpnp_iface_fptr.hpp":
     struct DPNPFuncData:
         DPNPFuncType return_type
         void * ptr
+        DPNPFuncType return_type_no_fp64
+        void *ptr_no_fp64
 
     DPNPFuncData get_dpnp_function_ptr(DPNPFuncName name, DPNPFuncType first_type, DPNPFuncType second_type) except +
 
diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index aaa7334e18a..f12707ccc76 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -481,8 +481,6 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
     # get the FPTR data structure
     cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(fptr_name, x1_c_type, x2_c_type)
 
-    result_type = dpnp_DPNPFuncType_to_dtype( < size_t > kernel_data.return_type)
-
     # Create result array
     cdef shape_type_c x1_shape = x1_obj.shape
 
@@ -495,15 +493,26 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
 
     result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(x1_obj, x2_obj)
 
+    # get FPTR function and return type
+    cdef fptr_2in_1out_strides_t func = NULL
+    cdef DPNPFuncType return_type = DPNP_FT_NONE
+    if fptr_name != DPNP_FN_DIVIDE_EXT or result_sycl_device.has_aspect_fp64:
+        return_type = kernel_data.return_type
+        func = < fptr_2in_1out_strides_t > kernel_data.ptr
+    else:
+        return_type = kernel_data.return_type_no_fp64
+        func = < fptr_2in_1out_strides_t > kernel_data.ptr_no_fp64
+
     if out is None:
         """ Create result array with type given by FPTR data """
         result = utils.create_output_descriptor(result_shape,
-                                                kernel_data.return_type,
+                                                return_type,
                                                 None,
                                                 device=result_sycl_device,
                                                 usm_type=result_usm_type,
                                                 sycl_queue=result_sycl_queue)
     else:
+        result_type = dpnp_DPNPFuncType_to_dtype(< size_t > return_type)
         if out.dtype != result_type:
             utils.checker_throw_value_error(func_name, 'out.dtype', out.dtype, result_type)
         if out.shape != result_shape:
@@ -517,11 +526,10 @@ cdef utils.dpnp_descriptor call_fptr_2in_1out_strides(DPNPFuncName fptr_name,
 
     result_obj = result.get_array()
 
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_obj.sycl_queue
+    cdef c_dpctl.SyclQueue q = < c_dpctl.SyclQueue > result_obj.sycl_queue
     cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
 
     """ Call FPTR function """
-    cdef fptr_2in_1out_strides_t func = <fptr_2in_1out_strides_t > kernel_data.ptr
     cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
                                                     result.get_data(),
                                                     result.size,
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 64886de23c0..feff53288cf 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -544,55 +544,66 @@ def diff(x1, n=1, axis=-1, prepend=numpy._NoValue, append=numpy._NoValue):
     return call_origin(numpy.diff, x1, n=n, axis=axis, prepend=prepend, append=append)
 
 
-def divide(x1, x2, dtype=None, out=None, where=True, **kwargs):
+def divide(x1,
+           x2,
+           /,
+           out=None,
+           *,
+           where=True,
+           dtype=None,
+           subok=True,
+           **kwargs):
     """
     Divide arguments element-wise.
 
     For full documentation refer to :obj:`numpy.divide`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        The quotient ``x1/x2``, element-wise.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
+    Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar,
+    but not both (at least either `x1` or `x2` should be as :class:`dpnp.ndarray`).
+    Parameters `out`, `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments ``kwargs`` are currently unsupported.
-    Otherwise the functions will be executed sequentially on CPU.
+    Otherwise the function will be executed sequentially on CPU.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
     Examples
     --------
-    >>> import dpnp as np
-    >>> result = np.divide(np.array([1, -2, 6, -9]), np.array([-2, -2, -2, -2]))
-    >>> [x for x in result]
+    >>> import dpnp as dp
+    >>> result = dp.divide(dp.array([1, -2, 6, -9]), dp.array([-2, -2, -2, -2]))
+    >>> print(result)
     [-0.5, 1.0, -3.0, 4.5]
 
     """
 
-    x1_is_scalar = dpnp.isscalar(x1)
-    x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
+    if out is not None:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
 
-    if x1_desc and x2_desc and not kwargs:
-        if not x1_desc and not x1_is_scalar:
-            pass
-        elif not x2_desc and not x2_is_scalar:
-            pass
-        elif x1_is_scalar and x2_is_scalar:
-            pass
-        elif x1_desc and x1_desc.ndim == 0:
-            pass
-        elif x2_desc and x2_desc.ndim == 0:
-            pass
-        elif dtype is not None:
-            pass
-        elif out is not None:
-            pass
-        elif not where:
-            pass
-        else:
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
             return dpnp_divide(x1_desc, x2_desc, dtype=dtype, out=out, where=where).get_pyobj()
 
-    return call_origin(numpy.divide, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
+    return call_origin(numpy.divide, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 def ediff1d(x1, to_end=None, to_begin=None):
diff --git a/tests/conftest.py b/tests/conftest.py
index 78d3180bac0..22276f125f2 100755
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -77,3 +77,22 @@ def pytest_collection_modifyitems(config, items):
 @pytest.fixture
 def allow_fall_back_on_numpy(monkeypatch):
     monkeypatch.setattr(dpnp.config, '__DPNP_RAISE_EXCEPION_ON_NUMPY_FALLBACK__', 0)
+
+@pytest.fixture
+def suppress_divide_numpy_warnings():
+    # divide: treatment for division by zero (infinite result obtained from finite numbers)
+    old_settings = numpy.seterr(divide='ignore')
+    yield
+    numpy.seterr(**old_settings)  # reset to default
+
+@pytest.fixture
+def suppress_invalid_numpy_warnings():
+    # invalid: treatment for invalid floating-point operation
+    # (result is not an expressible number, typically indicates that a NaN was produced)
+    old_settings = numpy.seterr(invalid='ignore')
+    yield
+    numpy.seterr(**old_settings)  # reset to default
+
+@pytest.fixture
+def suppress_divide_invalid_numpy_warnings(suppress_divide_numpy_warnings, suppress_invalid_numpy_warnings):
+    yield
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index 25d1fd1bc0f..2f0334077a0 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -769,9 +769,7 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticModf::test_m
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_4_{name='divide', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_5_{name='power', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_7_{name='true_divide', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_8_{name='floor_divide', nargs=2}::test_raises_with_numpy_input
 
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 34d1795cc98..e6598904e16 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -988,9 +988,7 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_para
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_10_{name='remainder', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_11_{name='mod', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_1_{name='angle', nargs=1}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_4_{name='divide', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_5_{name='power', nargs=2}::test_raises_with_numpy_input
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_7_{name='true_divide', nargs=2}::test_raises_with_numpy_input
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_8_{name='floor_divide', nargs=2}::test_raises_with_numpy_input
 
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp
diff --git a/tests/test_linalg.py b/tests/test_linalg.py
index ac8392d1538..d9784a41558 100644
--- a/tests/test_linalg.py
+++ b/tests/test_linalg.py
@@ -1,9 +1,15 @@
 import pytest
+from .helper import get_all_dtypes
 
 import dpnp as inp
 
 import dpctl
+
 import numpy
+from numpy.testing import (
+    assert_allclose,
+    assert_array_equal
+)
 
 
 def vvsort(val, vec, size, xp):
@@ -49,7 +55,7 @@ def test_cholesky(array):
     ia = inp.array(a)
     result = inp.linalg.cholesky(ia)
     expected = numpy.linalg.cholesky(a)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("arr",
@@ -63,7 +69,7 @@ def test_cond(arr, p):
     ia = inp.array(a)
     result = inp.linalg.cond(ia, p)
     expected = numpy.linalg.cond(a, p)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.parametrize("array",
@@ -82,13 +88,11 @@ def test_det(array):
     ia = inp.array(a)
     result = inp.linalg.det(ia)
     expected = numpy.linalg.det(a)
-    numpy.testing.assert_allclose(expected, result)
+    assert_allclose(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("size",
                          [2, 4, 8, 16, 300])
 def test_eig_arange(type, size):
@@ -115,21 +119,19 @@ def test_eig_arange(type, size):
         if np_vec[0, i] * dpnp_vec[0, i] < 0:
             np_vec[:, i] = -np_vec[:, i]
 
-    numpy.testing.assert_array_equal(symm_orig, symm)
-    numpy.testing.assert_array_equal(dpnp_symm_orig, dpnp_symm)
+    assert_array_equal(symm_orig, symm)
+    assert_array_equal(dpnp_symm_orig, dpnp_symm)
 
     assert (dpnp_val.dtype == np_val.dtype)
     assert (dpnp_vec.dtype == np_vec.dtype)
     assert (dpnp_val.shape == np_val.shape)
     assert (dpnp_vec.shape == np_vec.shape)
 
-    numpy.testing.assert_allclose(dpnp_val, np_val, rtol=1e-05, atol=1e-05)
-    numpy.testing.assert_allclose(dpnp_vec, np_vec, rtol=1e-05, atol=1e-05)
+    assert_allclose(dpnp_val, np_val, rtol=1e-05, atol=1e-05)
+    assert_allclose(dpnp_vec, np_vec, rtol=1e-05, atol=1e-05)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 def test_eigvals(type):
     if dpctl.get_current_device_type() != dpctl.device_type.gpu:
         pytest.skip("eigvals function doesn\'t work on CPU: https://github.com/IntelPython/dpnp/issues/1005")
@@ -144,12 +146,10 @@ def test_eigvals(type):
         ia = inp.array(a)
         result = inp.linalg.eigvals(ia)
         expected = numpy.linalg.eigvals(a)
-        numpy.testing.assert_allclose(expected, result, atol=0.5)
+        assert_allclose(expected, result, atol=0.5)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("array",
                          [[[1., 2.], [3., 4.]], [[0, 1, 2], [3, 2, -1], [4, -2, 3]]],
                          ids=['[[1., 2.], [3., 4.]]', '[[0, 1, 2], [3, 2, -1], [4, -2, 3]]'])
@@ -158,12 +158,10 @@ def test_inv(type, array):
     ia = inp.array(a)
     result = inp.linalg.inv(ia)
     expected = numpy.linalg.inv(a)
-    numpy.testing.assert_allclose(expected, result)
+    assert_allclose(expected, result)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True, no_none=True))
 @pytest.mark.parametrize("array",
                          [[0, 0], [0, 1], [1, 2], [[0, 0], [0, 0]], [[1, 2], [1, 2]], [[1, 2], [3, 4]]],
                          ids=['[0, 0]', '[0, 1]', '[1, 2]', '[[0, 0], [0, 0]]', '[[1, 2], [1, 2]]', '[[1, 2], [3, 4]]'])
@@ -177,10 +175,11 @@ def test_matrix_rank(type, tol, array):
     result = inp.linalg.matrix_rank(ia, tol=tol)
     expected = numpy.linalg.matrix_rank(a, tol=tol)
 
-    numpy.testing.assert_allclose(expected, result)
+    assert_allclose(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+@pytest.mark.usefixtures("suppress_divide_numpy_warnings")
 @pytest.mark.parametrize("array",
                          [[7], [1, 2], [1, 0]],
                          ids=['[7]', '[1, 2]', '[1, 0]'])
@@ -195,7 +194,7 @@ def test_norm1(array, ord, axis):
     ia = inp.array(a)
     result = inp.linalg.norm(ia, ord=ord, axis=axis)
     expected = numpy.linalg.norm(a, ord=ord, axis=axis)
-    numpy.testing.assert_allclose(expected, result)
+    assert_allclose(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -213,7 +212,7 @@ def test_norm2(array, ord, axis):
     ia = inp.array(a)
     result = inp.linalg.norm(ia, ord=ord, axis=axis)
     expected = numpy.linalg.norm(a, ord=ord, axis=axis)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
@@ -231,13 +230,11 @@ def test_norm3(array, ord, axis):
     ia = inp.array(a)
     result = inp.linalg.norm(ia, ord=ord, axis=axis)
     expected = numpy.linalg.norm(a, ord=ord, axis=axis)
-    numpy.testing.assert_array_equal(expected, result)
+    assert_array_equal(expected, result)
 
 
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(2, 2), (3, 4), (5, 3), (16, 16)],
                          ids=['(2,2)', '(3,4)', '(5,3)', '(16,16)'])
@@ -262,7 +259,7 @@ def test_qr(type, shape, mode):
         tol = 1e-11
 
     # check decomposition
-    numpy.testing.assert_allclose(ia, numpy.dot(inp.asnumpy(dpnp_q), inp.asnumpy(dpnp_r)), rtol=tol, atol=tol)
+    assert_allclose(ia, numpy.dot(inp.asnumpy(dpnp_q), inp.asnumpy(dpnp_r)), rtol=tol, atol=tol)
 
     # NP change sign for comparison
     ncols = min(a.shape[0], a.shape[1])
@@ -273,14 +270,12 @@ def test_qr(type, shape, mode):
             np_r[i, :] = -np_r[i, :]
 
         if numpy.any(numpy.abs(np_r[i, :]) > tol):
-            numpy.testing.assert_allclose(inp.asnumpy(dpnp_q)[:, i], np_q[:, i], rtol=tol, atol=tol)
+            assert_allclose(inp.asnumpy(dpnp_q)[:, i], np_q[:, i], rtol=tol, atol=tol)
 
-    numpy.testing.assert_allclose(dpnp_r, np_r, rtol=tol, atol=tol)
+    assert_allclose(dpnp_r, np_r, rtol=tol, atol=tol)
 
 
-@pytest.mark.parametrize("type",
-                         [numpy.float64, numpy.float32, numpy.int64, numpy.int32],
-                         ids=['float64', 'float32', 'int64', 'int32'])
+@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
 @pytest.mark.parametrize("shape",
                          [(2, 2), (3, 4), (5, 3), (16, 16)],
                          ids=['(2,2)', '(3,4)', '(5,3)', '(16,16)'])
@@ -309,10 +304,10 @@ def test_svd(type, shape):
         dpnp_diag_s[i, i] = dpnp_s[i]
 
     # check decomposition
-    numpy.testing.assert_allclose(ia, inp.dot(dpnp_u, inp.dot(dpnp_diag_s, dpnp_vt)), rtol=tol, atol=tol)
+    assert_allclose(ia, inp.dot(dpnp_u, inp.dot(dpnp_diag_s, dpnp_vt)), rtol=tol, atol=tol)
 
     # compare singular values
-    # numpy.testing.assert_allclose(dpnp_s, np_s, rtol=tol, atol=tol)
+    # assert_allclose(dpnp_s, np_s, rtol=tol, atol=tol)
 
     # change sign of vectors
     for i in range(min(shape[0], shape[1])):
@@ -322,5 +317,5 @@ def test_svd(type, shape):
 
     # compare vectors for non-zero values
     for i in range(numpy.count_nonzero(np_s > tol)):
-        numpy.testing.assert_allclose(inp.asnumpy(dpnp_u)[:, i], np_u[:, i], rtol=tol, atol=tol)
-        numpy.testing.assert_allclose(inp.asnumpy(dpnp_vt)[i, :], np_vt[i, :], rtol=tol, atol=tol)
+        assert_allclose(inp.asnumpy(dpnp_u)[:, i], np_u[:, i], rtol=tol, atol=tol)
+        assert_allclose(inp.asnumpy(dpnp_vt)[i, :], np_vt[i, :], rtol=tol, atol=tol)
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index 6f7ee58c038..78f62890833 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -66,7 +66,7 @@ def test_diff(array):
 @pytest.mark.parametrize("dtype1", get_all_dtypes())
 @pytest.mark.parametrize("dtype2", get_all_dtypes())
 @pytest.mark.parametrize("func",
-                         ['add', 'multiply', 'subtract'])
+                         ['add', 'multiply', 'subtract', 'divide'])
 @pytest.mark.parametrize("data",
                          [[[1, 2], [3, 4]]],
                          ids=['[[1, 2], [3, 4]]'])
@@ -132,8 +132,7 @@ def test_arctan2(self, dtype, lhs, rhs):
     def test_copysign(self, dtype, lhs, rhs):
         self._test_mathematical('copysign', dtype, lhs, rhs)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
     def test_divide(self, dtype, lhs, rhs):
         self._test_mathematical('divide', dtype, lhs, rhs)
 
@@ -181,12 +180,13 @@ def test_subtract(self, dtype, lhs, rhs):
         self._test_mathematical('subtract', dtype, lhs, rhs)
 
 
+@pytest.mark.usefixtures("suppress_divide_invalid_numpy_warnings")
 @pytest.mark.parametrize("val_type",
                          [bool, int, float],
                          ids=['bool', 'int', 'float'])
 @pytest.mark.parametrize("data_type", get_all_dtypes())
 @pytest.mark.parametrize("func",
-                         ['add', 'multiply', 'subtract'])
+                         ['add', 'multiply', 'subtract', 'divide'])
 @pytest.mark.parametrize("val",
                          [0, 1, 5],
                          ids=['0', '1', '5'])
@@ -216,11 +216,11 @@ def test_op_with_scalar(array, val, func, data_type, val_type):
     else:
         result = getattr(dpnp, func)(dpnp_a, val_)
         expected = getattr(numpy, func)(np_a, val_)
-        assert_array_equal(result, expected)
+        assert_allclose(result, expected)
 
         result = getattr(dpnp, func)(val_, dpnp_a)
         expected = getattr(numpy, func)(val_, np_a)
-        assert_array_equal(result, expected)
+        assert_allclose(result, expected)
 
 
 @pytest.mark.parametrize("shape",
@@ -262,6 +262,19 @@ def test_subtract_scalar(shape, dtype):
     assert_allclose(result, expected)
 
 
+@pytest.mark.parametrize("shape",
+                         [(), (3, 2)],
+                         ids=['()', '(3, 2)'])
+@pytest.mark.parametrize("dtype", get_all_dtypes())
+def test_divide_scalar(shape, dtype):
+    np_a = numpy.ones(shape, dtype=dtype)
+    dpnp_a = dpnp.ones(shape, dtype=dtype)
+
+    result = 0.5 / dpnp_a / 1.7
+    expected = 0.5 / np_a / 1.7
+    assert_allclose(result, expected)
+
+
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @pytest.mark.parametrize("array", [[1, 2, 3, 4, 5],
                                    [1, 2, numpy.nan, 4, 5],
@@ -442,7 +455,6 @@ def test_cross_3x3(self, x1, x2, axisa, axisb, axisc, axis):
         assert_array_equal(expected, result)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestGradient:
 
     @pytest.mark.parametrize("array", [[2, 3, 6, 8, 4, 9],
@@ -456,6 +468,7 @@ def test_gradient_y1(self, array):
         expected = numpy.gradient(np_y)
         assert_array_equal(expected, result)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @pytest.mark.parametrize("array", [[2, 3, 6, 8, 4, 9],
                                        [3., 4., 7.5, 9.],
                                        [2, 6, 8, 10]])
diff --git a/tests/test_strides.py b/tests/test_strides.py
index 3c0d86a44a5..02e8c868975 100644
--- a/tests/test_strides.py
+++ b/tests/test_strides.py
@@ -37,6 +37,7 @@ def test_strides(func_name, dtype):
     assert_allclose(expected, result)
 
 
+@pytest.mark.usefixtures("suppress_divide_invalid_numpy_warnings")
 @pytest.mark.parametrize("func_name",
                          ["arccos", "arccosh", "arcsin", "arcsinh", "arctan", "arctanh", "cbrt", "ceil", "copy", "cos",
                           "cosh", "conjugate", "degrees", "ediff1d", "exp", "exp2", "expm1", "fabs", "floor", "log",
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 46531cb78aa..1a33a1d655d 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -50,6 +50,19 @@ def test_coerced_usm_types_subtract(usm_type_x, usm_type_y):
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
 
 
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+def test_coerced_usm_types_divide(usm_type_x, usm_type_y):
+    x = dp.arange(120, usm_type = usm_type_x)
+    y = dp.arange(120, usm_type = usm_type_y)
+
+    z = 2 / x / y / 1.5
+
+    assert x.usm_type == usm_type_x
+    assert y.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
+
+
 @pytest.mark.parametrize(
     "func, args",
     [
diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py
index 21068ece874..027722d8bef 100644
--- a/tests/third_party/cupy/math_tests/test_arithmetic.py
+++ b/tests/third_party/cupy/math_tests/test_arithmetic.py
@@ -146,27 +146,35 @@ def check_binary(self, xp):
                 y = y.astype(numpy.complex64)
 
         # NumPy returns an output array of another type than DPNP when input ones have diffrent types.
-        if self.name in ('add', 'multiply', 'subtract') and xp is cupy and dtype1 != dtype2 and not self.use_dtype:
+        if xp is cupy and dtype1 != dtype2 and not self.use_dtype:
             is_array_arg1 = not xp.isscalar(arg1)
             is_array_arg2 = not xp.isscalar(arg2)
 
             is_int_float = lambda _x, _y: numpy.issubdtype(_x, numpy.integer) and numpy.issubdtype(_y, numpy.floating)
             is_same_type = lambda _x, _y, _type: numpy.issubdtype(_x, _type) and numpy.issubdtype(_y, _type)
 
-            if is_array_arg1 and is_array_arg2:
-                # If both inputs are arrays where one is of floating type and another - integer,
-                # NumPy will return an output array of always "float64" type,
-                # while DPNP will return the array of a wider type from the input arrays.
-                if is_int_float(dtype1, dtype2) or is_int_float(dtype2, dtype1):
-                    y = y.astype(numpy.float64)
-            elif is_same_type(dtype1, dtype2, numpy.floating) or is_same_type(dtype1, dtype2, numpy.integer):
-                # If one input is an array and another - scalar,
-                # NumPy will return an output array of the same type as the inpupt array has,
-                # while DPNP will return the array of a wider type from the inputs (considering both array and scalar).
-                if is_array_arg1 and not is_array_arg2:
-                    y = y.astype(dtype1)
-                elif is_array_arg2 and not is_array_arg1:
-                    y = y.astype(dtype2)
+            if self.name in ('add', 'multiply', 'subtract'):
+                if is_array_arg1 and is_array_arg2:
+                    # If both inputs are arrays where one is of floating type and another - integer,
+                    # NumPy will return an output array of always "float64" type,
+                    # while DPNP will return the array of a wider type from the input arrays.
+                    if is_int_float(dtype1, dtype2) or is_int_float(dtype2, dtype1):
+                        y = y.astype(numpy.float64)
+                elif is_same_type(dtype1, dtype2, numpy.floating) or is_same_type(dtype1, dtype2, numpy.integer):
+                    # If one input is an array and another - scalar,
+                    # NumPy will return an output array of the same type as the inpupt array has,
+                    # while DPNP will return the array of a wider type from the inputs (considering both array and scalar).
+                    if is_array_arg1 and not is_array_arg2:
+                        y = y.astype(dtype1)
+                    elif is_array_arg2 and not is_array_arg1:
+                        y = y.astype(dtype2)
+            elif self.name in ('divide', 'true_divide'):
+                # If one input is an array of float32 and another - an integer or floating scalar,
+                # NumPy will return an output array of float32, while DPNP will return the array of float64,
+                # since NumPy would use the same float64 type when instead of scalar here is array of integer of floating type.
+                if not (is_array_arg1 and is_array_arg2):
+                    if (is_array_arg1 and arg1.dtype == numpy.float32) ^ (is_array_arg2 and arg2.dtype == numpy.float32):
+                        y = y.astype(numpy.float32)
 
         # NumPy returns different values (nan/inf) on division by zero
         # depending on the architecture.
diff --git a/tests/third_party/cupy/statistics_tests/test_meanvar.py b/tests/third_party/cupy/statistics_tests/test_meanvar.py
index aea22d02c51..60d3413b0da 100644
--- a/tests/third_party/cupy/statistics_tests/test_meanvar.py
+++ b/tests/third_party/cupy/statistics_tests/test_meanvar.py
@@ -89,7 +89,6 @@ def test_median_axis_sequence(self, xp, dtype):
         return xp.median(a, self.axis, keepdims=self.keepdims)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestAverage(unittest.TestCase):
 
@@ -101,12 +100,14 @@ def test_average_all(self, xp, dtype):
         a = testing.shaped_arange((2, 3), xp, dtype)
         return xp.average(a)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_average_axis(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
         return xp.average(a, axis=1)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_average_weights(self, xp, dtype):
@@ -114,6 +115,7 @@ def test_average_weights(self, xp, dtype):
         w = testing.shaped_arange((2, 3), xp, dtype)
         return xp.average(a, weights=w)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_average_axis_weights(self, xp, dtype):
@@ -132,6 +134,7 @@ def check_returned(self, a, axis, weights):
         testing.assert_allclose(average_cpu, average_gpu)
         testing.assert_allclose(sum_weights_cpu, sum_weights_gpu)
 
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     def test_returned(self, dtype):
         a = testing.shaped_arange((2, 3), numpy, dtype)
diff --git a/utils/command_build_clib.py b/utils/command_build_clib.py
index 95887cc65aa..d16bab3aec4 100644
--- a/utils/command_build_clib.py
+++ b/utils/command_build_clib.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -63,7 +63,7 @@
 # default variables (for Linux)
 _project_compiler = "icpx"
 _project_linker = "icpx"
-_project_cmplr_flag_sycl_devel = ["-fsycl-device-code-split=per_kernel", "-fno-approx-func"]
+_project_cmplr_flag_sycl_devel = ["-fsycl-device-code-split=per_kernel", "-fno-approx-func", "-fno-finite-math-only"]
 _project_cmplr_flag_sycl = ["-fsycl"]
 _project_cmplr_flag_stdcpp_static = []  # This brakes TBB ["-static-libstdc++", "-static-libgcc"]
 _project_cmplr_flag_compatibility = ["-Wl,--enable-new-dtags"]

From d22214ca587f335a8ba9fa2c971472963cec5291 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Fri, 17 Feb 2023 11:06:12 +0100
Subject: [PATCH 27/32] Updae example3 building from debug build script (#1298)

---
 0.build.sh | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/0.build.sh b/0.build.sh
index 59df4dc8571..b1a2a29ec0a 100755
--- a/0.build.sh
+++ b/0.build.sh
@@ -25,7 +25,8 @@ CC=icpx python setup.py build_ext --inplace
 
 echo
 echo =========example3==============
-icpx -fsycl -g -fPIC dpnp/backend/examples/example3.cpp -Idpnp -Idpnp/backend/include -Ldpnp -Wl,-rpath='$ORIGIN'/dpnp -ldpnp_backend_c -o example3
+DPCTL_INCLUDES=$(python -m dpctl --includes)
+icpx -fsycl -g -O0 -ggdb3 -fPIC dpnp/backend/examples/example3.cpp $DPCTL_INCLUDES -Idpnp -Idpnp/backend/include -Ldpnp -Wl,-rpath='$ORIGIN'/dpnp -ldpnp_backend_c -o example3
 # LD_DEBUG=libs,bindings,symbols ./example3
 ./example3
 
@@ -47,7 +48,7 @@ icpx -fsycl -g -fPIC dpnp/backend/examples/example3.cpp -Idpnp -Idpnp/backend/in
 # strings /usr/share/miniconda/envs/dpnp*/lib/libstdc++.so | grep GLIBCXX | sort -n
 
 
-# echo
+echo
 echo =========example1==============
 # LD_DEBUG=libs,bindings,symbols python examples/example1.py
 # LD_DEBUG=libs python examples/example1.py

From 7bddfbec24a7326081372e28a26d6872017c117f Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Fri, 17 Feb 2023 14:25:28 +0100
Subject: [PATCH 28/32] Remove temporary solution accepting CFD with equal SYCL
 context instead of queue (#1303)

---
 dpnp/dpnp_utils/dpnp_algo_utils.pyx |  3 ---
 tests/test_sycl_queue.py            | 16 +++++++++++++++-
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
index abdc4107f64..672aa19e4dc 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
@@ -651,9 +651,6 @@ cdef tuple get_common_usm_allocation(dpnp_descriptor x1, dpnp_descriptor x2):
             "".format(array1_obj.usm_type, array2_obj.usm_type))
 
     common_sycl_queue = dpu.get_execution_queue((array1_obj.sycl_queue, array2_obj.sycl_queue))
-    # TODO: refactor, remove when CFD is implemented in all array constructors
-    if common_sycl_queue is None and array1_obj.sycl_context == array2_obj.sycl_context:
-        common_sycl_queue = array1_obj.sycl_queue
     if common_sycl_queue is None:
         raise ValueError(
             "could not recognize common SYCL queue for inputs in SYCL queues {} and {}"
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index 657918f3b05..1bffa18111b 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -6,7 +6,8 @@
 import numpy
 
 from numpy.testing import (
-    assert_array_equal
+    assert_array_equal,
+    assert_raises
 )
 
 
@@ -353,6 +354,19 @@ def test_broadcasting(func, data1, data2, device):
     assert_sycl_queue_equal(result_queue, expected_queue)
 
 
+@pytest.mark.parametrize("func", ["add", "copysign", "divide", "floor_divide", "fmod",
+                                  "maximum", "minimum", "multiply", "outer", "power",
+                                  "remainder", "subtract"])
+@pytest.mark.parametrize("device",
+                         valid_devices,
+                         ids=[device.filter_string for device in valid_devices])
+def test_2in_1out_diff_queue_but_equal_context(func, device):
+    x1 = dpnp.arange(10)
+    x2 = dpnp.arange(10, sycl_queue=dpctl.SyclQueue(device))[::-1]
+    with assert_raises(ValueError):
+        getattr(dpnp, func)(x1, x2)
+
+
 @pytest.mark.parametrize(
     "func, kwargs",
     [

From d7219e2235bc1062798d14c721718c3d4b928df8 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Sat, 18 Feb 2023 15:16:29 +0100
Subject: [PATCH 29/32] Intel LLVM is to use conda's gcc toolchain, sysroot and
 target libraries (#1306)

---
 conda-recipe/build.sh | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh
index d873320f80f..164ad09d578 100644
--- a/conda-recipe/build.sh
+++ b/conda-recipe/build.sh
@@ -29,6 +29,11 @@ fi
 export CFLAGS="-Wl,-rpath,\$ORIGIN/../dpctl,-rpath,\$ORIGIN $CFLAGS"
 export LDFLAGS="-Wl,-rpath,\$ORIGIN/../dpctl,-rpath,\$ORIGIN $LDFLAGS"
 
+# Intel LLVM must cooperate with compiler and sysroot from conda
+echo "--gcc-toolchain=${BUILD_PREFIX} --sysroot=${BUILD_PREFIX}/${HOST}/sysroot -target ${HOST}" > icpx_for_conda.cfg
+export ICPXCFG="$(pwd)/icpx_for_conda.cfg"
+export ICXCFG="$(pwd)/icpx_for_conda.cfg"
+
 $PYTHON setup.py build_clib
 $PYTHON setup.py build_ext install
 

From 4012c9843f4b72f0744e19d97017ddc156268312 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Tue, 21 Feb 2023 12:17:04 +0100
Subject: [PATCH 30/32] Tests are crashing if no default device (#1311)

---
 dpnp/backend/src/queue_sycl.hpp | 7 +++++++
 dpnp/dpnp_algo/dpnp_algo.pxd    | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/dpnp/backend/src/queue_sycl.hpp b/dpnp/backend/src/queue_sycl.hpp
index 63dc01c1dab..8683fdd5737 100644
--- a/dpnp/backend/src/queue_sycl.hpp
+++ b/dpnp/backend/src/queue_sycl.hpp
@@ -137,6 +137,13 @@ class backend_sycl
 #else
         // temporal solution. Started from Sept-2020
         DPCTLSyclQueueRef DPCtrl_queue = DPCTLQueueMgr_GetCurrentQueue();
+        if (DPCtrl_queue == nullptr)
+        {
+            std::string reason = (DPCTLQueueMgr_GetQueueStackSize() == static_cast<size_t>(-1))
+                                     ? ": the queue stack is empty, probably no device is available."
+                                     : ".";
+            throw std::runtime_error("Failed to create a copy of SYCL queue with default device" + reason);
+        }
         return *(reinterpret_cast<sycl::queue*>(DPCtrl_queue));
 #endif
     }
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index 65e07a9c704..9bf161b0aaf 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -391,7 +391,7 @@ cdef extern from "constants.hpp":
 
 cdef extern from "dpnp_iface.hpp":
     void dpnp_queue_initialize_c(QueueOptions selector)
-    size_t dpnp_queue_is_cpu_c()
+    size_t dpnp_queue_is_cpu_c() except +
 
     char * dpnp_memory_alloc_c(size_t size_in_bytes) except +
     void dpnp_memory_free_c(void * ptr)

From 307fcebe9cfd17bdd29ffcf74173f0bb49da0165 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Thu, 23 Feb 2023 12:29:10 +0100
Subject: [PATCH 31/32] Setting version to 0.11.1 (#1308)

---
 doc/conf.py                 | 2 +-
 dpnp/backend/CMakeLists.txt | 2 +-
 dpnp/backend/doc/Doxyfile   | 2 +-
 dpnp/version.py             | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index 46505fa8f6d..999b2504bd6 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -33,7 +33,7 @@
 # The short X.Y version
 version = '0.11'
 # The full version, including alpha/beta/rc tags
-release = '0.11.0'
+release = '0.11.1'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt
index 52e9cb21985..baee709b11e 100644
--- a/dpnp/backend/CMakeLists.txt
+++ b/dpnp/backend/CMakeLists.txt
@@ -27,7 +27,7 @@
 
 cmake_minimum_required(VERSION 3.10 FATAL_ERROR)
 
-# set(DPNP_VERSION 0.11.0)
+# set(DPNP_VERSION 0.11.1)
 # set(DPNP_API_VERSION 0.11)
 
 # set directory where the custom finders live
diff --git a/dpnp/backend/doc/Doxyfile b/dpnp/backend/doc/Doxyfile
index 6c83bb0e846..3d6c971a799 100644
--- a/dpnp/backend/doc/Doxyfile
+++ b/dpnp/backend/doc/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = "DPNP C++ backend kernel library"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 0.11.0
+PROJECT_NUMBER         = 0.11.1
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
diff --git a/dpnp/version.py b/dpnp/version.py
index 160e8ec963a..f09ea3c76a7 100644
--- a/dpnp/version.py
+++ b/dpnp/version.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2022, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -29,6 +29,6 @@
 DPNP version module
 """
 
-__version__: str = '0.11.0'
+__version__: str = '0.11.1'
 
 version: str = __version__

From 384302c773a4a268a6cfb1d486cfe37e16fd7065 Mon Sep 17 00:00:00 2001
From: Anton <100830759+antonwolfy@users.noreply.github.com>
Date: Fri, 24 Feb 2023 15:32:14 +0100
Subject: [PATCH 32/32] Update versions of external github actions (#1316)

---
 .github/workflows/build-sphinx.yml  |  6 +++---
 .github/workflows/conda-package.yml | 16 ++++++++--------
 .github/workflows/pre-commit.yml    |  4 ++--
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml
index 73b3352dd58..178e1835fe2 100644
--- a/.github/workflows/build-sphinx.yml
+++ b/.github/workflows/build-sphinx.yml
@@ -54,7 +54,7 @@ jobs:
           sudo apt-get install -y nvidia-cuda-toolkit clinfo
 
       - name: Checkout repo
-        uses: actions/checkout@v3.1.0
+        uses: actions/checkout@v3.3.0
 
       # https://github.com/marketplace/actions/setup-miniconda
       - name: Setup miniconda
@@ -94,7 +94,7 @@ jobs:
 
       # https://github.com/marketplace/actions/doxygen-action
       - name: Build backend docs
-        uses: mattnotmitt/doxygen-action@v1.9.4
+        uses: mattnotmitt/doxygen-action@v1.9.5
         with:
             working-directory: 'dpnp/backend/doc'
 
@@ -106,7 +106,7 @@ jobs:
         if: |
           !github.event.pull_request.head.repo.fork  &&
           (github.ref == 'refs/heads/master' || (startsWith(github.ref, 'refs/heads/release') == true) || github.event_name == 'push' && contains(github.ref, 'refs/tags/'))
-        uses: peaceiris/actions-gh-pages@v3.9.0
+        uses: peaceiris/actions-gh-pages@v3.9.2
         with:
           github_token: ${{ secrets.GITHUB_TOKEN }}
           publish_dir: doc/_build/html/
diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index da29bf31dd0..64a14a85be5 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -47,7 +47,7 @@ jobs:
           access_token: ${{ github.token }}
 
       - name: Checkout DPNP repo
-        uses: actions/checkout@v3.1.0
+        uses: actions/checkout@v3.3.0
         with:
           fetch-depth: 0
 
@@ -74,7 +74,7 @@ jobs:
         run: conda install conda-build
 
       - name: Cache conda packages
-        uses: actions/cache@v3.0.11
+        uses: actions/cache@v3.2.6
         env:
           CACHE_NUMBER: 1  # Increase to reset cache
         with:
@@ -89,7 +89,7 @@ jobs:
         run: conda build --no-test --python ${{ matrix.python }} ${{ env.CHANNELS }} conda-recipe
 
       - name: Upload artifact
-        uses: actions/upload-artifact@v3.1.1
+        uses: actions/upload-artifact@v3.1.2
         with:
           name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }}
           path: ${{ env.CONDA_BLD }}${{ env.PACKAGE_NAME }}-*.tar.bz2
@@ -124,7 +124,7 @@ jobs:
 
     steps:
       - name: Download artifact
-        uses: actions/download-artifact@v3.0.1
+        uses: actions/download-artifact@v3.0.2
         with:
           name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }}
           path: ${{ env.pkg-path-in-channel }}
@@ -167,7 +167,7 @@ jobs:
           TEST_CHANNELS: '-c ${{ env.channel-path }} ${{ env.CHANNELS }}'
 
       - name: Cache conda packages
-        uses: actions/cache@v3.0.11
+        uses: actions/cache@v3.2.6
         env:
           CACHE_NUMBER: 1  # Increase to reset cache
         with:
@@ -228,7 +228,7 @@ jobs:
 
     steps:
       - name: Download artifact
-        uses: actions/download-artifact@v3.0.1
+        uses: actions/download-artifact@v3.0.2
         with:
           name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }}
           path: ${{ env.pkg-path-in-channel }}
@@ -294,7 +294,7 @@ jobs:
         run: more lockfile
 
       - name: Cache conda packages
-        uses: actions/cache@v3.0.11
+        uses: actions/cache@v3.2.6
         env:
           CACHE_NUMBER: 1  # Increase to reset cache
         with:
@@ -398,7 +398,7 @@ jobs:
 
     steps:
       - name: Download artifact
-        uses: actions/download-artifact@v3.0.1
+        uses: actions/download-artifact@v3.0.2
         with:
           name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }}
 
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 56ce09e3476..770aea0dc65 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -9,8 +9,8 @@ jobs:
   pre-commit:
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v3.1.0
-    - uses: actions/setup-python@v4.3.0
+    - uses: actions/checkout@v3.3.0
+    - uses: actions/setup-python@v4.5.0
       with:
         python-version: '3.10'
     - uses: pre-commit/action@v3.0.0