diff --git a/dpnp/backend/include/dpnp_gen_2arg_1type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_1type_tbl.hpp
index 19589c4b0ee..0330faeee37 100644
--- a/dpnp/backend/include/dpnp_gen_2arg_1type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_2arg_1type_tbl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2020, Intel Corporation
+// Copyright (c) 2016-2023, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -104,7 +104,7 @@
 
 #endif
 
-MACRO_2ARG_1TYPE_OP(dpnp_bitwise_and_c, input1_elem& input2_elem)
+MACRO_2ARG_1TYPE_OP(dpnp_bitwise_and_c, input1_elem & input2_elem)
 MACRO_2ARG_1TYPE_OP(dpnp_bitwise_or_c, input1_elem | input2_elem)
 MACRO_2ARG_1TYPE_OP(dpnp_bitwise_xor_c, input1_elem ^ input2_elem)
 MACRO_2ARG_1TYPE_OP(dpnp_left_shift_c, input1_elem << input2_elem)
diff --git a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
index c082bd636bf..f3d8a4a95cc 100644
--- a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
@@ -27,6 +27,7 @@
 
 #include "dpnp_fptr.hpp"
 #include "dpnp_iface.hpp"
+#include "dpnp_iterator.hpp"
 #include "dpnp_utils.hpp"
 #include "dpnpc_memory_adapter.hpp"
 #include "queue_sycl.hpp"
@@ -49,27 +50,66 @@ DPCTLSyclEventRef dpnp_invert_c(DPCTLSyclQueueRef q_ref,
     sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));
     sycl::event event;
 
-    DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, size);
-    _DataType* array1 = input1_ptr.get_ptr();
-    _DataType* result = reinterpret_cast<_DataType*>(result1);
+    _DataType* input_data = static_cast<_DataType*>(array1_in);
+    _DataType* result = static_cast<_DataType*>(result1);
 
-    sycl::range<1> gws(size);
-    auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {
-        size_t i = global_id[0]; /*for (size_t i = 0; i < size; ++i)*/
+    constexpr size_t lws = 64;
+    constexpr unsigned int vec_sz = 8;
+
+    auto gws_range = sycl::range<1>(((size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);
+    auto lws_range = sycl::range<1>(lws);
+
+    auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {
+        auto sg = nd_it.get_sub_group();
+        const auto max_sg_size = sg.get_max_local_range()[0];
+        const size_t start =
+            vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size);
+
+        if (start + static_cast<size_t>(vec_sz) * max_sg_size < size)
         {
-            _DataType input_elem1 = array1[i];
-            result[i] = ~input_elem1;
+            using multi_ptrT = sycl::multi_ptr<_DataType, sycl::access::address_space::global_space>;
+
+            sycl::vec<_DataType, vec_sz> x = sg.load<vec_sz>(multi_ptrT(&input_data[start]));
+            sycl::vec<_DataType, vec_sz> res_vec;
+
+            if constexpr (std::is_same_v<_DataType, bool>)
+            {
+#pragma unroll
+                for (size_t k = 0; k < vec_sz; ++k)
+                {
+                    res_vec[k] = !(x[k]);
+                }
+            }
+            else
+            {
+                res_vec = ~x;
+            }
+
+            sg.store<vec_sz>(multi_ptrT(&result[start]), res_vec);
+        }
+        else
+        {
+            for (size_t k = start + sg.get_local_id()[0]; k < size; k += max_sg_size)
+            {
+                if constexpr (std::is_same_v<_DataType, bool>)
+                {
+                    result[k] = !(input_data[k]);
+                }
+                else
+                {
+                    result[k] = ~(input_data[k]);
+                }
+            }
         }
     };
 
     auto kernel_func = [&](sycl::handler& cgh) {
-        cgh.parallel_for<class dpnp_invert_c_kernel<_DataType>>(gws, kernel_parallel_for_func);
+        cgh.parallel_for<class dpnp_invert_c_kernel<_DataType>>(sycl::nd_range<1>(gws_range, lws_range),
+                                                                kernel_parallel_for_func);
     };
-
     event = q.submit(kernel_func);
 
     event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
-
     return DPCTLEvent_Copy(event_ref);
 }
 
@@ -84,6 +124,7 @@ void dpnp_invert_c(void* array1_in, void* result1, size_t size)
                                                            size,
                                                            dep_event_vec_ref);
     DPCTLEvent_WaitAndThrow(event_ref);
+    DPCTLEvent_Delete(event_ref);
 }
 
 template <typename _DataType>
@@ -98,9 +139,11 @@ DPCTLSyclEventRef (*dpnp_invert_ext_c)(DPCTLSyclQueueRef,
 
 static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
 {
+    fmap[DPNPFuncName::DPNP_FN_INVERT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_invert_default_c<bool>};
     fmap[DPNPFuncName::DPNP_FN_INVERT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_invert_default_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_INVERT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_invert_default_c<int64_t>};
 
+    fmap[DPNPFuncName::DPNP_FN_INVERT_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_invert_ext_c<bool>};
     fmap[DPNPFuncName::DPNP_FN_INVERT_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_invert_ext_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_INVERT_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_invert_ext_c<int64_t>};
 
@@ -114,6 +157,9 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
     template <typename _KernelNameSpecialization>                                                                      \
     class __name__##_strides_kernel;                                                                                   \
                                                                                                                        \
+    template <typename _KernelNameSpecialization>                                                                      \
+    class __name__##_broadcast_kernel;                                                                                 \
+                                                                                                                       \
     template <typename _DataType>                                                                                      \
     DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref,                                                                \
                                void* result_out,                                                                       \
@@ -152,6 +198,8 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
         _DataType* input2_data = static_cast<_DataType*>(const_cast<void*>(input2_in));                                \
         _DataType* result = static_cast<_DataType*>(result_out);                                                       \
                                                                                                                        \
+        bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim);                    \
+                                                                                                                       \
         shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim];                                      \
                                                                                                                        \
         get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets);                                   \
@@ -167,7 +215,42 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
         sycl::event event;                                                                                             \
         sycl::range<1> gws(result_size);                                                                               \
                                                                                                                        \
-        if (use_strides)                                                                                               \
+        if (use_broadcasting)                                                                                          \
+        {                                                                                                              \
+            DPNPC_id<_DataType>* input1_it;                                                                            \
+            const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType>);                                        \
+            input1_it = reinterpret_cast<DPNPC_id<_DataType>*>(dpnp_memory_alloc_c(q_ref, input1_it_size_in_bytes));   \
+            new (input1_it) DPNPC_id<_DataType>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim);        \
+                                                                                                                       \
+            input1_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
+                                                                                                                       \
+            DPNPC_id<_DataType>* input2_it;                                                                            \
+            const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType>);                                        \
+            input2_it = reinterpret_cast<DPNPC_id<_DataType>*>(dpnp_memory_alloc_c(q_ref, input2_it_size_in_bytes));   \
+            new (input2_it) DPNPC_id<_DataType>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim);        \
+                                                                                                                       \
+            input2_it->broadcast_to_shape(result_shape, result_ndim);                                                  \
+                                                                                                                       \
+            auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {                                               \
+                const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */                          \
+                {                                                                                                      \
+                    const _DataType input1_elem = (*input1_it)[i];                                                     \
+                    const _DataType input2_elem = (*input2_it)[i];                                                     \
+                    result[i] = __operation__;                                                                         \
+                }                                                                                                      \
+            };                                                                                                         \
+            auto kernel_func = [&](sycl::handler& cgh) {                                                               \
+                cgh.parallel_for<class __name__##_broadcast_kernel<_DataType>>(gws, kernel_parallel_for_func);         \
+            };                                                                                                         \
+                                                                                                                       \
+            q.submit(kernel_func).wait();                                                                              \
+                                                                                                                       \
+            input1_it->~DPNPC_id();                                                                                    \
+            input2_it->~DPNPC_id();                                                                                    \
+                                                                                                                       \
+            return event_ref;                                                                                          \
+        }                                                                                                              \
+        else if (use_strides)                                                                                          \
         {                                                                                                              \
             if ((result_ndim != input1_ndim) || (result_ndim != input2_ndim))                                          \
             {                                                                                                          \
@@ -332,18 +415,21 @@ static void func_map_init_bitwise_2arg_1type(func_map_t& fmap)
     fmap[DPNPFuncName::DPNP_FN_BITWISE_AND][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_and_c_default<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_AND][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_and_c_default<int64_t>};
 
+    fmap[DPNPFuncName::DPNP_FN_BITWISE_AND_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_bitwise_and_c_ext<bool>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_AND_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_and_c_ext<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_AND_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_and_c_ext<int64_t>};
 
     fmap[DPNPFuncName::DPNP_FN_BITWISE_OR][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_or_c_default<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_OR][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_or_c_default<int64_t>};
 
+    fmap[DPNPFuncName::DPNP_FN_BITWISE_OR_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_bitwise_or_c_ext<bool>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_OR_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_or_c_ext<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_OR_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_or_c_ext<int64_t>};
 
     fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_xor_c_default<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_xor_c_default<int64_t>};
 
+    fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_bitwise_xor_c_ext<bool>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_xor_c_ext<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_xor_c_ext<int64_t>};
 
diff --git a/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx b/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx
index 482f00c2c71..a8af53b709d 100644
--- a/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo_bitwise.pyx
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -68,8 +68,8 @@ cpdef utils.dpnp_descriptor dpnp_bitwise_xor(utils.dpnp_descriptor x1_obj,
     return call_fptr_2in_1out_strides(DPNP_FN_BITWISE_XOR_EXT, x1_obj, x2_obj, dtype=dtype, out=out, where=where)
 
 
-cpdef utils.dpnp_descriptor dpnp_invert(utils.dpnp_descriptor arr):
-    return call_fptr_1in_1out(DPNP_FN_INVERT_EXT, arr, arr.shape)
+cpdef utils.dpnp_descriptor dpnp_invert(utils.dpnp_descriptor arr, utils.dpnp_descriptor out=None):
+    return call_fptr_1in_1out(DPNP_FN_INVERT_EXT, arr, arr.shape, out=out, func_name="invert")
 
 
 cpdef utils.dpnp_descriptor dpnp_left_shift(utils.dpnp_descriptor x1_obj,
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index d1ad1252d4e..f2ccf56ef76 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -125,7 +125,9 @@ def __abs__(self):
     def __add__(self, other):
         return dpnp.add(self, other)
 
- # '__and__',
+    def __and__(self, other):
+        return dpnp.bitwise_and(self, other)
+
  # '__array__',
  # '__array_finalize__',
  # '__array_function__',
@@ -193,9 +195,17 @@ def __gt__(self, other):
 
  # '__hash__',
  # '__iadd__',
- # '__iand__',
+
+    def __iand__(self, other):
+        dpnp.bitwise_and(self, other, out=self)
+        return self
+
  # '__ifloordiv__',
- # '__ilshift__',
+
+    def __ilshift__(self, other):
+        dpnp.left_shift(self, other, out=self)
+        return self
+
  # '__imatmul__',
  # '__imod__',
  # '__imul__',
@@ -209,18 +219,28 @@ def __index__(self):
     def __int__(self):
         return self._array_obj.__int__()
 
- # '__invert__',
- # '__ior__',
+    def __invert__(self):
+        return dpnp.invert(self)
+
+    def __ior__(self, other):
+        dpnp.bitwise_or(self, other, out=self)
+        return self
 
     def __ipow__(self, other):
         dpnp.power(self, other, out=self)
         return self
 
- # '__irshift__',
+    def __irshift__(self, other):
+        dpnp.right_shift(self, other, out=self)
+        return self
+
  # '__isub__',
  # '__iter__',
  # '__itruediv__',
- # '__ixor__',
+
+    def __ixor__(self, other):
+        dpnp.bitwise_xor(self, other, out=self)
+        return self
 
     def __le__(self, other):
         return dpnp.less_equal(self, other)
@@ -232,7 +252,8 @@ def __len__(self):
 
         return self._array_obj.__len__()
 
- # '__lshift__',
+    def __lshift__(self, other):
+        return dpnp.left_shift(self, other)
 
     def __lt__(self, other):
         return dpnp.less(self, other)
@@ -253,7 +274,10 @@ def __neg__(self):
         return dpnp.negative(self)
 
  # '__new__',
- # '__or__',
+
+    def __or__(self, other):
+        return dpnp.bitwise_or(self, other)
+
  # '__pos__',
 
     def __pow__(self, other):
@@ -262,7 +286,9 @@ def __pow__(self, other):
     def __radd__(self, other):
         return dpnp.add(other, self)
 
- # '__rand__',
+    def __rand__(self, other):
+        return dpnp.bitwise_and(other, self)
+
  # '__rdivmod__',
  # '__reduce__',
  # '__reduce_ex__',
@@ -271,7 +297,9 @@ def __repr__(self):
         return dpt.usm_ndarray_repr(self._array_obj, prefix="array")
 
  # '__rfloordiv__',
- # '__rlshift__',
+
+    def __rlshift__(self, other):
+        return dpnp.left_shift(other, self)
 
     def __rmatmul__(self, other):
         return dpnp.matmul(other, self)
@@ -282,13 +310,17 @@ def __rmod__(self, other):
     def __rmul__(self, other):
         return dpnp.multiply(other, self)
 
- # '__ror__',
- 
+    def __ror__(self, other):
+        return dpnp.bitwise_or(other, self)
+
     def __rpow__(self, other):
         return dpnp.power(other, self)
 
- # '__rrshift__',
- # '__rshift__',
+    def __rrshift__(self, other):
+        return dpnp.right_shift(other, self)
+
+    def __rshift__(self, other):
+        return dpnp.right_shift(self, other)
 
     def __rsub__(self, other):
         return dpnp.subtract(other, self)
@@ -296,7 +328,9 @@ def __rsub__(self, other):
     def __rtruediv__(self, other):
         return dpnp.true_divide(other, self)
 
- # '__rxor__',
+    def __rxor__(self, other):
+        return dpnp.bitwise_xor(other, self)
+
  # '__setattr__',
 
     def __setitem__(self, key, val):
@@ -334,7 +368,8 @@ def __sub__(self, other):
     def __truediv__(self, other):
         return dpnp.true_divide(self, other)
 
- # '__xor__',
+    def __xor__(self, other):
+        return dpnp.bitwise_xor(self, other)
 
     @staticmethod
     def _create_from_usm_ndarray(usm_ary : dpt.usm_ndarray):
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index b7cdef8cc61..9bf456060dd 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -251,6 +251,7 @@ def from_dlpack(obj, /):
 def get_dpnp_descriptor(ext_obj,
                         copy_when_strides=True,
                         copy_when_nondefault_queue=True,
+                        alloc_dtype=None,
                         alloc_usm_type=None,
                         alloc_queue=None):
     """
@@ -274,7 +275,7 @@ def get_dpnp_descriptor(ext_obj,
     # If input object is a scalar, it means it was allocated on host memory.
     # We need to copy it to USM memory according to compute follows data paradigm.
     if isscalar(ext_obj):
-        ext_obj = array(ext_obj, usm_type=alloc_usm_type, sycl_queue=alloc_queue)
+        ext_obj = array(ext_obj, dtype=alloc_dtype, usm_type=alloc_usm_type, sycl_queue=alloc_queue)
 
     # while dpnp functions have no implementation with strides support
     # we need to create a non-strided copy
diff --git a/dpnp/dpnp_iface_bitwise.py b/dpnp/dpnp_iface_bitwise.py
index 51a28b0464e..36f37f4282e 100644
--- a/dpnp/dpnp_iface_bitwise.py
+++ b/dpnp/dpnp_iface_bitwise.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2020, Intel Corporation
+# Copyright (c) 2016-2023, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -40,19 +40,20 @@
 """
 
 
-import numpy
-
-
 from dpnp.dpnp_algo import *
 from dpnp.dpnp_utils import *
 import dpnp
 
+import numpy
+import dpctl.tensor as dpt
+
+
 __all__ = [
     'bitwise_and',
+    'bitwise_not',
     'bitwise_or',
     'bitwise_xor',
     'invert',
-    'bitwise_not',
     'left_shift',
     'right_shift',
 ]
@@ -61,37 +62,34 @@
 def _check_nd_call(origin_func, dpnp_func, x1, x2, dtype=None, out=None, where=True, **kwargs):
     """Choose function to call based on input and call chosen fucntion."""
 
-    x1_is_scalar = dpnp.isscalar(x1)
-    x2_is_scalar = dpnp.isscalar(x2)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False)
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False)
-
-    if x1_desc and x2_desc and not kwargs:
-        if not x1_desc and not x1_is_scalar:
-            pass
-        elif not x2_desc and not x2_is_scalar:
-            pass
-        elif x1_is_scalar and x2_is_scalar:
-            pass
-        elif x1_desc and x1_desc.ndim == 0:
-            pass
-        elif x2_desc and x2_desc.ndim == 0:
-            pass
-        elif x1_desc and x2_desc and x1_desc.size != x2_desc.size:
-            pass
-        elif x1_desc and x2_desc and x1_desc.shape != x2_desc.shape:
-            pass
-        elif dtype is not None:
-            pass
-        elif out is not None:
-            pass
-        elif not where:
-            pass
+    if where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif dpnp.isscalar(x1) and dpnp.isscalar(x2):
+        # at least either x1 or x2 has to be an array
+        pass
+    else:
+        # get USM type and queue to copy scalar from the host memory into a USM allocation
+        if dpnp.isscalar(x1) or dpnp.isscalar(x2):
+            usm_type, queue = get_usm_allocations([x1, x2]) if dpnp.isscalar(x1) or dpnp.isscalar(x2) else (None, None)
+            dtype = x1.dtype if not dpnp.isscalar(x1) else x2.dtype
         else:
-            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False) if out is not None else None
-            return dpnp_func(x1_desc, x2_desc, dtype, out_desc, where).get_pyobj()
+            dtype, usm_type, queue = (None, None, None)
+
+        x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_dtype=dtype, alloc_usm_type=usm_type, alloc_queue=queue)
+        x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
+                                           alloc_dtype=dtype, alloc_usm_type=usm_type, alloc_queue=queue)
+        if x1_desc and x2_desc:
+            if out is not None:
+                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
+                    raise TypeError("return array must be of supported array type")
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
+            else:
+                out_desc = None
+
+            return dpnp_func(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
 
     return call_origin(origin_func, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
 
@@ -102,14 +100,20 @@ def bitwise_and(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     For full documentation refer to :obj:`numpy.bitwise_and`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array containing the element-wise results.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
-    Sizes, shapes and data types of input arrays are supported to be equal.
-    Keyword arguments ``kwargs`` are currently unsupported.
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
+    Parameters `dtype` and `where` are supported with their default values.
+    Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
-    Input data is supported as integer only.
+    Data type of input arrays `x` and `y` are limited by :obj:`dpnp.bool`, :obj:`dpnp.int32`
+    and :obj:`dpnp.int64`.
 
     See Also
     --------
@@ -136,14 +140,20 @@ def bitwise_or(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     For full documentation refer to :obj:`numpy.bitwise_or`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array containing the element-wise results.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
-    Sizes, shapes and data types of input arrays are supported to be equal.
-    Keyword arguments ``kwargs`` are currently unsupported.
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
+    Parameters `dtype` and `where` are supported with their default values.
+    Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
-    Input data is supported as integer only.
+    Data type of input arrays `x` and `y` are limited by :obj:`dpnp.bool`, :obj:`dpnp.int32`
+    and :obj:`dpnp.int64`.
 
     See Also
     --------
@@ -170,14 +180,20 @@ def bitwise_xor(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     For full documentation refer to :obj:`numpy.bitwise_xor`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array containing the element-wise results.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
-    Sizes, shapes and data types of input arrays are supported to be equal.
-    Keyword arguments ``kwargs`` are currently unsupported.
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
+    Parameters `dtype` and `where` are supported with their default values.
+    Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
-    Input data is supported as integer only.
+    Data type of input arrays `x` and `y` are limited by :obj:`dpnp.bool`, :obj:`dpnp.int32`
+    and :obj:`dpnp.int64`.
 
     See Also
     --------
@@ -198,18 +214,33 @@ def bitwise_xor(x1, x2, dtype=None, out=None, where=True, **kwargs):
     return _check_nd_call(numpy.bitwise_xor, dpnp_bitwise_xor, x1, x2, dtype=dtype, out=out, where=where, **kwargs)
 
 
-def invert(x, **kwargs):
+def invert(x,
+           /,
+           out=None,
+           *,
+           where=True,
+           dtype=None,
+           subok=True,
+           **kwargs):
     """
     Compute bit-wise inversion, or bit-wise NOT, element-wise.
 
     For full documentation refer to :obj:`numpy.invert`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array containing the element-wise results.
+    
     Limitations
     -----------
-    Parameters ``x`` is supported as :obj:`dpnp.ndarray`.
-    Keyword arguments ``kwargs`` are currently unsupported.
+    Parameter `x` is supported as either :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`.
+    Parameters `where`, `dtype` and `subok` are supported with their default values.
+    Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
-    Input array ``x`` is supported as integer :obj:`dpnp.ndarray` only.
+    Data type of input array `x` is limited by :obj:`dpnp.bool`, :obj:`dpnp.int32`
+    and :obj:`dpnp.int64`.
 
     See Also
     --------
@@ -220,19 +251,34 @@ def invert(x, **kwargs):
 
     Examples
     --------
-    >>> import dpnp as np
-    >>> x = np.array([13])
-    >>> out = np.invert(x)
+    >>> import dpnp as dp
+    >>> x = dp.array([13])
+    >>> out = dp.invert(x)
     >>> out[0]
     -14
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x, copy_when_nondefault_queue=False)
-    if x1_desc and not kwargs:
-        return dpnp_invert(x1_desc).get_pyobj()
-
-    return call_origin(numpy.invert, x, **kwargs)
+    if kwargs:
+        pass
+    elif where is not True:
+        pass
+    elif dtype is not None:
+        pass
+    elif subok is not True:
+        pass
+    else:
+        x1_desc = dpnp.get_dpnp_descriptor(x, copy_when_nondefault_queue=False)
+        if x1_desc:
+            if out is not None:
+                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
+                    raise TypeError("return array must be of supported array type")
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
+            else:
+                out_desc = None
+        return dpnp_invert(x1_desc, out_desc).get_pyobj()
+
+    return call_origin(numpy.invert, x, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
 
 
 bitwise_not = invert  # bitwise_not is an alias for invert
@@ -244,12 +290,17 @@ def left_shift(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     For full documentation refer to :obj:`numpy.left_shift`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array containing the element-wise results.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
-    Sizes, shapes and data types of input arrays are supported to be equal.
-    Keyword arguments ``kwargs`` are currently unsupported.
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
+    Parameters `dtype` and `where` are supported with their default values.
+    Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
     Input data is supported as integer only.
 
@@ -276,12 +327,17 @@ def right_shift(x1, x2, dtype=None, out=None, where=True, **kwargs):
 
     For full documentation refer to :obj:`numpy.right_shift`.
 
+    Returns
+    -------
+    y : dpnp.ndarray
+        An array containing the element-wise results.
+    
     Limitations
     -----------
-    Parameters ``x1`` and ``x2`` are supported as either :obj:`dpnp.ndarray` or scalar.
-    Parameters ``dtype``, ``out`` and ``where`` are supported with their default values.
-    Sizes, shapes and data types of input arrays are supported to be equal.
-    Keyword arguments ``kwargs`` are currently unsupported.
+    Parameters `x1` and `x2` are supported as either scalar, :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`, but both `x1` and `x2` can not be scalars at the same time.
+    Parameters `dtype` and `where` are supported with their default values.
+    Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
     Input data is supported as integer only.
 
diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index 716b2ff8a0f..e36c44d3f98 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -300,7 +300,8 @@ def equal(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_equal(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.equal, x1, x2)
+
+    return call_origin(numpy.equal, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def greater(x1,
@@ -370,7 +371,8 @@ def greater(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_greater(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.greater, x1, x2)
+
+    return call_origin(numpy.greater, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def greater_equal(x1,
@@ -440,7 +442,8 @@ def greater_equal(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_greater_equal(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.greater_equal, x1, x2)
+
+    return call_origin(numpy.greater_equal, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def isclose(x1, x2, rtol=1e-05, atol=1e-08, equal_nan=False):
@@ -685,7 +688,8 @@ def less(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_less(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.less, x1, x2)
+
+    return call_origin(numpy.less, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def less_equal(x1,
@@ -755,7 +759,8 @@ def less_equal(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_less_equal(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.less_equal, x1, x2)
+
+    return call_origin(numpy.less_equal, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def logical_and(x1,
@@ -824,7 +829,8 @@ def logical_and(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_logical_and(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.logical_and, x1, x2)
+
+    return call_origin(numpy.logical_and, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def logical_not(x,
@@ -881,7 +887,8 @@ def logical_not(x,
         x1_desc = dpnp.get_dpnp_descriptor(x, copy_when_strides=False, copy_when_nondefault_queue=False)
         if x1_desc:
             return dpnp_logical_not(x1_desc).get_pyobj()
-    return call_origin(numpy.logical_not, x)
+
+    return call_origin(numpy.logical_not, x, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def logical_or(x1,
@@ -950,7 +957,8 @@ def logical_or(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_logical_or(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.logical_or, x1, x2)
+
+    return call_origin(numpy.logical_or, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def logical_xor(x1,
@@ -1019,7 +1027,8 @@ def logical_xor(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_logical_xor(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.logical_xor, x1, x2)
+
+    return call_origin(numpy.logical_xor, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
 
 
 def not_equal(x1,
@@ -1089,4 +1098,5 @@ def not_equal(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         if x1_desc and x2_desc:
             return dpnp_not_equal(x1_desc, x2_desc).get_pyobj()
-    return call_origin(numpy.not_equal, x1, x2)
+
+    return call_origin(numpy.not_equal, x1, x2, out=out, where=where, dtype=dtype, subok=subok)
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 03d2a352775..08de8b2ba5a 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -44,6 +44,7 @@
 from dpnp.dpnp_utils import *
 
 import dpnp
+
 import numpy
 import dpctl.tensor as dpt
 
@@ -1413,15 +1414,14 @@ def power(x1,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
         x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_strides=False, copy_when_nondefault_queue=False,
                                            alloc_usm_type=usm_type, alloc_queue=queue)
-
-        if out is not None:
-            if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
-                raise TypeError("return array must be of supported array type")
-            out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
-        else:
-            out_desc = None
-
         if x1_desc and x2_desc:
+            if out is not None:
+                if not isinstance(out, (dpnp.ndarray, dpt.usm_ndarray)):
+                    raise TypeError("return array must be of supported array type")
+                out_desc = dpnp.get_dpnp_descriptor(out, copy_when_nondefault_queue=False)
+            else:
+                out_desc = None
+
             return dpnp_power(x1_desc, x2_desc, dtype=dtype, out=out_desc, where=where).get_pyobj()
 
     return call_origin(numpy.power, x1, x2, out=out, where=where, dtype=dtype, subok=subok, **kwargs)
diff --git a/tests/helper.py b/tests/helper.py
index 8432443d488..1e97615fb3d 100644
--- a/tests/helper.py
+++ b/tests/helper.py
@@ -27,7 +27,9 @@ def get_float_dtypes(no_float16=True,
     dev = dpctl.select_default_device() if device is None else device
 
     # add floating types
-    dtypes = [dpnp.float16] if not no_float16 else []
+    dtypes = []
+    if not no_float16 and dev.has_aspect_fp16:
+        dtypes.append(dpnp.float16)
 
     dtypes.append(dpnp.float32)
     if dev.has_aspect_fp64:
@@ -64,11 +66,11 @@ def get_all_dtypes(no_bool=False,
     dtypes.extend([dpnp.int32, dpnp.int64])
 
     # add floating types
-    dtypes.extend(get_float_dtypes(dev))
+    dtypes.extend(get_float_dtypes(no_float16=no_float16, device=dev))
 
     # add complex types
     if not no_complex:
-        dtypes.extend(get_complex_dtypes(dev))
+        dtypes.extend(get_complex_dtypes(device=dev))
 
     # add None value to validate a default dtype
     if not no_none:
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index bda10cfd497..ecc5bd5e999 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -192,12 +192,7 @@ tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray(x).astype(dpnp.int8)]
 
 tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-float32-1]
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_bitwise_and
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_bitwise_or
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_bitwise_xor
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_invert
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_left_shift
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_right_shift
+
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestAngle::test_angle
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag_inplace
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index 3e002675933..d3864a05b7e 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -359,12 +359,7 @@ tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: (dpnp
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray([(i, i) for i in x], [("a", object), ("b", dpnp.int32)])]]
 tests/test_random.py::TestPermutationsTestShuffle::test_shuffle1[lambda x: dpnp.asarray(x).astype(dpnp.int8)]
 tests/test_sort.py::test_partition[[[1, 0], [3, 0]]-float32-1]
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_bitwise_and
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_bitwise_or
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_bitwise_xor
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_invert
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_left_shift
-tests/third_party/cupy/binary_tests/test_elementwise.py::TestElementwise::test_right_shift
+
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestAngle::test_angle
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag_inplace
diff --git a/tests/test_bitwise.py b/tests/test_bitwise.py
index 645ae4556c1..34f7f971c86 100644
--- a/tests/test_bitwise.py
+++ b/tests/test_bitwise.py
@@ -3,60 +3,97 @@
 import dpnp as inp
 
 import numpy
+from numpy.testing import (
+    assert_array_equal
+)
 
 
 @pytest.mark.parametrize("lhs", [[[-7, -6, -5, -4, -3, -2, -1], [0, 1, 2, 3, 4, 5, 6]], [-3, -2, -1, 0, 1, 2, 3], 0])
 @pytest.mark.parametrize("rhs", [[[0, 1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12, 13]], [0, 1, 2, 3, 4, 5, 6], 3])
-@pytest.mark.parametrize("dtype", [numpy.int32, numpy.int64])
+@pytest.mark.parametrize("dtype", [inp.bool, inp.int32, inp.int64])
 class TestBitwise:
 
     @staticmethod
     def array_or_scalar(xp, data, dtype=None):
         if numpy.isscalar(data):
+            if dtype == inp.bool:
+                return numpy.dtype(dtype).type(data)
             return data
 
         return xp.array(data, dtype=dtype)
 
     def _test_unary_int(self, name, data, dtype):
-        a = self.array_or_scalar(inp, data, dtype=dtype)
-        result = getattr(inp, name)(a)
+        dp_a = self.array_or_scalar(inp, data, dtype=dtype)
+        result = getattr(inp, name)(dp_a)
 
-        a = self.array_or_scalar(numpy, data, dtype=dtype)
-        expected = getattr(numpy, name)(a)
+        np_a = self.array_or_scalar(numpy, data, dtype=dtype)
+        expected = getattr(numpy, name)(np_a)
 
-        numpy.testing.assert_array_equal(result, expected)
+        assert_array_equal(result, expected)
+        return (dp_a, np_a)
 
     def _test_binary_int(self, name, lhs, rhs, dtype):
-        a = self.array_or_scalar(inp, lhs, dtype=dtype)
-        b = self.array_or_scalar(inp, rhs, dtype=dtype)
-        result = getattr(inp, name)(a, b)
+        if name in ('left_shift', 'right_shift') and dtype == inp.bool:
+            pytest.skip("A shift operation isn't implemented for bool type")
+        elif numpy.isscalar(lhs) and numpy.isscalar(rhs):
+            pytest.skip("Both inputs can't be scalars")
 
-        a = self.array_or_scalar(numpy, lhs, dtype=dtype)
-        b = self.array_or_scalar(numpy, rhs, dtype=dtype)
-        expected = getattr(numpy, name)(a, b)
+        dp_a = self.array_or_scalar(inp, lhs, dtype=dtype)
+        dp_b = self.array_or_scalar(inp, rhs, dtype=dtype)
+        result = getattr(inp, name)(dp_a, dp_b)
 
-        numpy.testing.assert_array_equal(result, expected)
+        np_a = self.array_or_scalar(numpy, lhs, dtype=dtype)
+        np_b = self.array_or_scalar(numpy, rhs, dtype=dtype)
+        expected = getattr(numpy, name)(np_a, np_b)
+
+        assert_array_equal(result, expected)
+        return (dp_a, dp_b, np_a, np_b)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_bitwise_and(self, lhs, rhs, dtype):
-        self._test_binary_int('bitwise_and', lhs, rhs, dtype)
+        dp_a, dp_b, np_a, np_b = self._test_binary_int('bitwise_and', lhs, rhs, dtype)
+        assert_array_equal(dp_a & dp_b, np_a & np_b)
+
+        if not (inp.isscalar(dp_a) or inp.isscalar(dp_b)) and dp_a.shape == dp_b.shape:
+            dp_a &= dp_b
+            np_a &= np_b
+            assert_array_equal(dp_a, np_a)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_bitwise_or(self, lhs, rhs, dtype):
-        self._test_binary_int('bitwise_or', lhs, rhs, dtype)
+        dp_a, dp_b, np_a, np_b = self._test_binary_int('bitwise_or', lhs, rhs, dtype)
+        assert_array_equal(dp_a | dp_b, np_a | np_b)
+
+        if not (inp.isscalar(dp_a) or inp.isscalar(dp_b)) and dp_a.shape == dp_b.shape:
+            dp_a |= dp_b
+            np_a |= np_b
+            assert_array_equal(dp_a, np_a)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_bitwise_xor(self, lhs, rhs, dtype):
-        self._test_binary_int('bitwise_xor', lhs, rhs, dtype)
+        dp_a, dp_b, np_a, np_b = self._test_binary_int('bitwise_xor', lhs, rhs, dtype)
+        assert_array_equal(dp_a ^ dp_b, np_a ^ np_b)
+
+        if not (inp.isscalar(dp_a) or inp.isscalar(dp_b)) and dp_a.shape == dp_b.shape:
+            dp_a ^= dp_b
+            np_a ^= np_b
+            assert_array_equal(dp_a, np_a)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_invert(self, lhs, rhs, dtype):
-        self._test_unary_int('invert', lhs, dtype)
+        dp_a, np_a = self._test_unary_int('invert', lhs, dtype)
+        assert_array_equal(~dp_a, ~np_a)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_left_shift(self, lhs, rhs, dtype):
-        self._test_binary_int('left_shift', lhs, rhs, dtype)
+        dp_a, dp_b, np_a, np_b = self._test_binary_int('left_shift', lhs, rhs, dtype)
+        assert_array_equal(dp_a << dp_b, np_a << np_b)
+
+        if not (inp.isscalar(dp_a) or inp.isscalar(dp_b)) and dp_a.shape == dp_b.shape:
+            dp_a <<= dp_b
+            np_a <<= np_b
+            assert_array_equal(dp_a, np_a)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_right_shift(self, lhs, rhs, dtype):
-        self._test_binary_int('right_shift', lhs, rhs, dtype)
+        dp_a, dp_b, np_a, np_b = self._test_binary_int('right_shift', lhs, rhs, dtype)
+        assert_array_equal(dp_a >> dp_b, np_a >> np_b)
+
+        if not (inp.isscalar(dp_a) or inp.isscalar(dp_b)) and dp_a.shape == dp_b.shape:
+            dp_a >>= dp_b
+            np_a >>= np_b
+            assert_array_equal(dp_a, np_a)
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index b0efa89968b..817bdee66a5 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -146,6 +146,24 @@ def test_coerced_usm_types_logic_op(op, usm_type_x, usm_type_y):
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
 
 
+@pytest.mark.parametrize("op",
+                         ['bitwise_and', 'bitwise_or', 'bitwise_xor', 'left_shift', 'right_shift'],
+                         ids=['bitwise_and', 'bitwise_or', 'bitwise_xor', 'left_shift', 'right_shift'])
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+def test_coerced_usm_types_bitwise_op(op, usm_type_x, usm_type_y):
+    x = dp.arange(25, usm_type = usm_type_x)
+    y = dp.arange(25, usm_type = usm_type_y)[::-1]
+
+    z = getattr(dp, op)(x, y)
+    zx = getattr(dp, op)(x, 7)
+    zy = getattr(dp, op)(12, y)
+
+    assert x.usm_type == zx.usm_type == usm_type_x
+    assert y.usm_type == zy.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
+
+
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
 @pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
 def test_meshgrid(usm_type_x, usm_type_y):
diff --git a/tests/third_party/cupy/binary_tests/test_elementwise.py b/tests/third_party/cupy/binary_tests/test_elementwise.py
index b2212e043f2..a01cbb082a3 100644
--- a/tests/third_party/cupy/binary_tests/test_elementwise.py
+++ b/tests/third_party/cupy/binary_tests/test_elementwise.py
@@ -1,18 +1,19 @@
 import unittest
 
+import numpy
 from tests.third_party.cupy import testing
 
 
 @testing.gpu
 class TestElementwise(unittest.TestCase):
 
-    @testing.for_int_dtypes()
+    @testing.for_dtypes((numpy.bool_, numpy.int32, numpy.int64))
     @testing.numpy_cupy_array_equal()
     def check_unary_int(self, name, xp, dtype):
         a = xp.array([-3, -2, -1, 0, 1, 2, 3], dtype=dtype)
         return getattr(xp, name)(a)
 
-    @testing.for_int_dtypes()
+    @testing.for_dtypes((numpy.int32, numpy.int64))
     @testing.numpy_cupy_array_equal()
     def check_binary_int(self, name, xp, dtype):
         a = xp.array([-3, -2, -1, 0, 1, 2, 3], dtype=dtype)