Skip to content

Commit

Permalink
Merge master into gold/2021 (#1335)
Browse files Browse the repository at this point in the history
* Add dpnp.broadcast_to() function (#1333)

* Add support of bool type in bitwise operations (#1334)

* Add support of bool type in bitwise operations

* Update dpnp/dpnp_algo/dpnp_algo_bitwise.pyx
  • Loading branch information
antonwolfy authored Mar 7, 2023
1 parent b675679 commit 759c6e7
Show file tree
Hide file tree
Showing 17 changed files with 602 additions and 215 deletions.
4 changes: 2 additions & 2 deletions dpnp/backend/include/dpnp_gen_2arg_1type_tbl.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
//*****************************************************************************
// Copyright (c) 2016-2020, Intel Corporation
// Copyright (c) 2016-2023, Intel Corporation
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -104,7 +104,7 @@

#endif

MACRO_2ARG_1TYPE_OP(dpnp_bitwise_and_c, input1_elem& input2_elem)
MACRO_2ARG_1TYPE_OP(dpnp_bitwise_and_c, input1_elem & input2_elem)
MACRO_2ARG_1TYPE_OP(dpnp_bitwise_or_c, input1_elem | input2_elem)
MACRO_2ARG_1TYPE_OP(dpnp_bitwise_xor_c, input1_elem ^ input2_elem)
MACRO_2ARG_1TYPE_OP(dpnp_left_shift_c, input1_elem << input2_elem)
Expand Down
110 changes: 98 additions & 12 deletions dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include "dpnp_fptr.hpp"
#include "dpnp_iface.hpp"
#include "dpnp_iterator.hpp"
#include "dpnp_utils.hpp"
#include "dpnpc_memory_adapter.hpp"
#include "queue_sycl.hpp"
Expand All @@ -49,27 +50,66 @@ DPCTLSyclEventRef dpnp_invert_c(DPCTLSyclQueueRef q_ref,
sycl::queue q = *(reinterpret_cast<sycl::queue*>(q_ref));
sycl::event event;

DPNPC_ptr_adapter<_DataType> input1_ptr(q_ref, array1_in, size);
_DataType* array1 = input1_ptr.get_ptr();
_DataType* result = reinterpret_cast<_DataType*>(result1);
_DataType* input_data = static_cast<_DataType*>(array1_in);
_DataType* result = static_cast<_DataType*>(result1);

sycl::range<1> gws(size);
auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {
size_t i = global_id[0]; /*for (size_t i = 0; i < size; ++i)*/
constexpr size_t lws = 64;
constexpr unsigned int vec_sz = 8;

auto gws_range = sycl::range<1>(((size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);
auto lws_range = sycl::range<1>(lws);

auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {
auto sg = nd_it.get_sub_group();
const auto max_sg_size = sg.get_max_local_range()[0];
const size_t start =
vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) + sg.get_group_id()[0] * max_sg_size);

if (start + static_cast<size_t>(vec_sz) * max_sg_size < size)
{
_DataType input_elem1 = array1[i];
result[i] = ~input_elem1;
using multi_ptrT = sycl::multi_ptr<_DataType, sycl::access::address_space::global_space>;

sycl::vec<_DataType, vec_sz> x = sg.load<vec_sz>(multi_ptrT(&input_data[start]));
sycl::vec<_DataType, vec_sz> res_vec;

if constexpr (std::is_same_v<_DataType, bool>)
{
#pragma unroll
for (size_t k = 0; k < vec_sz; ++k)
{
res_vec[k] = !(x[k]);
}
}
else
{
res_vec = ~x;
}

sg.store<vec_sz>(multi_ptrT(&result[start]), res_vec);
}
else
{
for (size_t k = start + sg.get_local_id()[0]; k < size; k += max_sg_size)
{
if constexpr (std::is_same_v<_DataType, bool>)
{
result[k] = !(input_data[k]);
}
else
{
result[k] = ~(input_data[k]);
}
}
}
};

auto kernel_func = [&](sycl::handler& cgh) {
cgh.parallel_for<class dpnp_invert_c_kernel<_DataType>>(gws, kernel_parallel_for_func);
cgh.parallel_for<class dpnp_invert_c_kernel<_DataType>>(sycl::nd_range<1>(gws_range, lws_range),
kernel_parallel_for_func);
};

event = q.submit(kernel_func);

event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);

return DPCTLEvent_Copy(event_ref);
}

Expand All @@ -84,6 +124,7 @@ void dpnp_invert_c(void* array1_in, void* result1, size_t size)
size,
dep_event_vec_ref);
DPCTLEvent_WaitAndThrow(event_ref);
DPCTLEvent_Delete(event_ref);
}

template <typename _DataType>
Expand All @@ -98,9 +139,11 @@ DPCTLSyclEventRef (*dpnp_invert_ext_c)(DPCTLSyclQueueRef,

static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
{
fmap[DPNPFuncName::DPNP_FN_INVERT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_invert_default_c<bool>};
fmap[DPNPFuncName::DPNP_FN_INVERT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_invert_default_c<int32_t>};
fmap[DPNPFuncName::DPNP_FN_INVERT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_invert_default_c<int64_t>};

fmap[DPNPFuncName::DPNP_FN_INVERT_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_invert_ext_c<bool>};
fmap[DPNPFuncName::DPNP_FN_INVERT_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_invert_ext_c<int32_t>};
fmap[DPNPFuncName::DPNP_FN_INVERT_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_invert_ext_c<int64_t>};

Expand All @@ -114,6 +157,9 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
template <typename _KernelNameSpecialization> \
class __name__##_strides_kernel; \
\
template <typename _KernelNameSpecialization> \
class __name__##_broadcast_kernel; \
\
template <typename _DataType> \
DPCTLSyclEventRef __name__(DPCTLSyclQueueRef q_ref, \
void* result_out, \
Expand Down Expand Up @@ -152,6 +198,8 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
_DataType* input2_data = static_cast<_DataType*>(const_cast<void*>(input2_in)); \
_DataType* result = static_cast<_DataType*>(result_out); \
\
bool use_broadcasting = !array_equal(input1_shape, input1_ndim, input2_shape, input2_ndim); \
\
shape_elem_type* input1_shape_offsets = new shape_elem_type[input1_ndim]; \
\
get_shape_offsets_inkernel(input1_shape, input1_ndim, input1_shape_offsets); \
Expand All @@ -167,7 +215,42 @@ static void func_map_init_bitwise_1arg_1type(func_map_t& fmap)
sycl::event event; \
sycl::range<1> gws(result_size); \
\
if (use_strides) \
if (use_broadcasting) \
{ \
DPNPC_id<_DataType>* input1_it; \
const size_t input1_it_size_in_bytes = sizeof(DPNPC_id<_DataType>); \
input1_it = reinterpret_cast<DPNPC_id<_DataType>*>(dpnp_memory_alloc_c(q_ref, input1_it_size_in_bytes)); \
new (input1_it) DPNPC_id<_DataType>(q_ref, input1_data, input1_shape, input1_strides, input1_ndim); \
\
input1_it->broadcast_to_shape(result_shape, result_ndim); \
\
DPNPC_id<_DataType>* input2_it; \
const size_t input2_it_size_in_bytes = sizeof(DPNPC_id<_DataType>); \
input2_it = reinterpret_cast<DPNPC_id<_DataType>*>(dpnp_memory_alloc_c(q_ref, input2_it_size_in_bytes)); \
new (input2_it) DPNPC_id<_DataType>(q_ref, input2_data, input2_shape, input2_strides, input2_ndim); \
\
input2_it->broadcast_to_shape(result_shape, result_ndim); \
\
auto kernel_parallel_for_func = [=](sycl::id<1> global_id) { \
const size_t i = global_id[0]; /* for (size_t i = 0; i < result_size; ++i) */ \
{ \
const _DataType input1_elem = (*input1_it)[i]; \
const _DataType input2_elem = (*input2_it)[i]; \
result[i] = __operation__; \
} \
}; \
auto kernel_func = [&](sycl::handler& cgh) { \
cgh.parallel_for<class __name__##_broadcast_kernel<_DataType>>(gws, kernel_parallel_for_func); \
}; \
\
q.submit(kernel_func).wait(); \
\
input1_it->~DPNPC_id(); \
input2_it->~DPNPC_id(); \
\
return event_ref; \
} \
else if (use_strides) \
{ \
if ((result_ndim != input1_ndim) || (result_ndim != input2_ndim)) \
{ \
Expand Down Expand Up @@ -332,18 +415,21 @@ static void func_map_init_bitwise_2arg_1type(func_map_t& fmap)
fmap[DPNPFuncName::DPNP_FN_BITWISE_AND][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_and_c_default<int32_t>};
fmap[DPNPFuncName::DPNP_FN_BITWISE_AND][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_and_c_default<int64_t>};

fmap[DPNPFuncName::DPNP_FN_BITWISE_AND_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_bitwise_and_c_ext<bool>};
fmap[DPNPFuncName::DPNP_FN_BITWISE_AND_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_and_c_ext<int32_t>};
fmap[DPNPFuncName::DPNP_FN_BITWISE_AND_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_and_c_ext<int64_t>};

fmap[DPNPFuncName::DPNP_FN_BITWISE_OR][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_or_c_default<int32_t>};
fmap[DPNPFuncName::DPNP_FN_BITWISE_OR][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_or_c_default<int64_t>};

fmap[DPNPFuncName::DPNP_FN_BITWISE_OR_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_bitwise_or_c_ext<bool>};
fmap[DPNPFuncName::DPNP_FN_BITWISE_OR_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_or_c_ext<int32_t>};
fmap[DPNPFuncName::DPNP_FN_BITWISE_OR_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_or_c_ext<int64_t>};

fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_xor_c_default<int32_t>};
fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_xor_c_default<int64_t>};

fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR_EXT][eft_BLN][eft_BLN] = {eft_BLN, (void*)dpnp_bitwise_xor_c_ext<bool>};
fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR_EXT][eft_INT][eft_INT] = {eft_INT, (void*)dpnp_bitwise_xor_c_ext<int32_t>};
fmap[DPNPFuncName::DPNP_FN_BITWISE_XOR_EXT][eft_LNG][eft_LNG] = {eft_LNG, (void*)dpnp_bitwise_xor_c_ext<int64_t>};

Expand Down
6 changes: 3 additions & 3 deletions dpnp/dpnp_algo/dpnp_algo_bitwise.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# cython: language_level=3
# -*- coding: utf-8 -*-
# *****************************************************************************
# Copyright (c) 2016-2020, Intel Corporation
# Copyright (c) 2016-2023, Intel Corporation
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -68,8 +68,8 @@ cpdef utils.dpnp_descriptor dpnp_bitwise_xor(utils.dpnp_descriptor x1_obj,
return call_fptr_2in_1out_strides(DPNP_FN_BITWISE_XOR_EXT, x1_obj, x2_obj, dtype=dtype, out=out, where=where)


cpdef utils.dpnp_descriptor dpnp_invert(utils.dpnp_descriptor arr):
return call_fptr_1in_1out(DPNP_FN_INVERT_EXT, arr, arr.shape)
cpdef utils.dpnp_descriptor dpnp_invert(utils.dpnp_descriptor arr, utils.dpnp_descriptor out=None):
return call_fptr_1in_1out(DPNP_FN_INVERT_EXT, arr, arr.shape, out=out, func_name="invert")


cpdef utils.dpnp_descriptor dpnp_left_shift(utils.dpnp_descriptor x1_obj,
Expand Down
69 changes: 52 additions & 17 deletions dpnp/dpnp_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,9 @@ def __abs__(self):
def __add__(self, other):
return dpnp.add(self, other)

# '__and__',
def __and__(self, other):
return dpnp.bitwise_and(self, other)

# '__array__',
# '__array_finalize__',
# '__array_function__',
Expand Down Expand Up @@ -193,9 +195,17 @@ def __gt__(self, other):

# '__hash__',
# '__iadd__',
# '__iand__',

def __iand__(self, other):
dpnp.bitwise_and(self, other, out=self)
return self

# '__ifloordiv__',
# '__ilshift__',

def __ilshift__(self, other):
dpnp.left_shift(self, other, out=self)
return self

# '__imatmul__',
# '__imod__',
# '__imul__',
Expand All @@ -209,18 +219,28 @@ def __index__(self):
def __int__(self):
return self._array_obj.__int__()

# '__invert__',
# '__ior__',
def __invert__(self):
return dpnp.invert(self)

def __ior__(self, other):
dpnp.bitwise_or(self, other, out=self)
return self

def __ipow__(self, other):
dpnp.power(self, other, out=self)
return self

# '__irshift__',
def __irshift__(self, other):
dpnp.right_shift(self, other, out=self)
return self

# '__isub__',
# '__iter__',
# '__itruediv__',
# '__ixor__',

def __ixor__(self, other):
dpnp.bitwise_xor(self, other, out=self)
return self

def __le__(self, other):
return dpnp.less_equal(self, other)
Expand All @@ -232,7 +252,8 @@ def __len__(self):

return self._array_obj.__len__()

# '__lshift__',
def __lshift__(self, other):
return dpnp.left_shift(self, other)

def __lt__(self, other):
return dpnp.less(self, other)
Expand All @@ -253,7 +274,10 @@ def __neg__(self):
return dpnp.negative(self)

# '__new__',
# '__or__',

def __or__(self, other):
return dpnp.bitwise_or(self, other)

# '__pos__',

def __pow__(self, other):
Expand All @@ -262,7 +286,9 @@ def __pow__(self, other):
def __radd__(self, other):
return dpnp.add(other, self)

# '__rand__',
def __rand__(self, other):
return dpnp.bitwise_and(other, self)

# '__rdivmod__',
# '__reduce__',
# '__reduce_ex__',
Expand All @@ -271,7 +297,9 @@ def __repr__(self):
return dpt.usm_ndarray_repr(self._array_obj, prefix="array")

# '__rfloordiv__',
# '__rlshift__',

def __rlshift__(self, other):
return dpnp.left_shift(other, self)

def __rmatmul__(self, other):
return dpnp.matmul(other, self)
Expand All @@ -282,21 +310,27 @@ def __rmod__(self, other):
def __rmul__(self, other):
return dpnp.multiply(other, self)

# '__ror__',

def __ror__(self, other):
return dpnp.bitwise_or(other, self)

def __rpow__(self, other):
return dpnp.power(other, self)

# '__rrshift__',
# '__rshift__',
def __rrshift__(self, other):
return dpnp.right_shift(other, self)

def __rshift__(self, other):
return dpnp.right_shift(self, other)

def __rsub__(self, other):
return dpnp.subtract(other, self)

def __rtruediv__(self, other):
return dpnp.true_divide(other, self)

# '__rxor__',
def __rxor__(self, other):
return dpnp.bitwise_xor(other, self)

# '__setattr__',

def __setitem__(self, key, val):
Expand Down Expand Up @@ -334,7 +368,8 @@ def __sub__(self, other):
def __truediv__(self, other):
return dpnp.true_divide(self, other)

# '__xor__',
def __xor__(self, other):
return dpnp.bitwise_xor(self, other)

@staticmethod
def _create_from_usm_ndarray(usm_ary : dpt.usm_ndarray):
Expand Down
3 changes: 2 additions & 1 deletion dpnp/dpnp_iface.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ def from_dlpack(obj, /):
def get_dpnp_descriptor(ext_obj,
copy_when_strides=True,
copy_when_nondefault_queue=True,
alloc_dtype=None,
alloc_usm_type=None,
alloc_queue=None):
"""
Expand All @@ -274,7 +275,7 @@ def get_dpnp_descriptor(ext_obj,
# If input object is a scalar, it means it was allocated on host memory.
# We need to copy it to USM memory according to compute follows data paradigm.
if isscalar(ext_obj):
ext_obj = array(ext_obj, usm_type=alloc_usm_type, sycl_queue=alloc_queue)
ext_obj = array(ext_obj, dtype=alloc_dtype, usm_type=alloc_usm_type, sycl_queue=alloc_queue)

# while dpnp functions have no implementation with strides support
# we need to create a non-strided copy
Expand Down
Loading

0 comments on commit 759c6e7

Please sign in to comment.