Skip to content

Implemented dpctl.tensor.eye constructor and tests #896

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Sep 13, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions dpctl/tensor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
asarray,
empty,
empty_like,
eye,
full,
full_like,
linspace,
Expand Down Expand Up @@ -62,6 +63,7 @@
"zeros",
"ones",
"full",
"eye",
"linspace",
"empty_like",
"zeros_like",
Expand Down
81 changes: 81 additions & 0 deletions dpctl/tensor/_ctors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1035,3 +1035,84 @@ def linspace(
)
hev.wait()
return res


def eye(
n_rows,
n_cols=None,
/,
*,
k=0,
dtype=None,
order="C",
device=None,
usm_type="device",
sycl_queue=None,
):
"""
eye(n_rows, n_cols = None, /, *, k = 0, dtype = None, \
device = None, usm_type="device", sycl_queue=None) -> usm_ndarray

Creates `usm_ndarray` with ones on the `k`th diagonal.

Args:
n_rows: number of rows in the output array.
n_cols (optional): number of columns in the output array. If None,
n_cols = n_rows. Default: `None`.
k: index of the diagonal, with 0 as the main diagonal.
A positive value of k is a superdiagonal, a negative value
is a subdiagonal.
Raises `TypeError` if k is not an integer.
Default: `0`.
dtype (optional): data type of the array. Can be typestring,
a `numpy.dtype` object, `numpy` char string, or a numpy
scalar type. Default: None
order ("C" or F"): memory layout for the array. Default: "C"
device (optional): array API concept of device where the output array
is created. `device` can be `None`, a oneAPI filter selector string,
an instance of :class:`dpctl.SyclDevice` corresponding to a
non-partitioned SYCL device, an instance of
:class:`dpctl.SyclQueue`, or a `Device` object returnedby
`dpctl.tensor.usm_array.device`. Default: `None`.
usm_type ("device"|"shared"|"host", optional): The type of SYCL USM
allocation for the output array. Default: `"device"`.
sycl_queue (:class:`dpctl.SyclQueue`, optional): The SYCL queue to use
for output array allocation and copying. `sycl_queue` and `device`
are exclusive keywords, i.e. use one or another. If both are
specified, a `TypeError` is raised unless both imply the same
underlying SYCL queue to be used. If both are `None`, the
`dpctl.SyclQueue()` is used for allocation and copying.
Default: `None`.
"""
if not isinstance(order, str) or len(order) == 0 or order[0] not in "CcFf":
raise ValueError(
"Unrecognized order keyword value, expecting 'F' or 'C'."
)
else:
order = order[0].upper()
n_rows = operator.index(n_rows)
n_cols = n_rows if n_cols is None else operator.index(n_cols)
k = operator.index(k)
if k >= n_cols or -k >= n_rows:
return dpt.zeros(
(n_rows, n_cols),
dtype=dtype,
order=order,
device=device,
usm_type=usm_type,
sycl_queue=sycl_queue,
)
dpctl.utils.validate_usm_type(usm_type, allow_none=False)
sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device)
dtype = _get_dtype(dtype, sycl_queue)
res = dpt.usm_ndarray(
(n_rows, n_cols),
dtype=dtype,
buffer=usm_type,
order=order,
buffer_ctor_kwargs={"queue": sycl_queue},
)
if n_rows != 0 and n_cols != 0:
hev, _ = ti._eye(k, dst=res, sycl_queue=sycl_queue)
hev.wait()
return res
152 changes: 152 additions & 0 deletions dpctl/tensor/libtensor/source/tensor_py.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ template <typename srcT, typename dstT, int nd> class copy_cast_spec_kernel;
template <typename Ty> class copy_for_reshape_generic_kernel;
template <typename Ty> class linear_sequence_step_kernel;
template <typename Ty, typename wTy> class linear_sequence_affine_kernel;
template <typename Ty> class eye_kernel;

static dpctl::tensor::detail::usm_ndarray_types array_types;

Expand Down Expand Up @@ -1742,6 +1743,144 @@ usm_ndarray_full(py::object py_value,
}
}

/* ================ Eye ================== */

typedef sycl::event (*eye_fn_ptr_t)(sycl::queue,
size_t nelems, // num_elements
py::ssize_t start,
py::ssize_t end,
py::ssize_t step,
char *, // dst_data_ptr
const std::vector<sycl::event> &);

static eye_fn_ptr_t eye_dispatch_vector[_ns::num_types];

template <typename Ty> class EyeFunctor
{
private:
Ty *p = nullptr;
py::ssize_t start_v;
py::ssize_t end_v;
py::ssize_t step_v;

public:
EyeFunctor(char *dst_p,
const py::ssize_t v0,
const py::ssize_t v1,
const py::ssize_t dv)
: p(reinterpret_cast<Ty *>(dst_p)), start_v(v0), end_v(v1), step_v(dv)
{
}

void operator()(sycl::id<1> wiid) const
{
Ty set_v = 0;
py::ssize_t i = static_cast<py::ssize_t>(wiid.get(0));
if (i >= start_v and i <= end_v) {
if ((i - start_v) % step_v == 0) {
set_v = 1;
}
}
p[i] = set_v;
}
};

template <typename Ty>
sycl::event eye_impl(sycl::queue exec_q,
size_t nelems,
const py::ssize_t start,
const py::ssize_t end,
const py::ssize_t step,
char *array_data,
const std::vector<sycl::event> &depends)
{
sycl::event eye_event = exec_q.submit([&](sycl::handler &cgh) {
cgh.depends_on(depends);
cgh.parallel_for<eye_kernel<Ty>>(
sycl::range<1>{nelems},
EyeFunctor<Ty>(array_data, start, end, step));
});

return eye_event;
}

template <typename fnT, typename Ty> struct EyeFactory
{
fnT get()
{
fnT f = eye_impl<Ty>;
return f;
}
};

std::pair<sycl::event, sycl::event>
eye(py::ssize_t k,
dpctl::tensor::usm_ndarray dst,
sycl::queue exec_q,
const std::vector<sycl::event> &depends = {})
{
// dst must be 2D

if (dst.get_ndim() != 2) {
throw py::value_error(
"usm_ndarray_eye: Expecting 2D array to populate");
}

sycl::queue dst_q = dst.get_queue();
if (!dpctl::utils::queues_are_compatible(exec_q, {dst_q})) {
throw py::value_error("Execution queue is not compatible with the "
"allocation queue");
}

int dst_typenum = dst.get_typenum();
int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum);

const py::ssize_t nelem = dst.get_size();
const py::ssize_t rows = dst.get_shape(0);
const py::ssize_t cols = dst.get_shape(1);
if (rows == 0 || cols == 0) {
// nothing to do
return std::make_pair(sycl::event{}, sycl::event{});
}

bool is_dst_c_contig = ((dst.get_flags() & USM_ARRAY_C_CONTIGUOUS) != 0);
bool is_dst_f_contig = ((dst.get_flags() & USM_ARRAY_F_CONTIGUOUS) != 0);
if (!is_dst_c_contig && !is_dst_f_contig) {
throw py::value_error("USM array is not contiguous");
}

py::ssize_t start;
if (is_dst_c_contig) {
start = (k < 0) ? -k * cols : k;
}
else {
start = (k < 0) ? -k : k * rows;
}

const py::ssize_t *strides = dst.get_strides_raw();
py::ssize_t step;
if (strides == nullptr) {
step = (is_dst_c_contig) ? cols + 1 : rows + 1;
}
else {
step = strides[0] + strides[1];
}

const py::ssize_t length = std::min({rows, cols, rows + k, cols - k});
const py::ssize_t end = start + step * (length - 1);

char *dst_data = dst.get_data();
sycl::event eye_event;

auto fn = eye_dispatch_vector[dst_typeid];

eye_event = fn(exec_q, static_cast<size_t>(nelem), start, end, step,
dst_data, depends);

return std::make_pair(keep_args_alive(exec_q, {dst}, {eye_event}),
eye_event);
}

// populate dispatch tables
void init_copy_and_cast_dispatch_tables(void)
{
Expand Down Expand Up @@ -1796,6 +1935,9 @@ void init_copy_for_reshape_dispatch_vector(void)
dvb3;
dvb3.populate_dispatch_vector(full_contig_dispatch_vector);

DispatchVectorBuilder<eye_fn_ptr_t, EyeFactory, num_types> dvb4;
dvb4.populate_dispatch_vector(eye_dispatch_vector);

return;
}

Expand Down Expand Up @@ -1901,6 +2043,16 @@ PYBIND11_MODULE(_tensor_impl, m)
py::arg("fill_value"), py::arg("dst"), py::arg("sycl_queue"),
py::arg("depends") = py::list());

m.def("_eye", &eye,
"Fills input 2D contiguous usm_ndarray `dst` with "
"zeros outside of the diagonal "
"specified by "
"the diagonal index `k` "
"which is filled with ones."
"Returns a tuple of events: (ht_event, comp_event)",
py::arg("k"), py::arg("dst"), py::arg("sycl_queue"),
py::arg("depends") = py::list());

m.def("default_device_fp_type", [](sycl::queue q) -> std::string {
return get_default_device_fp_type(q.get_device());
});
Expand Down
20 changes: 20 additions & 0 deletions dpctl/tests/test_usm_ndarray_ctor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1256,6 +1256,24 @@ def test_full_like(dt, usm_kind):
assert np.array_equal(dpt.asnumpy(Y), np.ones(X.shape, dtype=X.dtype))


@pytest.mark.parametrize("dtype", _all_dtypes)
@pytest.mark.parametrize("usm_kind", ["shared", "device", "host"])
def test_eye(dtype, usm_kind):
try:
q = dpctl.SyclQueue()
except dpctl.SyclQueueCreationError:
pytest.skip("Queue could not be created")

if dtype in ["f8", "c16"] and q.sycl_device.has_aspect_fp64 is False:
pytest.skip(
"Device does not support double precision floating point type"
)
X = dpt.eye(4, 5, k=1, dtype=dtype, usm_type=usm_kind, sycl_queue=q)
Xnp = np.eye(4, 5, k=1, dtype=dtype)
assert X.dtype == Xnp.dtype
assert np.array_equal(Xnp, dpt.asnumpy(X))


def test_common_arg_validation():
order = "I"
# invalid order must raise ValueError
Expand All @@ -1267,6 +1285,8 @@ def test_common_arg_validation():
dpt.ones(10, order=order)
with pytest.raises(ValueError):
dpt.full(10, 1, order=order)
with pytest.raises(ValueError):
dpt.eye(10, order=order)
X = dpt.empty(10)
with pytest.raises(ValueError):
dpt.empty_like(X, order=order)
Expand Down