diff --git a/dpctl/tensor/__init__.py b/dpctl/tensor/__init__.py index 5fc9329077..6c9ac88fc2 100644 --- a/dpctl/tensor/__init__.py +++ b/dpctl/tensor/__init__.py @@ -27,6 +27,7 @@ asarray, empty, empty_like, + eye, full, full_like, linspace, @@ -62,6 +63,7 @@ "zeros", "ones", "full", + "eye", "linspace", "empty_like", "zeros_like", diff --git a/dpctl/tensor/_ctors.py b/dpctl/tensor/_ctors.py index 0f660c8cee..e3bb8fe3ec 100644 --- a/dpctl/tensor/_ctors.py +++ b/dpctl/tensor/_ctors.py @@ -1035,3 +1035,84 @@ def linspace( ) hev.wait() return res + + +def eye( + n_rows, + n_cols=None, + /, + *, + k=0, + dtype=None, + order="C", + device=None, + usm_type="device", + sycl_queue=None, +): + """ + eye(n_rows, n_cols = None, /, *, k = 0, dtype = None, \ + device = None, usm_type="device", sycl_queue=None) -> usm_ndarray + + Creates `usm_ndarray` with ones on the `k`th diagonal. + + Args: + n_rows: number of rows in the output array. + n_cols (optional): number of columns in the output array. If None, + n_cols = n_rows. Default: `None`. + k: index of the diagonal, with 0 as the main diagonal. + A positive value of k is a superdiagonal, a negative value + is a subdiagonal. + Raises `TypeError` if k is not an integer. + Default: `0`. + dtype (optional): data type of the array. Can be typestring, + a `numpy.dtype` object, `numpy` char string, or a numpy + scalar type. Default: None + order ("C" or F"): memory layout for the array. Default: "C" + device (optional): array API concept of device where the output array + is created. `device` can be `None`, a oneAPI filter selector string, + an instance of :class:`dpctl.SyclDevice` corresponding to a + non-partitioned SYCL device, an instance of + :class:`dpctl.SyclQueue`, or a `Device` object returnedby + `dpctl.tensor.usm_array.device`. Default: `None`. + usm_type ("device"|"shared"|"host", optional): The type of SYCL USM + allocation for the output array. Default: `"device"`. + sycl_queue (:class:`dpctl.SyclQueue`, optional): The SYCL queue to use + for output array allocation and copying. `sycl_queue` and `device` + are exclusive keywords, i.e. use one or another. If both are + specified, a `TypeError` is raised unless both imply the same + underlying SYCL queue to be used. If both are `None`, the + `dpctl.SyclQueue()` is used for allocation and copying. + Default: `None`. + """ + if not isinstance(order, str) or len(order) == 0 or order[0] not in "CcFf": + raise ValueError( + "Unrecognized order keyword value, expecting 'F' or 'C'." + ) + else: + order = order[0].upper() + n_rows = operator.index(n_rows) + n_cols = n_rows if n_cols is None else operator.index(n_cols) + k = operator.index(k) + if k >= n_cols or -k >= n_rows: + return dpt.zeros( + (n_rows, n_cols), + dtype=dtype, + order=order, + device=device, + usm_type=usm_type, + sycl_queue=sycl_queue, + ) + dpctl.utils.validate_usm_type(usm_type, allow_none=False) + sycl_queue = normalize_queue_device(sycl_queue=sycl_queue, device=device) + dtype = _get_dtype(dtype, sycl_queue) + res = dpt.usm_ndarray( + (n_rows, n_cols), + dtype=dtype, + buffer=usm_type, + order=order, + buffer_ctor_kwargs={"queue": sycl_queue}, + ) + if n_rows != 0 and n_cols != 0: + hev, _ = ti._eye(k, dst=res, sycl_queue=sycl_queue) + hev.wait() + return res diff --git a/dpctl/tensor/libtensor/source/tensor_py.cpp b/dpctl/tensor/libtensor/source/tensor_py.cpp index 2f0c4e2770..b0c526b54e 100644 --- a/dpctl/tensor/libtensor/source/tensor_py.cpp +++ b/dpctl/tensor/libtensor/source/tensor_py.cpp @@ -45,6 +45,7 @@ template class copy_cast_spec_kernel; template class copy_for_reshape_generic_kernel; template class linear_sequence_step_kernel; template class linear_sequence_affine_kernel; +template class eye_kernel; static dpctl::tensor::detail::usm_ndarray_types array_types; @@ -1742,6 +1743,144 @@ usm_ndarray_full(py::object py_value, } } +/* ================ Eye ================== */ + +typedef sycl::event (*eye_fn_ptr_t)(sycl::queue, + size_t nelems, // num_elements + py::ssize_t start, + py::ssize_t end, + py::ssize_t step, + char *, // dst_data_ptr + const std::vector &); + +static eye_fn_ptr_t eye_dispatch_vector[_ns::num_types]; + +template class EyeFunctor +{ +private: + Ty *p = nullptr; + py::ssize_t start_v; + py::ssize_t end_v; + py::ssize_t step_v; + +public: + EyeFunctor(char *dst_p, + const py::ssize_t v0, + const py::ssize_t v1, + const py::ssize_t dv) + : p(reinterpret_cast(dst_p)), start_v(v0), end_v(v1), step_v(dv) + { + } + + void operator()(sycl::id<1> wiid) const + { + Ty set_v = 0; + py::ssize_t i = static_cast(wiid.get(0)); + if (i >= start_v and i <= end_v) { + if ((i - start_v) % step_v == 0) { + set_v = 1; + } + } + p[i] = set_v; + } +}; + +template +sycl::event eye_impl(sycl::queue exec_q, + size_t nelems, + const py::ssize_t start, + const py::ssize_t end, + const py::ssize_t step, + char *array_data, + const std::vector &depends) +{ + sycl::event eye_event = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on(depends); + cgh.parallel_for>( + sycl::range<1>{nelems}, + EyeFunctor(array_data, start, end, step)); + }); + + return eye_event; +} + +template struct EyeFactory +{ + fnT get() + { + fnT f = eye_impl; + return f; + } +}; + +std::pair +eye(py::ssize_t k, + dpctl::tensor::usm_ndarray dst, + sycl::queue exec_q, + const std::vector &depends = {}) +{ + // dst must be 2D + + if (dst.get_ndim() != 2) { + throw py::value_error( + "usm_ndarray_eye: Expecting 2D array to populate"); + } + + sycl::queue dst_q = dst.get_queue(); + if (!dpctl::utils::queues_are_compatible(exec_q, {dst_q})) { + throw py::value_error("Execution queue is not compatible with the " + "allocation queue"); + } + + int dst_typenum = dst.get_typenum(); + int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); + + const py::ssize_t nelem = dst.get_size(); + const py::ssize_t rows = dst.get_shape(0); + const py::ssize_t cols = dst.get_shape(1); + if (rows == 0 || cols == 0) { + // nothing to do + return std::make_pair(sycl::event{}, sycl::event{}); + } + + bool is_dst_c_contig = ((dst.get_flags() & USM_ARRAY_C_CONTIGUOUS) != 0); + bool is_dst_f_contig = ((dst.get_flags() & USM_ARRAY_F_CONTIGUOUS) != 0); + if (!is_dst_c_contig && !is_dst_f_contig) { + throw py::value_error("USM array is not contiguous"); + } + + py::ssize_t start; + if (is_dst_c_contig) { + start = (k < 0) ? -k * cols : k; + } + else { + start = (k < 0) ? -k : k * rows; + } + + const py::ssize_t *strides = dst.get_strides_raw(); + py::ssize_t step; + if (strides == nullptr) { + step = (is_dst_c_contig) ? cols + 1 : rows + 1; + } + else { + step = strides[0] + strides[1]; + } + + const py::ssize_t length = std::min({rows, cols, rows + k, cols - k}); + const py::ssize_t end = start + step * (length - 1); + + char *dst_data = dst.get_data(); + sycl::event eye_event; + + auto fn = eye_dispatch_vector[dst_typeid]; + + eye_event = fn(exec_q, static_cast(nelem), start, end, step, + dst_data, depends); + + return std::make_pair(keep_args_alive(exec_q, {dst}, {eye_event}), + eye_event); +} + // populate dispatch tables void init_copy_and_cast_dispatch_tables(void) { @@ -1796,6 +1935,9 @@ void init_copy_for_reshape_dispatch_vector(void) dvb3; dvb3.populate_dispatch_vector(full_contig_dispatch_vector); + DispatchVectorBuilder dvb4; + dvb4.populate_dispatch_vector(eye_dispatch_vector); + return; } @@ -1901,6 +2043,16 @@ PYBIND11_MODULE(_tensor_impl, m) py::arg("fill_value"), py::arg("dst"), py::arg("sycl_queue"), py::arg("depends") = py::list()); + m.def("_eye", &eye, + "Fills input 2D contiguous usm_ndarray `dst` with " + "zeros outside of the diagonal " + "specified by " + "the diagonal index `k` " + "which is filled with ones." + "Returns a tuple of events: (ht_event, comp_event)", + py::arg("k"), py::arg("dst"), py::arg("sycl_queue"), + py::arg("depends") = py::list()); + m.def("default_device_fp_type", [](sycl::queue q) -> std::string { return get_default_device_fp_type(q.get_device()); }); diff --git a/dpctl/tests/test_usm_ndarray_ctor.py b/dpctl/tests/test_usm_ndarray_ctor.py index 26ea45bc76..c77933d56a 100644 --- a/dpctl/tests/test_usm_ndarray_ctor.py +++ b/dpctl/tests/test_usm_ndarray_ctor.py @@ -1256,6 +1256,24 @@ def test_full_like(dt, usm_kind): assert np.array_equal(dpt.asnumpy(Y), np.ones(X.shape, dtype=X.dtype)) +@pytest.mark.parametrize("dtype", _all_dtypes) +@pytest.mark.parametrize("usm_kind", ["shared", "device", "host"]) +def test_eye(dtype, usm_kind): + try: + q = dpctl.SyclQueue() + except dpctl.SyclQueueCreationError: + pytest.skip("Queue could not be created") + + if dtype in ["f8", "c16"] and q.sycl_device.has_aspect_fp64 is False: + pytest.skip( + "Device does not support double precision floating point type" + ) + X = dpt.eye(4, 5, k=1, dtype=dtype, usm_type=usm_kind, sycl_queue=q) + Xnp = np.eye(4, 5, k=1, dtype=dtype) + assert X.dtype == Xnp.dtype + assert np.array_equal(Xnp, dpt.asnumpy(X)) + + def test_common_arg_validation(): order = "I" # invalid order must raise ValueError @@ -1267,6 +1285,8 @@ def test_common_arg_validation(): dpt.ones(10, order=order) with pytest.raises(ValueError): dpt.full(10, 1, order=order) + with pytest.raises(ValueError): + dpt.eye(10, order=order) X = dpt.empty(10) with pytest.raises(ValueError): dpt.empty_like(X, order=order)