Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid sycl::queue copying in libtensor #1645

Merged
merged 1 commit into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions dpctl/tensor/libtensor/include/kernels/reductions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2383,7 +2383,7 @@ struct CustomSearchReduction
};

typedef sycl::event (*search_strided_impl_fn_ptr)(
sycl::queue,
sycl::queue &,
size_t,
size_t,
const char *,
Expand Down Expand Up @@ -2507,7 +2507,7 @@ template <typename argTy,
typename ReductionOpT,
typename IndexOpT>
sycl::event search_over_group_temps_strided_impl(
sycl::queue exec_q,
sycl::queue &exec_q,
size_t iter_nelems, // number of reductions (num. of rows in a matrix
// when reducing over rows)
size_t reduction_nelems, // size of each reduction (length of rows, i.e.
Expand Down Expand Up @@ -2804,7 +2804,7 @@ sycl::event search_over_group_temps_strided_impl(
}

typedef sycl::event (*search_contig_impl_fn_ptr)(
sycl::queue,
sycl::queue &,
size_t,
size_t,
const char *,
Expand All @@ -2819,7 +2819,7 @@ template <typename argTy,
typename ReductionOpT,
typename IndexOpT>
sycl::event search_axis1_over_group_temps_contig_impl(
sycl::queue exec_q,
sycl::queue &exec_q,
size_t iter_nelems, // number of reductions (num. of rows in a matrix
// when reducing over rows)
size_t reduction_nelems, // size of each reduction (length of rows, i.e.
Expand Down Expand Up @@ -3098,7 +3098,7 @@ template <typename argTy,
typename ReductionOpT,
typename IndexOpT>
sycl::event search_axis0_over_group_temps_contig_impl(
sycl::queue exec_q,
sycl::queue &exec_q,
size_t iter_nelems, // number of reductions (num. of rows in a matrix
// when reducing over rows)
size_t reduction_nelems, // size of each reduction (length of rows, i.e.
Expand Down
12 changes: 6 additions & 6 deletions dpctl/tensor/libtensor/source/device_support_queries.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ sycl::device _extract_device(const py::object &arg)

PyObject *source = arg.ptr();
if (api.PySyclQueue_Check_(source)) {
sycl::queue q = py::cast<sycl::queue>(arg);
const sycl::queue &q = py::cast<sycl::queue>(arg);
return q.get_device();
}
else if (api.PySyclDevice_Check_(source)) {
Expand All @@ -98,31 +98,31 @@ sycl::device _extract_device(const py::object &arg)

std::string default_device_fp_type(const py::object &arg)
{
sycl::device d = _extract_device(arg);
const sycl::device &d = _extract_device(arg);
return _default_device_fp_type(d);
}

std::string default_device_int_type(const py::object &arg)
{
sycl::device d = _extract_device(arg);
const sycl::device &d = _extract_device(arg);
return _default_device_int_type(d);
}

std::string default_device_bool_type(const py::object &arg)
{
sycl::device d = _extract_device(arg);
const sycl::device &d = _extract_device(arg);
return _default_device_bool_type(d);
}

std::string default_device_complex_type(const py::object &arg)
{
sycl::device d = _extract_device(arg);
const sycl::device &d = _extract_device(arg);
return _default_device_complex_type(d);
}

std::string default_device_index_type(const py::object &arg)
{
sycl::device d = _extract_device(arg);
const sycl::device &d = _extract_device(arg);
return _default_device_index_type(d);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ template <typename output_typesT,
std::pair<sycl::event, sycl::event>
py_unary_ufunc(const dpctl::tensor::usm_ndarray &src,
const dpctl::tensor::usm_ndarray &dst,
sycl::queue q,
sycl::queue &q,
const std::vector<sycl::event> &depends,
//
const output_typesT &output_type_vec,
Expand Down Expand Up @@ -301,7 +301,7 @@ std::pair<sycl::event, sycl::event> py_binary_ufunc(
const dpctl::tensor::usm_ndarray &src1,
const dpctl::tensor::usm_ndarray &src2,
const dpctl::tensor::usm_ndarray &dst, // dst = op(src1, src2), elementwise
sycl::queue exec_q,
sycl::queue &exec_q,
const std::vector<sycl::event> depends,
//
const output_typesT &output_type_table,
Expand Down Expand Up @@ -622,7 +622,7 @@ template <typename output_typesT,
std::pair<sycl::event, sycl::event>
py_binary_inplace_ufunc(const dpctl::tensor::usm_ndarray &lhs,
const dpctl::tensor::usm_ndarray &rhs,
sycl::queue exec_q,
sycl::queue &exec_q,
const std::vector<sycl::event> depends,
//
const output_typesT &output_type_table,
Expand Down