From 8b2e9c1d807d39357410467ba1cee58d56abb911 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Thu, 9 Mar 2023 13:44:50 -0800 Subject: [PATCH 1/4] Cleaned up dangling host_tasks in copy and cast --- .../source/copy_and_cast_usm_to_usm.cpp | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp b/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp index c629f585c3..b7d3056561 100644 --- a/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp +++ b/dpctl/tensor/libtensor/source/copy_and_cast_usm_to_usm.cpp @@ -70,6 +70,7 @@ using dpctl::utils::keep_args_alive; sycl::event _populate_packed_shape_strides_for_copycast_kernel( sycl::queue exec_q, + std::vector &host_task_events, py::ssize_t *device_shape_strides, // to be populated const std::vector &common_shape, const std::vector &src_strides, @@ -102,13 +103,14 @@ sycl::event _populate_packed_shape_strides_for_copycast_kernel( shp_host_shape_strides->data(), device_shape_strides, shp_host_shape_strides->size()); - exec_q.submit([&](sycl::handler &cgh) { + auto shared_ptr_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(copy_shape_ev); cgh.host_task([shp_host_shape_strides]() { // increment shared pointer ref-count to keep it alive // till copy operation completes; }); }); + host_task_events.push_back(shared_ptr_cleanup_ev); return copy_shape_ev; } @@ -306,10 +308,13 @@ copy_usm_ndarray_into_usm_ndarray(dpctl::tensor::usm_ndarray src, throw std::runtime_error("Unabled to allocate device memory"); } + std::vector host_task_events; + host_task_events.reserve(2); + sycl::event copy_shape_ev = _populate_packed_shape_strides_for_copycast_kernel( - exec_q, shape_strides, simplified_shape, simplified_src_strides, - simplified_dst_strides); + exec_q, host_task_events, shape_strides, simplified_shape, + simplified_src_strides, simplified_dst_strides); sycl::event copy_and_cast_generic_ev = copy_and_cast_fn( exec_q, src_nelems, nd, shape_strides, src_data, src_offset, dst_data, @@ -317,15 +322,16 @@ copy_usm_ndarray_into_usm_ndarray(dpctl::tensor::usm_ndarray src, // async free of shape_strides temporary auto ctx = exec_q.get_context(); - exec_q.submit([&](sycl::handler &cgh) { + auto temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(copy_and_cast_generic_ev); cgh.host_task( [ctx, shape_strides]() { sycl::free(shape_strides, ctx); }); }); - return std::make_pair( - keep_args_alive(exec_q, {src, dst}, {copy_and_cast_generic_ev}), - copy_and_cast_generic_ev); + host_task_events.push_back(temporaries_cleanup_ev); + + return std::make_pair(keep_args_alive(exec_q, {src, dst}, host_task_events), + temporaries_cleanup_ev); } void init_copy_and_cast_usm_to_usm_dispatch_tables(void) From e178d0e0768dc0234e9b47e1fbb916dc7c5302ce Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Thu, 9 Mar 2023 13:44:55 -0800 Subject: [PATCH 2/4] Cleaned up dangling host_tasks in reshape --- .../tensor/libtensor/source/copy_for_reshape.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/dpctl/tensor/libtensor/source/copy_for_reshape.cpp b/dpctl/tensor/libtensor/source/copy_for_reshape.cpp index 3a07ac2bd3..fd43f847d7 100644 --- a/dpctl/tensor/libtensor/source/copy_for_reshape.cpp +++ b/dpctl/tensor/libtensor/source/copy_for_reshape.cpp @@ -202,11 +202,14 @@ copy_usm_ndarray_for_reshape(dpctl::tensor::usm_ndarray src, dst_nd); } + std::vector host_task_events; + host_task_events.reserve(2); + // copy packed shapes and strides from host to devices sycl::event packed_shape_strides_copy_ev = exec_q.copy( packed_host_shapes_strides_shp->data(), packed_shapes_strides, packed_host_shapes_strides_shp->size()); - exec_q.submit([&](sycl::handler &cgh) { + auto shared_ptr_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(packed_shape_strides_copy_ev); cgh.host_task([packed_host_shapes_strides_shp] { // Capturing shared pointer ensures that the underlying vector is @@ -215,6 +218,8 @@ copy_usm_ndarray_for_reshape(dpctl::tensor::usm_ndarray src, }); }); + host_task_events.push_back(shared_ptr_cleanup_ev); + char *src_data = src.get_data(); char *dst_data = dst.get_data(); @@ -226,7 +231,7 @@ copy_usm_ndarray_for_reshape(dpctl::tensor::usm_ndarray src, fn(exec_q, shift, src_nelems, src_nd, dst_nd, packed_shapes_strides, src_data, dst_data, all_deps); - exec_q.submit([&](sycl::handler &cgh) { + auto temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(copy_for_reshape_event); auto ctx = exec_q.get_context(); cgh.host_task([packed_shapes_strides, ctx]() { @@ -234,9 +239,10 @@ copy_usm_ndarray_for_reshape(dpctl::tensor::usm_ndarray src, }); }); - return std::make_pair( - keep_args_alive(exec_q, {src, dst}, {copy_for_reshape_event}), - copy_for_reshape_event); + host_task_events.push_back(temporaries_cleanup_ev); + + return std::make_pair(keep_args_alive(exec_q, {src, dst}, host_task_events), + temporaries_cleanup_ev); } void init_copy_for_reshape_dispatch_vectors(void) From ac9bcd4131840ed6022870528c9eda19a94edd0d Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Thu, 9 Mar 2023 13:48:03 -0800 Subject: [PATCH 3/4] Name tweaks in integer indexing implementation --- .../source/integer_advanced_indexing.cpp | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp b/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp index ed0f749add..cb148c7df3 100644 --- a/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp +++ b/dpctl/tensor/libtensor/source/integer_advanced_indexing.cpp @@ -180,17 +180,15 @@ _populate_kernel_params(sycl::queue exec_q, host_along_sh_st_shp->data(), device_along_sh_st, host_along_sh_st_shp->size()); - sycl::event shared_ptr_cleanup_host_task = - exec_q.submit([&](sycl::handler &cgh) { - cgh.depends_on({device_along_sh_st_copy_ev, - device_orthog_sh_st_copy_ev, - device_ind_offsets_copy_ev, - device_ind_sh_st_copy_ev, device_ind_ptrs_copy_ev}); - cgh.host_task([host_ind_offsets_shp, host_ind_sh_st_shp, - host_ind_ptrs_shp, host_orthog_sh_st_shp, - host_along_sh_st_shp]() {}); - }); - host_task_events.push_back(shared_ptr_cleanup_host_task); + sycl::event shared_ptr_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { + cgh.depends_on({device_along_sh_st_copy_ev, device_orthog_sh_st_copy_ev, + device_ind_offsets_copy_ev, device_ind_sh_st_copy_ev, + device_ind_ptrs_copy_ev}); + cgh.host_task([host_ind_offsets_shp, host_ind_sh_st_shp, + host_ind_ptrs_shp, host_orthog_sh_st_shp, + host_along_sh_st_shp]() {}); + }); + host_task_events.push_back(shared_ptr_cleanup_ev); std::vector sh_st_pack_deps{ device_ind_ptrs_copy_ev, device_ind_sh_st_copy_ev, From fd9286c230bb1bbb84ddae9a2c5a043844d39a55 Mon Sep 17 00:00:00 2001 From: Nikita Grigorian Date: Thu, 9 Mar 2023 13:48:44 -0800 Subject: [PATCH 4/4] Cleaned up host_tasks in triu/l implementation --- dpctl/tensor/libtensor/source/triul_ctor.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/dpctl/tensor/libtensor/source/triul_ctor.cpp b/dpctl/tensor/libtensor/source/triul_ctor.cpp index 3967914425..81ad3bdd2e 100644 --- a/dpctl/tensor/libtensor/source/triul_ctor.cpp +++ b/dpctl/tensor/libtensor/source/triul_ctor.cpp @@ -250,7 +250,7 @@ usm_ndarray_triul(sycl::queue exec_q, dev_shape_and_strides, k, depends, {copy_shape_and_strides}); } - exec_q.submit([&](sycl::handler &cgh) { + auto temporaries_cleanup_ev = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on({tri_ev}); auto ctx = exec_q.get_context(); cgh.host_task( @@ -261,8 +261,9 @@ usm_ndarray_triul(sycl::queue exec_q, }); }); - return std::make_pair(keep_args_alive(exec_q, {src, dst}, {tri_ev}), - tri_ev); + return std::make_pair( + keep_args_alive(exec_q, {src, dst}, {temporaries_cleanup_ev}), + temporaries_cleanup_ev); } void init_triul_ctor_dispatch_vectors(void)