Skip to content

Commit

Permalink
Merge pull request #4478 from masterleinad/fix_impl_shared_alloc
Browse files Browse the repository at this point in the history
Fix impl_shared_alloc for GPU+threads
  • Loading branch information
crtrott authored Oct 28, 2021
2 parents 78f0bd5 + 43ada2b commit 8b17f8e
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 17 deletions.
12 changes: 7 additions & 5 deletions core/src/Cuda/Kokkos_CudaSpace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -492,9 +492,10 @@ SharedAllocationRecord<Kokkos::CudaSpace, void>::~SharedAllocationRecord() {
const char *label = nullptr;
if (Kokkos::Profiling::profileLibraryLoaded()) {
SharedAllocationHeader header;
Kokkos::Cuda exec;
Kokkos::Impl::DeepCopy<Kokkos::CudaSpace, HostSpace>(
&header, RecordBase::m_alloc_ptr, sizeof(SharedAllocationHeader));
Kokkos::fence(
exec, &header, RecordBase::m_alloc_ptr, sizeof(SharedAllocationHeader));
exec.fence(
"SharedAllocationRecord<Kokkos::CudaSpace, "
"void>::~SharedAllocationRecord(): fence after copying header from "
"HostSpace");
Expand Down Expand Up @@ -552,9 +553,10 @@ SharedAllocationRecord<Kokkos::CudaSpace, void>::SharedAllocationRecord(
this->base_t::_fill_host_accessible_header_info(header, arg_label);

// Copy to device memory
Kokkos::Impl::DeepCopy<CudaSpace, HostSpace>(RecordBase::m_alloc_ptr, &header,
sizeof(SharedAllocationHeader));
Kokkos::fence(
Kokkos::Cuda exec;
Kokkos::Impl::DeepCopy<CudaSpace, HostSpace>(
exec, RecordBase::m_alloc_ptr, &header, sizeof(SharedAllocationHeader));
exec.fence(
"SharedAllocationRecord<Kokkos::CudaSpace, "
"void>::SharedAllocationRecord(): fence after copying header from "
"HostSpace");
Expand Down
10 changes: 6 additions & 4 deletions core/src/HIP/Kokkos_HIP_Space.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,9 +265,10 @@ SharedAllocationRecord<Kokkos::Experimental::HIPSpace,
const char* label = nullptr;
if (Kokkos::Profiling::profileLibraryLoaded()) {
SharedAllocationHeader header;
Kokkos::Experimental::HIP exec;
Kokkos::Impl::DeepCopy<Kokkos::Experimental::HIPSpace, HostSpace>(
&header, RecordBase::m_alloc_ptr, sizeof(SharedAllocationHeader));
Kokkos::fence(
exec, &header, RecordBase::m_alloc_ptr, sizeof(SharedAllocationHeader));
exec.fence(
"SharedAllocationRecord<Kokkos::Experimental::HIPSpace, "
"void>::~SharedAllocationRecord(): fence after copying header from "
"HostSpace");
Expand Down Expand Up @@ -307,9 +308,10 @@ SharedAllocationRecord<Kokkos::Experimental::HIPSpace, void>::
this->base_t::_fill_host_accessible_header_info(header, arg_label);

// Copy to device memory
Kokkos::Experimental::HIP exec;
Kokkos::Impl::DeepCopy<Kokkos::Experimental::HIPSpace, HostSpace>(
RecordBase::m_alloc_ptr, &header, sizeof(SharedAllocationHeader));
Kokkos::fence(
exec, RecordBase::m_alloc_ptr, &header, sizeof(SharedAllocationHeader));
exec.fence(
"SharedAllocationRecord<Kokkos::Experimental::HIPSpace, "
"void>::SharedAllocationRecord(): fence after copying header from "
"HostSpace");
Expand Down
12 changes: 7 additions & 5 deletions core/src/SYCL/Kokkos_SYCL_Space.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -252,9 +252,10 @@ SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, void>::
this->base_t::_fill_host_accessible_header_info(header, label);

// Copy to device memory
Kokkos::Experimental::SYCL exec;
Kokkos::Impl::DeepCopy<Kokkos::Experimental::SYCLDeviceUSMSpace, HostSpace>(
RecordBase::m_alloc_ptr, &header, sizeof(SharedAllocationHeader));
Kokkos::fence(
exec, RecordBase::m_alloc_ptr, &header, sizeof(SharedAllocationHeader));
exec.fence(
"SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, "
"void>::SharedAllocationRecord(): fence after copying header from "
"HostSpace");
Expand Down Expand Up @@ -316,10 +317,11 @@ SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace,
const char* label = nullptr;
if (Kokkos::Profiling::profileLibraryLoaded()) {
SharedAllocationHeader header;
Kokkos::Experimental::SYCL exec;
Kokkos::Impl::DeepCopy<Kokkos::Experimental::SYCLDeviceUSMSpace,
Kokkos::HostSpace>(&header, RecordBase::m_alloc_ptr,
sizeof(SharedAllocationHeader));
Kokkos::fence(
Kokkos::HostSpace>(
exec, &header, RecordBase::m_alloc_ptr, sizeof(SharedAllocationHeader));
exec.fence(
"SharedAllocationRecord<Kokkos::Experimental::SYCLDeviceUSMSpace, "
"void>::~SharedAllocationRecord(): fence after copying header from "
"HostSpace");
Expand Down
7 changes: 4 additions & 3 deletions core/src/impl/Kokkos_SharedAlloc_timpl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,10 @@ auto HostInaccessibleSharedAllocationRecordCommon<MemorySpace>::get_record(
alloc_ptr ? SharedAllocationHeader::get_header(alloc_ptr) : nullptr;

if (alloc_ptr) {
Kokkos::Impl::DeepCopy<HostSpace, MemorySpace>(
&head, head_cuda, sizeof(SharedAllocationHeader));
Kokkos::fence(
typename MemorySpace::execution_space exec_space;
Kokkos::Impl::DeepCopy<HostSpace, MemorySpace, decltype(exec_space)>(
exec_space, &head, head_cuda, sizeof(SharedAllocationHeader));
exec_space.fence(
"HostInaccessibleSharedAllocationRecordCommon::get_record(): fence "
"after copying header to HostSpace");
}
Expand Down

0 comments on commit 8b17f8e

Please sign in to comment.