From ced79a96a0af5d6c99afef66e2f46601e69e4f4f Mon Sep 17 00:00:00 2001 From: Jan Ciesko Date: Thu, 7 Sep 2023 15:22:55 -0600 Subject: [PATCH] Fix missing fence and incorrect ptr calculation in mpi_block_type{put,get} --- src/core/Kokkos_RemoteSpaces_ViewMapping.hpp | 7 +--- .../mpispace/Kokkos_MPISpace_BlockOps.hpp | 32 ++++++++++--------- .../mpispace/Kokkos_MPISpace_DataHandle.hpp | 23 ++++--------- unit_tests/Test_Reduction.cpp | 2 +- 4 files changed, 26 insertions(+), 38 deletions(-) diff --git a/src/core/Kokkos_RemoteSpaces_ViewMapping.hpp b/src/core/Kokkos_RemoteSpaces_ViewMapping.hpp index ec2b0ada..910892f7 100644 --- a/src/core/Kokkos_RemoteSpaces_ViewMapping.hpp +++ b/src/core/Kokkos_RemoteSpaces_ViewMapping.hpp @@ -1057,15 +1057,10 @@ class ViewMapping { Kokkos::Impl::ViewCtorProp const &arg_prop, typename Traits::array_layout const &arg_layout) : m_offset_remote_dim(0), -#ifdef KRS_ENABLE_MPISPACE m_handle( ((Kokkos::Impl::ViewCtorProp const &)arg_prop) .value) -#else - m_handle( - ((Kokkos::Impl::ViewCtorProp const &)arg_prop) - .value) -#endif + { typedef typename Traits::value_type value_type; typedef std::integral_constant< diff --git a/src/impl/mpispace/Kokkos_MPISpace_BlockOps.hpp b/src/impl/mpispace/Kokkos_MPISpace_BlockOps.hpp index 203269aa..69f395f7 100644 --- a/src/impl/mpispace/Kokkos_MPISpace_BlockOps.hpp +++ b/src/impl/mpispace/Kokkos_MPISpace_BlockOps.hpp @@ -33,9 +33,10 @@ namespace Impl { int _typesize; \ MPI_Request request; \ MPI_Type_size(mpi_type, &_typesize); \ - MPI_Rput(ptr, nelems, mpi_type, pe, \ - sizeof(SharedAllocationHeader) + offset * _typesize, nelems, \ - mpi_type, win, &request); \ + const void *src_adr = ptr + offset; \ + size_t win_offset = sizeof(SharedAllocationHeader) + offset * _typesize; \ + MPI_Rput(src_adr, nelems, mpi_type, pe, win_offset, nelems, mpi_type, win, \ + &request); \ MPI_Wait(&request, MPI_STATUS_IGNORE); \ } @@ -54,18 +55,19 @@ KOKKOS_REMOTESPACES_PUT(double, MPI_DOUBLE) #undef KOKKOS_REMOTESPACES_PUT -#define KOKKOS_REMOTESPACES_GET(type, mpi_type) \ - static KOKKOS_INLINE_FUNCTION void mpi_block_type_get( \ - type *ptr, const size_t offset, const size_t nelems, const int pe, \ - const MPI_Win &win) { \ - assert(win != MPI_WIN_NULL); \ - int _typesize; \ - MPI_Request request; \ - MPI_Type_size(mpi_type, &_typesize); \ - MPI_Rget(ptr, nelems, mpi_type, pe, \ - sizeof(SharedAllocationHeader) + offset * _typesize, nelems, \ - mpi_type, win, &request); \ - MPI_Wait(&request, MPI_STATUS_IGNORE); \ +#define KOKKOS_REMOTESPACES_GET(type, mpi_type) \ + static KOKKOS_INLINE_FUNCTION void mpi_block_type_get( \ + type *ptr, const size_t offset, const size_t nelems, const int pe, \ + const MPI_Win &win) { \ + assert(win != MPI_WIN_NULL); \ + int _typesize; \ + MPI_Request request; \ + MPI_Type_size(mpi_type, &_typesize); \ + void *dst_adr = ptr + offset; \ + size_t win_offset = sizeof(SharedAllocationHeader) + offset * _typesize; \ + MPI_Rget(dst_adr, nelems, mpi_type, pe, win_offset, nelems, mpi_type, win, \ + &request); \ + MPI_Wait(&request, MPI_STATUS_IGNORE); \ } KOKKOS_REMOTESPACES_GET(char, MPI_SIGNED_CHAR) diff --git a/src/impl/mpispace/Kokkos_MPISpace_DataHandle.hpp b/src/impl/mpispace/Kokkos_MPISpace_DataHandle.hpp index 53949c24..a54f99d5 100644 --- a/src/impl/mpispace/Kokkos_MPISpace_DataHandle.hpp +++ b/src/impl/mpispace/Kokkos_MPISpace_DataHandle.hpp @@ -47,45 +47,36 @@ struct MPIDataHandle { } KOKKOS_INLINE_FUNCTION - MPIDataHandle *operator+(size_t &offset) { - ptr += offset; - loc.offset += offset; + MPIDataHandle operator+(size_t &offset) { + return MPIDataHandle(ptr += offset, loc.offset += offset); } }; template struct BlockDataHandle { T *ptr; - MPIAccessLocation remote_loc; + MPIAccessLocation loc; size_t pe; size_t elems; - KOKKOS_INLINE_FUNCTION - BlockDataHandle() : elems(0) {} - KOKKOS_INLINE_FUNCTION BlockDataHandle(T *ptr_, MPI_Win win_, size_t offset_, size_t elems_, size_t pe_) - : ptr(ptr_), remote_loc(win_, offset_), elems(elems_), pe(pe_) {} + : ptr(ptr_), loc(win_, offset_), elems(elems_), pe(pe_) {} KOKKOS_INLINE_FUNCTION BlockDataHandle(BlockDataHandle const &arg) - : ptr(arg.ptr), - remote_loc(arg.remote_loc), - elems(arg.elems), - pe(arg.pe) {} + : ptr(arg.ptr), loc(arg.loc), elems(arg.elems), pe(arg.pe) {} KOKKOS_INLINE_FUNCTION void get() { - MPIBlockDataElement element(ptr, remote_loc.win, pe, - remote_loc.offset, elems); + MPIBlockDataElement element(ptr, loc.win, pe, loc.offset, elems); element.get(); } KOKKOS_INLINE_FUNCTION void put() { - MPIBlockDataElement element(ptr, remote_loc.win, pe, - remote_loc.offset, elems); + MPIBlockDataElement element(ptr, loc.win, pe, loc.offset, elems); element.put(); } }; diff --git a/unit_tests/Test_Reduction.cpp b/unit_tests/Test_Reduction.cpp index 4d24909e..23465170 100644 --- a/unit_tests/Test_Reduction.cpp +++ b/unit_tests/Test_Reduction.cpp @@ -117,7 +117,7 @@ void test_scalar_reduce_partitioned_1D(int dim1) { v(my_rank, i) = static_cast(start + i); }); - Kokkos::fence(); + RemoteSpace_t().fence(); Data_t gsum = 0; Kokkos::parallel_reduce(