From 242325c9b348584ef341c2a936608a281e039169 Mon Sep 17 00:00:00 2001 From: Jan Ciesko Date: Thu, 31 Aug 2023 14:26:55 -0600 Subject: [PATCH] Modernize the use of AllocationRecord and consolidate code --- .../access_overhead/access_overhead.cpp | 2 + .../access_overhead_noThis.cpp | 6 + .../access_overhead/access_overhead_teams.cpp | 4 + examples/heat3d/mpi/heat3d.cpp | 2 +- examples/matvec/multi-node/matvec.cpp | 11 +- examples/matvec/single-node/matvec.cpp | 13 +- src/core/Kokkos_RemoteSpaces_DeepCopy.hpp | 52 +++--- src/core/Kokkos_RemoteSpaces_Error.hpp | 104 +++++++++++ src/core/Kokkos_RemoteSpaces_ViewMapping.hpp | 46 ++++- src/impl/mpispace/Kokkos_MPISpace.cpp | 28 --- src/impl/mpispace/Kokkos_MPISpace.hpp | 6 +- .../Kokkos_MPISpace_AllocationRecord.hpp | 2 - src/impl/nvshmemspace/Kokkos_NVSHMEMSpace.cpp | 52 ------ src/impl/nvshmemspace/Kokkos_NVSHMEMSpace.hpp | 12 +- .../rocshmemspace/Kokkos_ROCSHMEMSpace.cpp | 29 --- .../rocshmemspace/Kokkos_ROCSHMEMSpace.hpp | 7 +- src/impl/shmemspace/Kokkos_SHMEMSpace.cpp | 124 +++++++++---- src/impl/shmemspace/Kokkos_SHMEMSpace.hpp | 32 +++- .../Kokkos_SHMEMSpace_AllocationRecord.cpp | 173 +++++------------- .../Kokkos_SHMEMSpace_AllocationRecord.hpp | 86 ++++----- .../Kokkos_SHMEMSpace_DataHandle.hpp | 3 - unit_tests/Test_DeepCopy.cpp | 5 + 22 files changed, 415 insertions(+), 384 deletions(-) create mode 100644 src/core/Kokkos_RemoteSpaces_Error.hpp diff --git a/examples/benchmarks/access_overhead/access_overhead.cpp b/examples/benchmarks/access_overhead/access_overhead.cpp index 2c506db8..d75248e9 100644 --- a/examples/benchmarks/access_overhead/access_overhead.cpp +++ b/examples/benchmarks/access_overhead/access_overhead.cpp @@ -117,7 +117,9 @@ struct Access T, dT; // Halo data using buffer_t = - Kokkos::View; + Kokkos::View; buffer_t T_left, T_right, T_up, T_down, T_front, T_back; buffer_t T_left_out, T_right_out, T_up_out, T_down_out, T_front_out, T_back_out; diff --git a/examples/matvec/multi-node/matvec.cpp b/examples/matvec/multi-node/matvec.cpp index 8ac3f29b..93301d35 100644 --- a/examples/matvec/multi-node/matvec.cpp +++ b/examples/matvec/multi-node/matvec.cpp @@ -38,8 +38,8 @@ using VectorHost_r_t = using VectorHost_t = Kokkos::View; using MatrixHost_t = Kokkos::View; -using Vector_t = Kokkos::View; -using Matrix_t = Kokkos::View; +using Vector_t = Kokkos::View; +using Matrix_t = Kokkos::View; int main(int argc, char *argv[]) { int mpi_thread_level_available; @@ -95,8 +95,11 @@ int main(int argc, char *argv[]) { Kokkos::deep_copy(b_h, 0.0); Kokkos::deep_copy(x_h, 1.0); - auto A = Kokkos::create_mirror_view_and_copy(Kokkos::CudaSpace(), A_h); - auto b = Kokkos::create_mirror_view_and_copy(Kokkos::CudaSpace(), b_h); + using DeviceMemorySpace = + typename Kokkos::DefaultExecutionSpace::memory_space; + + auto A = Kokkos::create_mirror_view_and_copy(DeviceMemorySpace{}, A_h); + auto b = Kokkos::create_mirror_view_and_copy(DeviceMemorySpace{}, b_h); // Copy host device data into global vector Kokkos::deep_copy(x, x_h); diff --git a/examples/matvec/single-node/matvec.cpp b/examples/matvec/single-node/matvec.cpp index 1547697d..5742ce95 100644 --- a/examples/matvec/single-node/matvec.cpp +++ b/examples/matvec/single-node/matvec.cpp @@ -30,8 +30,8 @@ using VALUE_T = double; using VectorHost_t = Kokkos::View; using MatrixHost_t = Kokkos::View; -using Vector_t = Kokkos::View; -using Matrix_t = Kokkos::View; +using Vector_t = Kokkos::View; +using Matrix_t = Kokkos::View; int main(int argc, char *argv[]) { // Vars @@ -56,9 +56,12 @@ int main(int argc, char *argv[]) { Kokkos::deep_copy(b_h, 0.0); Kokkos::deep_copy(x_h, 1.0); - auto A = Kokkos::create_mirror_view_and_copy(Kokkos::CudaSpace(), A_h); - auto b = Kokkos::create_mirror_view_and_copy(Kokkos::CudaSpace(), b_h); - auto x = Kokkos::create_mirror_view_and_copy(Kokkos::CudaSpace(), x_h); + using DeviceMemorySpace = + typename Kokkos::DefaultExecutionSpace::memory_space; + + auto A = Kokkos::create_mirror_view_and_copy(DeviceMemorySpace{}, A_h); + auto b = Kokkos::create_mirror_view_and_copy(DeviceMemorySpace{}, b_h); + auto x = Kokkos::create_mirror_view_and_copy(DeviceMemorySpace{}, x_h); Kokkos::Timer timer; Kokkos::parallel_for( diff --git a/src/core/Kokkos_RemoteSpaces_DeepCopy.hpp b/src/core/Kokkos_RemoteSpaces_DeepCopy.hpp index b1e36169..ce91c37e 100644 --- a/src/core/Kokkos_RemoteSpaces_DeepCopy.hpp +++ b/src/core/Kokkos_RemoteSpaces_DeepCopy.hpp @@ -309,7 +309,7 @@ void view_copy_(const DstType& dst, const SrcType& src) { } // Figure out iteration order in case we need it - int64_t strides[DstType::Rank + 1]; + int64_t strides[DstType::rank + 1]; dst.stride(strides); Kokkos::Iterate iterate; if (Kokkos::is_layouttiled::value) { @@ -329,7 +329,7 @@ void view_copy_(const DstType& dst, const SrcType& src) { Kokkos::PartitionedLayoutStride>::value || std::is_same::value) { - if (strides[0] > strides[DstType::Rank - 1]) + if (strides[0] > strides[DstType::rank - 1]) iterate = Kokkos::Iterate::Right; else iterate = Kokkos::Iterate::Left; @@ -348,40 +348,40 @@ void view_copy_(const DstType& dst, const SrcType& src) { if (DstExecCanAccessSrc) { if (iterate == Kokkos::Iterate::Right) Kokkos::Impl::ViewCopy_( + dst_execution_space, DstType::rank, int64_t>( dst, src); else Kokkos::Impl::ViewCopy_( + dst_execution_space, DstType::rank, int64_t>( dst, src); } else { if (iterate == Kokkos::Iterate::Right) Kokkos::Impl::ViewCopy_( + src_execution_space, DstType::rank, int64_t>( dst, src); else Kokkos::Impl::ViewCopy_( + src_execution_space, DstType::rank, int64_t>( dst, src); } } else { if (DstExecCanAccessSrc) { if (iterate == Kokkos::Iterate::Right) Kokkos::Impl::ViewCopy_(dst, + dst_execution_space, DstType::rank, int>(dst, src); else Kokkos::Impl::ViewCopy_(dst, + dst_execution_space, DstType::rank, int>(dst, src); } else { if (iterate == Kokkos::Iterate::Right) Kokkos::Impl::ViewCopy_(dst, + src_execution_space, DstType::rank, int>(dst, src); else Kokkos::Impl::ViewCopy_(dst, + src_execution_space, DstType::rank, int>(dst, src); } } @@ -517,19 +517,19 @@ inline void deep_copy( "match: "); message += dst.label(); message += "("; - for (int r = 0; r < dst_type::Rank - 1; r++) { + for (int r = 0; r < dst_type::rank - 1; r++) { message += std::to_string(dst.extent(r)); message += ","; } - message += std::to_string(dst.extent(dst_type::Rank - 1)); + message += std::to_string(dst.extent(dst_type::rank - 1)); message += ") "; message += src.label(); message += "("; - for (int r = 0; r < src_type::Rank - 1; r++) { + for (int r = 0; r < src_type::rank - 1; r++) { message += std::to_string(src.extent(r)); message += ","; } - message += std::to_string(src.extent(src_type::Rank - 1)); + message += std::to_string(src.extent(src_type::rank - 1)); message += ") "; Kokkos::Impl::throw_runtime_exception(message); @@ -604,19 +604,19 @@ inline void deep_copy( "Deprecation Error: Kokkos::deep_copy extents of views don't match: "); message += dst.label(); message += "("; - for (int r = 0; r < dst_type::Rank - 1; r++) { + for (int r = 0; r < dst_type::rank - 1; r++) { message += std::to_string(dst.extent(r)); message += ","; } - message += std::to_string(dst.extent(dst_type::Rank - 1)); + message += std::to_string(dst.extent(dst_type::rank - 1)); message += ") "; message += src.label(); message += "("; - for (int r = 0; r < src_type::Rank - 1; r++) { + for (int r = 0; r < src_type::rank - 1; r++) { message += std::to_string(src.extent(r)); message += ","; } - message += std::to_string(src.extent(src_type::Rank - 1)); + message += std::to_string(src.extent(src_type::rank - 1)); message += ") "; Kokkos::Impl::throw_runtime_exception(message); @@ -746,19 +746,19 @@ inline void deep_copy( "match: "); message += dst.label(); message += "("; - for (int r = 0; r < dst_type::Rank - 1; r++) { + for (int r = 0; r < dst_type::rank - 1; r++) { message += std::to_string(dst.extent(r)); message += ","; } - message += std::to_string(dst.extent(dst_type::Rank - 1)); + message += std::to_string(dst.extent(dst_type::rank - 1)); message += ") "; message += src.label(); message += "("; - for (int r = 0; r < src_type::Rank - 1; r++) { + for (int r = 0; r < src_type::rank - 1; r++) { message += std::to_string(src.extent(r)); message += ","; } - message += std::to_string(src.extent(src_type::Rank - 1)); + message += std::to_string(src.extent(src_type::rank - 1)); message += ") "; Kokkos::Impl::throw_runtime_exception(message); @@ -816,19 +816,19 @@ inline void deep_copy( "Deprecation Error: Kokkos::deep_copy extents of views don't match: "); message += dst.label(); message += "("; - for (int r = 0; r < dst_type::Rank - 1; r++) { + for (int r = 0; r < dst_type::rank - 1; r++) { message += std::to_string(dst.extent(r)); message += ","; } - message += std::to_string(dst.extent(dst_type::Rank - 1)); + message += std::to_string(dst.extent(dst_type::rank - 1)); message += ") "; message += src.label(); message += "("; - for (int r = 0; r < src_type::Rank - 1; r++) { + for (int r = 0; r < src_type::rank - 1; r++) { message += std::to_string(src.extent(r)); message += ","; } - message += std::to_string(src.extent(src_type::Rank - 1)); + message += std::to_string(src.extent(src_type::rank - 1)); message += ") "; Kokkos::Impl::throw_runtime_exception(message); diff --git a/src/core/Kokkos_RemoteSpaces_Error.hpp b/src/core/Kokkos_RemoteSpaces_Error.hpp new file mode 100644 index 00000000..6adb30cf --- /dev/null +++ b/src/core/Kokkos_RemoteSpaces_Error.hpp @@ -0,0 +1,104 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// Contact: Jan Ciesko (jciesko@sandia.gov) +// +//@HEADER + +namespace Kokkos { +namespace Impl { +namespace Experimental { + +class RemoteSpacesMemoryAllocationFailure : public std::bad_alloc { + public: + enum class FailureMode { + OutOfMemoryError, + AllocationNotAligned, + InvalidAllocationSize, + Unknown + }; + enum class AllocationMechanism { + SHMEMMALLOCDEFAULT, + SHMEMMALLOC, + NVSHMEMMALLOC, + ROCSHMEMMALLOC, + MPIWINALLOC + }; + + private: + size_t m_attempted_size; + size_t m_attempted_alignment; + FailureMode m_failure_mode; + AllocationMechanism m_mechanism; + + public: + RemoteSpacesMemoryAllocationFailure( + size_t arg_attempted_size, size_t arg_attempted_alignment, + FailureMode arg_failure_mode = FailureMode::OutOfMemoryError, + AllocationMechanism arg_mechanism = + AllocationMechanism::SHMEMMALLOCDEFAULT) noexcept + : m_attempted_size(arg_attempted_size), + m_attempted_alignment(arg_attempted_alignment), + m_failure_mode(arg_failure_mode), + m_mechanism(arg_mechanism) {} + + RemoteSpacesMemoryAllocationFailure() noexcept = delete; + + RemoteSpacesMemoryAllocationFailure( + RemoteSpacesMemoryAllocationFailure const &) noexcept = default; + RemoteSpacesMemoryAllocationFailure( + RemoteSpacesMemoryAllocationFailure &&) noexcept = default; + + RemoteSpacesMemoryAllocationFailure &operator =( + RemoteSpacesMemoryAllocationFailure const &) noexcept = default; + RemoteSpacesMemoryAllocationFailure &operator =( + RemoteSpacesMemoryAllocationFailure &&) noexcept = default; + + ~RemoteSpacesMemoryAllocationFailure() noexcept override = default; + + [[nodiscard]] const char *what() const noexcept override { + if (m_failure_mode == FailureMode::OutOfMemoryError) { + return "Memory allocation error: out of memory"; + } else if (m_failure_mode == FailureMode::AllocationNotAligned) { + return "Memory allocation error: allocation result was under-aligned"; + } + + return nullptr; // unreachable + } + + [[nodiscard]] size_t attempted_size() const noexcept { + return m_attempted_size; + } + + [[nodiscard]] size_t attempted_alignment() const noexcept { + return m_attempted_alignment; + } + + [[nodiscard]] AllocationMechanism allocation_mechanism() const noexcept { + return m_mechanism; + } + + [[nodiscard]] FailureMode failure_mode() const noexcept { + return m_failure_mode; + } + + void print_error_message(std::ostream &o) const; + [[nodiscard]] std::string get_error_message() const; + + virtual void append_additional_error_information(std::ostream &) const {} +}; + +} // namespace Experimental +} // namespace Impl +} // namespace Kokkos \ No newline at end of file diff --git a/src/core/Kokkos_RemoteSpaces_ViewMapping.hpp b/src/core/Kokkos_RemoteSpaces_ViewMapping.hpp index c365119a..a8ca5fe7 100644 --- a/src/core/Kokkos_RemoteSpaces_ViewMapping.hpp +++ b/src/core/Kokkos_RemoteSpaces_ViewMapping.hpp @@ -25,13 +25,57 @@ /** \brief View mapping for non-specialized data type and standard layout */ namespace Kokkos { -namespace Impl { +namespace Experimental { + +KOKKOS_INLINE_FUNCTION +size_t get_indexing_block_size(size_t size) { + int num_pes; + size_t block; + num_pes = Kokkos::Experimental::get_num_pes(); + block = (size + static_cast(num_pes) - 1) / num_pes; + return block; +} + +template +KOKKOS_INLINE_FUNCTION Kokkos::pair getRange(T size, int pe) { + T start, end; + T block = get_indexing_block_size(size); + start = static_cast(pe) * block; + end = (static_cast(pe) + 1) * block; + + T num_pes = Kokkos::Experimental::get_num_pes(); + if (size < num_pes) { + T diff = (num_pes * block) - size; + if (pe > num_pes - 1 - diff) end--; + } else { + if (pe == num_pes - 1) { + size_t diff = size - (num_pes - 1) * block; + end = start + diff; + } + } + return Kokkos::pair(start, end); +} + +template +KOKKOS_INLINE_FUNCTION Kokkos::pair get_range(T size, int pe) { + return getRange(size, pe); +} + +template +KOKKOS_INLINE_FUNCTION Kokkos::pair get_local_range(T size) { + auto pe = Kokkos::Experimental::get_my_pe(); + return getRange(size, pe); +} + +} // namespace Experimental /* * ViewMapping class used by View copy-ctr and subview() to specialize new * (sub-) view type */ +namespace Impl { + template class ViewMapping< typename std::enable_if<( diff --git a/src/impl/mpispace/Kokkos_MPISpace.cpp b/src/impl/mpispace/Kokkos_MPISpace.cpp index 5df80fcd..21db61d2 100644 --- a/src/impl/mpispace/Kokkos_MPISpace.cpp +++ b/src/impl/mpispace/Kokkos_MPISpace.cpp @@ -127,34 +127,6 @@ size_t get_my_pe() { return rank; } -KOKKOS_FUNCTION -size_t get_indexing_block_size(size_t size) { - size_t num_pes, block; - num_pes = get_num_pes(); - block = (size + num_pes - 1) / num_pes; - return block; -} - -std::pair getRange(size_t size, size_t pe) { - size_t start, end; - size_t block = get_indexing_block_size(size); - start = pe * block; - end = (pe + 1) * block; - - size_t num_pes = get_num_pes(); - - if (size < num_pes) { - size_t diff = (num_pes * block) - size; - if (pe > num_pes - 1 - diff) end--; - } else { - if (pe == num_pes - 1) { - size_t diff = size - (num_pes - 1) * block; - end = start + diff; - } - end--; - } - return std::make_pair(start, end); -} } // namespace Experimental namespace Impl { diff --git a/src/impl/mpispace/Kokkos_MPISpace.hpp b/src/impl/mpispace/Kokkos_MPISpace.hpp index 7f90dcc2..200584d1 100644 --- a/src/impl/mpispace/Kokkos_MPISpace.hpp +++ b/src/impl/mpispace/Kokkos_MPISpace.hpp @@ -98,8 +98,6 @@ class MPISpace { size_t get_num_pes(); size_t get_my_pe(); -size_t get_indexing_block_size(size_t size); -std::pair getRange(size_t size, size_t pe); } // namespace Experimental } // namespace Kokkos @@ -148,15 +146,17 @@ struct MemorySpaceAccess { } // namespace Impl } // namespace Kokkos +#include #include #include -#include #include #include #include #include +#include #include #include +#include #include #endif // #define KOKKOS_MPISPACE_HPP diff --git a/src/impl/mpispace/Kokkos_MPISpace_AllocationRecord.hpp b/src/impl/mpispace/Kokkos_MPISpace_AllocationRecord.hpp index 70b5970b..b07ec481 100644 --- a/src/impl/mpispace/Kokkos_MPISpace_AllocationRecord.hpp +++ b/src/impl/mpispace/Kokkos_MPISpace_AllocationRecord.hpp @@ -20,8 +20,6 @@ #include -/*--------------------------------------------------------------------------*/ - namespace Kokkos { namespace Impl { diff --git a/src/impl/nvshmemspace/Kokkos_NVSHMEMSpace.cpp b/src/impl/nvshmemspace/Kokkos_NVSHMEMSpace.cpp index 75b57c56..cbb7d301 100644 --- a/src/impl/nvshmemspace/Kokkos_NVSHMEMSpace.cpp +++ b/src/impl/nvshmemspace/Kokkos_NVSHMEMSpace.cpp @@ -68,58 +68,6 @@ int get_num_pes() { return nvshmem_n_pes(); } KOKKOS_FUNCTION int get_my_pe() { return nvshmem_my_pe(); } -KOKKOS_FUNCTION -size_t get_indexing_block_size(size_t size) { - int num_pes; - size_t block; - num_pes = get_num_pes(); - block = (size + static_cast(num_pes) - 1) / num_pes; - return block; -} - -template -KOKKOS_FUNCTION Kokkos::pair getRange(T size, int pe) { - T start, end; - T block = get_indexing_block_size(size); - start = static_cast(pe) * block; - end = (static_cast(pe) + 1) * block; - - T num_pes = get_num_pes(); - if (size < num_pes) { - T diff = (num_pes * block) - size; - if (pe > num_pes - 1 - diff) end--; - } else { - if (pe == num_pes - 1) { - size_t diff = size - (num_pes - 1) * block; - end = start + diff; - } - } - return Kokkos::pair(start, end); -} - -template -KOKKOS_FUNCTION Kokkos::pair get_range(T size, int pe) { - return getRange(size, pe); -} - -template -KOKKOS_FUNCTION Kokkos::pair get_local_range(T size) { - auto pe = get_my_pe(); - return getRange(size, pe); -} - -template KOKKOS_FUNCTION Kokkos::pair get_range( - size_t size, int p); -template KOKKOS_FUNCTION Kokkos::pair get_local_range( - size_t size); -template KOKKOS_FUNCTION Kokkos::pair getRange( - size_t size, int pe); - -template KOKKOS_FUNCTION Kokkos::pair get_range(int size, - int pe); -template KOKKOS_FUNCTION Kokkos::pair get_local_range(int size); -template KOKKOS_FUNCTION Kokkos::pair getRange(int size, int pe); - } // namespace Experimental namespace Impl { diff --git a/src/impl/nvshmemspace/Kokkos_NVSHMEMSpace.hpp b/src/impl/nvshmemspace/Kokkos_NVSHMEMSpace.hpp index 648843e6..2ff3ebb4 100644 --- a/src/impl/nvshmemspace/Kokkos_NVSHMEMSpace.hpp +++ b/src/impl/nvshmemspace/Kokkos_NVSHMEMSpace.hpp @@ -88,17 +88,6 @@ KOKKOS_FUNCTION int get_num_pes(); KOKKOS_FUNCTION int get_my_pe(); -KOKKOS_FUNCTION -size_t get_indexing_block_size(size_t size); - -template -KOKKOS_FUNCTION Kokkos::pair getRange(T size, int pe); - -template -KOKKOS_FUNCTION Kokkos::pair get_range(T size, int pe); - -template -KOKKOS_FUNCTION Kokkos::pair get_local_range(T size); } // namespace Experimental } // namespace Kokkos @@ -150,6 +139,7 @@ struct MemorySpaceAccess #include #include #include diff --git a/src/impl/rocshmemspace/Kokkos_ROCSHMEMSpace.cpp b/src/impl/rocshmemspace/Kokkos_ROCSHMEMSpace.cpp index cf540b9c..e4774715 100644 --- a/src/impl/rocshmemspace/Kokkos_ROCSHMEMSpace.cpp +++ b/src/impl/rocshmemspace/Kokkos_ROCSHMEMSpace.cpp @@ -68,35 +68,6 @@ size_t get_num_pes() { return roc_shmem_n_pes(); } KOKKOS_FUNCTION size_t get_my_pe() { return roc_shmem_my_pe(); } -KOKKOS_FUNCTION -size_t get_indexing_block_size(size_t size) { - size_t num_pes, block; - num_pes = get_num_pes(); - block = (size + num_pes - 1) / num_pes; - return block; -} - -std::pair getRange(size_t size, size_t pe) { - size_t start, end; - size_t block = get_indexing_block_size(size); - start = pe * block; - end = (pe + 1) * block; - - size_t num_pes = get_num_pes(); - - if (size < num_pes) { - size_t diff = (num_pes * block) - size; - if (pe > num_pes - 1 - diff) end--; - } else { - if (pe == num_pes - 1) { - size_t diff = size - (num_pes - 1) * block; - end = start + diff; - } - end--; - } - return std::make_pair(start, end); -} - } // namespace Experimental namespace Impl { diff --git a/src/impl/rocshmemspace/Kokkos_ROCSHMEMSpace.hpp b/src/impl/rocshmemspace/Kokkos_ROCSHMEMSpace.hpp index 7020add2..94a2e7e4 100644 --- a/src/impl/rocshmemspace/Kokkos_ROCSHMEMSpace.hpp +++ b/src/impl/rocshmemspace/Kokkos_ROCSHMEMSpace.hpp @@ -86,9 +86,6 @@ KOKKOS_FUNCTION size_t get_num_pes(); KOKKOS_FUNCTION size_t get_my_pe(); -KOKKOS_FUNCTION -size_t get_indexing_block_size(size_t size); -std::pair getRange(size_t size, size_t pe); } // namespace Experimental } // namespace Kokkos @@ -140,15 +137,17 @@ struct MemorySpaceAccess #include #include -#include #include #include #include #include +#include #include #include +#include #include #endif // #define KOKKOS_ROCSHMEMSPACE_HPP diff --git a/src/impl/shmemspace/Kokkos_SHMEMSpace.cpp b/src/impl/shmemspace/Kokkos_SHMEMSpace.cpp index 6d9b80c6..b58962b3 100644 --- a/src/impl/shmemspace/Kokkos_SHMEMSpace.cpp +++ b/src/impl/shmemspace/Kokkos_SHMEMSpace.cpp @@ -18,6 +18,7 @@ #include #include +#include #include //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -35,6 +36,22 @@ void SHMEMSpace::impl_set_allocation_mode(const int allocation_mode_) { void SHMEMSpace::impl_set_extent(const int64_t extent_) { extent = extent_; } void *SHMEMSpace::allocate(const size_t arg_alloc_size) const { + return allocate("[unlabeled]", arg_alloc_size); +} + +void *SHMEMSpace::allocate(const char *arg_label, const size_t arg_alloc_size, + const size_t + + arg_logical_size) const { + return impl_allocate(arg_label, arg_alloc_size, arg_logical_size); +} + +void *SHMEMSpace::impl_allocate( + const char *arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size, + const Kokkos::Tools::SpaceHandle arg_handle) const { + const size_t reported_size = + (arg_logical_size > 0) ? arg_logical_size : arg_alloc_size; static_assert(sizeof(void *) == sizeof(uintptr_t), "Error sizeof(void*) != sizeof(uintptr_t)"); @@ -42,59 +59,100 @@ void *SHMEMSpace::allocate(const size_t arg_alloc_size) const { Kokkos::Impl::is_integral_power_of_two(Kokkos::Impl::MEMORY_ALIGNMENT), "Memory alignment must be power of two"); - void *ptr = 0; + constexpr uintptr_t alignment = Kokkos::Impl::MEMORY_ALIGNMENT; + constexpr uintptr_t alignment_mask = alignment - 1; + + void *ptr = nullptr; + if (arg_alloc_size) { + // Over-allocate to and round up to guarantee proper alignment. + size_t size_padded = arg_alloc_size + sizeof(void *) + alignment; + if (allocation_mode == Kokkos::Experimental::Symmetric) { int num_pes = shmem_n_pes(); int my_id = shmem_my_pe(); - ptr = shmem_malloc(arg_alloc_size); + ptr = shmem_malloc(size_padded); } else { Kokkos::abort("SHMEMSpace only supports symmetric allocation policy."); } + + if (ptr) { + auto address = reinterpret_cast(ptr); + + // offset enough to record the alloc_ptr + address += sizeof(void *); + uintptr_t rem = address % alignment; + uintptr_t offset = rem ? (alignment - rem) : 0u; + address += offset; + ptr = reinterpret_cast(address); + // record the alloc'd pointer + address -= sizeof(void *); + *reinterpret_cast(address) = ptr; + } } - return ptr; -} -void SHMEMSpace::deallocate(void *const arg_alloc_ptr, const size_t) const { - shmem_free(arg_alloc_ptr); + using MemAllocFailure = + Kokkos::Impl::Experimental::RemoteSpacesMemoryAllocationFailure; + using MemAllocFailureMode = Kokkos::Impl::Experimental:: + RemoteSpacesMemoryAllocationFailure::FailureMode; + + if ((ptr == nullptr) || (reinterpret_cast(ptr) == ~uintptr_t(0)) || + (reinterpret_cast(ptr) & alignment_mask)) { + MemAllocFailureMode failure_mode = + MemAllocFailureMode::AllocationNotAligned; + if (ptr == nullptr) { + failure_mode = MemAllocFailureMode::OutOfMemoryError; + } + + MemAllocFailure::AllocationMechanism alloc_mec = + MemAllocFailure::AllocationMechanism::SHMEMMALLOC; + throw MemAllocFailure(arg_alloc_size, alignment, failure_mode, alloc_mec); + } + + if (Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::allocateData(arg_handle, arg_label, ptr, reported_size); + } + return ptr; } -void SHMEMSpace::fence() { - Kokkos::fence(); - shmem_barrier_all(); +void SHMEMSpace::deallocate(void *const arg_alloc_ptr, + const size_t arg_alloc_size) const { + deallocate("[unlabeled]", arg_alloc_ptr, arg_alloc_size); } -size_t get_num_pes() { return shmem_n_pes(); } -size_t get_my_pe() { return shmem_my_pe(); } +void SHMEMSpace::deallocate(const char *arg_label, void *const arg_alloc_ptr, + const size_t arg_alloc_size, + const size_t -size_t get_indexing_block_size(size_t size) { - size_t num_pes, block; - num_pes = get_num_pes(); - block = (size + num_pes - 1) / num_pes; - return block; + arg_logical_size) const { + impl_deallocate(arg_label, arg_alloc_ptr, arg_alloc_size, arg_logical_size); } -std::pair getRange(size_t size, size_t pe) { - size_t start, end; - size_t block = get_indexing_block_size(size); - start = pe * block; - end = (pe + 1) * block; - - size_t num_pes = get_num_pes(); - - if (size < num_pes) { - size_t diff = (num_pes * block) - size; - if (pe > num_pes - 1 - diff) end--; - } else { - if (pe == num_pes - 1) { - size_t diff = size - (num_pes - 1) * block; - end = start + diff; +void SHMEMSpace::impl_deallocate( + const char *arg_label, void *const arg_alloc_ptr, + const size_t arg_alloc_size, const size_t arg_logical_size, + const Kokkos::Tools::SpaceHandle arg_handle) const { + if (arg_alloc_ptr) { + Kokkos::fence("HostSpace::impl_deallocate before free"); + fence(); + size_t reported_size = + (arg_logical_size > 0) ? arg_logical_size : arg_alloc_size; + if (Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::deallocateData(arg_handle, arg_label, arg_alloc_ptr, + reported_size); } - end--; + shmem_free(arg_alloc_ptr); } - return std::make_pair(start, end); } +void SHMEMSpace::fence() const { + Kokkos::fence(); + shmem_barrier_all(); +} + +size_t get_num_pes() { return shmem_n_pes(); } +size_t get_my_pe() { return shmem_my_pe(); } + } // namespace Experimental namespace Impl { diff --git a/src/impl/shmemspace/Kokkos_SHMEMSpace.hpp b/src/impl/shmemspace/Kokkos_SHMEMSpace.hpp index 8bcca659..28a939bf 100644 --- a/src/impl/shmemspace/Kokkos_SHMEMSpace.hpp +++ b/src/impl/shmemspace/Kokkos_SHMEMSpace.hpp @@ -66,19 +66,36 @@ class SHMEMSpace { explicit SHMEMSpace(const MPI_Comm &); + /**\brief Allocate untracked memory in the space */ void *allocate(const size_t arg_alloc_size) const; + void *allocate(const char *arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const; + /**\brief Deallocate untracked memory in the space */ void deallocate(void *const arg_alloc_ptr, const size_t arg_alloc_size) const; + void deallocate(const char *arg_label, void *const arg_alloc_ptr, + const size_t arg_alloc_size, + const size_t arg_logical_size = 0) const; - void *allocate(const int *gids, const int &arg_local_alloc_size) const; - - void deallocate(const int *gids, void *const arg_alloc_ptr, - const size_t arg_alloc_size) const; + private: + template + friend class Kokkos::Experimental::LogicalMemorySpace; + + void *impl_allocate(const char *arg_label, const size_t arg_alloc_size, + const size_t arg_logical_size = 0, + const Kokkos::Tools::SpaceHandle = + Kokkos::Tools::make_space_handle(name())) const; + void impl_deallocate(const char *arg_label, void *const arg_alloc_ptr, + const size_t arg_alloc_size, + const size_t arg_logical_size = 0, + const Kokkos::Tools::SpaceHandle = + Kokkos::Tools::make_space_handle(name())) const; + public: /**\brief Return Name of the MemorySpace */ static constexpr const char *name() { return m_name; } - void fence(); + void fence() const; int allocation_mode; int64_t extent; @@ -94,8 +111,6 @@ class SHMEMSpace { size_t get_num_pes(); size_t get_my_pe(); -size_t get_indexing_block_size(size_t size); -std::pair getRange(size_t size, size_t pe); } // namespace Experimental } // namespace Kokkos @@ -146,13 +161,14 @@ struct MemorySpaceAccess { #include #include -#include #include #include #include #include +#include #include #include +#include #include #endif // #define KOKKOS_SHMEMSPACE_HPP diff --git a/src/impl/shmemspace/Kokkos_SHMEMSpace_AllocationRecord.cpp b/src/impl/shmemspace/Kokkos_SHMEMSpace_AllocationRecord.cpp index bda96ea2..014a1473 100644 --- a/src/impl/shmemspace/Kokkos_SHMEMSpace_AllocationRecord.cpp +++ b/src/impl/shmemspace/Kokkos_SHMEMSpace_AllocationRecord.cpp @@ -22,34 +22,40 @@ namespace Kokkos { namespace Impl { -template -SharedAllocationRecord:: - SharedAllocationRecord( - const ExecutionSpace &execution_space, - const Kokkos::Experimental::SHMEMSpace &arg_space, - const std::string &arg_label, const size_t arg_alloc_size, - const SharedAllocationRecord::function_type arg_dealloc) - // Pass through allocated [ SharedAllocationHeader , user_memory ] - // Pass through deallocation function - : SharedAllocationRecord( - execution_space, - reinterpret_cast(arg_space.allocate( - sizeof(SharedAllocationHeader) + arg_alloc_size)), - sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc, - arg_label), - m_space(arg_space) { -#if defined(KOKKOS_ENABLE_PROFILING) - if (Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::allocateData( - Kokkos::Profiling::SpaceHandle(arg_space.name()), arg_label, data(), - arg_alloc_size); - } +#ifdef KOKKOS_ENABLE_DEBUG +SharedAllocationRecord SharedAllocationRecord< + Kokkos::Experimental::SHMEMSpace, void>::s_root_record; #endif - // Fill in the Header information - RecordBase::m_alloc_ptr->m_record = - static_cast *>(this); - strncpy(RecordBase::m_alloc_ptr->m_label, arg_label.c_str(), - SharedAllocationHeader::maximum_label_length); + +SharedAllocationRecord::~SharedAllocationRecord() { + m_space.deallocate(m_label.c_str(), + SharedAllocationRecord::m_alloc_ptr, + SharedAllocationRecord::m_alloc_size, + (SharedAllocationRecord::m_alloc_size - + sizeof(SharedAllocationHeader))); +} + +SharedAllocationHeader *_do_allocation( + Kokkos::Experimental::SHMEMSpace const &space, std::string const &label, + size_t alloc_size) { + try { + return reinterpret_cast( + space.allocate(alloc_size)); + } catch (Experimental::RawMemoryAllocationFailure const &failure) { + if (failure.failure_mode() == Experimental::RawMemoryAllocationFailure:: + FailureMode::AllocationNotAligned) { + // TODO: delete the misaligned memory + } + + std::cerr << "Kokkos failed to allocate memory for label \"" << label + << "\". Allocation using MemorySpace named \"" << space.name() + << " failed with the following error: "; + failure.print_error_message(std::cerr); + std::cerr.flush(); + Kokkos::Impl::throw_runtime_exception("Memory allocation failure"); + } + return nullptr; // unreachable } SharedAllocationRecord:: @@ -59,120 +65,33 @@ SharedAllocationRecord:: const SharedAllocationRecord::function_type arg_dealloc) // Pass through allocated [ SharedAllocationHeader , user_memory ] // Pass through deallocation function - : SharedAllocationRecord( + : base_t( #ifdef KOKKOS_ENABLE_DEBUG &SharedAllocationRecord::s_root_record, #endif - reinterpret_cast(arg_space.allocate( - sizeof(SharedAllocationHeader) + arg_alloc_size)), + Impl::checked_allocation_with_header(arg_space, arg_label, + arg_alloc_size), sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc, arg_label), m_space(arg_space) { -#if defined(KOKKOS_ENABLE_PROFILING) - if (Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::allocateData( - Kokkos::Profiling::SpaceHandle(arg_space.name()), arg_label, data(), - arg_alloc_size); - } -#endif - // Fill in the Header information - RecordBase::m_alloc_ptr->m_record = - static_cast *>(this); - strncpy(RecordBase::m_alloc_ptr->m_label, arg_label.c_str(), - SharedAllocationHeader::maximum_label_length); + this->base_t::_fill_host_accessible_header_info(*RecordBase::m_alloc_ptr, + arg_label); } -SharedAllocationRecord::~SharedAllocationRecord() { -#if defined(KOKKOS_ENABLE_PROFILING) - if (Kokkos::Profiling::profileLibraryLoaded()) { - SharedAllocationHeader header; - Kokkos::Profiling::deallocateData( - Kokkos::Profiling::SpaceHandle( - Kokkos::Experimental::SHMEMSpace::name()), - header.m_label, data(), size()); - } -#endif - - m_space.deallocate(SharedAllocationRecord::m_alloc_ptr, - SharedAllocationRecord::m_alloc_size); -} - -SharedAllocationRecord SharedAllocationRecord< - Kokkos::Experimental::SHMEMSpace, void>::s_root_record; - -void SharedAllocationRecord::deallocate( - SharedAllocationRecord *arg_rec) { - delete static_cast(arg_rec); -} - -void *SharedAllocationRecord:: - allocate_tracked(const Kokkos::Experimental::SHMEMSpace &arg_space, - const std::string &arg_alloc_label, - const size_t arg_alloc_size) { - if (!arg_alloc_size) return (void *)0; - - SharedAllocationRecord *const r = - allocate(arg_space, arg_alloc_label, arg_alloc_size); - RecordBase::increment(r); - return r->data(); -} - -void SharedAllocationRecord::deallocate_tracked(void *const - arg_alloc_ptr) { - if (arg_alloc_ptr != 0) { - SharedAllocationRecord *const r = get_record(arg_alloc_ptr); - RecordBase::decrement(r); - } -} - -void *SharedAllocationRecord:: - reallocate_tracked(void *const arg_alloc_ptr, const size_t arg_alloc_size) { - SharedAllocationRecord *const r_old = get_record(arg_alloc_ptr); - SharedAllocationRecord *const r_new = - allocate(r_old->m_space, r_old->get_label(), arg_alloc_size); - - Kokkos::Impl::DeepCopy( - r_new->data(), r_old->data(), r_new->size()); - - RecordBase::increment(r_new); - RecordBase::decrement(r_old); - - return r_new->data(); -} +} // namespace Impl +} // namespace Kokkos -SharedAllocationRecord - *SharedAllocationRecord::get_record( - void *alloc_ptr) { - typedef SharedAllocationHeader Header; - typedef SharedAllocationRecord - RecordHost; +#define KOKKOS_IMPL_PUBLIC_INCLUDE - // Copy the header from the allocation - SharedAllocationHeader const *const head = - alloc_ptr ? Header::get_header(alloc_ptr) : (SharedAllocationHeader *)0; - RecordHost *const record = - head ? static_cast(head->m_record) : (RecordHost *)0; +#include - if (!alloc_ptr || record->m_alloc_ptr != head) { - Kokkos::Impl::throw_runtime_exception(std::string( - "Kokkos::Impl::SharedAllocationRecord< " - "Kokkos::Experimental::SHMEMSpace , void >::get_record ERROR")); - } +namespace Kokkos { +namespace Impl { - return record; -} +template class SharedAllocationRecordCommon; -// Iterate records to print orphaned memory ... -void SharedAllocationRecord:: - print_records(std::ostream &s, const Kokkos::Experimental::SHMEMSpace &, - bool detail) { - SharedAllocationRecord::print_host_accessible_records( - s, "SHMEMSpace", &s_root_record, detail); -} +#undef KOKKOS_IMPL_PUBLIC_INCLUDE } // namespace Impl } // namespace Kokkos diff --git a/src/impl/shmemspace/Kokkos_SHMEMSpace_AllocationRecord.hpp b/src/impl/shmemspace/Kokkos_SHMEMSpace_AllocationRecord.hpp index 42a1d2a5..7c310f0a 100644 --- a/src/impl/shmemspace/Kokkos_SHMEMSpace_AllocationRecord.hpp +++ b/src/impl/shmemspace/Kokkos_SHMEMSpace_AllocationRecord.hpp @@ -21,79 +21,71 @@ #include -/*--------------------------------------------------------------------------*/ - namespace Kokkos { namespace Impl { template <> class SharedAllocationRecord - : public SharedAllocationRecord { + : public SharedAllocationRecordCommon { private: friend Kokkos::Experimental::SHMEMSpace; + friend class SharedAllocationRecordCommon; - typedef SharedAllocationRecord RecordBase; - - SharedAllocationRecord(const SharedAllocationRecord &) = delete; - SharedAllocationRecord &operator=(const SharedAllocationRecord &) = delete; + using base_t = SharedAllocationRecordCommon; + using RecordBase = SharedAllocationRecord; - static void deallocate(RecordBase *); + SharedAllocationRecord(const SharedAllocationRecord&) = delete; + SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete; - /**\brief Root record for tracked allocations from this SHMEMSpace instance - */ +#ifdef KOKKOS_ENABLE_DEBUG + /**\brief Root record for tracked allocations from this HostSpace instance */ static RecordBase s_root_record; +#endif const Kokkos::Experimental::SHMEMSpace m_space; protected: ~SharedAllocationRecord(); - SharedAllocationRecord() = default; + // This constructor does not forward to the one without exec_space arg + // in order to work around https://github.com/kokkos/kokkos/issues/5258 + // This constructor is templated so I can't just put it into the cpp file + // like the other constructor. template SharedAllocationRecord( - const ExecutionSpace &execution_space, - const Kokkos::Experimental::SHMEMSpace &arg_space, - const std::string &arg_label, const size_t arg_alloc_size, - const RecordBase::function_type arg_dealloc = &deallocate); + const ExecutionSpace& /* exec_space*/, + const Kokkos::Experimental::SHMEMSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, + const RecordBase::function_type arg_dealloc = &deallocate) + : base_t( +#ifdef KOKKOS_ENABLE_DEBUG + &SharedAllocationRecord::s_root_record, +#endif + Impl::checked_allocation_with_header(arg_space, arg_label, + arg_alloc_size), + sizeof(SharedAllocationHeader) + arg_alloc_size, arg_dealloc, + arg_label), + m_space(arg_space) { + this->base_t::_fill_host_accessible_header_info(*RecordBase::m_alloc_ptr, + arg_label); + } SharedAllocationRecord( - const Kokkos::Experimental::SHMEMSpace &arg_space, - const std::string &arg_label, const size_t arg_alloc_size, + const Kokkos::Experimental::SHMEMSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size, const RecordBase::function_type arg_dealloc = &deallocate); public: - inline std::string get_label() const { - return std::string(RecordBase::head()->m_label); + KOKKOS_INLINE_FUNCTION static SharedAllocationRecord* allocate( + const Kokkos::Experimental::SHMEMSpace& arg_space, + const std::string& arg_label, const size_t arg_alloc_size) { + KOKKOS_IF_ON_HOST((return new SharedAllocationRecord(arg_space, arg_label, + arg_alloc_size);)) + KOKKOS_IF_ON_DEVICE(((void)arg_space; (void)arg_label; (void)arg_alloc_size; + return nullptr;)) } - - KOKKOS_INLINE_FUNCTION static SharedAllocationRecord *allocate( - const Kokkos::Experimental::SHMEMSpace &arg_space, - const std::string &arg_label, const size_t arg_alloc_size) { -#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) - return new SharedAllocationRecord(arg_space, arg_label, arg_alloc_size); -#else - return (SharedAllocationRecord *)0; -#endif - } - - /**\brief Allocate tracked memory in the space */ - static void *allocate_tracked( - const Kokkos::Experimental::SHMEMSpace &arg_space, - const std::string &arg_label, const size_t arg_alloc_size); - - /**\brief Reallocate tracked memory in the space */ - static void *reallocate_tracked(void *const arg_alloc_ptr, - const size_t arg_alloc_size); - - /**\brief Deallocate tracked memory in the space */ - static void deallocate_tracked(void *const arg_alloc_ptr); - - static SharedAllocationRecord *get_record(void *arg_alloc_ptr); - - static void print_records(std::ostream &, - const Kokkos::Experimental::SHMEMSpace &, - bool detail = false); }; } // namespace Impl diff --git a/src/impl/shmemspace/Kokkos_SHMEMSpace_DataHandle.hpp b/src/impl/shmemspace/Kokkos_SHMEMSpace_DataHandle.hpp index e1400702..1949aeb3 100644 --- a/src/impl/shmemspace/Kokkos_SHMEMSpace_DataHandle.hpp +++ b/src/impl/shmemspace/Kokkos_SHMEMSpace_DataHandle.hpp @@ -35,9 +35,6 @@ struct SHMEMDataHandle { KOKKOS_INLINE_FUNCTION SHMEMDataHandle(SHMEMDataHandle const &arg) : ptr(arg.ptr) {} - template - KOKKOS_INLINE_FUNCTION SHMEMDataHandle(SrcTraits const &arg) : ptr(arg.ptr) {} - template KOKKOS_INLINE_FUNCTION SHMEMDataElement operator()( const int &pe, const iType &i) const { diff --git a/unit_tests/Test_DeepCopy.cpp b/unit_tests/Test_DeepCopy.cpp index 52682da1..5a9c0de0 100644 --- a/unit_tests/Test_DeepCopy.cpp +++ b/unit_tests/Test_DeepCopy.cpp @@ -128,6 +128,8 @@ void test_deepcopy( Kokkos::parallel_for( "Team", 1, KOKKOS_LAMBDA(const int i) { assert(v_R(my_rank, 0) == (Data_t)0x123); }); + + Kokkos::fence(); } template @@ -153,6 +155,8 @@ void test_deepcopy( Kokkos::parallel_for( "Team", i1, KOKKOS_LAMBDA(const int i) { assert(v_R(my_rank, i) == (Data_t)0x123); }); + + Kokkos::fence(); } template @@ -183,6 +187,7 @@ void test_deepcopy( for (int j = 0; j < i2; ++j) assert(v_R(my_rank, i, j) == (Data_t)0x123); }); + Kokkos::fence(); } TEST(TEST_CATEGORY, test_deepcopy) {