Skip to content

Commit

Permalink
Modernize the use of AllocationRecord and consolidate code
Browse files Browse the repository at this point in the history
  • Loading branch information
janciesko committed Aug 31, 2023
1 parent d846eba commit 242325c
Show file tree
Hide file tree
Showing 22 changed files with 415 additions and 384 deletions.
2 changes: 2 additions & 0 deletions examples/benchmarks/access_overhead/access_overhead.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,9 @@ struct Access<ViewType_t, typename std::enable_if_t<!std::is_same<
Kokkos::parallel_for("access_overhead-init", policy_init_t({0}, {N}),
*this);
Kokkos::fence();
#ifdef KRS_ENABLE_NVSHMEMSPACE
nvshmem_barrier_all(); // Not sure why this impacts perf
#endif

time_a = timer.seconds();
for (int i = 0; i < iters; i++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,9 @@ void run_1(Args_t& args) {
KOKKOS_LAMBDA(const size_t i) { v(i) = 0.0; });

Kokkos::fence();
#ifdef KRS_ENABLE_NVSHMEMSPACE
nvshmem_barrier_all(); // Not sure why this impacts perf
#endif

time_a = timer.seconds();
for (int i = 0; i < iters; i++) {
Expand Down Expand Up @@ -137,7 +139,9 @@ void run_2(Args_t& args) {
KOKKOS_LAMBDA(const size_t i) { v(i) = 0.0; });

Kokkos::fence();
#ifdef KRS_ENABLE_NVSHMEMSPACE
nvshmem_barrier_all(); // Not sure why this impacts perf
#endif

time_a = timer.seconds();
for (int i = 0; i < iters; i++) {
Expand Down Expand Up @@ -182,7 +186,9 @@ void run_3(Args_t& args) {
KOKKOS_LAMBDA(const size_t i) { v(i) = 0.0; });

Kokkos::fence();
#ifdef KRS_ENABLE_NVSHMEMSPACE
nvshmem_barrier_all(); // Not sure why this impacts perf
#endif

time_a = timer.seconds();
for (int i = 0; i < iters; i++) {
Expand Down
4 changes: 4 additions & 0 deletions examples/benchmarks/access_overhead/access_overhead_teams.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,9 @@ struct Access<ViewType_t, typename std::enable_if_t<!std::is_same<
Kokkos::parallel_for("access_overhead-init", policy_init_t({0}, {N}),
*this);
Kokkos::fence();
#ifdef KRS_ENABLE_NVSHMEMSPACE
nvshmem_barrier_all(); // Not sure why this impacts perf
#endif

auto policy = policy_update_t(ls, ts, 1);

Expand Down Expand Up @@ -227,7 +229,9 @@ struct Access<ViewType_t, typename std::enable_if_t<std::is_same<
*this);

Kokkos::fence();
#ifdef KRS_ENABLE_NVSHMEMSPACE
nvshmem_barrier_all(); // Not sure why this impacts perf
#endif

auto policy = policy_update_t(ls, ts, 1);
for (int i = 0; i < iters; i++) {
Expand Down
2 changes: 1 addition & 1 deletion examples/heat3d/mpi/heat3d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ struct System {
Kokkos::View<double***> T, dT;
// Halo data
using buffer_t =
Kokkos::View<double**, Kokkos::LayoutLeft, Kokkos::CudaSpace>;
Kokkos::View<double**, Kokkos::LayoutLeft, Kokkos::DefaultExecutionSpace>;
buffer_t T_left, T_right, T_up, T_down, T_front, T_back;
buffer_t T_left_out, T_right_out, T_up_out, T_down_out, T_front_out,
T_back_out;
Expand Down
11 changes: 7 additions & 4 deletions examples/matvec/multi-node/matvec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ using VectorHost_r_t =

using VectorHost_t = Kokkos::View<VALUE_T *, Kokkos::HostSpace>;
using MatrixHost_t = Kokkos::View<VALUE_T **, Kokkos::HostSpace>;
using Vector_t = Kokkos::View<VALUE_T *, Kokkos::CudaSpace>;
using Matrix_t = Kokkos::View<VALUE_T **, Kokkos::CudaSpace>;
using Vector_t = Kokkos::View<VALUE_T *>;
using Matrix_t = Kokkos::View<VALUE_T **>;

int main(int argc, char *argv[]) {
int mpi_thread_level_available;
Expand Down Expand Up @@ -95,8 +95,11 @@ int main(int argc, char *argv[]) {
Kokkos::deep_copy(b_h, 0.0);
Kokkos::deep_copy(x_h, 1.0);

auto A = Kokkos::create_mirror_view_and_copy(Kokkos::CudaSpace(), A_h);
auto b = Kokkos::create_mirror_view_and_copy(Kokkos::CudaSpace(), b_h);
using DeviceMemorySpace =
typename Kokkos::DefaultExecutionSpace::memory_space;

auto A = Kokkos::create_mirror_view_and_copy(DeviceMemorySpace{}, A_h);
auto b = Kokkos::create_mirror_view_and_copy(DeviceMemorySpace{}, b_h);

// Copy host device data into global vector
Kokkos::deep_copy(x, x_h);
Expand Down
13 changes: 8 additions & 5 deletions examples/matvec/single-node/matvec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ using VALUE_T = double;

using VectorHost_t = Kokkos::View<VALUE_T *, Kokkos::HostSpace>;
using MatrixHost_t = Kokkos::View<VALUE_T **, Kokkos::HostSpace>;
using Vector_t = Kokkos::View<VALUE_T *, Kokkos::CudaSpace>;
using Matrix_t = Kokkos::View<VALUE_T **, Kokkos::CudaSpace>;
using Vector_t = Kokkos::View<VALUE_T *>;
using Matrix_t = Kokkos::View<VALUE_T **>;

int main(int argc, char *argv[]) {
// Vars
Expand All @@ -56,9 +56,12 @@ int main(int argc, char *argv[]) {
Kokkos::deep_copy(b_h, 0.0);
Kokkos::deep_copy(x_h, 1.0);

auto A = Kokkos::create_mirror_view_and_copy(Kokkos::CudaSpace(), A_h);
auto b = Kokkos::create_mirror_view_and_copy(Kokkos::CudaSpace(), b_h);
auto x = Kokkos::create_mirror_view_and_copy(Kokkos::CudaSpace(), x_h);
using DeviceMemorySpace =
typename Kokkos::DefaultExecutionSpace::memory_space;

auto A = Kokkos::create_mirror_view_and_copy(DeviceMemorySpace{}, A_h);
auto b = Kokkos::create_mirror_view_and_copy(DeviceMemorySpace{}, b_h);
auto x = Kokkos::create_mirror_view_and_copy(DeviceMemorySpace{}, x_h);

Kokkos::Timer timer;
Kokkos::parallel_for(
Expand Down
52 changes: 26 additions & 26 deletions src/core/Kokkos_RemoteSpaces_DeepCopy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ void view_copy_(const DstType& dst, const SrcType& src) {
}

// Figure out iteration order in case we need it
int64_t strides[DstType::Rank + 1];
int64_t strides[DstType::rank + 1];
dst.stride(strides);
Kokkos::Iterate iterate;
if (Kokkos::is_layouttiled<typename DstType::array_layout>::value) {
Expand All @@ -329,7 +329,7 @@ void view_copy_(const DstType& dst, const SrcType& src) {
Kokkos::PartitionedLayoutStride>::value ||
std::is_same<typename DstType::array_layout,
Kokkos::LayoutStride>::value) {
if (strides[0] > strides[DstType::Rank - 1])
if (strides[0] > strides[DstType::rank - 1])
iterate = Kokkos::Iterate::Right;
else
iterate = Kokkos::Iterate::Left;
Expand All @@ -348,40 +348,40 @@ void view_copy_(const DstType& dst, const SrcType& src) {
if (DstExecCanAccessSrc) {
if (iterate == Kokkos::Iterate::Right)
Kokkos::Impl::ViewCopy_<DstType, SrcType, Kokkos::LayoutRight,
dst_execution_space, DstType::Rank, int64_t>(
dst_execution_space, DstType::rank, int64_t>(
dst, src);
else
Kokkos::Impl::ViewCopy_<DstType, SrcType, Kokkos::LayoutLeft,
dst_execution_space, DstType::Rank, int64_t>(
dst_execution_space, DstType::rank, int64_t>(
dst, src);
} else {
if (iterate == Kokkos::Iterate::Right)
Kokkos::Impl::ViewCopy_<DstType, SrcType, Kokkos::LayoutRight,
src_execution_space, DstType::Rank, int64_t>(
src_execution_space, DstType::rank, int64_t>(
dst, src);
else
Kokkos::Impl::ViewCopy_<DstType, SrcType, Kokkos::LayoutLeft,
src_execution_space, DstType::Rank, int64_t>(
src_execution_space, DstType::rank, int64_t>(
dst, src);
}
} else {
if (DstExecCanAccessSrc) {
if (iterate == Kokkos::Iterate::Right)
Kokkos::Impl::ViewCopy_<DstType, SrcType, Kokkos::LayoutRight,
dst_execution_space, DstType::Rank, int>(dst,
dst_execution_space, DstType::rank, int>(dst,
src);
else
Kokkos::Impl::ViewCopy_<DstType, SrcType, Kokkos::LayoutLeft,
dst_execution_space, DstType::Rank, int>(dst,
dst_execution_space, DstType::rank, int>(dst,
src);
} else {
if (iterate == Kokkos::Iterate::Right)
Kokkos::Impl::ViewCopy_<DstType, SrcType, Kokkos::LayoutRight,
src_execution_space, DstType::Rank, int>(dst,
src_execution_space, DstType::rank, int>(dst,
src);
else
Kokkos::Impl::ViewCopy_<DstType, SrcType, Kokkos::LayoutLeft,
src_execution_space, DstType::Rank, int>(dst,
src_execution_space, DstType::rank, int>(dst,
src);
}
}
Expand Down Expand Up @@ -517,19 +517,19 @@ inline void deep_copy(
"match: ");
message += dst.label();
message += "(";
for (int r = 0; r < dst_type::Rank - 1; r++) {
for (int r = 0; r < dst_type::rank - 1; r++) {
message += std::to_string(dst.extent(r));
message += ",";
}
message += std::to_string(dst.extent(dst_type::Rank - 1));
message += std::to_string(dst.extent(dst_type::rank - 1));
message += ") ";
message += src.label();
message += "(";
for (int r = 0; r < src_type::Rank - 1; r++) {
for (int r = 0; r < src_type::rank - 1; r++) {
message += std::to_string(src.extent(r));
message += ",";
}
message += std::to_string(src.extent(src_type::Rank - 1));
message += std::to_string(src.extent(src_type::rank - 1));
message += ") ";

Kokkos::Impl::throw_runtime_exception(message);
Expand Down Expand Up @@ -604,19 +604,19 @@ inline void deep_copy(
"Deprecation Error: Kokkos::deep_copy extents of views don't match: ");
message += dst.label();
message += "(";
for (int r = 0; r < dst_type::Rank - 1; r++) {
for (int r = 0; r < dst_type::rank - 1; r++) {
message += std::to_string(dst.extent(r));
message += ",";
}
message += std::to_string(dst.extent(dst_type::Rank - 1));
message += std::to_string(dst.extent(dst_type::rank - 1));
message += ") ";
message += src.label();
message += "(";
for (int r = 0; r < src_type::Rank - 1; r++) {
for (int r = 0; r < src_type::rank - 1; r++) {
message += std::to_string(src.extent(r));
message += ",";
}
message += std::to_string(src.extent(src_type::Rank - 1));
message += std::to_string(src.extent(src_type::rank - 1));
message += ") ";

Kokkos::Impl::throw_runtime_exception(message);
Expand Down Expand Up @@ -746,19 +746,19 @@ inline void deep_copy(
"match: ");
message += dst.label();
message += "(";
for (int r = 0; r < dst_type::Rank - 1; r++) {
for (int r = 0; r < dst_type::rank - 1; r++) {
message += std::to_string(dst.extent(r));
message += ",";
}
message += std::to_string(dst.extent(dst_type::Rank - 1));
message += std::to_string(dst.extent(dst_type::rank - 1));
message += ") ";
message += src.label();
message += "(";
for (int r = 0; r < src_type::Rank - 1; r++) {
for (int r = 0; r < src_type::rank - 1; r++) {
message += std::to_string(src.extent(r));
message += ",";
}
message += std::to_string(src.extent(src_type::Rank - 1));
message += std::to_string(src.extent(src_type::rank - 1));
message += ") ";

Kokkos::Impl::throw_runtime_exception(message);
Expand Down Expand Up @@ -816,19 +816,19 @@ inline void deep_copy(
"Deprecation Error: Kokkos::deep_copy extents of views don't match: ");
message += dst.label();
message += "(";
for (int r = 0; r < dst_type::Rank - 1; r++) {
for (int r = 0; r < dst_type::rank - 1; r++) {
message += std::to_string(dst.extent(r));
message += ",";
}
message += std::to_string(dst.extent(dst_type::Rank - 1));
message += std::to_string(dst.extent(dst_type::rank - 1));
message += ") ";
message += src.label();
message += "(";
for (int r = 0; r < src_type::Rank - 1; r++) {
for (int r = 0; r < src_type::rank - 1; r++) {
message += std::to_string(src.extent(r));
message += ",";
}
message += std::to_string(src.extent(src_type::Rank - 1));
message += std::to_string(src.extent(src_type::rank - 1));
message += ") ";

Kokkos::Impl::throw_runtime_exception(message);
Expand Down
104 changes: 104 additions & 0 deletions src/core/Kokkos_RemoteSpaces_Error.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// Contact: Jan Ciesko (jciesko@sandia.gov)
//
//@HEADER

namespace Kokkos {
namespace Impl {
namespace Experimental {

class RemoteSpacesMemoryAllocationFailure : public std::bad_alloc {
public:
enum class FailureMode {
OutOfMemoryError,
AllocationNotAligned,
InvalidAllocationSize,
Unknown
};
enum class AllocationMechanism {
SHMEMMALLOCDEFAULT,
SHMEMMALLOC,
NVSHMEMMALLOC,
ROCSHMEMMALLOC,
MPIWINALLOC
};

private:
size_t m_attempted_size;
size_t m_attempted_alignment;
FailureMode m_failure_mode;
AllocationMechanism m_mechanism;

public:
RemoteSpacesMemoryAllocationFailure(
size_t arg_attempted_size, size_t arg_attempted_alignment,
FailureMode arg_failure_mode = FailureMode::OutOfMemoryError,
AllocationMechanism arg_mechanism =
AllocationMechanism::SHMEMMALLOCDEFAULT) noexcept
: m_attempted_size(arg_attempted_size),
m_attempted_alignment(arg_attempted_alignment),
m_failure_mode(arg_failure_mode),
m_mechanism(arg_mechanism) {}

RemoteSpacesMemoryAllocationFailure() noexcept = delete;

RemoteSpacesMemoryAllocationFailure(
RemoteSpacesMemoryAllocationFailure const &) noexcept = default;
RemoteSpacesMemoryAllocationFailure(
RemoteSpacesMemoryAllocationFailure &&) noexcept = default;

RemoteSpacesMemoryAllocationFailure &operator =(
RemoteSpacesMemoryAllocationFailure const &) noexcept = default;
RemoteSpacesMemoryAllocationFailure &operator =(
RemoteSpacesMemoryAllocationFailure &&) noexcept = default;

~RemoteSpacesMemoryAllocationFailure() noexcept override = default;

[[nodiscard]] const char *what() const noexcept override {
if (m_failure_mode == FailureMode::OutOfMemoryError) {
return "Memory allocation error: out of memory";
} else if (m_failure_mode == FailureMode::AllocationNotAligned) {
return "Memory allocation error: allocation result was under-aligned";
}

return nullptr; // unreachable
}

[[nodiscard]] size_t attempted_size() const noexcept {
return m_attempted_size;
}

[[nodiscard]] size_t attempted_alignment() const noexcept {
return m_attempted_alignment;
}

[[nodiscard]] AllocationMechanism allocation_mechanism() const noexcept {
return m_mechanism;
}

[[nodiscard]] FailureMode failure_mode() const noexcept {
return m_failure_mode;
}

void print_error_message(std::ostream &o) const;
[[nodiscard]] std::string get_error_message() const;

virtual void append_additional_error_information(std::ostream &) const {}
};

} // namespace Experimental
} // namespace Impl
} // namespace Kokkos
Loading

0 comments on commit 242325c

Please sign in to comment.