Skip to content

Commit

Permalink
perf_test/blas/blas3: Fix device verify
Browse files Browse the repository at this point in the history
  - The verification option for device is not correct.
  It will always compare 0s from the HostMirror views since
  the data was never copied to the host.
  • Loading branch information
e10harvey authored and Luc Berger-Vergiat committed May 10, 2021
1 parent 645eb4e commit 2a3eee3
Showing 1 changed file with 50 additions and 26 deletions.
76 changes: 50 additions & 26 deletions perf_test/blas/blas3/KokkosBlas3_gemm_perf_test.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1315,14 +1315,11 @@ void __do_gemm_parallel_experiment6(options_t options, gemm_args_t gemm_args) {
* @var epsilon: The tolerance to use when comparing.
* @return true if the comparison fails and false if the comparison succeeds.
*/
static inline bool __gemm_print_compare_failure(view_type_3d expected,
view_type_3d actual, int i,
template<class ViewType>
static inline bool __gemm_print_compare_failure(ViewType h_expected,
ViewType h_actual, int i,
int j, int k, double epsilon) {
STATUS;
typename view_type_3d::HostMirror h_expected =
Kokkos::create_mirror_view(expected);
typename view_type_3d::HostMirror h_actual =
Kokkos::create_mirror_view(actual);
auto diff = static_cast<double>(Kokkos::Experimental::fabs(
static_cast<double>(h_expected(i, j, k) - h_actual(i, j, k))));

Expand All @@ -1349,22 +1346,32 @@ static inline bool __gemm_do_compare(view_type_3d expected,
double epsilon = Test::epsilon<ScalarType>::value * 1e3;
STATUS;

typename view_type_3d::HostMirror h_expected =
Kokkos::create_mirror_view(expected);
typename view_type_3d::HostMirror h_actual =
Kokkos::create_mirror_view(actual);

// Copy to host for comparision
Kokkos::deep_copy(h_expected, expected);
Kokkos::deep_copy(h_actual, actual);
Kokkos::fence();

if (std::is_same<LayoutType, Kokkos::LayoutRight>::value) {
for (size_t i = 0; i < expected.extent(0); i++) {
for (size_t j = 0; j < expected.extent(1); j++) {
for (size_t k = 0; k < expected.extent(2); k++) {
if (__gemm_print_compare_failure(expected, actual, i, j, k, epsilon))
for (size_t i = 0; i < h_expected.extent(0); i++) {
for (size_t j = 0; j < h_expected.extent(1); j++) {
for (size_t k = 0; k < h_expected.extent(2); k++) {
if (__gemm_print_compare_failure<decltype(h_expected)>(h_expected, h_actual, i, j, k, epsilon))
return true;
}
}
}
}

if (std::is_same<LayoutType, Kokkos::LayoutLeft>::value) {
for (size_t k = 0; k < expected.extent(2); k++) {
for (size_t j = 0; j < expected.extent(1); j++) {
for (size_t i = 0; i < expected.extent(0); i++) {
if (__gemm_print_compare_failure(expected, actual, i, j, k, epsilon))
for (size_t k = 0; k < h_expected.extent(2); k++) {
for (size_t j = 0; j < h_expected.extent(1); j++) {
for (size_t i = 0; i < h_expected.extent(0); i++) {
if (__gemm_print_compare_failure<decltype(h_expected)>(h_expected, h_actual, i, j, k, epsilon))
return true;
}
}
Expand All @@ -1380,20 +1387,28 @@ static inline void __gemm_copy_simd_view_to_3d_view(gemm_simd_args_t src,
options_t options) {
using dst_scalar_type = typename dstViewType::value_type;
using src_scalar_type = typename view_type_5d::value_type;
size_t remainder, vector_batch_size, simd_batch_size;
size_t remainder, vector_batch_size, simd_batch_size, last_batch;
bool data_layout_same_as_3d_view = false;
typename dstViewType::HostMirror h_dst =
Kokkos::create_mirror_view(dst);
typename view_type_4d::HostMirror h_src =
Kokkos::create_mirror_view(src.mat_4d);
Kokkos::deep_copy(h_src, src.mat_4d);
Kokkos::fence();

if (options.blas_args.batch_size_last_dim) {
remainder = dst.extent(2) % simd_vector_size;
remainder = dst.extent(2) % simd_internal_vector_size;
vector_batch_size = src.ivec_4d.extent(0);
simd_batch_size = src.ivec_4d.extent(3);
last_batch = dst.extent(2);
if (std::is_same<default_layout, Kokkos::LayoutRight>::value && remainder == 0)
data_layout_same_as_3d_view = true;

} else {
remainder = dst.extent(0) % simd_vector_size;
remainder = dst.extent(0) % simd_internal_vector_size;
vector_batch_size = src.ivec_4d.extent(3);
simd_batch_size = src.ivec_4d.extent(0);
last_batch = dst.extent(0);
if (std::is_same<default_layout, Kokkos::LayoutLeft>::value && remainder == 0)
data_layout_same_as_3d_view = true;
}
Expand All @@ -1403,34 +1418,38 @@ static inline void __gemm_copy_simd_view_to_3d_view(gemm_simd_args_t src,
// lies in the correct location and the data can simply be cast to the 3d view.
if (data_layout_same_as_3d_view) {
// We can just re-cast the data to the 3d view but we'll copy it for verification
memcpy(dst.data(), src.ivec_4d.data(),
memcpy(h_dst.data(), h_src.data(),
sizeof(dst_scalar_type) * dst.extent(0) * dst.extent(1) *
dst.extent(2));
Kokkos::deep_copy(dst, h_dst);
Kokkos::fence();
return;
}

// If the remainder is 0, we have simd_vector_size sub-batches to copy out...
// this is a bad data access pattern but for these perf_tests we will support it.
remainder = remainder == 0 ? simd_vector_size : remainder;
// If the remainder is non-zero, we have simd_vector_size sub-batches + remainder to
// copy out.
remainder += simd_internal_vector_size;

// Views needed for slow manual copy
view_type_5d src_raw;
using h_view_type_5d = Kokkos::View<src_scalar_type *****, default_layout, Kokkos::HostSpace>;
using h_subview_type_2d = Kokkos::View<src_scalar_type **, Kokkos::LayoutStride, Kokkos::HostSpace>;
using h_subview_type_3d = Kokkos::View<src_scalar_type ***, Kokkos::LayoutStride, Kokkos::HostSpace>;
using h_subview_type_4d = Kokkos::View<src_scalar_type ****, Kokkos::LayoutStride, Kokkos::HostSpace>;
h_view_type_5d h_src_raw;
h_subview_type_4d h_sv0;
h_subview_type_3d h_sv1;
h_subview_type_2d h_sv2;

// TODO: Clean everything below this point up...
if (std::is_same<default_layout, Kokkos::LayoutRight>::value)
src_raw = view_type_5d((src_scalar_type *)src.ivec_4d.data(), src.ivec_4d.extent(0), src.ivec_4d.extent(1), src.ivec_4d.extent(2), src.ivec_4d.extent(3), simd_internal_vector_size);
h_src_raw = h_view_type_5d((src_scalar_type *)h_src.data(), src.ivec_4d.extent(0), src.ivec_4d.extent(1), src.ivec_4d.extent(2), src.ivec_4d.extent(3), simd_internal_vector_size);
else
src_raw = view_type_5d((src_scalar_type *)src.ivec_4d.data(),
h_src_raw = h_view_type_5d((src_scalar_type *)h_src.data(),
simd_internal_vector_size, src.ivec_4d.extent(0),
src.ivec_4d.extent(1), src.ivec_4d.extent(2),
src.ivec_4d.extent(3));
typename view_type_5d::HostMirror h_src_raw =
Kokkos::create_mirror_view(src_raw);

// The below loops copies each corresponding 2-rank matrix within the simd
// view back to the 3-rank view.
Expand All @@ -1457,14 +1476,19 @@ static inline void __gemm_copy_simd_view_to_3d_view(gemm_simd_args_t src,
for (size_t m = 0; m < src.ivec_4d.extent(1); m++) {
for (size_t n = 0; n < src.ivec_4d.extent(2); n++) {
if (options.blas_args.batch_size_last_dim)
dst(m, n, simd_internal_vec_idx + simd_batch_size_idx + vector_batch_idx) = h_sv2(m, n);
h_dst(m, n, simd_internal_vec_idx + simd_batch_size_idx + vector_batch_idx) = h_sv2(m, n);
else
dst(simd_internal_vec_idx + simd_batch_size_idx + vector_batch_idx, m, n) = h_sv2(m, n);
h_dst(simd_internal_vec_idx + simd_batch_size_idx + vector_batch_idx, m, n) = h_sv2(m, n);
}
}
if (simd_internal_vec_idx + simd_batch_size_idx + vector_batch_idx == last_batch - 1)
goto out;
}
}
}
out:
Kokkos::deep_copy(dst, h_dst);
Kokkos::fence();
}

/**
Expand Down

0 comments on commit 2a3eee3

Please sign in to comment.