Skip to content

Commit

Permalink
Add specific tests for OMP reductions
Browse files Browse the repository at this point in the history
  • Loading branch information
Thomas Grützmacher committed Jul 20, 2023
1 parent 69ed150 commit a30d10d
Showing 1 changed file with 168 additions and 0 deletions.
168 changes: 168 additions & 0 deletions test/base/kernel_launch_generic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common/unified/base/kernel_launch.hpp"


#include <algorithm>
#include <memory>
#include <type_traits>

Expand Down Expand Up @@ -364,6 +365,39 @@ void run1d_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
TEST_F(KernelLaunch, Reduction1D) { run1d_reduction(exec); }


void run1d_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
std::vector<size_type> sizes)
{
gko::array<int64> output{exec, 1};
gko::array<char> temp(exec);
for (const auto& size : sizes) {
temp.clear();
gko::kernels::EXEC_NAMESPACE::run_kernel_reduction_cached(
exec,
[] GKO_KERNEL(auto i) {
static_assert(is_same<decltype(i), int64>::value, "index");
return i + 1;
},
[] GKO_KERNEL(auto i, auto j) { return std::max(i, j); },
[] GKO_KERNEL(auto j) { return j; }, int64{}, output.get_data(),
size, temp);

ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()),
static_cast<int64>(size));
// The temporary storage (used for partial sums) must be smaller than
// the input array
ASSERT_LT(temp.get_num_elems() / sizeof(int64), size);
}
}

TEST_F(KernelLaunch, Reduction1DCached)
{
// Note: Start with at least 200 elements in case the machine has a lot of
// cores
run1d_reduction_cached(exec, {1000, 1000000, 1234567, 7654321});
}


void run2d_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
{
gko::array<int64> output{exec, 1};
Expand Down Expand Up @@ -419,6 +453,47 @@ void run2d_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
TEST_F(KernelLaunch, Reduction2D) { run2d_reduction(exec); }


void run2d_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
std::vector<gko::dim<2>> dims)
{
gko::array<int64> output{exec, 1};
gko::array<char> temp(exec);
for (const auto& dim : dims) {
temp.clear();
gko::kernels::EXEC_NAMESPACE::run_kernel_reduction_cached(
exec,
[] GKO_KERNEL(auto i, auto j) {
static_assert(is_same<decltype(i), int64>::value, "index");
static_assert(is_same<decltype(j), int64>::value, "index");
return i + j + 2;
},
[] GKO_KERNEL(auto i, auto j) { return std::max(i, j); },
[] GKO_KERNEL(auto j) { return j; }, int64{}, output.get_data(),
dim, temp);

ASSERT_EQ(exec->copy_val_to_host(output.get_const_data()),
static_cast<int64>(dim[0] + dim[1]));
// The temporary storage (used for partial sums) must be smaller than
// the input array
ASSERT_LT(temp.get_num_elems() / sizeof(int64), dim[0] * dim[1]);
}
}

TEST_F(KernelLaunch, Reduction2DCached)
{
// Note: Start with at least 200 elements in case the machine has a lot of
// cores
run2d_reduction_cached(exec, {{20, 10},
{10, 3000},
{1000, 5},
{30, 50},
{1, 100000},
{100000, 1},
{500000, 20},
{20, 500000}});
}


void run2d_row_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
{
for (auto num_rows : {0, 100, 1000, 10000}) {
Expand Down Expand Up @@ -468,6 +543,53 @@ void run2d_row_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
TEST_F(KernelLaunch, ReductionRow2D) { run2d_row_reduction(exec); }


void run2d_row_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
std::vector<gko::dim<2>> dims)
{
// This assumes at most 256 OpenMP Threads
constexpr int64_t max_tmp_elems = 4 * 256;
const size_type result_stride = 1;
gko::array<char> temp(exec);
for (const auto& dim : dims) {
gko::array<int64> host_ref{exec->get_master(), dim[0]};
gko::array<int64> output{exec, host_ref};
temp.clear();
for (int64 i = 0; i < host_ref.get_num_elems(); ++i) {
host_ref.get_data()[i] = dim[1] + i + 1;
}

gko::kernels::EXEC_NAMESPACE::run_kernel_row_reduction_cached(
exec,
[] GKO_KERNEL(auto i, auto j) {
static_assert(is_same<decltype(i), int64>::value, "index");
static_assert(is_same<decltype(j), int64>::value, "index");
return i + j + 2;
},
[] GKO_KERNEL(auto i, auto j) { return std::max(i, j); },
[] GKO_KERNEL(auto j) { return j; }, int64{}, output.get_data(),
result_stride, dim, temp);

GKO_ASSERT_ARRAY_EQ(host_ref, output);
ASSERT_LT(temp.get_num_elems() / sizeof(int64),
max_tmp_elems * max_tmp_elems);
}
}

TEST_F(KernelLaunch, ReductionRowCached)
{
// Note: Start with at least 200 elements in case the machine has a lot of
// cores
run2d_row_reduction_cached(exec, {{20, 10},
{10, 3000},
{1000, 5},
{30, 50},
{1, 100000},
{100000, 1},
{500000, 20},
{20, 500000}});
}


void run2d_col_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
{
// empty, most threads idle, most threads busy, multiple blocks
Expand Down Expand Up @@ -517,3 +639,49 @@ void run2d_col_reduction(std::shared_ptr<gko::EXEC_TYPE> exec)
}

TEST_F(KernelLaunch, ReductionCol2D) { run2d_col_reduction(exec); }


void run2d_col_reduction_cached(std::shared_ptr<gko::EXEC_TYPE> exec,
std::vector<gko::dim<2>> dims)
{
gko::array<char> temp(exec);
for (const auto& dim : dims) {
gko::array<int64> host_ref{exec->get_master(), dim[1]};
gko::array<int64> output{exec, host_ref};
temp.clear();
for (int64 i = 0; i < host_ref.get_num_elems(); ++i) {
host_ref.get_data()[i] = dim[0] + i + 1;
}

gko::kernels::EXEC_NAMESPACE::run_kernel_col_reduction_cached(
exec,
[] GKO_KERNEL(auto i, auto j) {
static_assert(is_same<decltype(i), int64>::value, "index");
static_assert(is_same<decltype(j), int64>::value, "index");
return i + j + 2;
},
[] GKO_KERNEL(auto i, auto j) { return std::max(i, j); },
[] GKO_KERNEL(auto j) { return j; }, int64{}, output.get_data(),
dim, temp);

GKO_ASSERT_ARRAY_EQ(host_ref, output);
// This assumes at most 256 OpenMP Threads
const size_type temp_elem_limit =
std::max(size_type{4 * 256}, dim[0] * dim[1]);
ASSERT_LT(temp.get_num_elems() / sizeof(int64), temp_elem_limit);
}
}

TEST_F(KernelLaunch, ReductionColCached)
{
// Note: Start with at least 200 elements in case the machine has a lot of
// cores
run2d_col_reduction_cached(exec, {{20, 10},
{10, 3000},
{1000, 5},
{30, 50},
{1, 100000},
{100000, 1},
{500000, 20},
{20, 500000}});
}

0 comments on commit a30d10d

Please sign in to comment.