Skip to content

Commit

Permalink
Small fixes to spgemm, and plug gaps in testing (#1159)
Browse files Browse the repository at this point in the history
* Small fixes to spgemm, and plug gaps in testing

- Simplified interface: don't destroy spgemm subhandle after numeric.
Want to be able to reuse, and user is responsible for creating the
handle anyway.
- Don't default to SPGEMM_CUSPARSE unless cusparse TPL is actually
enabled.
- Don't divide by 0 when A has 0 rows.
- In testing, test the case where A has 0 rows.
- In testing, actually test default algo SPGEMM_KK, since this takes a
slightly different code path than SPGEMM_KK_MEMORY.

* In spgemm test, rename dimensions to m/k/n
  • Loading branch information
brian-kelley authored Nov 1, 2021
1 parent df04b2a commit c451856
Showing 6 changed files with 50 additions and 29 deletions.
6 changes: 6 additions & 0 deletions src/common/KokkosKernels_IOUtils.hpp
Original file line number Diff line number Diff line change
@@ -103,6 +103,12 @@ void kk_sparseMatrix_generate(OrdinalType nrows, OrdinalType ncols,
int varianz = (1.0 * rand() / RAND_MAX - 0.5) * row_size_variance;
int numRowEntries = elements_per_row + varianz;
if (numRowEntries < 0) numRowEntries = 0;
// Clamping numRowEntries above accomplishes 2 things:
// - If ncols is 0, numRowEntries will also be 0
// - With numRowEntries at most 2/3 the number of columns, in the worst
// case
// 90% of insertions will succeed after 6 tries
if (numRowEntries > 0.66 * ncols) numRowEntries = 0.66 * ncols;
rowPtr[row + 1] = rowPtr[row] + numRowEntries;
}
nnz = rowPtr[nrows];
1 change: 0 additions & 1 deletion src/sparse/KokkosSparse_spgemm.hpp
Original file line number Diff line number Diff line change
@@ -92,7 +92,6 @@ void spgemm_numeric(KernelHandle& kh, const AMatrix& A, const bool Amode,
&kh, A.numRows(), B.numRows(), B.numCols(), A.graph.row_map,
A.graph.entries, A.values, Amode, B.graph.row_map, B.graph.entries,
B.values, Bmode, C.graph.row_map, C.graph.entries, C.values);
kh.destroy_spgemm_handle();
}

} // namespace KokkosSparse
4 changes: 4 additions & 0 deletions src/sparse/KokkosSparse_spgemm_handle.hpp
Original file line number Diff line number Diff line change
@@ -547,7 +547,11 @@ class SPGEMMHandle {

#if defined(KOKKOS_ENABLE_CUDA)
if (std::is_same<Kokkos::Cuda, ExecutionSpace>::value) {
#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
this->algorithm_type = SPGEMM_CUSPARSE;
#else
this->algorithm_type = SPGEMM_KK;
#endif
#ifdef VERBOSE
std::cout << "Cuda Execution Space, Default Algorithm: SPGEMM_CUSPARSE"
<< std::endl;
8 changes: 6 additions & 2 deletions src/sparse/impl/KokkosSparse_spgemm_impl_kkmem.hpp
Original file line number Diff line number Diff line change
@@ -1364,8 +1364,12 @@ void KokkosSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
if (KokkosKernels::Impl::kk_is_gpu_exec_space<
typename HandleType::HandleExecSpace>()) {
// then chose the best method and parameters.
size_type average_row_nnz = overall_nnz / this->a_row_cnt;
size_t average_row_flops = original_overall_flops / this->a_row_cnt;
size_type average_row_nnz = 0;
size_t average_row_flops = 0;
if (this->a_row_cnt > 0) {
average_row_nnz = overall_nnz / this->a_row_cnt;
average_row_flops = original_overall_flops / this->a_row_cnt;
}
// if we have very low flops per row, or our maximum number of nnz is
// prett small, then we do row-base algorithm.
if (SPGEMM_KK_LP != this->spgemm_algorithm &&
8 changes: 5 additions & 3 deletions src/sparse/impl/KokkosSparse_spgemm_impl_symbolic.hpp
Original file line number Diff line number Diff line change
@@ -1804,9 +1804,11 @@ void KokkosSPGEMM<
#else
size_t original_overall_flops =
this->handle->get_spgemm_handle()->compressed_overall_flops;
size_t estimate_max_nnz =
(sqrt(maxNumRoughNonzeros) * sqrt(original_overall_flops / m)) /
estimate_compress;
size_t estimate_max_nnz = 0;
if (m > 0)
estimate_max_nnz =
(sqrt(maxNumRoughNonzeros) * sqrt(original_overall_flops / m)) /
estimate_compress;
if (KOKKOSKERNELS_VERBOSE) {
std::cout << "\t\t\testimate_max_nnz:" << estimate_max_nnz
<< " maxNumRoughNonzeros:" << maxNumRoughNonzeros
52 changes: 29 additions & 23 deletions unit_test/sparse/Test_Sparse_spgemm.hpp
Original file line number Diff line number Diff line change
@@ -246,9 +246,11 @@ bool is_same_matrix(crsMat_t output_mat_actual, crsMat_t output_mat_reference) {
}
} // namespace Test

// Generate matrices and test all supported spgemm algorithms.
// C := AB, where A is m*k, B is k*n, and C is m*n.
template <typename scalar_t, typename lno_t, typename size_type,
typename device>
void test_spgemm(lno_t numRows, size_type nnz, lno_t bandwidth,
void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth,
lno_t row_size_variance, bool oldInterface = false) {
using namespace Test;
// device::execution_space::initialize();
@@ -260,22 +262,21 @@ void test_spgemm(lno_t numRows, size_type nnz, lno_t bandwidth,
// typedef typename graph_t::entries_type::non_const_type lno_nnz_view_t;
// typedef typename crsMat_t::values_type::non_const_type scalar_view_t;

lno_t numCols = numRows;
// Generate random compressed sparse row matrix. Randomly generated (non-zero)
// values are stored in a 1-D (1 rank) array.
crsMat_t input_mat = KokkosKernels::Impl::kk_generate_sparse_matrix<crsMat_t>(
numRows, numCols, nnz, row_size_variance, bandwidth);
crsMat_t A = KokkosKernels::Impl::kk_generate_sparse_matrix<crsMat_t>(
m, k, nnz, row_size_variance, bandwidth);
crsMat_t B = KokkosKernels::Impl::kk_generate_sparse_matrix<crsMat_t>(
k, n, nnz, row_size_variance, bandwidth);

crsMat_t output_mat2;
if (oldInterface)
run_spgemm_old_interface<crsMat_t, device>(input_mat, input_mat,
SPGEMM_DEBUG, output_mat2);
run_spgemm_old_interface<crsMat_t, device>(A, B, SPGEMM_DEBUG, output_mat2);
else
run_spgemm<crsMat_t, device>(input_mat, input_mat, SPGEMM_DEBUG,
output_mat2);
run_spgemm<crsMat_t, device>(A, B, SPGEMM_DEBUG, output_mat2);

std::vector<SPGEMMAlgorithm> algorithms = {SPGEMM_KK_MEMORY, SPGEMM_KK_SPEED,
SPGEMM_KK_MEMSPEED};
std::vector<SPGEMMAlgorithm> algorithms = {
SPGEMM_KK, SPGEMM_KK_MEMORY, SPGEMM_KK_SPEED, SPGEMM_KK_MEMSPEED};

#ifdef HAVE_KOKKOSKERNELS_MKL
algorithms.push_back(SPGEMM_MKL);
@@ -309,7 +310,7 @@ void test_spgemm(lno_t numRows, size_type nnz, lno_t bandwidth,
}
// if size_type is larger than int, mkl casts it to int.
// it will fail if casting cause overflow.
if (input_mat.values.extent(0) > max_integer) {
if (A.values.extent(0) > max_integer) {
is_expected_to_fail = true;
}

@@ -333,11 +334,10 @@ void test_spgemm(lno_t numRows, size_type nnz, lno_t bandwidth,
int res = 0;
try {
if (oldInterface)
res = run_spgemm_old_interface<crsMat_t, device>(
input_mat, input_mat, spgemm_algorithm, output_mat);
res = run_spgemm_old_interface<crsMat_t, device>(A, B, spgemm_algorithm,
output_mat);
else
res = run_spgemm<crsMat_t, device>(input_mat, input_mat,
spgemm_algorithm, output_mat);
res = run_spgemm<crsMat_t, device>(A, B, spgemm_algorithm, output_mat);
} catch (const char *message) {
EXPECT_TRUE(is_expected_to_fail) << algo;
failed = true;
@@ -433,14 +433,20 @@ void test_issue402() {
<< "KKMEM still has issue 402 bug; C=AA' is incorrect!\n";
}

#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \
TEST_F(TestCategory, \
sparse##_##spgemm##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(10000, 10000 * 20, 500, 10); \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(0, 0, 10, 10); \
test_issue402<SCALAR, ORDINAL, OFFSET, DEVICE>(); \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(10000, 10000 * 20, 500, 10, \
true); \
#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \
TEST_F(TestCategory, \
sparse##_##spgemm##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(10000, 10000, 10000, \
10000 * 20, 500, 10, false); \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(10000, 10000, 10000, \
10000 * 20, 500, 10, true); \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(0, 0, 0, 0, 10, 10, false); \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(0, 0, 0, 0, 10, 10, true); \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(0, 12, 5, 0, 10, 0, false); \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(0, 12, 5, 0, 10, 0, true); \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(10, 10, 0, 0, 10, 10, false); \
test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(10, 10, 0, 0, 10, 10, true); \
test_issue402<SCALAR, ORDINAL, OFFSET, DEVICE>(); \
}

// test_spgemm<SCALAR,ORDINAL,OFFSET,DEVICE>(50000, 50000 * 30, 100, 10);

0 comments on commit c451856

Please sign in to comment.