Small fixes to spgemm, and plug gaps in testing (#1159)

* Small fixes to spgemm, and plug gaps in testing - Simplified interface: don't destroy spgemm subhandle after numeric. Want to be able to reuse, and user is responsible for creating the handle anyway. - Don't default to SPGEMM_CUSPARSE unless cusparse TPL is actually enabled. - Don't divide by 0 when A has 0 rows. - In testing, test the case where A has 0 rows. - In testing, actually test default algo SPGEMM_KK, since this takes a slightly different code path than SPGEMM_KK_MEMORY. * In spgemm test, rename dimensions to m/k/n
kokkos · Nov 1, 2021 · c451856 · c451856
1 parent df04b2a
commit c451856
Showing 6 changed files with 50 additions and 29 deletions.
diff --git a/src/common/KokkosKernels_IOUtils.hpp b/src/common/KokkosKernels_IOUtils.hpp
@@ -103,6 +103,12 @@ void kk_sparseMatrix_generate(OrdinalType nrows, OrdinalType ncols,
     int varianz       = (1.0 * rand() / RAND_MAX - 0.5) * row_size_variance;
     int numRowEntries = elements_per_row + varianz;
     if (numRowEntries < 0) numRowEntries = 0;
+    // Clamping numRowEntries above accomplishes 2 things:
+    //  - If ncols is 0, numRowEntries will also be 0
+    //  - With numRowEntries at most 2/3 the number of columns, in the worst
+    //  case
+    //    90% of insertions will succeed after 6 tries
+    if (numRowEntries > 0.66 * ncols) numRowEntries = 0.66 * ncols;
     rowPtr[row + 1] = rowPtr[row] + numRowEntries;
   }
   nnz    = rowPtr[nrows];

diff --git a/src/sparse/KokkosSparse_spgemm.hpp b/src/sparse/KokkosSparse_spgemm.hpp
@@ -92,7 +92,6 @@ void spgemm_numeric(KernelHandle& kh, const AMatrix& A, const bool Amode,
       &kh, A.numRows(), B.numRows(), B.numCols(), A.graph.row_map,
       A.graph.entries, A.values, Amode, B.graph.row_map, B.graph.entries,
       B.values, Bmode, C.graph.row_map, C.graph.entries, C.values);
-  kh.destroy_spgemm_handle();
 }
 
 }  // namespace KokkosSparse

diff --git a/src/sparse/KokkosSparse_spgemm_handle.hpp b/src/sparse/KokkosSparse_spgemm_handle.hpp
@@ -547,7 +547,11 @@ class SPGEMMHandle {
 
 #if defined(KOKKOS_ENABLE_CUDA)
     if (std::is_same<Kokkos::Cuda, ExecutionSpace>::value) {
+#ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
       this->algorithm_type = SPGEMM_CUSPARSE;
+#else
+      this->algorithm_type = SPGEMM_KK;
+#endif
 #ifdef VERBOSE
       std::cout << "Cuda Execution Space, Default Algorithm: SPGEMM_CUSPARSE"
                 << std::endl;

diff --git a/src/sparse/impl/KokkosSparse_spgemm_impl_kkmem.hpp b/src/sparse/impl/KokkosSparse_spgemm_impl_kkmem.hpp
@@ -1364,8 +1364,12 @@ void KokkosSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
     if (KokkosKernels::Impl::kk_is_gpu_exec_space<
             typename HandleType::HandleExecSpace>()) {
       // then chose the best method and parameters.
-      size_type average_row_nnz = overall_nnz / this->a_row_cnt;
-      size_t average_row_flops  = original_overall_flops / this->a_row_cnt;
+      size_type average_row_nnz = 0;
+      size_t average_row_flops  = 0;
+      if (this->a_row_cnt > 0) {
+        average_row_nnz   = overall_nnz / this->a_row_cnt;
+        average_row_flops = original_overall_flops / this->a_row_cnt;
+      }
       // if we have very low flops per row, or our maximum number of nnz is
       // prett small, then we do row-base algorithm.
       if (SPGEMM_KK_LP != this->spgemm_algorithm &&

diff --git a/src/sparse/impl/KokkosSparse_spgemm_impl_symbolic.hpp b/src/sparse/impl/KokkosSparse_spgemm_impl_symbolic.hpp
@@ -1804,9 +1804,11 @@ void KokkosSPGEMM<
 #else
       size_t original_overall_flops =
           this->handle->get_spgemm_handle()->compressed_overall_flops;
-      size_t estimate_max_nnz =
-          (sqrt(maxNumRoughNonzeros) * sqrt(original_overall_flops / m)) /
-          estimate_compress;
+      size_t estimate_max_nnz = 0;
+      if (m > 0)
+        estimate_max_nnz =
+            (sqrt(maxNumRoughNonzeros) * sqrt(original_overall_flops / m)) /
+            estimate_compress;
       if (KOKKOSKERNELS_VERBOSE) {
         std::cout << "\t\t\testimate_max_nnz:" << estimate_max_nnz
                   << " maxNumRoughNonzeros:" << maxNumRoughNonzeros

diff --git a/unit_test/sparse/Test_Sparse_spgemm.hpp b/unit_test/sparse/Test_Sparse_spgemm.hpp
@@ -246,9 +246,11 @@ bool is_same_matrix(crsMat_t output_mat_actual, crsMat_t output_mat_reference) {
 }
 }  // namespace Test
 
+// Generate matrices and test all supported spgemm algorithms.
+// C := AB, where A is m*k, B is k*n, and C is m*n.
 template <typename scalar_t, typename lno_t, typename size_type,
           typename device>
-void test_spgemm(lno_t numRows, size_type nnz, lno_t bandwidth,
+void test_spgemm(lno_t m, lno_t k, lno_t n, size_type nnz, lno_t bandwidth,
                  lno_t row_size_variance, bool oldInterface = false) {
   using namespace Test;
   // device::execution_space::initialize();
@@ -260,22 +262,21 @@ void test_spgemm(lno_t numRows, size_type nnz, lno_t bandwidth,
   // typedef typename graph_t::entries_type::non_const_type   lno_nnz_view_t;
   // typedef typename crsMat_t::values_type::non_const_type scalar_view_t;
 
-  lno_t numCols = numRows;
   // Generate random compressed sparse row matrix. Randomly generated (non-zero)
   // values are stored in a 1-D (1 rank) array.
-  crsMat_t input_mat = KokkosKernels::Impl::kk_generate_sparse_matrix<crsMat_t>(
-      numRows, numCols, nnz, row_size_variance, bandwidth);
+  crsMat_t A = KokkosKernels::Impl::kk_generate_sparse_matrix<crsMat_t>(
+      m, k, nnz, row_size_variance, bandwidth);
+  crsMat_t B = KokkosKernels::Impl::kk_generate_sparse_matrix<crsMat_t>(
+      k, n, nnz, row_size_variance, bandwidth);
 
   crsMat_t output_mat2;
   if (oldInterface)
-    run_spgemm_old_interface<crsMat_t, device>(input_mat, input_mat,
-                                               SPGEMM_DEBUG, output_mat2);
+    run_spgemm_old_interface<crsMat_t, device>(A, B, SPGEMM_DEBUG, output_mat2);
   else
-    run_spgemm<crsMat_t, device>(input_mat, input_mat, SPGEMM_DEBUG,
-                                 output_mat2);
+    run_spgemm<crsMat_t, device>(A, B, SPGEMM_DEBUG, output_mat2);
 
-  std::vector<SPGEMMAlgorithm> algorithms = {SPGEMM_KK_MEMORY, SPGEMM_KK_SPEED,
-                                             SPGEMM_KK_MEMSPEED};
+  std::vector<SPGEMMAlgorithm> algorithms = {
+      SPGEMM_KK, SPGEMM_KK_MEMORY, SPGEMM_KK_SPEED, SPGEMM_KK_MEMSPEED};
 
 #ifdef HAVE_KOKKOSKERNELS_MKL
   algorithms.push_back(SPGEMM_MKL);
@@ -309,7 +310,7 @@ void test_spgemm(lno_t numRows, size_type nnz, lno_t bandwidth,
         }
         // if size_type is larger than int, mkl casts it to int.
         // it will fail if casting cause overflow.
-        if (input_mat.values.extent(0) > max_integer) {
+        if (A.values.extent(0) > max_integer) {
           is_expected_to_fail = true;
         }
 
@@ -333,11 +334,10 @@ void test_spgemm(lno_t numRows, size_type nnz, lno_t bandwidth,
     int res     = 0;
     try {
       if (oldInterface)
-        res = run_spgemm_old_interface<crsMat_t, device>(
-            input_mat, input_mat, spgemm_algorithm, output_mat);
+        res = run_spgemm_old_interface<crsMat_t, device>(A, B, spgemm_algorithm,
+                                                         output_mat);
       else
-        res = run_spgemm<crsMat_t, device>(input_mat, input_mat,
-                                           spgemm_algorithm, output_mat);
+        res = run_spgemm<crsMat_t, device>(A, B, spgemm_algorithm, output_mat);
     } catch (const char *message) {
       EXPECT_TRUE(is_expected_to_fail) << algo;
       failed = true;
@@ -433,14 +433,20 @@ void test_issue402() {
       << "KKMEM still has issue 402 bug; C=AA' is incorrect!\n";
 }
 
-#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE)                         \
-  TEST_F(TestCategory,                                                        \
-         sparse##_##spgemm##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) {    \
-    test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(10000, 10000 * 20, 500, 10); \
-    test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(0, 0, 10, 10);               \
-    test_issue402<SCALAR, ORDINAL, OFFSET, DEVICE>();                         \
-    test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(10000, 10000 * 20, 500, 10,  \
-                                                 true);                       \
+#define EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE)                          \
+  TEST_F(TestCategory,                                                         \
+         sparse##_##spgemm##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) {     \
+    test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(10000, 10000, 10000,          \
+                                                 10000 * 20, 500, 10, false);  \
+    test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(10000, 10000, 10000,          \
+                                                 10000 * 20, 500, 10, true);   \
+    test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(0, 0, 0, 0, 10, 10, false);   \
+    test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(0, 0, 0, 0, 10, 10, true);    \
+    test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(0, 12, 5, 0, 10, 0, false);   \
+    test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(0, 12, 5, 0, 10, 0, true);    \
+    test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(10, 10, 0, 0, 10, 10, false); \
+    test_spgemm<SCALAR, ORDINAL, OFFSET, DEVICE>(10, 10, 0, 0, 10, 10, true);  \
+    test_issue402<SCALAR, ORDINAL, OFFSET, DEVICE>();                          \
   }
 
 // test_spgemm<SCALAR,ORDINAL,OFFSET,DEVICE>(50000, 50000 * 30, 100, 10);