diff --git a/core/factorization/ilu.cpp b/core/factorization/ilu.cpp index 41df4065979..015a5829493 100644 --- a/core/factorization/ilu.cpp +++ b/core/factorization/ilu.cpp @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include "core/base/array_access.hpp" #include "core/config/config_helper.hpp" @@ -52,6 +54,17 @@ Ilu::parse(const config::pnode& config, if (auto& obj = config.get("skip_sorting")) { params.with_skip_sorting(config::get_value(obj)); } + if (auto& obj = config.get("algorithm")) { + using gko::factorization::factorize_algorithm; + auto str = obj.get_string(); + if (str == "sparselib") { + params.with_algorithm(factorize_algorithm::sparselib); + } else if (str == "syncfree") { + params.with_algorithm(factorize_algorithm::syncfree); + } else { + GKO_INVALID_CONFIG_VALUE("algorithm", str); + } + } return params; } @@ -66,7 +79,8 @@ std::unique_ptr> Ilu::generate_l_u( // Converts the system matrix to CSR. // Throws an exception if it is not convertible. - auto local_system_matrix = matrix_type::create(exec); + auto local_system_matrix = share(matrix_type::create(exec)); + std::shared_ptr ilu; as>(system_matrix.get()) ->convert_to(local_system_matrix); @@ -79,16 +93,36 @@ std::unique_ptr> Ilu::generate_l_u( local_system_matrix.get(), false)); // Compute LU factorization - exec->run(ilu_factorization::make_compute_ilu(local_system_matrix.get())); - + if (std::dynamic_pointer_cast(exec) || + parameters_.algorithm == factorize_algorithm::syncfree) { + auto sparsity = + share(gko::matrix::SparsityCsr::create_const( + exec, local_system_matrix->get_size(), + make_const_array_view( + exec, local_system_matrix->get_num_stored_elements(), + local_system_matrix->get_const_col_idxs()), + make_const_array_view( + exec, local_system_matrix->get_size()[0] + 1, + local_system_matrix->get_const_row_ptrs()))); + ilu = + gko::experimental::factorization::Lu::build() + .with_checked_lookup(true) + .with_symbolic_factorization(sparsity) + .on(exec) + ->generate(local_system_matrix) + ->get_combined(); + } else { + exec->run( + ilu_factorization::make_compute_ilu(local_system_matrix.get())); + ilu = local_system_matrix; + } // Separate L and U factors: nnz - const auto matrix_size = local_system_matrix->get_size(); + const auto matrix_size = ilu->get_size(); const auto num_rows = matrix_size[0]; array l_row_ptrs{exec, num_rows + 1}; array u_row_ptrs{exec, num_rows + 1}; exec->run(ilu_factorization::make_initialize_row_ptrs_l_u( - local_system_matrix.get(), l_row_ptrs.get_data(), - u_row_ptrs.get_data())); + ilu.get(), l_row_ptrs.get_data(), u_row_ptrs.get_data())); // Get nnz from device memory auto l_nnz = static_cast(get_element(l_row_ptrs, num_rows)); @@ -107,8 +141,8 @@ std::unique_ptr> Ilu::generate_l_u( std::move(u_row_ptrs), parameters_.u_strategy); // Separate L and U: columns and values - exec->run(ilu_factorization::make_initialize_l_u( - local_system_matrix.get(), l_factor.get(), u_factor.get())); + exec->run(ilu_factorization::make_initialize_l_u(ilu.get(), l_factor.get(), + u_factor.get())); return Composition::create(std::move(l_factor), std::move(u_factor)); diff --git a/core/test/config/factorization.cpp b/core/test/config/factorization.cpp index 9ee196222d3..014fb5e346d 100644 --- a/core/test/config/factorization.cpp +++ b/core/test/config/factorization.cpp @@ -111,6 +111,8 @@ struct Ilu : FactorizationConfigTest, typename gko::matrix::Csr::sparselib>()); config_map["skip_sorting"] = pnode{true}; param.with_skip_sorting(true); + config_map["algorithm"] = pnode{"syncfree"}; + param.with_algorithm(gko::factorization::factorize_algorithm::syncfree); } template @@ -122,6 +124,7 @@ struct Ilu : FactorizationConfigTest, check_strategy(res_param.l_strategy, ans_param.l_strategy); check_strategy(res_param.u_strategy, ans_param.u_strategy); ASSERT_EQ(res_param.skip_sorting, ans_param.skip_sorting); + ASSERT_EQ(res_param.algorithm, ans_param.algorithm); } }; diff --git a/include/ginkgo/core/factorization/ilu.hpp b/include/ginkgo/core/factorization/ilu.hpp index 80f11ab7b6f..2de5ea8bca8 100644 --- a/include/ginkgo/core/factorization/ilu.hpp +++ b/include/ginkgo/core/factorization/ilu.hpp @@ -25,6 +25,14 @@ namespace gko { namespace factorization { +/** + * A helper for algorithm selection in the incomplete factorization. + * sparselib is only available for cuda and hip. + * syncfree is Ginkgo's implementation through the Lu factorization with given + * sparsity. + */ +enum class factorize_algorithm { sparselib, syncfree }; + /** * Represents an incomplete LU factorization -- ILU(0) -- of a sparse matrix. * @@ -94,6 +102,15 @@ class Ilu : public Composition { * incorrect. */ bool GKO_FACTORY_PARAMETER_SCALAR(skip_sorting, false); + + /** + * Select the implementation which is supposed to be used for + * the incomplete factorization. This only matters for the Cuda and Hip + * executor where the choice is between the Ginkgo (syncfree) and the + * cuSPARSE/hipSPARSE (sparselib) implementation. Default is sparselib. + */ + factorize_algorithm GKO_FACTORY_PARAMETER_SCALAR( + algorithm, factorize_algorithm::sparselib); }; GKO_ENABLE_LIN_OP_FACTORY(Ilu, parameters, Factory); GKO_ENABLE_BUILD_METHOD(Factory); diff --git a/test/factorization/CMakeLists.txt b/test/factorization/CMakeLists.txt index e768a48ef05..7a6359093bd 100644 --- a/test/factorization/CMakeLists.txt +++ b/test/factorization/CMakeLists.txt @@ -1,7 +1,7 @@ ginkgo_create_common_test(cholesky_kernels DISABLE_EXECUTORS dpcpp) ginkgo_create_common_test(lu_kernels DISABLE_EXECUTORS dpcpp) ginkgo_create_common_test(ic_kernels DISABLE_EXECUTORS dpcpp omp) -ginkgo_create_common_test(ilu_kernels DISABLE_EXECUTORS dpcpp omp) +ginkgo_create_common_test(ilu_kernels DISABLE_EXECUTORS dpcpp) ginkgo_create_common_test(par_ic_kernels) ginkgo_create_common_test(par_ict_kernels) ginkgo_create_common_test(par_ilu_kernels) diff --git a/test/factorization/ilu_kernels.cpp b/test/factorization/ilu_kernels.cpp index 004b0d34a4f..297f0d6d922 100644 --- a/test/factorization/ilu_kernels.cpp +++ b/test/factorization/ilu_kernels.cpp @@ -55,6 +55,26 @@ TEST_F(Ilu, ComputeILUIsEquivalentToRefSorted) } +TEST_F(Ilu, ComputeILUBySyncfreeIsEquivalentToRefSorted) +{ + auto fact = gko::factorization::Ilu<>::build() + .with_skip_sorting(true) + .on(ref) + ->generate(mtx); + auto dfact = + gko::factorization::Ilu<>::build() + .with_skip_sorting(true) + .with_algorithm(gko::factorization::factorize_algorithm::syncfree) + .on(exec) + ->generate(dmtx); + + GKO_ASSERT_MTX_NEAR(fact->get_l_factor(), dfact->get_l_factor(), 1e-14); + GKO_ASSERT_MTX_NEAR(fact->get_u_factor(), dfact->get_u_factor(), 1e-14); + GKO_ASSERT_MTX_EQ_SPARSITY(fact->get_l_factor(), dfact->get_l_factor()); + GKO_ASSERT_MTX_EQ_SPARSITY(fact->get_u_factor(), dfact->get_u_factor()); +} + + TEST_F(Ilu, ComputeILUIsEquivalentToRefUnsorted) { gko::test::unsort_matrix(mtx, rand_engine); @@ -74,11 +94,19 @@ TEST_F(Ilu, SetsCorrectStrategy) { auto dfact = gko::factorization::Ilu<>::build() .with_l_strategy(std::make_shared()) +#ifdef GKO_COMPILING_OMP + .with_u_strategy(std::make_shared()) +#else .with_u_strategy(std::make_shared(exec)) +#endif .on(exec) ->generate(dmtx); ASSERT_EQ(dfact->get_l_factor()->get_strategy()->get_name(), "merge_path"); +#ifdef GKO_COMPILING_OMP + ASSERT_EQ(dfact->get_u_factor()->get_strategy()->get_name(), "merge_path"); +#else ASSERT_EQ(dfact->get_u_factor()->get_strategy()->get_name(), "load_balance"); +#endif }