Skip to content

Commit

Permalink
add ilu syncfree through lu implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
yhmtsai committed Sep 25, 2024
1 parent cb52f9c commit dfa446c
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 9 deletions.
50 changes: 42 additions & 8 deletions core/factorization/ilu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#include <ginkgo/core/base/exception_helpers.hpp>
#include <ginkgo/core/config/config.hpp>
#include <ginkgo/core/config/registry.hpp>
#include <ginkgo/core/factorization/lu.hpp>
#include <ginkgo/core/matrix/sparsity_csr.hpp>

#include "core/base/array_access.hpp"
#include "core/config/config_helper.hpp"
Expand Down Expand Up @@ -52,6 +54,17 @@ Ilu<ValueType, IndexType>::parse(const config::pnode& config,
if (auto& obj = config.get("skip_sorting")) {
params.with_skip_sorting(config::get_value<bool>(obj));
}
if (auto& obj = config.get("algorithm")) {
using gko::factorization::factorize_algorithm;
auto str = obj.get_string();
if (str == "sparselib") {
params.with_algorithm(factorize_algorithm::sparselib);
} else if (str == "syncfree") {
params.with_algorithm(factorize_algorithm::syncfree);
} else {
GKO_INVALID_CONFIG_VALUE("algorithm", str);
}
}
return params;
}

Expand All @@ -66,7 +79,8 @@ std::unique_ptr<Composition<ValueType>> Ilu<ValueType, IndexType>::generate_l_u(

// Converts the system matrix to CSR.
// Throws an exception if it is not convertible.
auto local_system_matrix = matrix_type::create(exec);
auto local_system_matrix = share(matrix_type::create(exec));
std::shared_ptr<const matrix_type> ilu;
as<ConvertibleTo<matrix_type>>(system_matrix.get())
->convert_to(local_system_matrix);

Expand All @@ -79,16 +93,36 @@ std::unique_ptr<Composition<ValueType>> Ilu<ValueType, IndexType>::generate_l_u(
local_system_matrix.get(), false));

// Compute LU factorization
exec->run(ilu_factorization::make_compute_ilu(local_system_matrix.get()));

if (std::dynamic_pointer_cast<const OmpExecutor>(exec) ||
parameters_.algorithm == factorize_algorithm::syncfree) {
auto sparsity =
share(gko::matrix::SparsityCsr<ValueType, IndexType>::create_const(
exec, local_system_matrix->get_size(),
make_const_array_view(
exec, local_system_matrix->get_num_stored_elements(),
local_system_matrix->get_const_col_idxs()),
make_const_array_view(
exec, local_system_matrix->get_size()[0] + 1,
local_system_matrix->get_const_row_ptrs())));
ilu =
gko::experimental::factorization::Lu<ValueType, IndexType>::build()
.with_checked_lookup(true)
.with_symbolic_factorization(sparsity)
.on(exec)
->generate(local_system_matrix)
->get_combined();
} else {
exec->run(
ilu_factorization::make_compute_ilu(local_system_matrix.get()));
ilu = local_system_matrix;
}
// Separate L and U factors: nnz
const auto matrix_size = local_system_matrix->get_size();
const auto matrix_size = ilu->get_size();
const auto num_rows = matrix_size[0];
array<IndexType> l_row_ptrs{exec, num_rows + 1};
array<IndexType> u_row_ptrs{exec, num_rows + 1};
exec->run(ilu_factorization::make_initialize_row_ptrs_l_u(
local_system_matrix.get(), l_row_ptrs.get_data(),
u_row_ptrs.get_data()));
ilu.get(), l_row_ptrs.get_data(), u_row_ptrs.get_data()));

// Get nnz from device memory
auto l_nnz = static_cast<size_type>(get_element(l_row_ptrs, num_rows));
Expand All @@ -107,8 +141,8 @@ std::unique_ptr<Composition<ValueType>> Ilu<ValueType, IndexType>::generate_l_u(
std::move(u_row_ptrs), parameters_.u_strategy);

// Separate L and U: columns and values
exec->run(ilu_factorization::make_initialize_l_u(
local_system_matrix.get(), l_factor.get(), u_factor.get()));
exec->run(ilu_factorization::make_initialize_l_u(ilu.get(), l_factor.get(),
u_factor.get()));

return Composition<ValueType>::create(std::move(l_factor),
std::move(u_factor));
Expand Down
3 changes: 3 additions & 0 deletions core/test/config/factorization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ struct Ilu : FactorizationConfigTest<gko::factorization::Ilu<float, int>,
typename gko::matrix::Csr<float, int>::sparselib>());
config_map["skip_sorting"] = pnode{true};
param.with_skip_sorting(true);
config_map["algorithm"] = pnode{"syncfree"};
param.with_algorithm(gko::factorization::factorize_algorithm::syncfree);
}

template <typename AnswerType>
Expand All @@ -122,6 +124,7 @@ struct Ilu : FactorizationConfigTest<gko::factorization::Ilu<float, int>,
check_strategy(res_param.l_strategy, ans_param.l_strategy);
check_strategy(res_param.u_strategy, ans_param.u_strategy);
ASSERT_EQ(res_param.skip_sorting, ans_param.skip_sorting);
ASSERT_EQ(res_param.algorithm, ans_param.algorithm);
}
};

Expand Down
17 changes: 17 additions & 0 deletions include/ginkgo/core/factorization/ilu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,14 @@ namespace gko {
namespace factorization {


/**
* A helper for algorithm selection in the incomplete factorization.
* sparselib is only available for cuda and hip.
* syncfree is Ginkgo's implementation through the Lu factorization with given
* sparsity.
*/
enum class factorize_algorithm { sparselib, syncfree };

/**
* Represents an incomplete LU factorization -- ILU(0) -- of a sparse matrix.
*
Expand Down Expand Up @@ -94,6 +102,15 @@ class Ilu : public Composition<ValueType> {
* incorrect.
*/
bool GKO_FACTORY_PARAMETER_SCALAR(skip_sorting, false);

/**
* Select the implementation which is supposed to be used for
* the incomplete factorization. This only matters for the Cuda and Hip
* executor where the choice is between the Ginkgo (syncfree) and the
* cuSPARSE/hipSPARSE (sparselib) implementation. Default is sparselib.
*/
factorize_algorithm GKO_FACTORY_PARAMETER_SCALAR(
algorithm, factorize_algorithm::sparselib);
};
GKO_ENABLE_LIN_OP_FACTORY(Ilu, parameters, Factory);
GKO_ENABLE_BUILD_METHOD(Factory);
Expand Down
2 changes: 1 addition & 1 deletion test/factorization/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
ginkgo_create_common_test(cholesky_kernels DISABLE_EXECUTORS dpcpp)
ginkgo_create_common_test(lu_kernels DISABLE_EXECUTORS dpcpp)
ginkgo_create_common_test(ic_kernels DISABLE_EXECUTORS dpcpp omp)
ginkgo_create_common_test(ilu_kernels DISABLE_EXECUTORS dpcpp omp)
ginkgo_create_common_test(ilu_kernels DISABLE_EXECUTORS dpcpp)
ginkgo_create_common_test(par_ic_kernels)
ginkgo_create_common_test(par_ict_kernels)
ginkgo_create_common_test(par_ilu_kernels)
Expand Down
28 changes: 28 additions & 0 deletions test/factorization/ilu_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,26 @@ TEST_F(Ilu, ComputeILUIsEquivalentToRefSorted)
}


TEST_F(Ilu, ComputeILUBySyncfreeIsEquivalentToRefSorted)
{
auto fact = gko::factorization::Ilu<>::build()
.with_skip_sorting(true)
.on(ref)
->generate(mtx);
auto dfact =
gko::factorization::Ilu<>::build()
.with_skip_sorting(true)
.with_algorithm(gko::factorization::factorize_algorithm::syncfree)
.on(exec)
->generate(dmtx);

GKO_ASSERT_MTX_NEAR(fact->get_l_factor(), dfact->get_l_factor(), 1e-14);
GKO_ASSERT_MTX_NEAR(fact->get_u_factor(), dfact->get_u_factor(), 1e-14);
GKO_ASSERT_MTX_EQ_SPARSITY(fact->get_l_factor(), dfact->get_l_factor());
GKO_ASSERT_MTX_EQ_SPARSITY(fact->get_u_factor(), dfact->get_u_factor());
}


TEST_F(Ilu, ComputeILUIsEquivalentToRefUnsorted)
{
gko::test::unsort_matrix(mtx, rand_engine);
Expand All @@ -74,11 +94,19 @@ TEST_F(Ilu, SetsCorrectStrategy)
{
auto dfact = gko::factorization::Ilu<>::build()
.with_l_strategy(std::make_shared<Csr::merge_path>())
#ifdef GKO_COMPILING_OMP
.with_u_strategy(std::make_shared<Csr::merge_path>())
#else
.with_u_strategy(std::make_shared<Csr::load_balance>(exec))
#endif
.on(exec)
->generate(dmtx);

ASSERT_EQ(dfact->get_l_factor()->get_strategy()->get_name(), "merge_path");
#ifdef GKO_COMPILING_OMP
ASSERT_EQ(dfact->get_u_factor()->get_strategy()->get_name(), "merge_path");
#else
ASSERT_EQ(dfact->get_u_factor()->get_strategy()->get_name(),
"load_balance");
#endif
}

0 comments on commit dfa446c

Please sign in to comment.