Skip to content

Commit

Permalink
add checked_lookup into LU
Browse files Browse the repository at this point in the history
  • Loading branch information
yhmtsai committed Sep 25, 2024
1 parent 6df1722 commit cb52f9c
Show file tree
Hide file tree
Showing 10 changed files with 79 additions and 26 deletions.
36 changes: 27 additions & 9 deletions common/cuda_hip/factorization/lu_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ __global__ __launch_bounds__(default_block_size) void initialize(
}


template <typename ValueType, typename IndexType>
template <bool checked_lookup, typename ValueType, typename IndexType>
__global__ __launch_bounds__(default_block_size) void factorize(
const IndexType* __restrict__ row_ptrs, const IndexType* __restrict__ cols,
const IndexType* __restrict__ storage_offsets,
Expand Down Expand Up @@ -130,8 +130,16 @@ __global__ __launch_bounds__(default_block_size) void factorize(
upper_nz += config::warp_size) {
const auto upper_col = cols[upper_nz];
const auto upper_val = vals[upper_nz];
const auto output_pos = lookup.lookup_unsafe(upper_col) + row_begin;
vals[output_pos] -= scale * upper_val;
if (checked_lookup) {
const auto pos = lookup[upper_col];
if (pos != invalid_index<IndexType>()) {
vals[row_begin + pos] -= scale * upper_val;
}
} else {
const auto output_pos =
lookup.lookup_unsafe(upper_col) + row_begin;
vals[output_pos] -= scale * upper_val;
}
}
}
scheduler.mark_ready();
Expand Down Expand Up @@ -252,19 +260,29 @@ template <typename ValueType, typename IndexType>
void factorize(std::shared_ptr<const DefaultExecutor> exec,
const IndexType* lookup_offsets, const int64* lookup_descs,
const int32* lookup_storage, const IndexType* diag_idxs,
matrix::Csr<ValueType, IndexType>* factors,
matrix::Csr<ValueType, IndexType>* factors, bool checked_lookup,
array<int>& tmp_storage)
{
const auto num_rows = factors->get_size()[0];
if (num_rows > 0) {
syncfree_storage storage(exec, tmp_storage, num_rows);
const auto num_blocks =
ceildiv(num_rows, default_block_size / config::warp_size);
kernel::factorize<<<num_blocks, default_block_size, 0,
exec->get_stream()>>>(
factors->get_const_row_ptrs(), factors->get_const_col_idxs(),
lookup_offsets, lookup_storage, lookup_descs, diag_idxs,
as_device_type(factors->get_values()), storage, num_rows);
if (checked_lookup) {
kernel::factorize<true>
<<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
factors->get_const_row_ptrs(),
factors->get_const_col_idxs(), lookup_offsets,
lookup_storage, lookup_descs, diag_idxs,
as_device_type(factors->get_values()), storage, num_rows);
} else {
kernel::factorize<false>
<<<num_blocks, default_block_size, 0, exec->get_stream()>>>(
factors->get_const_row_ptrs(),
factors->get_const_col_idxs(), lookup_offsets,
lookup_storage, lookup_descs, diag_idxs,
as_device_type(factors->get_values()), storage, num_rows);
}
}
}

Expand Down
11 changes: 7 additions & 4 deletions core/factorization/lu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ Lu<ValueType, IndexType>::parse(const config::pnode& config,
if (auto& obj = config.get("skip_sorting")) {
params.with_skip_sorting(config::get_value<bool>(obj));
}
if (auto& obj = config.get("checked_lookup")) {
params.with_checked_lookup(config::get_value<bool>(obj));
}

return params;
}
Expand Down Expand Up @@ -160,10 +163,10 @@ std::unique_ptr<LinOp> Lu<ValueType, IndexType>::generate_impl(
storage.get_const_data(), diag_idxs.get_data(), factors.get()));
// run numerical factorization
array<int> tmp{exec};
exec->run(make_factorize(storage_offsets.get_const_data(),
row_descs.get_const_data(),
storage.get_const_data(),
diag_idxs.get_const_data(), factors.get(), tmp));
exec->run(make_factorize(
storage_offsets.get_const_data(), row_descs.get_const_data(),
storage.get_const_data(), diag_idxs.get_const_data(), factors.get(),
parameters_.checked_lookup, tmp));
return factorization_type::create_from_combined_lu(std::move(factors));
}

Expand Down
2 changes: 1 addition & 1 deletion core/factorization/lu_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ namespace kernels {
const IndexType* lookup_offsets, const int64* lookup_descs, \
const int32* lookup_storage, const IndexType* diag_idxs, \
matrix::Csr<ValueType, IndexType>* factors, \
array<int>& tmp_storage)
bool checked_lookup, array<int>& tmp_storage)


#define GKO_DECLARE_LU_SYMMETRIC_FACTORIZE_SIMPLE(IndexType) \
Expand Down
3 changes: 3 additions & 0 deletions core/test/config/factorization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,8 @@ struct Lu : FactorizationConfigTest<
gko::experimental::factorization::symbolic_type::near_symmetric);
config_map["skip_sorting"] = pnode{true};
param.with_skip_sorting(true);
config_map["checked_lookup"] = pnode{true};
param.with_checked_lookup(true);
}

template <typename AnswerType>
Expand All @@ -190,6 +192,7 @@ struct Lu : FactorizationConfigTest<
ans_param.symbolic_factorization);
ASSERT_EQ(res_param.symbolic_algorithm, ans_param.symbolic_algorithm);
ASSERT_EQ(res_param.skip_sorting, ans_param.skip_sorting);
ASSERT_EQ(res_param.checked_lookup, ans_param.checked_lookup);
}
};

Expand Down
2 changes: 1 addition & 1 deletion dpcpp/factorization/lu_kernels.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ template <typename ValueType, typename IndexType>
void factorize(std::shared_ptr<const DefaultExecutor> exec,
const IndexType* lookup_offsets, const int64* lookup_descs,
const int32* lookup_storage, const IndexType* diag_idxs,
matrix::Csr<ValueType, IndexType>* factors,
matrix::Csr<ValueType, IndexType>* factors, bool checked_lookup,
array<int>& tmp_storage) GKO_NOT_IMPLEMENTED;

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_LU_FACTORIZE);
Expand Down
9 changes: 9 additions & 0 deletions include/ginkgo/core/factorization/lu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,15 @@ class Lu
* incorrect results or crash.
*/
bool GKO_FACTORY_PARAMETER_SCALAR(skip_sorting, false);

/**
* The symbolic factoization should contains the fill-in information. If
* it is not the case (like Ilu), users might face hang or illegal
* access issue. Please enable this option when the symbolic
* factorization does not contain the full fill-in information. Symbolic
* factorization must still contain the entry for the original matrix.
*/
bool GKO_FACTORY_PARAMETER_SCALAR(checked_lookup, false);
};

/**
Expand Down
15 changes: 11 additions & 4 deletions omp/factorization/lu_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ template <typename ValueType, typename IndexType>
void factorize(std::shared_ptr<const DefaultExecutor> exec,
const IndexType* lookup_offsets, const int64* lookup_descs,
const int32* lookup_storage, const IndexType* diag_idxs,
matrix::Csr<ValueType, IndexType>* factors,
matrix::Csr<ValueType, IndexType>* factors, bool checked_lookup,
array<int>& tmp_storage)
{
const auto num_rows = factors->get_size()[0];
Expand All @@ -89,8 +89,15 @@ void factorize(std::shared_ptr<const DefaultExecutor> exec,
for (auto dep_nz = dep_diag_idx + 1; dep_nz < dep_end; dep_nz++) {
const auto col = cols[dep_nz];
const auto val = vals[dep_nz];
const auto nz = row_begin + lookup.lookup_unsafe(col);
vals[nz] -= scale * val;
if (checked_lookup) {
const auto idx = lookup[col];
if (idx != invalid_index<IndexType>()) {
vals[row_begin + idx] -= scale * val;
}
} else {
const auto nz = row_begin + lookup.lookup_unsafe(col);
vals[nz] -= scale * val;
}
}
}
}
Expand Down Expand Up @@ -185,4 +192,4 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
} // namespace lu_factorization
} // namespace omp
} // namespace kernels
} // namespace gko
} // namespace gko
15 changes: 11 additions & 4 deletions reference/factorization/lu_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ template <typename ValueType, typename IndexType>
void factorize(std::shared_ptr<const DefaultExecutor> exec,
const IndexType* lookup_offsets, const int64* lookup_descs,
const int32* lookup_storage, const IndexType* diag_idxs,
matrix::Csr<ValueType, IndexType>* factors,
matrix::Csr<ValueType, IndexType>* factors, bool checked_lookup,
array<int>& tmp_storage)
{
const auto num_rows = factors->get_size()[0];
Expand All @@ -87,8 +87,15 @@ void factorize(std::shared_ptr<const DefaultExecutor> exec,
for (auto dep_nz = dep_diag_idx + 1; dep_nz < dep_end; dep_nz++) {
const auto col = cols[dep_nz];
const auto val = vals[dep_nz];
const auto nz = row_begin + lookup.lookup_unsafe(col);
vals[nz] -= scale * val;
if (checked_lookup) {
const auto idx = lookup[col];
if (idx != invalid_index<IndexType>()) {
vals[row_begin + idx] -= scale * val;
}
} else {
const auto nz = row_begin + lookup.lookup_unsafe(col);
vals[nz] -= scale * val;
}
}
}
}
Expand Down Expand Up @@ -182,4 +189,4 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(
} // namespace lu_factorization
} // namespace reference
} // namespace kernels
} // namespace gko
} // namespace gko
4 changes: 3 additions & 1 deletion reference/test/factorization/lu_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ TYPED_TEST(Lu, KernelFactorizeWorks)
gko::kernels::reference::lu_factorization::factorize(
this->ref, this->storage_offsets.get_const_data(),
this->row_descs.get_const_data(), this->storage.get_const_data(),
diag_idxs.get_const_data(), this->mtx_lu.get(), tmp);
diag_idxs.get_const_data(), this->mtx_lu.get(), false, tmp);

GKO_ASSERT_MTX_NEAR(this->mtx_lu, mtx_lu_ref,
15 * r<value_type>::value);
Expand Down Expand Up @@ -356,6 +356,7 @@ TYPED_TEST(Lu, GenerateIluWithBitmapIsEquivalentToRef)
auto factory =
gko::experimental::factorization::Lu<value_type, index_type>::build()
.with_symbolic_factorization(sparsity)
.with_checked_lookup(true)
.on(this->ref);

auto lu = factory->generate(mtx);
Expand Down Expand Up @@ -389,6 +390,7 @@ TYPED_TEST(Lu, GenerateIluWithHashmapIsEquivalentToRef)
auto factory =
gko::experimental::factorization::Lu<value_type, index_type>::build()
.with_symbolic_factorization(sparsity)
.with_checked_lookup(true)
.on(this->ref);

auto lu = factory->generate(mtx);
Expand Down
8 changes: 6 additions & 2 deletions test/factorization/lu_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,11 +198,11 @@ TYPED_TEST(Lu, KernelFactorizeIsEquivalentToRef)
gko::kernels::reference::lu_factorization::factorize(
this->ref, this->storage_offsets.get_const_data(),
this->row_descs.get_const_data(), this->storage.get_const_data(),
diag_idxs.get_const_data(), this->mtx_lu.get(), tmp);
diag_idxs.get_const_data(), this->mtx_lu.get(), false, tmp);
gko::kernels::GKO_DEVICE_NAMESPACE::lu_factorization::factorize(
this->exec, this->dstorage_offsets.get_const_data(),
this->drow_descs.get_const_data(), this->dstorage.get_const_data(),
ddiag_idxs.get_const_data(), this->dmtx_lu.get(), dtmp);
ddiag_idxs.get_const_data(), this->dmtx_lu.get(), false, dtmp);

GKO_ASSERT_MTX_NEAR(this->mtx_lu, this->dmtx_lu, r<value_type>::value);
});
Expand Down Expand Up @@ -376,10 +376,12 @@ TYPED_TEST(Lu, GenerateIluWithBitmapIsEquivalentToRef)
auto factory =
gko::experimental::factorization::Lu<value_type, index_type>::build()
.with_symbolic_factorization(sparsity)
.with_checked_lookup(true)
.on(this->ref);
auto dfactory =
gko::experimental::factorization::Lu<value_type, index_type>::build()
.with_symbolic_factorization(dsparsity)
.with_checked_lookup(true)
.on(this->exec);

auto lu = factory->generate(mtx);
Expand Down Expand Up @@ -417,10 +419,12 @@ TYPED_TEST(Lu, GenerateIluWithHashmapIsEquivalentToRef)
auto factory =
gko::experimental::factorization::Lu<value_type, index_type>::build()
.with_symbolic_factorization(sparsity)
.with_checked_lookup(true)
.on(this->ref);
auto dfactory =
gko::experimental::factorization::Lu<value_type, index_type>::build()
.with_symbolic_factorization(dsparsity)
.with_checked_lookup(true)
.on(this->exec);

auto lu = factory->generate(mtx);
Expand Down

0 comments on commit cb52f9c

Please sign in to comment.