Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cuda kernels for Lower triangular solve #336

Merged
merged 19 commits into from
Sep 6, 2019
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,12 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CG_STEP_2_KERNEL);
namespace lower_trs {


GKO_DECLARE_LOWER_TRS_CHECK_TRANSPOSABILITY_KERNEL()
GKO_NOT_COMPILED(GKO_HOOK_MODULE);

GKO_DECLARE_LOWER_TRS_INIT_STRUCT_KERNEL()
GKO_NOT_COMPILED(GKO_HOOK_MODULE);

template <typename ValueType, typename IndexType>
GKO_DECLARE_LOWER_TRS_GENERATE_KERNEL(ValueType, IndexType)
GKO_NOT_COMPILED(GKO_HOOK_MODULE);
Expand Down
34 changes: 27 additions & 7 deletions core/solver/lower_trs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,23 +48,31 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

namespace gko {
namespace solver {


namespace lower_trs {


GKO_REGISTER_OPERATION(generate, lower_trs::generate);
GKO_REGISTER_OPERATION(init_struct, lower_trs::init_struct);
GKO_REGISTER_OPERATION(perform_transpose, lower_trs::perform_transpose);
GKO_REGISTER_OPERATION(solve, lower_trs::solve);


} // namespace lower_trs


template <typename ValueType, typename IndexType>
void LowerTrs<ValueType, IndexType>::init_trs_solve_struct()
{
this->get_executor()->run(lower_trs::make_init_struct(this->solve_struct_));
}


template <typename ValueType, typename IndexType>
void LowerTrs<ValueType, IndexType>::generate()
{
this->get_executor()->run(
lower_trs::make_generate(gko::lend(system_matrix_), gko::lend(b_)));
this->get_executor()->run(lower_trs::make_generate(
gko::lend(system_matrix_), this->solve_struct_.get(),
parameters_.num_rhs));
}


Expand All @@ -76,9 +84,21 @@ void LowerTrs<ValueType, IndexType>::apply_impl(const LinOp *b, LinOp *x) const

auto dense_b = as<const Vector>(b);
auto dense_x = as<Vector>(x);

exec->run(
lower_trs::make_solve(gko::lend(system_matrix_), dense_b, dense_x));
bool transposability = false;
std::shared_ptr<Vector> trans_b;
std::shared_ptr<Vector> trans_x;
this->get_executor()->run(
lower_trs::make_perform_transpose(transposability));
if (transposability) {
trans_b = Vector::create(exec, gko::transpose(dense_b->get_size()));
trans_x = Vector::create(exec, gko::transpose(dense_x->get_size()));
} else {
trans_b = Vector::create(exec);
trans_x = Vector::create(exec);
}
exec->run(lower_trs::make_solve(
gko::lend(system_matrix_), this->solve_struct_.get(),
gko::lend(trans_b), gko::lend(trans_x), dense_b, dense_x));
thoasm marked this conversation as resolved.
Show resolved Hide resolved
}


Expand Down
24 changes: 20 additions & 4 deletions core/solver/lower_trs_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,26 +40,42 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/base/executor.hpp>
#include <ginkgo/core/matrix/csr.hpp>
#include <ginkgo/core/matrix/dense.hpp>
#include <ginkgo/core/solver/lower_trs.hpp>


namespace gko {
namespace kernels {
namespace lower_trs {


#define GKO_DECLARE_LOWER_TRS_CHECK_TRANSPOSABILITY_KERNEL() \
void perform_transpose(std::shared_ptr<const DefaultExecutor> exec, \
pratikvn marked this conversation as resolved.
Show resolved Hide resolved
bool &transposability)


#define GKO_DECLARE_LOWER_TRS_INIT_STRUCT_KERNEL() \
void init_struct(std::shared_ptr<const DefaultExecutor> exec, \
std::shared_ptr<gko::solver::SolveStruct> &solve_struct)
thoasm marked this conversation as resolved.
Show resolved Hide resolved


#define GKO_DECLARE_LOWER_TRS_GENERATE_KERNEL(_vtype, _itype) \
void generate(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<_vtype, _itype> *matrix, \
const matrix::Dense<_vtype> *b)
gko::solver::SolveStruct *solve_struct, \
const gko::size_type num_rhs)


#define GKO_DECLARE_LOWER_TRS_SOLVE_KERNEL(_vtype, _itype) \
void solve(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<_vtype, _itype> *matrix, \
#define GKO_DECLARE_LOWER_TRS_SOLVE_KERNEL(_vtype, _itype) \
void solve(std::shared_ptr<const DefaultExecutor> exec, \
const matrix::Csr<_vtype, _itype> *matrix, \
gko::solver::SolveStruct *solve_struct, \
pratikvn marked this conversation as resolved.
Show resolved Hide resolved
matrix::Dense<_vtype> *trans_b, matrix::Dense<_vtype> *trans_x, \
const matrix::Dense<_vtype> *b, matrix::Dense<_vtype> *x)


#define GKO_DECLARE_ALL_AS_TEMPLATES \
GKO_DECLARE_LOWER_TRS_CHECK_TRANSPOSABILITY_KERNEL(); \
GKO_DECLARE_LOWER_TRS_INIT_STRUCT_KERNEL(); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_LOWER_TRS_SOLVE_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
Expand Down
Loading