Skip to content

Commit

Permalink
Update with the new upstream changes.
Browse files Browse the repository at this point in the history
  • Loading branch information
pratikvn committed Sep 6, 2019
1 parent 1e53dbe commit ff77fb1
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 53 deletions.
14 changes: 7 additions & 7 deletions core/solver/upper_trs_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ namespace kernels {
namespace upper_trs {


#define GKO_DECLARE_UPPER_TRS_CHECK_SHOULD_PERFORM_TRANSPOSE_KERNEL() \
#define GKO_DECLARE_UPPER_TRS_SHOULD_PERFORM_TRANSPOSE_KERNEL() \
void should_perform_transpose(std::shared_ptr<const DefaultExecutor> exec, \
bool &do_transpose)

Expand All @@ -73,12 +73,12 @@ namespace upper_trs {
const matrix::Dense<_vtype> *b, matrix::Dense<_vtype> *x)


#define GKO_DECLARE_ALL_AS_TEMPLATES \
GKO_DECLARE_UPPER_TRS_CHECK_SHOULD_PERFORM_TRANSPOSE_KERNEL(); \
GKO_DECLARE_UPPER_TRS_INIT_STRUCT_KERNEL(); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_UPPER_TRS_SOLVE_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
#define GKO_DECLARE_ALL_AS_TEMPLATES \
GKO_DECLARE_UPPER_TRS_SHOULD_PERFORM_TRANSPOSE_KERNEL(); \
GKO_DECLARE_UPPER_TRS_INIT_STRUCT_KERNEL(); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_UPPER_TRS_SOLVE_KERNEL(ValueType, IndexType); \
template <typename ValueType, typename IndexType> \
GKO_DECLARE_UPPER_TRS_GENERATE_KERNEL(ValueType, IndexType)


Expand Down
95 changes: 83 additions & 12 deletions cuda/solver/upper_trs_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -64,28 +64,99 @@ namespace upper_trs {


void should_perform_transpose(std::shared_ptr<const CudaExecutor> exec,
bool &do_transpose) GKO_NOT_IMPLEMENTED;
bool &do_transpose)
{
#if (defined(CUDA_VERSION) && (CUDA_VERSION >= 9020))


do_transpose = false;


#elif (defined(CUDA_VERSION) && (CUDA_VERSION < 9020))


do_transpose = true;


#endif
}


void init_struct(std::shared_ptr<const CudaExecutor> exec,
std::shared_ptr<gko::solver::SolveStruct> &solve_struct)
std::shared_ptr<solver::SolveStruct> &solve_struct)
{
const auto id = exec->get_device_id();
device_guard g(id);
solve_struct = std::shared_ptr<gko::solver::SolveStruct>(
kernels::cuda::cusparse::init_trs_solve_struct(),
[id](gko::solver::SolveStruct *solve_struct_) {
device_guard g(id);
kernels::cuda::cusparse::clear_trs_solve_struct(solve_struct_);
});
solve_struct =
std::shared_ptr<solver::SolveStruct>(new solver::SolveStruct());
}


template <typename ValueType, typename IndexType>
void generate(std::shared_ptr<const CudaExecutor> exec,
const matrix::Csr<ValueType, IndexType> *matrix,
solver::SolveStruct *solve_struct,
const gko::size_type num_rhs) GKO_NOT_IMPLEMENTED;
solver::SolveStruct *solve_struct, const gko::size_type num_rhs)
{
if (cusparse::is_supported<ValueType, IndexType>::value) {
auto handle = exec->get_cusparse_handle();
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSetMatFillMode(
solve_struct->factor_descr, CUSPARSE_FILL_MODE_UPPER));


#if (defined(CUDA_VERSION) && (CUDA_VERSION >= 9020))


ValueType one = 1.0;

GKO_ASSERT_NO_CUSPARSE_ERRORS(
cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST));
cusparse::buffer_size_ext(
handle, solve_struct->algorithm, CUSPARSE_OPERATION_NON_TRANSPOSE,
CUSPARSE_OPERATION_TRANSPOSE, matrix->get_size()[0], num_rhs,
matrix->get_num_stored_elements(), &one, solve_struct->factor_descr,
matrix->get_const_values(), matrix->get_const_row_ptrs(),
matrix->get_const_col_idxs(), nullptr, num_rhs,
solve_struct->solve_info, solve_struct->policy,
&solve_struct->factor_work_size);

// allocate workspace
if (solve_struct->factor_work_vec != nullptr) {
GKO_ASSERT_NO_CUDA_ERRORS(cudaFree(solve_struct->factor_work_vec));
}
solve_struct->factor_work_vec =
exec->alloc<void *>(solve_struct->factor_work_size);

cusparse::csrsm2_analysis(
handle, solve_struct->algorithm, CUSPARSE_OPERATION_NON_TRANSPOSE,
CUSPARSE_OPERATION_TRANSPOSE, matrix->get_size()[0], num_rhs,
matrix->get_num_stored_elements(), &one, solve_struct->factor_descr,
matrix->get_const_values(), matrix->get_const_row_ptrs(),
matrix->get_const_col_idxs(), nullptr, num_rhs,
solve_struct->solve_info, solve_struct->policy,
solve_struct->factor_work_vec);
GKO_ASSERT_NO_CUSPARSE_ERRORS(
cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_DEVICE));


#elif (defined(CUDA_VERSION) && (CUDA_VERSION < 9020))


GKO_ASSERT_NO_CUSPARSE_ERRORS(
cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST));
cusparse::csrsm_analysis(
handle, CUSPARSE_OPERATION_NON_TRANSPOSE, matrix->get_size()[0],
matrix->get_num_stored_elements(), solve_struct->factor_descr,
matrix->get_const_values(), matrix->get_const_row_ptrs(),
matrix->get_const_col_idxs(), solve_struct->solve_info);
GKO_ASSERT_NO_CUSPARSE_ERRORS(
cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_DEVICE));


#endif


} else {
GKO_NOT_IMPLEMENTED;
}
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_UPPER_TRS_GENERATE_KERNEL);
Expand Down
76 changes: 42 additions & 34 deletions cuda/test/solver/upper_trs_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,12 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/solver/upper_trs.hpp>


#include <gtest/gtest.h>


#include <memory>
#include <random>


#include <cuda.h>
#include <gtest/gtest.h>


#include <ginkgo/core/base/exception.hpp>
Expand All @@ -60,6 +58,7 @@ class UpperTrs : public ::testing::Test {
protected:
using CsrMtx = gko::matrix::Csr<double, gko::int32>;
using Mtx = gko::matrix::Dense<>;

UpperTrs() : rand_engine(30) {}

void SetUp()
Expand Down Expand Up @@ -92,7 +91,32 @@ class UpperTrs : public ::testing::Test {
std::normal_distribution<>(-1.0, 1.0), rand_engine, ref);
}

void initialize_data(int m, int n)
{
mtx = gen_u_mtx(m, m);
b = gen_mtx(m, n);
x = gen_mtx(m, n);
csr_mtx = CsrMtx::create(ref);
mtx->convert_to(csr_mtx.get());
d_csr_mtx = CsrMtx::create(cuda);
d_x = Mtx::create(cuda);
d_x->copy_from(x.get());
d_csr_mtx->copy_from(csr_mtx.get());
b2 = Mtx::create(ref);
d_b2 = Mtx::create(cuda);
d_b2->copy_from(b.get());
b2->copy_from(b.get());
}

std::shared_ptr<Mtx> b;
std::shared_ptr<Mtx> b2;
std::shared_ptr<Mtx> x;
std::shared_ptr<Mtx> mtx;
std::shared_ptr<CsrMtx> csr_mtx;
std::shared_ptr<Mtx> d_b;
std::shared_ptr<Mtx> d_b2;
std::shared_ptr<Mtx> d_x;
std::shared_ptr<CsrMtx> d_csr_mtx;
std::shared_ptr<gko::ReferenceExecutor> ref;
std::shared_ptr<const gko::CudaExecutor> cuda;
std::ranlux48 rand_engine;
Expand All @@ -103,65 +127,49 @@ TEST_F(UpperTrs, CudaUpperTrsFlagCheckIsCorrect)
{
bool trans_flag = true;
bool expected_flag = false;
#if (defined(CUDA_VERSION) && (CUDA_VERSION >= 9020))
expected_flag = false;
#elif (defined(CUDA_VERSION) && (CUDA_VERSION < 9020))


#if (defined(CUDA_VERSION) && (CUDA_VERSION < 9020))


expected_flag = true;


#endif
gko::kernels::cuda::upper_trs::perform_transpose(cuda, trans_flag);


gko::kernels::cuda::upper_trs::should_perform_transpose(cuda, trans_flag);

ASSERT_EQ(expected_flag, trans_flag);
}


TEST_F(UpperTrs, CudaSingleRhsApplyIsEquivalentToRef)
{
std::shared_ptr<Mtx> mtx = gen_u_mtx(50, 50);
std::shared_ptr<Mtx> b = gen_mtx(50, 1);
std::shared_ptr<Mtx> x = gen_mtx(50, 1);
std::shared_ptr<CsrMtx> csr_mtx = CsrMtx::create(ref);
mtx->convert_to(csr_mtx.get());
std::shared_ptr<CsrMtx> d_csr_mtx = CsrMtx::create(cuda);
auto d_x = Mtx::create(cuda);
d_x->copy_from(x.get());
d_csr_mtx->copy_from(csr_mtx.get());
std::shared_ptr<Mtx> b2 = Mtx::create(ref);
std::shared_ptr<Mtx> d_b2 = Mtx::create(cuda);
d_b2->copy_from(b.get());
b2->copy_from(b.get());

initialize_data(50, 1);
auto upper_trs_factory = gko::solver::UpperTrs<>::build().on(ref);
auto d_upper_trs_factory = gko::solver::UpperTrs<>::build().on(cuda);
auto solver = upper_trs_factory->generate(csr_mtx);
auto d_solver = d_upper_trs_factory->generate(d_csr_mtx);

solver->apply(b2.get(), x.get());
d_solver->apply(d_b2.get(), d_x.get());

GKO_ASSERT_MTX_NEAR(d_x, x, 1e-14);
}


TEST_F(UpperTrs, CudaMultipleRhsApplyIsEquivalentToRef)
{
std::shared_ptr<Mtx> mtx = gen_u_mtx(50, 50);
std::shared_ptr<Mtx> b = gen_mtx(50, 3);
std::shared_ptr<Mtx> x = gen_mtx(50, 3);
std::shared_ptr<CsrMtx> csr_mtx = CsrMtx::create(ref);
mtx->convert_to(csr_mtx.get());
std::shared_ptr<CsrMtx> d_csr_mtx = CsrMtx::create(cuda);
auto d_x = Mtx::create(cuda);
d_x->copy_from(x.get());
d_csr_mtx->copy_from(csr_mtx.get());
std::shared_ptr<Mtx> b2 = Mtx::create(ref);
std::shared_ptr<Mtx> d_b2 = Mtx::create(cuda);
d_b2->copy_from(b.get());
b2->copy_from(b.get());
initialize_data(50, 3);

auto upper_trs_factory =
gko::solver::UpperTrs<>::build().with_num_rhs(3u).on(ref);
auto d_upper_trs_factory =
gko::solver::UpperTrs<>::build().with_num_rhs(3u).on(cuda);
auto solver = upper_trs_factory->generate(csr_mtx);
auto d_solver = d_upper_trs_factory->generate(d_csr_mtx);

solver->apply(b2.get(), x.get());
d_solver->apply(d_b2.get(), d_x.get());

Expand Down

0 comments on commit ff77fb1

Please sign in to comment.