Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GPU linear solver interface #650

Merged
merged 12 commits into from
Aug 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/Drivers/Sparse/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ if(HIOP_USE_GINKGO)
endif(HIOP_USE_GINKGO)

if(HIOP_USE_RAJA AND HIOP_USE_GPU AND HIOP_USE_CUDA)
add_test(NAME NlpSparseRaja2_1 COMMAND ${RUNCMD} bash -c "$<TARGET_FILE:NlpSparseRajaEx2.exe>" "500" "-cusolver" "-inertiafree" "-selfcheck ")
add_test(NAME NlpSparseRaja2_1 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseRajaEx2.exe>" "500" "-inertiafree" "-selfcheck" "-resolve_cuda_glu")
add_test(NAME NlpSparseRaja2_2 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseRajaEx2.exe>" "500" "-inertiafree" "-selfcheck" "-resolve_cuda_rf")
endif()

add_test(NAME NlpSparse3_1 COMMAND ${RUNCMD} "$<TARGET_FILE:NlpSparseEx3.exe>" "500" "-selfcheck")
Expand Down
83 changes: 51 additions & 32 deletions src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,20 @@ static bool parse_arguments(int argc,
size_type& n,
bool& self_check,
bool& inertia_free,
bool& use_cusolver,
bool& use_resolve,
bool& use_resolve_cuda_glu,
bool& use_resolve_cuda_rf,
bool& use_ginkgo,
bool& use_ginkgo_cuda,
bool& use_ginkgo_hip)
{
self_check = false;
n = 3;
inertia_free = false;
use_cusolver = false;
use_resolve = false;
use_resolve_cuda_glu = false;
use_resolve_cuda_rf = false;
use_ginkgo = false;
use_ginkgo_cuda = false;
use_ginkgo_cuda = false;
use_ginkgo_hip = false;
switch(argc) {
case 1:
//no arguments
Expand All @@ -43,8 +43,10 @@ static bool parse_arguments(int argc,
self_check = true;
} else if(std::string(argv[4]) == "-inertiafree") {
inertia_free = true;
} else if(std::string(argv[4]) == "-cusolver") {
use_cusolver = true;
} else if(std::string(argv[4]) == "-resolve_cuda_glu") {
use_resolve_cuda_glu = true;
} else if(std::string(argv[4]) == "-resolve_cuda_rf") {
use_resolve_cuda_rf = true;
} else if(std::string(argv[4]) == "-ginkgo"){
use_ginkgo = true;
} else if(std::string(argv[4]) == "-ginkgo_cuda"){
Expand All @@ -66,8 +68,10 @@ static bool parse_arguments(int argc,
self_check = true;
} else if(std::string(argv[3]) == "-inertiafree") {
inertia_free = true;
} else if(std::string(argv[3]) == "-cusolver") {
use_cusolver = true;
} else if(std::string(argv[3]) == "-resolve_cuda_glu") {
use_resolve_cuda_glu = true;
} else if(std::string(argv[3]) == "-resolve_cuda_rf") {
use_resolve_cuda_rf = true;
} else if(std::string(argv[3]) == "-ginkgo"){
use_ginkgo = true;
} else if(std::string(argv[3]) == "-ginkgo_cuda"){
Expand All @@ -89,8 +93,10 @@ static bool parse_arguments(int argc,
self_check = true;
} else if(std::string(argv[2]) == "-inertiafree") {
inertia_free = true;
} else if(std::string(argv[2]) == "-cusolver") {
use_cusolver = true;
} else if(std::string(argv[2]) == "-resolve_cuda_glu") {
use_resolve_cuda_glu = true;
} else if(std::string(argv[2]) == "-resolve_cuda_rf") {
use_resolve_cuda_rf = true;
} else if(std::string(argv[2]) == "-ginkgo"){
use_ginkgo = true;
} else if(std::string(argv[2]) == "-ginkgo_cuda"){
Expand All @@ -112,8 +118,10 @@ static bool parse_arguments(int argc,
self_check = true;
} else if(std::string(argv[1]) == "-inertiafree") {
inertia_free = true;
} else if(std::string(argv[1]) == "-cusolver") {
use_cusolver = true;
} else if(std::string(argv[1]) == "-resolve_cuda_glu") {
use_resolve_cuda_glu = true;
} else if(std::string(argv[1]) == "-resolve_cuda_rf") {
use_resolve_cuda_rf = true;
} else if(std::string(argv[1]) == "-ginkgo"){
use_ginkgo = true;
} else if(std::string(argv[1]) == "-ginkgo_cuda"){
Expand All @@ -134,29 +142,33 @@ static bool parse_arguments(int argc,
return false; // 4 or more arguments
}

// If CUDA is not available, de-select cuSOLVER
// Currently only CUDA backend for ReSolve is available. Unselect ReSolve if CUDA is not enabled
#ifndef HIOP_USE_CUDA
if(use_cusolver) {
if(use_resolve_cuda_glu) {
printf("HiOp built without CUDA support. ");
printf("Using default instead of cuSOLVER ...\n");
use_cusolver = false;
printf("Using default instead of ReSolve ...\n");
use_resolve_cuda_glu = false;
}
#endif

// Use cuSOLVER's LU factorization, if it was configured
#ifdef HIOP_USE_RESOLVE
if(use_cusolver) {
use_resolve = true;
if(use_resolve_cuda_rf) {
printf("HiOp built without CUDA support. ");
printf("Using default instead of ReSolve ...\n");
use_resolve_cuda_rf = false;
}
#endif

// If cuSOLVER was selected, but inertia free approach was not, add inertia-free
if(use_cusolver && !(inertia_free)) {
// If ReSolve was selected, but inertia free approach was not, add inertia-free
if((use_resolve_cuda_glu || use_resolve_cuda_rf) && !(inertia_free)) {
inertia_free = true;
printf("LU solver from cuSOLVER library requires inertia free approach. ");
printf("LU solver from ReSolve library requires inertia free approach. ");
printf("Enabling now ...\n");
}

if(use_resolve_cuda_glu && use_resolve_cuda_rf) {
use_resolve_cuda_rf = false;
printf("You can select either GLU or Rf refactorization with ReSolve, not both. ");
printf("Using default GLU refactorization ...\n");
}

// If Ginkgo is not available, de-select it.
#ifndef HIOP_USE_GINKGO
if(use_ginkgo) {
Expand Down Expand Up @@ -185,7 +197,8 @@ static void usage(const char* exeName)
printf(" '-inertiafree': indicate if inertia free approach should be used [optional]\n");
printf(" '-selfcheck': compares the optimal objective with a previously saved value for the "
"problem specified by 'problem_size'. [optional]\n");
printf(" '-cusolver': use cuSOLVER linear solver [optional]\n");
printf(" '-use_resolve_cuda_glu': use ReSolve linear solver with KLU factorization and cusolverGLU refactorization [optional]\n");
printf(" '-use_resolve_cuda_rf' : use ReSolve linear solver with KLU factorization and cusolverRf refactorization [optional]\n");
printf(" '-ginkgo': use GINKGO linear solver [optional]\n");
}

Expand Down Expand Up @@ -215,12 +228,12 @@ int main(int argc, char **argv)
bool selfCheck = false;
size_type n = 50;
bool inertia_free = false;
bool use_cusolver = false;
bool use_resolve = false;
bool use_ginkgo = false;
bool use_resolve_cuda_glu = false;
bool use_resolve_cuda_rf = false;
bool use_ginkgo = false;
bool use_ginkgo_cuda = false;
bool use_ginkgo_hip = false;
if(!parse_arguments(argc, argv, n, selfCheck, inertia_free, use_cusolver, use_resolve, use_ginkgo, use_ginkgo_cuda, use_ginkgo_hip)) {
bool use_ginkgo_hip = false;
if(!parse_arguments(argc, argv, n, selfCheck, inertia_free, use_resolve_cuda_glu, use_resolve_cuda_rf, use_ginkgo, use_ginkgo_cuda, use_ginkgo_hip)) {
usage(argv[0]);
#ifdef HIOP_USE_MPI
MPI_Finalize();
Expand All @@ -243,6 +256,12 @@ int main(int argc, char **argv)
// only support cusolverLU right now, 2023.02.28
//lsq initialization of the duals fails for this example since the Jacobian is rank deficient
//use zero initialization
nlp.options->SetStringValue("linear_solver_sparse", "resolve");
if(use_resolve_cuda_rf) {
nlp.options->SetStringValue("resolve_refactorization", "rf");
nlp.options->SetIntegerValue("ir_inner_maxit", 20);
nlp.options->SetIntegerValue("ir_outer_maxit", 0);
}
nlp.options->SetStringValue("duals_init", "zero");
nlp.options->SetStringValue("mem_space", "device");
nlp.options->SetStringValue("fact_acceptor", "inertia_free");
Expand Down
30 changes: 23 additions & 7 deletions src/LinAlg/ReSolve/MatrixCsr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,7 @@ namespace ReSolve {
if(n_ == 0)
return;

cudaFree(irows_);
cudaFree(jcols_);
cudaFree(vals_);

delete [] irows_host_;
delete [] jcols_host_;
delete [] vals_host_ ;
clear_data();
}

void MatrixCsr::allocate_size(int n)
Expand All @@ -103,6 +97,28 @@ namespace ReSolve {
vals_host_ = new double[nnz_]{0};
}

void MatrixCsr::clear_data()
{
checkCudaErrors(cudaFree(irows_));
checkCudaErrors(cudaFree(jcols_));
checkCudaErrors(cudaFree(vals_));

irows_ = nullptr;
jcols_ = nullptr;
vals_ = nullptr;

delete [] irows_host_;
delete [] jcols_host_;
delete [] vals_host_ ;

irows_host_ = nullptr;
jcols_host_ = nullptr;
vals_host_ = nullptr;

n_ = 0;
nnz_ = 0;
}

void MatrixCsr::update_from_host_mirror()
{
checkCudaErrors(cudaMemcpy(irows_, irows_host_, sizeof(int) * (n_+1), cudaMemcpyHostToDevice));
Expand Down
1 change: 1 addition & 0 deletions src/LinAlg/ReSolve/MatrixCsr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class MatrixCsr
~MatrixCsr();
void allocate_size(int n);
void allocate_nnz(int nnz);
void clear_data();

int* get_irows()
{
Expand Down
Loading
Loading