From d806abf828f34246373ce34665cb9e298ae2b40e Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Sun, 15 Oct 2023 17:27:49 -0400 Subject: [PATCH 01/15] Don't build CUDA examples and tests when CUDA is disabled. --- CMakeLists.txt | 6 +- examples/CMakeLists.txt | 41 +++++++------ examples/r_KLU_KLU.cpp | 23 ++++---- examples/r_KLU_KLU_standalone.cpp | 13 ++--- resolve/cuda/CMakeLists.txt | 2 + resolve/{vector => cuda}/cudaVectorKernels.cu | 2 +- resolve/{vector => cuda}/cudaVectorKernels.h | 0 resolve/matrix/CMakeLists.txt | 8 +++ resolve/vector/CMakeLists.txt | 22 +++---- tests/CMakeLists.txt | 5 +- tests/functionality/CMakeLists.txt | 43 ++++++++------ tests/functionality/testKLU.cpp | 57 +++++++++---------- 12 files changed, 122 insertions(+), 100 deletions(-) rename resolve/{vector => cuda}/cudaVectorKernels.cu (92%) rename resolve/{vector => cuda}/cudaVectorKernels.h (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5715e6e2..635fa390 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -130,10 +130,8 @@ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/ReSolveConfig.cmake" "${CMAKE_CURRENT_BINARY_DIR}/ReSolveConfigVersion.cmake" DESTINATION share/resolve/cmake) -# Add examples (for now only CUDA examples are available) -if(RESOLVE_USE_CUDA) - add_subdirectory(examples) -endif(RESOLVE_USE_CUDA) +# Add usage examples +add_subdirectory(examples) # Add tests add_subdirectory(tests) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 73dce68e..6250400b 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -14,30 +14,39 @@ target_link_libraries(klu_klu.exe PRIVATE ReSolve) add_executable(klu_klu_standalone.exe r_KLU_KLU_standalone.cpp) target_link_libraries(klu_klu_standalone.exe PRIVATE ReSolve) -# Build example with KLU factorization and GLU refactorization -add_executable(klu_glu.exe r_KLU_GLU.cpp) -target_link_libraries(klu_glu.exe PRIVATE ReSolve) +if(RESOLVE_USE_CUDA) + + # Build example with KLU factorization and GLU refactorization + add_executable(klu_glu.exe r_KLU_GLU.cpp) + target_link_libraries(klu_glu.exe PRIVATE ReSolve) -# Build example with KLU factorization and Rf refactorization -add_executable(klu_rf.exe r_KLU_rf.cpp) -target_link_libraries(klu_rf.exe PRIVATE ReSolve) + # Build example with KLU factorization and Rf refactorization + add_executable(klu_rf.exe r_KLU_rf.cpp) + target_link_libraries(klu_rf.exe PRIVATE ReSolve) -# Build example with KLU factorization, Rf refactorization, and FGMRES iterative refinement -add_executable(klu_rf_fgmres.exe r_KLU_rf_FGMRES.cpp) -target_link_libraries(klu_rf_fgmres.exe PRIVATE ReSolve) + # Build example with KLU factorization, Rf refactorization, and FGMRES iterative refinement + add_executable(klu_rf_fgmres.exe r_KLU_rf_FGMRES.cpp) + target_link_libraries(klu_rf_fgmres.exe PRIVATE ReSolve) -# Build example where matrix is factorized once, refactorized once and then the preconditioner is REUSED -add_executable(klu_rf_fgmres_reuse_refactorization.exe r_KLU_rf_FGMRES_reuse_factorization.cpp) -target_link_libraries(klu_rf_fgmres_reuse_refactorization.exe PRIVATE ReSolve) + # Build example where matrix is factorized once, refactorized once and then the preconditioner is REUSED + add_executable(klu_rf_fgmres_reuse_refactorization.exe r_KLU_rf_FGMRES_reuse_factorization.cpp) + target_link_libraries(klu_rf_fgmres_reuse_refactorization.exe PRIVATE ReSolve) -# Build example where matrix data is updated -add_executable(klu_glu_values_update.exe r_KLU_GLU_matrix_values_update.cpp) -target_link_libraries(klu_glu_values_update.exe PRIVATE ReSolve) + # Build example where matrix data is updated + add_executable(klu_glu_values_update.exe r_KLU_GLU_matrix_values_update.cpp) + target_link_libraries(klu_glu_values_update.exe PRIVATE ReSolve) + +endif(RESOLVE_USE_CUDA) # Install all examples in bin directory -set(installable_executables klu_klu.exe klu_klu_standalone.exe klu_glu.exe klu_rf.exe klu_rf_fgmres.exe klu_glu_values_update.exe) +set(installable_executables klu_klu.exe klu_klu_standalone.exe) + +if(RESOLVE_USE_CUDA) + set(installable_executables ${installable_executables} klu_glu.exe klu_rf.exe klu_rf_fgmres.exe klu_glu_values_update.exe) +endif(RESOLVE_USE_CUDA) + install(TARGETS ${installable_executables} RUNTIME DESTINATION bin) diff --git a/examples/r_KLU_KLU.cpp b/examples/r_KLU_KLU.cpp index 63aad4b5..439c8f41 100644 --- a/examples/r_KLU_KLU.cpp +++ b/examples/r_KLU_KLU.cpp @@ -35,10 +35,9 @@ int main(int argc, char *argv[]) ReSolve::matrix::Coo* A_coo; ReSolve::matrix::Csr* A; - ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA; - workspace_CUDA->initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); + ReSolve::LinAlgWorkspace* workspace = ReSolve::createLinAlgWorkspace("cpu"); + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); + ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); real_type* rhs; real_type* x; @@ -96,7 +95,11 @@ int main(int argc, char *argv[]) ReSolve::io::readAndUpdateMatrix(mat_file, A_coo); ReSolve::io::readAndUpdateRhs(rhs_file, &rhs); } - std::cout<<"Finished reading the matrix and rhs, size: "<getNumRows()<<" x "<getNumColumns()<< ", nnz: "<< A->getNnz()<< ", symmetric? "<symmetric()<< ", Expanded? "<expanded()<getNumRows() + << " x " << A->getNumColumns() + << ", nnz: " << A->getNnz() + << ", symmetric? " << A->symmetric() + << ", Expanded? " << A->expanded() << std::endl; mat_file.close(); rhs_file.close(); @@ -106,8 +109,8 @@ int main(int argc, char *argv[]) vec_rhs->update(rhs, "cpu", "cpu"); vec_rhs->setDataUpdated("cpu"); } else { - matrix_handler->coo2csr(A_coo, A, "cuda"); - vec_rhs->update(rhs, "cpu", "cuda"); + matrix_handler->coo2csr(A_coo, A, "cpu"); + vec_rhs->update(rhs, "cpu", "cpu"); } std::cout<<"COO to CSR completed. Expanded NNZ: "<< A->getNnzExpanded()<solve(vec_rhs, vec_x); std::cout<<"KLU solve status: "<update(rhs, "cpu", "cuda"); + // vec_r->update(rhs, "cpu", "cuda"); matrix_handler->setValuesChanged(true); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); real_type* test = vec_r->getData("cpu"); (void) test; // TODO: Do we need `test` variable in this example? - printf("\t 2-Norm of the residual: %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))); + printf("\t 2-Norm of the residual: %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "cpu"))); } diff --git a/examples/r_KLU_KLU_standalone.cpp b/examples/r_KLU_KLU_standalone.cpp index fd84efe0..3323b68b 100644 --- a/examples/r_KLU_KLU_standalone.cpp +++ b/examples/r_KLU_KLU_standalone.cpp @@ -31,10 +31,9 @@ int main(int argc, char *argv[]) ReSolve::matrix::Coo* A_coo; ReSolve::matrix::Csr* A; - ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA; - workspace_CUDA->initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); + ReSolve::LinAlgWorkspace* workspace = ReSolve::createLinAlgWorkspace("cpu"); + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); + ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); real_type* rhs; real_type* x; @@ -95,15 +94,15 @@ int main(int argc, char *argv[]) std::cout << "KLU factorization status: " << status << std::endl; status = KLU->solve(vec_rhs, vec_x); std::cout << "KLU solve status: " << status << std::endl; - vec_r->update(rhs, "cpu", "cuda"); + vec_r->update(rhs, "cpu", "cpu"); matrix_handler->setValuesChanged(true); - matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); real_type* test = vec_r->getData("cpu"); (void) test; // TODO: Do we need `test` variable in this example? - printf("\t 2-Norm of the residual: %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))); + printf("\t 2-Norm of the residual: %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "cpu"))); diff --git a/resolve/cuda/CMakeLists.txt b/resolve/cuda/CMakeLists.txt index d3ead313..04a9c2ff 100644 --- a/resolve/cuda/CMakeLists.txt +++ b/resolve/cuda/CMakeLists.txt @@ -8,11 +8,13 @@ set(ReSolve_CUDA_SRC cudaKernels.cu + cudaVectorKernels.cu MemoryUtils.cu ) set(ReSolve_CUDA_HEADER_INSTALL cudaKernels.h + cudaVectorKernels.h CudaMemory.hpp cuda_check_errors.hpp ) diff --git a/resolve/vector/cudaVectorKernels.cu b/resolve/cuda/cudaVectorKernels.cu similarity index 92% rename from resolve/vector/cudaVectorKernels.cu rename to resolve/cuda/cudaVectorKernels.cu index ea54a880..a1c53198 100644 --- a/resolve/vector/cudaVectorKernels.cu +++ b/resolve/cuda/cudaVectorKernels.cu @@ -1,5 +1,5 @@ #include "cudaVectorKernels.h" -#include "VectorKernels.hpp" +#include namespace ReSolve { namespace vector { diff --git a/resolve/vector/cudaVectorKernels.h b/resolve/cuda/cudaVectorKernels.h similarity index 100% rename from resolve/vector/cudaVectorKernels.h rename to resolve/cuda/cudaVectorKernels.h diff --git a/resolve/matrix/CMakeLists.txt b/resolve/matrix/CMakeLists.txt index 89ac925d..289f805a 100644 --- a/resolve/matrix/CMakeLists.txt +++ b/resolve/matrix/CMakeLists.txt @@ -1,3 +1,11 @@ +#[[ + +@brief Build ReSolve matrix module + +@author Slaven Peles + +]] + set(Matrix_SRC io.cpp diff --git a/resolve/vector/CMakeLists.txt b/resolve/vector/CMakeLists.txt index 57feb5df..8fa9cd59 100644 --- a/resolve/vector/CMakeLists.txt +++ b/resolve/vector/CMakeLists.txt @@ -1,3 +1,11 @@ +#[[ + +@brief Build ReSolve (multi)vector module + +@author Slaven Peles + +]] + set(Vector_SRC Vector.cpp @@ -5,25 +13,17 @@ set(Vector_SRC ) - -set(Vector_SRC_CUDA - cudaVectorKernels.cu -) - set(Vector_HEADER_INSTALL Vector.hpp VectorHandler.hpp + VectorKernels.hpp ) -if (RESOLVE_USE_CUDA) - set_source_files_properties(${Vector_SRC_CUDA} PROPERTIES LANGUAGE CUDA) +add_library(resolve_vector SHARED ${Vector_SRC}) - # Build shared library ReSolve::vector - add_library(resolve_vector SHARED ${Vector_SRC} ${Vector_SRC_CUDA}) +if (RESOLVE_USE_CUDA) target_link_libraries(resolve_vector PUBLIC resolve_backend_cuda) else() - # Build shared library ReSolve::vector - add_library(resolve_vector SHARED ${Vector_SRC}) target_link_libraries(resolve_vector PUBLIC resolve_backend_cpu) endif() diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ae24f9db..19b799e4 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,9 +6,6 @@ ]] -if(RESOLVE_USE_CUDA) - add_subdirectory(functionality) -endif(RESOLVE_USE_CUDA) - +add_subdirectory(functionality) add_subdirectory(unit) diff --git a/tests/functionality/CMakeLists.txt b/tests/functionality/CMakeLists.txt index e2a6b4b6..a6652c26 100644 --- a/tests/functionality/CMakeLists.txt +++ b/tests/functionality/CMakeLists.txt @@ -10,26 +10,31 @@ add_executable(klu_klu_test.exe testKLU.cpp) target_link_libraries(klu_klu_test.exe PRIVATE ReSolve) -# Build KLU+Rf test -add_executable(klu_rf_test.exe testKLU_Rf.cpp) -target_link_libraries(klu_rf_test.exe PRIVATE ReSolve) +if(RESOLVE_USE_CUDA) + + # Build KLU+Rf test + add_executable(klu_rf_test.exe testKLU_Rf.cpp) + target_link_libraries(klu_rf_test.exe PRIVATE ReSolve) -# Build KLU+Rf+fgmres test -add_executable(klu_rf_fgmres_test.exe testKLU_Rf_FGMRES.cpp) -target_link_libraries(klu_rf_fgmres_test.exe PRIVATE ReSolve) + # Build KLU+Rf+fgmres test + add_executable(klu_rf_fgmres_test.exe testKLU_Rf_FGMRES.cpp) + target_link_libraries(klu_rf_fgmres_test.exe PRIVATE ReSolve) + # Build KLU+GLU test + add_executable(klu_glu_test.exe testKLU_GLU.cpp) + target_link_libraries(klu_glu_test.exe PRIVATE ReSolve) - -# Build KLU+GLU test -add_executable(klu_glu_test.exe testKLU_GLU.cpp) -target_link_libraries(klu_glu_test.exe PRIVATE ReSolve) +endif(RESOLVE_USE_CUDA) # Install tests -set(installable_tests - klu_klu_test.exe - klu_rf_test.exe - klu_rf_fgmres_test.exe - klu_glu_test.exe) +set(installable_tests klu_klu_test.exe) + +if(RESOLVE_USE_CUDA) + set(installable_tests ${installable_tests} + klu_rf_test.exe + klu_rf_fgmres_test.exe + klu_glu_test.exe) +endif(RESOLVE_USE_CUDA) install(TARGETS ${installable_tests} RUNTIME DESTINATION bin/resolve/tests/functionality) @@ -40,6 +45,8 @@ install(DIRECTORY data DESTINATION bin/resolve/tests/functionality) set(test_data_dir ${CMAKE_SOURCE_DIR}/tests/functionality/) add_test(NAME klu_klu_test COMMAND $ "${test_data_dir}") -add_test(NAME klu_rf_test COMMAND $ "${test_data_dir}") -add_test(NAME klu_rf_fgmres_test COMMAND $ "${test_data_dir}") -add_test(NAME klu_glu_test COMMAND $ "${test_data_dir}") +if(RESOLVE_USE_CUDA) + add_test(NAME klu_rf_test COMMAND $ "${test_data_dir}") + add_test(NAME klu_rf_fgmres_test COMMAND $ "${test_data_dir}") + add_test(NAME klu_glu_test COMMAND $ "${test_data_dir}") +endif(RESOLVE_USE_CUDA) diff --git a/tests/functionality/testKLU.cpp b/tests/functionality/testKLU.cpp index d5ff9d6d..229dfc1d 100644 --- a/tests/functionality/testKLU.cpp +++ b/tests/functionality/testKLU.cpp @@ -26,10 +26,9 @@ int main(int argc, char *argv[]) int error_sum = 0; int status = 0; - ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA; - workspace_CUDA->initializeHandles(); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); + ReSolve::LinAlgWorkspace* workspace = ReSolve::createLinAlgWorkspace("cpu"); + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); + ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; KLU->setupParameters(1, 0.1, false); @@ -101,32 +100,32 @@ int main(int argc, char *argv[]) } vec_test->setData(x_data, "cpu"); - vec_r->update(rhs, "cpu", "cuda"); - vec_diff->update(x_data, "cpu", "cuda"); + vec_r->update(rhs, "cpu", "cpu"); + vec_diff->update(x_data, "cpu", "cpu"); - // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, "cuda")); + // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, "cpu")); matrix_handler->setValuesChanged(true); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","cuda"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","cpu"); error_sum += status; - real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cpu")); //for testing only - control - real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, "cuda")); - real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); + real_type normXtrue = sqrt(vector_handler->dot(vec_x, vec_x, "cpu")); + real_type normB1 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cpu")); //compute x-x_true - vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); + vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cpu"); //evaluate its norm - real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); + real_type normDiffMatrix1 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cpu")); //compute the residual using exact solution - vec_r->update(rhs, "cpu", "cuda"); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "cuda"); + vec_r->update(rhs, "cpu", "cpu"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE,"csr", "cpu"); error_sum += status; - real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type exactSol_normRmatrix1 = sqrt(vector_handler->dot(vec_r, vec_r, "cpu")); //evaluate the residual ON THE CPU using COMPUTED solution vec_r->update(rhs, "cpu", "cpu"); @@ -134,7 +133,7 @@ int main(int argc, char *argv[]) status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cpu"); error_sum += status; - real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type normRmatrix1CPU = sqrt(vector_handler->dot(vec_r, vec_r, "cpu")); std::cout<<"Results (first matrix): "<coo2csr(A_coo, A, "cuda"); - vec_rhs->update(rhs, "cpu", "cuda"); + matrix_handler->coo2csr(A_coo, A, "cpu"); + vec_rhs->update(rhs, "cpu", "cpu"); // and solve it too status = KLU->refactorize(); @@ -174,27 +173,27 @@ int main(int argc, char *argv[]) status = KLU->solve(vec_rhs, vec_x); error_sum += status; - vec_r->update(rhs, "cpu", "cuda"); + vec_r->update(rhs, "cpu", "cpu"); matrix_handler->setValuesChanged(true); - status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); error_sum += status; - real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cpu")); //for testing only - control - real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cuda")); + real_type normB2 = sqrt(vector_handler->dot(vec_rhs, vec_rhs, "cpu")); //compute x-x_true - vec_diff->update(x_data, "cpu", "cuda"); - vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cuda"); + vec_diff->update(x_data, "cpu", "cpu"); + vector_handler->axpy(&MINUSONE, vec_x, vec_diff, "cpu"); //evaluate its norm - real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cuda")); + real_type normDiffMatrix2 = sqrt(vector_handler->dot(vec_diff, vec_diff, "cpu")); //compute the residual using exact solution - vec_r->update(rhs, "cpu", "cuda"); - status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cuda"); + vec_r->update(rhs, "cpu", "cpu"); + status = matrix_handler->matvec(A, vec_test, vec_r, &ONE, &MINUSONE, "csr", "cpu"); error_sum += status; - real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cuda")); + real_type exactSol_normRmatrix2 = sqrt(vector_handler->dot(vec_r, vec_r, "cpu")); std::cout<<"Results (second matrix): "< Date: Sun, 15 Oct 2023 20:16:00 -0400 Subject: [PATCH 02/15] Create modular (and eventually portable) workspace. --- examples/r_KLU_GLU.cpp | 2 +- examples/r_KLU_GLU_matrix_values_update.cpp | 2 +- examples/r_KLU_KLU.cpp | 2 +- examples/r_KLU_KLU_standalone.cpp | 2 +- examples/r_KLU_rf.cpp | 2 +- examples/r_KLU_rf_FGMRES.cpp | 2 +- .../r_KLU_rf_FGMRES_reuse_factorization.cpp | 2 +- resolve/CMakeLists.txt | 9 +++-- resolve/LinSolverDirectCuSolverGLU.cpp | 1 + resolve/LinSolverDirectCuSolverGLU.hpp | 6 ++- resolve/matrix/MatrixHandler.cpp | 2 +- resolve/vector/VectorHandler.cpp | 2 +- resolve/workspace/CMakeLists.txt | 37 +++++++++++++++++++ resolve/workspace/LinAlgWorkspace.cpp | 13 +++++++ resolve/workspace/LinAlgWorkspace.hpp | 17 +++++++++ .../LinAlgWorkspaceCUDA.cpp} | 24 ++++-------- .../LinAlgWorkspaceCUDA.hpp} | 28 +------------- resolve/workspace/LinAlgWorkspaceFactory.cpp | 25 +++++++++++++ resolve/workspace/LinAlgWorkspaceFactory.hpp | 14 +++++++ tests/functionality/testKLU.cpp | 2 +- tests/functionality/testKLU_GLU.cpp | 2 +- tests/functionality/testKLU_Rf.cpp | 2 +- tests/functionality/testKLU_Rf_FGMRES.cpp | 2 +- tests/unit/matrix/MatrixHandlerTests.hpp | 2 +- tests/unit/vector/GramSchmidtTests.hpp | 2 +- tests/unit/vector/VectorHandlerTests.hpp | 2 +- 26 files changed, 143 insertions(+), 63 deletions(-) create mode 100644 resolve/workspace/CMakeLists.txt create mode 100644 resolve/workspace/LinAlgWorkspace.cpp create mode 100644 resolve/workspace/LinAlgWorkspace.hpp rename resolve/{LinAlgWorkspace.cpp => workspace/LinAlgWorkspaceCUDA.cpp} (84%) rename resolve/{LinAlgWorkspace.hpp => workspace/LinAlgWorkspaceCUDA.hpp} (70%) create mode 100644 resolve/workspace/LinAlgWorkspaceFactory.cpp create mode 100644 resolve/workspace/LinAlgWorkspaceFactory.hpp diff --git a/examples/r_KLU_GLU.cpp b/examples/r_KLU_GLU.cpp index a4c059a9..82c949db 100644 --- a/examples/r_KLU_GLU.cpp +++ b/examples/r_KLU_GLU.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include using namespace ReSolve::constants; diff --git a/examples/r_KLU_GLU_matrix_values_update.cpp b/examples/r_KLU_GLU_matrix_values_update.cpp index 96c77591..19d7ec3c 100644 --- a/examples/r_KLU_GLU_matrix_values_update.cpp +++ b/examples/r_KLU_GLU_matrix_values_update.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include // this updates the matrix values to simulate what CFD/optimization software does. diff --git a/examples/r_KLU_KLU.cpp b/examples/r_KLU_KLU.cpp index 439c8f41..b1e87203 100644 --- a/examples/r_KLU_KLU.cpp +++ b/examples/r_KLU_KLU.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include using namespace ReSolve::constants; diff --git a/examples/r_KLU_KLU_standalone.cpp b/examples/r_KLU_KLU_standalone.cpp index 3323b68b..f0b7c3a4 100644 --- a/examples/r_KLU_KLU_standalone.cpp +++ b/examples/r_KLU_KLU_standalone.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include using namespace ReSolve::constants; diff --git a/examples/r_KLU_rf.cpp b/examples/r_KLU_rf.cpp index 9e166d34..f2e96dfe 100644 --- a/examples/r_KLU_rf.cpp +++ b/examples/r_KLU_rf.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include using namespace ReSolve::constants; diff --git a/examples/r_KLU_rf_FGMRES.cpp b/examples/r_KLU_rf_FGMRES.cpp index db816206..d9543895 100644 --- a/examples/r_KLU_rf_FGMRES.cpp +++ b/examples/r_KLU_rf_FGMRES.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include using namespace ReSolve::constants; diff --git a/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp b/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp index d0ca3a56..d50bdb4a 100644 --- a/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp +++ b/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include using namespace ReSolve::constants; diff --git a/resolve/CMakeLists.txt b/resolve/CMakeLists.txt index 675fc11f..7551d2c3 100644 --- a/resolve/CMakeLists.txt +++ b/resolve/CMakeLists.txt @@ -11,7 +11,6 @@ add_subdirectory(utilities) set(ReSolve_SRC LinSolver.cpp LinSolverDirectKLU.cpp - LinAlgWorkspace.cpp LinSolverIterativeFGMRES.cpp GramSchmidt.cpp LinSolverDirectCuSolverGLU.cpp @@ -22,7 +21,6 @@ set(ReSolve_SRC set(ReSolve_HEADER_INSTALL Common.hpp cusolver_defs.hpp - LinAlgWorkspace.hpp LinSolver.hpp LinSolverDirectCuSolverGLU.hpp LinSolverDirectCuSolverRf.hpp @@ -39,11 +37,15 @@ if(NOT RESOLVE_USE_GPU) add_subdirectory(cpu) endif() -# First create CUDA backend (this should really be CUDA _API_ backend, separate backend will be needed for CUDA SDK) +# If CUDA support is enabled, create CUDA backend +# (this should really be CUDA _API_ backend, separate backend will be needed for CUDA SDK) if(RESOLVE_USE_CUDA) add_subdirectory(cuda) endif() +# Now, build workspaces +add_subdirectory(workspace) + # Next build vector and matrix objects that may use this backend. add_subdirectory(vector) add_subdirectory(matrix) @@ -66,6 +68,7 @@ set(ReSolve_Targets_List resolve_vector resolve_logger resolve_tpl + resolve_workspace ) if(RESOLVE_USE_GPU) diff --git a/resolve/LinSolverDirectCuSolverGLU.cpp b/resolve/LinSolverDirectCuSolverGLU.cpp index 63413c5b..feaf11b8 100644 --- a/resolve/LinSolverDirectCuSolverGLU.cpp +++ b/resolve/LinSolverDirectCuSolverGLU.cpp @@ -3,6 +3,7 @@ #include #include +#include #include "LinSolverDirectCuSolverGLU.hpp" namespace ReSolve diff --git a/resolve/LinSolverDirectCuSolverGLU.hpp b/resolve/LinSolverDirectCuSolverGLU.hpp index d76e1921..0aa27df9 100644 --- a/resolve/LinSolverDirectCuSolverGLU.hpp +++ b/resolve/LinSolverDirectCuSolverGLU.hpp @@ -1,6 +1,5 @@ #pragma once #include "Common.hpp" -#include #include "LinSolver.hpp" #include "cusolver_defs.hpp" #include @@ -19,6 +18,9 @@ namespace ReSolve class Sparse; } + // Forward declaration of ReSolve handlers workspace + class LinAlgWorkspace; + class LinSolverDirectCuSolverGLU : public LinSolverDirect { using vector_type = vector::Vector; @@ -38,7 +40,7 @@ namespace ReSolve //note: we need cuSolver handle, we can copy it from the workspace to avoid double allocation cusparseMatDescr_t descr_M_; //this is NOT sparse matrix descriptor cusparseMatDescr_t descr_A_; //this is NOT sparse matrix descriptor - LinAlgWorkspace *workspace_;// so we can copy cusparse handle + LinAlgWorkspace* workspace_;// so we can copy cusparse handle cusolverSpHandle_t handle_cusolversp_; cusolverStatus_t status_cusolver_; cusparseStatus_t status_cusparse_; diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index 19119731..370d3a53 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -1,11 +1,11 @@ #include #include -#include #include #include #include #include +#include #include "MatrixHandler.hpp" namespace ReSolve { diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 032cc9b1..71bfce9c 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -1,9 +1,9 @@ #include #include -#include #include #include +#include #include "VectorHandler.hpp" namespace ReSolve { diff --git a/resolve/workspace/CMakeLists.txt b/resolve/workspace/CMakeLists.txt new file mode 100644 index 00000000..a50982c8 --- /dev/null +++ b/resolve/workspace/CMakeLists.txt @@ -0,0 +1,37 @@ +#[[ + +@brief Build ReSolve workspace module + +@author Slaven Peles + +]] + +set(ReSolve_Workspace_SRC + LinAlgWorkspace.cpp + LinAlgWorkspaceFactory.cpp +) + +set(ReSolve_Workspace_CUDA_SRC # consider adding a separate dir for cuda-sdk dependent code + LinAlgWorkspaceCUDA.cpp +) + +set(ReSolve_Workspace_HEADER_INSTALL + LinAlgWorkspace.hpp + LinAlgWorkspaceFactory.hpp + LinAlgWorkspaceCUDA.hpp +) + +if(RESOLVE_USE_CUDA) + add_library(resolve_workspace SHARED ${ReSolve_Workspace_SRC} ${ReSolve_Workspace_CUDA_SRC}) + target_link_libraries(resolve_workspace PUBLIC resolve_backend_cuda) +else(RESOLVE_USE_CUDA) + add_library(resolve_workspace SHARED ${ReSolve_Workspace_SRC}) +endif(RESOLVE_USE_CUDA) + +target_include_directories(resolve_workspace INTERFACE + $ + $ +) + +# install include headers +install(FILES ${ReSolve_Workspace_HEADER_INSTALL} DESTINATION include/resolve/workspace) diff --git a/resolve/workspace/LinAlgWorkspace.cpp b/resolve/workspace/LinAlgWorkspace.cpp new file mode 100644 index 00000000..d6d2085e --- /dev/null +++ b/resolve/workspace/LinAlgWorkspace.cpp @@ -0,0 +1,13 @@ +#include "LinAlgWorkspace.hpp" + +namespace ReSolve +{ + LinAlgWorkspace::LinAlgWorkspace() + { + } + + LinAlgWorkspace::~LinAlgWorkspace() + { + } + +} diff --git a/resolve/workspace/LinAlgWorkspace.hpp b/resolve/workspace/LinAlgWorkspace.hpp new file mode 100644 index 00000000..38dc0c19 --- /dev/null +++ b/resolve/workspace/LinAlgWorkspace.hpp @@ -0,0 +1,17 @@ +#pragma once + + +#include + +namespace ReSolve +{ + class LinAlgWorkspace + { + public: + LinAlgWorkspace(); + ~LinAlgWorkspace(); + protected: + MemoryHandler mem_; + }; + +} diff --git a/resolve/LinAlgWorkspace.cpp b/resolve/workspace/LinAlgWorkspaceCUDA.cpp similarity index 84% rename from resolve/LinAlgWorkspace.cpp rename to resolve/workspace/LinAlgWorkspaceCUDA.cpp index 9dee78c5..200ac437 100644 --- a/resolve/LinAlgWorkspace.cpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.cpp @@ -1,15 +1,7 @@ -#include "LinAlgWorkspace.hpp" +#include namespace ReSolve { - LinAlgWorkspace::LinAlgWorkspace() - { - } - - LinAlgWorkspace::~LinAlgWorkspace() - { - } - LinAlgWorkspaceCUDA::LinAlgWorkspaceCUDA() { handle_cusolversp_ = nullptr; @@ -29,8 +21,6 @@ namespace ReSolve cusolverSpDestroy(handle_cusolversp_); cublasDestroy(handle_cublas_); cusparseDestroySpMat(mat_A_); - // cusparseDestroyDnVec(vec_x_); - // cusparseDestroyDnVec(vec_y_); } void* LinAlgWorkspaceCUDA::getSpmvBuffer() @@ -93,21 +83,23 @@ namespace ReSolve mat_A_ = mat; } - cusparseDnVecDescr_t LinAlgWorkspaceCUDA::getVecX() + cusparseDnVecDescr_t LinAlgWorkspaceCUDA::getVecX() { return vec_x_; } - cusparseDnVecDescr_t LinAlgWorkspaceCUDA::getVecY() + cusparseDnVecDescr_t LinAlgWorkspaceCUDA::getVecY() { return vec_y_; } - bool LinAlgWorkspaceCUDA::matvecSetup(){ + bool LinAlgWorkspaceCUDA::matvecSetup() + { return matvec_setup_done_; } - void LinAlgWorkspaceCUDA::matvecSetupDone() { + void LinAlgWorkspaceCUDA::matvecSetupDone() + { matvec_setup_done_ = true; } @@ -117,4 +109,4 @@ namespace ReSolve cublasCreate(&handle_cublas_); cusolverSpCreate(&handle_cusolversp_); } -} +} // namespace ReSolve diff --git a/resolve/LinAlgWorkspace.hpp b/resolve/workspace/LinAlgWorkspaceCUDA.hpp similarity index 70% rename from resolve/LinAlgWorkspace.hpp rename to resolve/workspace/LinAlgWorkspaceCUDA.hpp index e5c79580..c0d4554c 100644 --- a/resolve/LinAlgWorkspace.hpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.hpp @@ -4,20 +4,10 @@ #include "cusparse.h" #include "cusolverSp.h" -#include +#include namespace ReSolve { - class LinAlgWorkspace - { - public: - LinAlgWorkspace(); - ~LinAlgWorkspace(); - protected: - MemoryHandler mem_; - }; - - class LinAlgWorkspaceCUDA : public LinAlgWorkspace { public: @@ -67,18 +57,4 @@ namespace ReSolve bool matvec_setup_done_; //check if setup is done for matvec i.e. if buffer is allocated, csr structure is set etc. }; - /// @brief Workspace factory - /// @param[in] memspace memory space ID - /// @return pointer to the linear algebra workspace - inline LinAlgWorkspace* createLinAlgWorkspace(std::string memspace) - { - if (memspace == "cuda") { - LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA(); - workspace->initializeHandles(); - return workspace; - } - // If not CUDA, return default - return (new LinAlgWorkspace()); - } - -} +} // namespace ReSolve diff --git a/resolve/workspace/LinAlgWorkspaceFactory.cpp b/resolve/workspace/LinAlgWorkspaceFactory.cpp new file mode 100644 index 00000000..11980c23 --- /dev/null +++ b/resolve/workspace/LinAlgWorkspaceFactory.cpp @@ -0,0 +1,25 @@ +#pragma once + +#include "LinAlgWorkspaceFactory.hpp" + +namespace ReSolve +{ + /// @brief Workspace factory + /// @param[in] memspace memory space ID + /// @return pointer to the linear algebra workspace + LinAlgWorkspace* createLinAlgWorkspace(std::string memspace) + { + if (memspace == "cuda") { +#ifdef RESOLVE_USE_CUDA + LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA(); + workspace->initializeHandles(); + return workspace; +#else + return nullptr; +#endif + } + // If not CUDA, return default + return (new LinAlgWorkspace()); + } + +} diff --git a/resolve/workspace/LinAlgWorkspaceFactory.hpp b/resolve/workspace/LinAlgWorkspaceFactory.hpp new file mode 100644 index 00000000..2c30738e --- /dev/null +++ b/resolve/workspace/LinAlgWorkspaceFactory.hpp @@ -0,0 +1,14 @@ +#pragma once + +#include + +#include + +#ifdef RESOLVE_USE_CUDA +#include +#endif + +namespace ReSolve +{ + LinAlgWorkspace* createLinAlgWorkspace(std::string memspace); +} diff --git a/tests/functionality/testKLU.cpp b/tests/functionality/testKLU.cpp index 229dfc1d..a60083cd 100644 --- a/tests/functionality/testKLU.cpp +++ b/tests/functionality/testKLU.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include //author: KS //functionality test to check whether KLU works correctly. using namespace ReSolve::constants; diff --git a/tests/functionality/testKLU_GLU.cpp b/tests/functionality/testKLU_GLU.cpp index 37f888e4..60dcce50 100644 --- a/tests/functionality/testKLU_GLU.cpp +++ b/tests/functionality/testKLU_GLU.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include //author: KS //functionality test to check whether cuSolverGLU works correctly. diff --git a/tests/functionality/testKLU_Rf.cpp b/tests/functionality/testKLU_Rf.cpp index 21aba8b0..50add7cd 100644 --- a/tests/functionality/testKLU_Rf.cpp +++ b/tests/functionality/testKLU_Rf.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include //author: KS //functionality test to check whether cuSolverRf works correctly. diff --git a/tests/functionality/testKLU_Rf_FGMRES.cpp b/tests/functionality/testKLU_Rf_FGMRES.cpp index c63a3f99..2dae74b5 100644 --- a/tests/functionality/testKLU_Rf_FGMRES.cpp +++ b/tests/functionality/testKLU_Rf_FGMRES.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include //author: KS //functionality test to check whether cuSolverRf/FGMRES works correctly. diff --git a/tests/unit/matrix/MatrixHandlerTests.hpp b/tests/unit/matrix/MatrixHandlerTests.hpp index 18fbe412..41df9f4b 100644 --- a/tests/unit/matrix/MatrixHandlerTests.hpp +++ b/tests/unit/matrix/MatrixHandlerTests.hpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/tests/unit/vector/GramSchmidtTests.hpp b/tests/unit/vector/GramSchmidtTests.hpp index 6f0d305b..2d801c49 100644 --- a/tests/unit/vector/GramSchmidtTests.hpp +++ b/tests/unit/vector/GramSchmidtTests.hpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace ReSolve { namespace tests { diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index 0737385d..ad0d3cfb 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include namespace ReSolve { namespace tests { From d8cf0e66f5a9643d8243a8fae8ec983085b5301c Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Sun, 15 Oct 2023 21:26:19 -0400 Subject: [PATCH 03/15] Enforce dependencies between CMake options. --- CMakeLists.txt | 7 +++++++ resolve/CMakeLists.txt | 12 ++++++------ resolve/MemoryUtils.hpp | 4 +++- resolve/workspace/LinAlgWorkspace.cpp | 3 +++ resolve/workspace/LinAlgWorkspace.hpp | 1 + resolve/workspace/LinAlgWorkspaceFactory.cpp | 2 -- resolve/workspace/LinAlgWorkspaceFactory.hpp | 4 ++++ 7 files changed, 24 insertions(+), 9 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 635fa390..d3b566ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,13 @@ option(RESOLVE_USE_GPU "Use GPU device for computations" ON) option(RESOLVE_USE_CUDA "Use CUDA language and SDK" ON) set(RESOLVE_CTEST_OUTPUT_DIR ${PROJECT_BINARY_DIR} CACHE PATH "Directory where CTest outputs are saved") +if(RESOLVE_USE_CUDA) + set(RESOLVE_USE_GPU On CACHE BOOL "Using GPU!" FORCE) +else() + set(RESOLVE_USE_GPU Off CACHE BOOL "Using GPU!" FORCE) +endif() + + set(CMAKE_MACOSX_RPATH 1) # set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") # https://gitlab.kitware.com/cmake/community/-/wikis/doc/cmake/RPATH-handling#always-full-rpath diff --git a/resolve/CMakeLists.txt b/resolve/CMakeLists.txt index 7551d2c3..2efac125 100644 --- a/resolve/CMakeLists.txt +++ b/resolve/CMakeLists.txt @@ -71,13 +71,13 @@ set(ReSolve_Targets_List resolve_workspace ) -if(RESOLVE_USE_GPU) - if(RESOLVE_USE_CUDA) - set(ReSolve_Targets_List ${ReSolve_Targets_List} resolve_backend_cuda) - endif() -else(RESOLVE_USE_GPU) +if(RESOLVE_USE_CUDA) + set(ReSolve_Targets_List ${ReSolve_Targets_List} resolve_backend_cuda) +endif() + +if(NOT RESOLVE_USE_GPU) set(ReSolve_Targets_List ${ReSolve_Targets_List} resolve_backend_cpu) -endif(RESOLVE_USE_GPU) +endif() install(TARGETS ${ReSolve_Targets_List} EXPORT ReSolveTargets) diff --git a/resolve/MemoryUtils.hpp b/resolve/MemoryUtils.hpp index 77c2a7af..00f3d653 100644 --- a/resolve/MemoryUtils.hpp +++ b/resolve/MemoryUtils.hpp @@ -48,14 +48,16 @@ namespace ReSolve } // namespace ReSolve -// Check if GPU support is enabled in Re::Solve and set appropriate device memory manager. #ifdef RESOLVE_USE_GPU +// Check if GPU support is enabled in Re::Solve and set appropriate device memory manager. #if defined RESOLVE_USE_CUDA #include using MemoryHandler = ReSolve::MemoryUtils; #elif defined RESOLVE_USE_HIP #error HIP support requested, but not available! Probably a bug in CMake configuration. +#else +#error Unrecognized device, probably bug in CMake configuration #endif #else diff --git a/resolve/workspace/LinAlgWorkspace.cpp b/resolve/workspace/LinAlgWorkspace.cpp index d6d2085e..5e4b55d8 100644 --- a/resolve/workspace/LinAlgWorkspace.cpp +++ b/resolve/workspace/LinAlgWorkspace.cpp @@ -10,4 +10,7 @@ namespace ReSolve { } + void LinAlgWorkspace::initializeHandles() + { + } } diff --git a/resolve/workspace/LinAlgWorkspace.hpp b/resolve/workspace/LinAlgWorkspace.hpp index 38dc0c19..68074eb8 100644 --- a/resolve/workspace/LinAlgWorkspace.hpp +++ b/resolve/workspace/LinAlgWorkspace.hpp @@ -10,6 +10,7 @@ namespace ReSolve public: LinAlgWorkspace(); ~LinAlgWorkspace(); + void initializeHandles(); protected: MemoryHandler mem_; }; diff --git a/resolve/workspace/LinAlgWorkspaceFactory.cpp b/resolve/workspace/LinAlgWorkspaceFactory.cpp index 11980c23..43a10870 100644 --- a/resolve/workspace/LinAlgWorkspaceFactory.cpp +++ b/resolve/workspace/LinAlgWorkspaceFactory.cpp @@ -1,5 +1,3 @@ -#pragma once - #include "LinAlgWorkspaceFactory.hpp" namespace ReSolve diff --git a/resolve/workspace/LinAlgWorkspaceFactory.hpp b/resolve/workspace/LinAlgWorkspaceFactory.hpp index 2c30738e..0f5860c1 100644 --- a/resolve/workspace/LinAlgWorkspaceFactory.hpp +++ b/resolve/workspace/LinAlgWorkspaceFactory.hpp @@ -8,6 +8,10 @@ #include #endif +// #ifndef RESOLVE_USE_CUDA +// using ReSolve::LinAlgWorkspace = ReSolve::LinAlgWorkspaceCUDA; +// #endif + namespace ReSolve { LinAlgWorkspace* createLinAlgWorkspace(std::string memspace); From e40c43dfa95df269150c3a904bce2cb1321b75fe Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Mon, 16 Oct 2023 12:48:43 -0400 Subject: [PATCH 04/15] Working PIMPL in MatrixHandler; needs lots of cleanup. --- resolve/matrix/CMakeLists.txt | 3 + resolve/matrix/MatrixHandler.cpp | 145 ++-------- resolve/matrix/MatrixHandler.hpp | 6 +- resolve/matrix/MatrixHandlerCpu.cpp | 338 +++++++++++++++++++++++ resolve/matrix/MatrixHandlerCpu.hpp | 66 +++++ resolve/matrix/MatrixHandlerCuda.cpp | 387 +++++++++++++++++++++++++++ resolve/matrix/MatrixHandlerCuda.hpp | 66 +++++ resolve/matrix/MatrixHandlerImpl.hpp | 75 ++++++ 8 files changed, 967 insertions(+), 119 deletions(-) create mode 100644 resolve/matrix/MatrixHandlerCpu.cpp create mode 100644 resolve/matrix/MatrixHandlerCpu.hpp create mode 100644 resolve/matrix/MatrixHandlerCuda.cpp create mode 100644 resolve/matrix/MatrixHandlerCuda.hpp create mode 100644 resolve/matrix/MatrixHandlerImpl.hpp diff --git a/resolve/matrix/CMakeLists.txt b/resolve/matrix/CMakeLists.txt index 289f805a..8c74ad09 100644 --- a/resolve/matrix/CMakeLists.txt +++ b/resolve/matrix/CMakeLists.txt @@ -14,6 +14,8 @@ set(Matrix_SRC Csc.cpp Coo.cpp MatrixHandler.cpp + MatrixHandlerCpu.cpp + MatrixHandlerCuda.cpp ) @@ -24,6 +26,7 @@ set(Matrix_HEADER_INSTALL Csr.hpp Csc.hpp MatrixHandler.hpp + MatrixHandlerImpl.hpp ) # Build shared library ReSolve::matrix diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index 370d3a53..30c08991 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -7,6 +7,8 @@ #include #include #include "MatrixHandler.hpp" +#include "MatrixHandlerCpu.hpp" +#include "MatrixHandlerCuda.hpp" namespace ReSolve { // Create a shortcut name for Logger static class @@ -49,6 +51,8 @@ namespace ReSolve { { this->new_matrix_ = true; this->values_changed_ = true; + cpuImpl_ = new MatrixHandlerCpu(); + cudaImpl_ = new MatrixHandlerCuda(); } MatrixHandler::~MatrixHandler() @@ -58,16 +62,20 @@ namespace ReSolve { MatrixHandler::MatrixHandler(LinAlgWorkspace* new_workspace) { workspace_ = new_workspace; + cpuImpl_ = new MatrixHandlerCpu(new_workspace); + cudaImpl_ = new MatrixHandlerCuda(new_workspace); } bool MatrixHandler::getValuesChanged() { + values_changed_ = cpuImpl_->isValuesChanged(); return this->values_changed_; } void MatrixHandler::setValuesChanged(bool toWhat) { this->values_changed_ = toWhat; + cpuImpl_->setValuesChanged(values_changed_); } int MatrixHandler::coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace) @@ -229,131 +237,32 @@ namespace ReSolve { return 0; } - int MatrixHandler::matvec(matrix::Sparse* Ageneric, + /** + * @brief Matrix vector product method result = alpha *A*x + beta * result + * @param A + * @param vec_x + * @param vec_result + * @param[in] alpha + * @param[in] beta + * @param[in] matrixFormat + * @param[in] memspace + * @return result := alpha * A * x + beta * result + */ + int MatrixHandler::matvec(matrix::Sparse* A, vector_type* vec_x, vector_type* vec_result, const real_type* alpha, const real_type* beta, std::string matrixFormat, - std::string memspace) + std::string memspace) { - using namespace constants; - int error_sum = 0; - if (matrixFormat == "csr"){ - matrix::Csr* A = (matrix::Csr*) Ageneric; - cusparseStatus_t status; - //result = alpha *A*x + beta * result - if (memspace == "cuda" ){ - // std::cout << "Matvec on NVIDIA GPU ...\n"; - LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; - cusparseDnVecDescr_t vecx = workspaceCUDA->getVecX(); - //printf("is vec_x NULL? %d\n", vec_x->getData("cuda") == nullptr); - //printf("is vec_result NULL? %d\n", vec_result->getData("cuda") == nullptr); - cusparseCreateDnVec(&vecx, A->getNumRows(), vec_x->getData("cuda"), CUDA_R_64F); - - - cusparseDnVecDescr_t vecAx = workspaceCUDA->getVecY(); - cusparseCreateDnVec(&vecAx, A->getNumRows(), vec_result->getData("cuda"), CUDA_R_64F); - - cusparseSpMatDescr_t matA = workspaceCUDA->getSpmvMatrixDescriptor(); - - void* buffer_spmv = workspaceCUDA->getSpmvBuffer(); - cusparseHandle_t handle_cusparse = workspaceCUDA->getCusparseHandle(); - if (values_changed_){ - status = cusparseCreateCsr(&matA, - A->getNumRows(), - A->getNumColumns(), - A->getNnzExpanded(), - A->getRowData("cuda"), - A->getColData("cuda"), - A->getValues("cuda"), - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_64F); - error_sum += status; - values_changed_ = false; - } - if (!workspaceCUDA->matvecSetup()){ - //setup first, allocate, etc. - size_t bufferSize = 0; - - status = cusparseSpMV_bufferSize(handle_cusparse, - CUSPARSE_OPERATION_NON_TRANSPOSE, - &MINUSONE, - matA, - vecx, - &ONE, - vecAx, - CUDA_R_64F, - CUSPARSE_SPMV_CSR_ALG2, - &bufferSize); - error_sum += status; - mem_.deviceSynchronize(); - mem_.allocateBufferOnDevice(&buffer_spmv, bufferSize); - workspaceCUDA->setSpmvMatrixDescriptor(matA); - workspaceCUDA->setSpmvBuffer(buffer_spmv); - - workspaceCUDA->matvecSetupDone(); - } - - status = cusparseSpMV(handle_cusparse, - CUSPARSE_OPERATION_NON_TRANSPOSE, - alpha, - matA, - vecx, - beta, - vecAx, - CUDA_R_64F, - CUSPARSE_SPMV_CSR_ALG2, - buffer_spmv); - error_sum += status; - mem_.deviceSynchronize(); - if (status) - out::error() << "Matvec status: " << status - << "Last error code: " << mem_.getLastDeviceError() << std::endl; - vec_result->setDataUpdated("cuda"); - - cusparseDestroyDnVec(vecx); - cusparseDestroyDnVec(vecAx); - return error_sum; - } else { - if (memspace == "cpu"){ - //cpu csr matvec - index_type* ia = A->getRowData("cpu"); - index_type* ja = A->getColData("cpu"); - real_type* a = A->getValues("cpu"); - - real_type* x_data = vec_x->getData("cpu"); - real_type* result_data = vec_result->getData("cpu"); - real_type sum; - real_type y, t, c; - //Kahan algorithm for stability; Kahan-Babushka version didnt make a difference - for (int i = 0; i < A->getNumRows(); ++i) { - sum = 0.0; - c = 0.0; - for (int j = ia[i]; j < ia[i+1]; ++j) { - y = ( a[j] * x_data[ja[j]]) - c; - t = sum + y; - c = (t - sum) - y; - sum = t; - // sum += ( a[j] * x_data[ja[j]]); - } - sum *= (*alpha); - result_data[i] = result_data[i]*(*beta) + sum; - } - vec_result->setDataUpdated("cpu"); - return 0; - } else { - out::error() << "Not implemented (yet)" << std::endl; - - return 1; - } - } + if (memspace == "cuda" ) { + return cudaImpl_->matvec(A, vec_x, vec_result, alpha, beta, matrixFormat); + } else if (memspace == "cpu") { + return cpuImpl_->matvec(A, vec_x, vec_result, alpha, beta, matrixFormat); } else { - - out::error() << "Not implemented (yet), only csr matvec is implemented" << std::endl; - return 1; + out::error() << "Support for device " << memspace << " not implemented (yet)" << std::endl; + return 1; } } diff --git a/resolve/matrix/MatrixHandler.hpp b/resolve/matrix/MatrixHandler.hpp index 366b5422..ea6abbcb 100644 --- a/resolve/matrix/MatrixHandler.hpp +++ b/resolve/matrix/MatrixHandler.hpp @@ -7,6 +7,8 @@ #include #include +#include + namespace ReSolve { namespace vector @@ -68,7 +70,7 @@ namespace ReSolve { const real_type* beta, std::string matrix_type, std::string memspace); - void Matrix1Norm(matrix::Sparse *A, real_type* norm); + int Matrix1Norm(matrix::Sparse *A, real_type* norm); bool getValuesChanged(); void setValuesChanged(bool toWhat); @@ -78,6 +80,8 @@ namespace ReSolve { bool values_changed_{true}; ///< needed for matvec MemoryHandler mem_; ///< Device memory manager object + MatrixHandlerImpl* cpuImpl_{nullptr}; + MatrixHandlerImpl* cudaImpl_{nullptr}; }; } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp new file mode 100644 index 00000000..5d61501d --- /dev/null +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -0,0 +1,338 @@ +#include + +#include +#include +#include +#include +#include +#include "MatrixHandlerCpu.hpp" + +namespace ReSolve { + // Create a shortcut name for Logger static class + using out = io::Logger; + + // //helper class + // indexPlusValue::indexPlusValue() + // { + // idx_ = 0; + // value_ = 0.0; + // } + + + // indexPlusValue::~indexPlusValue() + // { + // } + + // void indexPlusValue::setIdx(index_type new_idx) + // { + // idx_ = new_idx; + // } + + // void indexPlusValue::setValue(real_type new_value) + // { + // value_ = new_value; + // } + + // index_type indexPlusValue::getIdx() + // { + // return idx_; + // } + + // real_type indexPlusValue::getValue() + // { + // return value_; + // } + // //end of helper class + + MatrixHandlerCpu::MatrixHandlerCpu() + { + // new_matrix_ = true; + // values_changed_ = true; + } + + MatrixHandlerCpu::~MatrixHandlerCpu() + { + } + + MatrixHandlerCpu::MatrixHandlerCpu(LinAlgWorkspace* new_workspace) + { + workspace_ = new_workspace; + } + + // bool MatrixHandlerCpu::getValuesChanged() + // { + // return this->values_changed_; + // } + + // void MatrixHandlerCpu::setValuesChanged(bool toWhat) + // { + // this->values_changed_ = toWhat; + // } + +// int MatrixHandlerCpu::coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace) +// { +// //this happens on the CPU not on the GPU +// //but will return whatever memspace requested. + +// //count nnzs first + +// index_type nnz_unpacked = 0; +// index_type nnz = A_coo->getNnz(); +// index_type n = A_coo->getNumRows(); +// bool symmetric = A_coo->symmetric(); +// bool expanded = A_coo->expanded(); + +// index_type* nnz_counts = new index_type[n]; +// std::fill_n(nnz_counts, n, 0); +// index_type* coo_rows = A_coo->getRowData("cpu"); +// index_type* coo_cols = A_coo->getColData("cpu"); +// real_type* coo_vals = A_coo->getValues("cpu"); + +// index_type* diag_control = new index_type[n]; //for DEDUPLICATION of the diagonal +// std::fill_n(diag_control, n, 0); +// index_type nnz_unpacked_no_duplicates = 0; +// index_type nnz_no_duplicates = nnz; + + +// //maybe check if they exist? +// for (index_type i = 0; i < nnz; ++i) +// { +// nnz_counts[coo_rows[i]]++; +// nnz_unpacked++; +// nnz_unpacked_no_duplicates++; +// if ((coo_rows[i] != coo_cols[i])&& (symmetric) && (!expanded)) +// { +// nnz_counts[coo_cols[i]]++; +// nnz_unpacked++; +// nnz_unpacked_no_duplicates++; +// } +// if (coo_rows[i] == coo_cols[i]){ +// if (diag_control[coo_rows[i]] > 0) { +// //duplicate +// nnz_unpacked_no_duplicates--; +// nnz_no_duplicates--; +// } +// diag_control[coo_rows[i]]++; +// } +// } +// A_csr->setExpanded(true); +// A_csr->setNnzExpanded(nnz_unpacked_no_duplicates); +// index_type* csr_ia = new index_type[n+1]; +// std::fill_n(csr_ia, n + 1, 0); +// index_type* csr_ja = new index_type[nnz_unpacked]; +// real_type* csr_a = new real_type[nnz_unpacked]; +// index_type* nnz_shifts = new index_type[n]; +// std::fill_n(nnz_shifts, n , 0); + +// indexPlusValue* tmp = new indexPlusValue[nnz_unpacked]; + +// csr_ia[0] = 0; + +// for (index_type i = 1; i < n + 1; ++i){ +// csr_ia[i] = csr_ia[i - 1] + nnz_counts[i - 1] - (diag_control[i-1] - 1); +// } + +// int r, start; + + +// for (index_type i = 0; i < nnz; ++i){ +// //which row +// r = coo_rows[i]; +// start = csr_ia[r]; + +// if ((start + nnz_shifts[r]) > nnz_unpacked) { +// out::warning() << "index out of bounds (case 1) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; +// } +// if ((r == coo_cols[i]) && (diag_control[r] > 1)) {//diagonal, and there are duplicates +// bool already_there = false; +// for (index_type j = start; j < start + nnz_shifts[r]; ++j) +// { +// index_type c = tmp[j].getIdx(); +// if (c == r) { +// real_type val = tmp[j].getValue(); +// val += coo_vals[i]; +// tmp[j].setValue(val); +// already_there = true; +// out::warning() << " duplicate found, row " << c << " adding in place " << j << " current value: " << val << std::endl; +// } +// } +// if (!already_there){ // first time this duplicates appears + +// tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); +// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); + +// nnz_shifts[r]++; +// } +// } else {//not diagonal +// tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); +// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); +// nnz_shifts[r]++; + +// if ((coo_rows[i] != coo_cols[i]) && (symmetric == 1)) +// { +// r = coo_cols[i]; +// start = csr_ia[r]; + +// if ((start + nnz_shifts[r]) > nnz_unpacked) +// out::warning() << "index out of bounds (case 2) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; +// tmp[start + nnz_shifts[r]].setIdx(coo_rows[i]); +// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); +// nnz_shifts[r]++; +// } +// } +// } +// //now sort whatever is inside rows + +// for (int i = 0; i < n; ++i) +// { + +// //now sorting (and adding 1) +// int colStart = csr_ia[i]; +// int colEnd = csr_ia[i + 1]; +// int length = colEnd - colStart; +// std::sort(&tmp[colStart],&tmp[colStart] + length); +// } + +// for (index_type i = 0; i < nnz_unpacked; ++i) +// { +// csr_ja[i] = tmp[i].getIdx(); +// csr_a[i] = tmp[i].getValue(); +// } +// #if 0 +// for (int i = 0; isetNnz(nnz_no_duplicates); +// if (memspace == "cpu"){ +// A_csr->updateData(csr_ia, csr_ja, csr_a, "cpu", "cpu"); +// } else { +// if (memspace == "cuda"){ +// A_csr->updateData(csr_ia, csr_ja, csr_a, "cpu", "cuda"); +// } else { +// //display error +// } +// } +// delete [] nnz_counts; +// delete [] tmp; +// delete [] nnz_shifts; +// delete [] csr_ia; +// delete [] csr_ja; +// delete [] csr_a; +// delete [] diag_control; + +// return 0; +// } + + int MatrixHandlerCpu::matvec(matrix::Sparse* Ageneric, + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta, + std::string matrixFormat) + { + using namespace constants; + // int error_sum = 0; + if (matrixFormat == "csr") { + matrix::Csr* A = (matrix::Csr*) Ageneric; + //result = alpha *A*x + beta * result + index_type* ia = A->getRowData("cpu"); + index_type* ja = A->getColData("cpu"); + real_type* a = A->getValues("cpu"); + + real_type* x_data = vec_x->getData("cpu"); + real_type* result_data = vec_result->getData("cpu"); + real_type sum; + real_type y; + real_type t; + real_type c; + + //Kahan algorithm for stability; Kahan-Babushka version didnt make a difference + for (int i = 0; i < A->getNumRows(); ++i) { + sum = 0.0; + c = 0.0; + for (int j = ia[i]; j < ia[i+1]; ++j) { + y = ( a[j] * x_data[ja[j]]) - c; + t = sum + y; + c = (t - sum) - y; + sum = t; + // sum += ( a[j] * x_data[ja[j]]); + } + sum *= (*alpha); + result_data[i] = result_data[i]*(*beta) + sum; + } + vec_result->setDataUpdated("cpu"); + return 0; + } else { + out::error() << "MatVec not implemented (yet) for " + << matrixFormat << " matrix format." << std::endl; + return 1; + } + } + + int MatrixHandlerCpu::Matrix1Norm(matrix::Sparse* /* A */, real_type* /* norm */) + { + return -1; + } + + // int MatrixHandlerCpu::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace) + // { + // //it ONLY WORKS WITH CUDA + // index_type error_sum = 0; + // if (memspace == "cuda") { + // LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; + + // A_csr->allocateMatrixData("cuda"); + // index_type n = A_csc->getNumRows(); + // index_type m = A_csc->getNumRows(); + // index_type nnz = A_csc->getNnz(); + // size_t bufferSize; + // void* d_work; + // cusparseStatus_t status = cusparseCsr2cscEx2_bufferSize(workspaceCUDA->getCusparseHandle(), + // n, + // m, + // nnz, + // A_csc->getValues("cuda"), + // A_csc->getColData("cuda"), + // A_csc->getRowData("cuda"), + // A_csr->getValues("cuda"), + // A_csr->getRowData("cuda"), + // A_csr->getColData("cuda"), + // CUDA_R_64F, + // CUSPARSE_ACTION_NUMERIC, + // CUSPARSE_INDEX_BASE_ZERO, + // CUSPARSE_CSR2CSC_ALG1, + // &bufferSize); + // error_sum += status; + // mem_.allocateBufferOnDevice(&d_work, bufferSize); + // status = cusparseCsr2cscEx2(workspaceCUDA->getCusparseHandle(), + // n, + // m, + // nnz, + // A_csc->getValues("cuda"), + // A_csc->getColData("cuda"), + // A_csc->getRowData("cuda"), + // A_csr->getValues("cuda"), + // A_csr->getRowData("cuda"), + // A_csr->getColData("cuda"), + // CUDA_R_64F, + // CUSPARSE_ACTION_NUMERIC, + // CUSPARSE_INDEX_BASE_ZERO, + // CUSPARSE_CSR2CSC_ALG1, + // d_work); + // error_sum += status; + // return error_sum; + // mem_.deleteOnDevice(d_work); + // } else { + // out::error() << "Not implemented (yet)" << std::endl; + // return -1; + // } + + + // } + +} // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerCpu.hpp b/resolve/matrix/MatrixHandlerCpu.hpp new file mode 100644 index 00000000..e4de02bf --- /dev/null +++ b/resolve/matrix/MatrixHandlerCpu.hpp @@ -0,0 +1,66 @@ +// this class encapsulates various matrix manipulation operations, commonly required by linear solvers: +// this includes +// (1) Matrix format conversion: coo2csr, csr2csc +// (2) Matrix vector product (SpMV) +// (3) Matrix 1-norm +#pragma once +#include +#include +#include + +namespace ReSolve +{ + namespace vector + { + class Vector; + } + namespace matrix + { + class Sparse; + class Coo; + class Csc; + class Csr; + } + class LinAlgWorkspace; +} + + +namespace ReSolve { + /** + * @class MatrixHandlerCpu + * + * @brief CPU implementation of the matrix handler. + */ + class MatrixHandlerCpu : public MatrixHandlerImpl + { + using vector_type = vector::Vector; + + public: + MatrixHandlerCpu(); + MatrixHandlerCpu(LinAlgWorkspace* workspace); + virtual ~MatrixHandlerCpu(); + + // int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace); //memspace decides on what is returned (cpu or cuda pointer) + // int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace); + + /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped + virtual int matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta, + std::string matrix_type); + virtual int Matrix1Norm(matrix::Sparse *A, real_type* norm); + // bool getValuesChanged(); + // void setValuesChanged(bool toWhat); + + private: + LinAlgWorkspace* workspace_{nullptr}; + // bool new_matrix_{true}; ///< if the structure changed, you need a new handler. + // bool values_changed_{true}; ///< needed for matvec + + MemoryHandler mem_; ///< Device memory manager object + }; + +} // namespace ReSolve + diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp new file mode 100644 index 00000000..bd151cbf --- /dev/null +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -0,0 +1,387 @@ +#include + +#include +#include +#include +#include +#include +#include +#include "MatrixHandlerCuda.hpp" + +namespace ReSolve { + // Create a shortcut name for Logger static class + using out = io::Logger; + + // //helper class + // indexPlusValue::indexPlusValue() + // { + // idx_ = 0; + // value_ = 0.0; + // } + + + // indexPlusValue::~indexPlusValue() + // { + // } + + // void indexPlusValue::setIdx(index_type new_idx) + // { + // idx_ = new_idx; + // } + + // void indexPlusValue::setValue(real_type new_value) + // { + // value_ = new_value; + // } + + // index_type indexPlusValue::getIdx() + // { + // return idx_; + // } + + // real_type indexPlusValue::getValue() + // { + // return value_; + // } + // //end of helper class + + MatrixHandlerCuda::MatrixHandlerCuda() + { + // new_matrix_ = true; + values_changed_ = true; + } + + MatrixHandlerCuda::~MatrixHandlerCuda() + { + } + + MatrixHandlerCuda::MatrixHandlerCuda(LinAlgWorkspace* new_workspace) + { + workspace_ = new_workspace; + } + + // bool MatrixHandlerCuda::getValuesChanged() + // { + // return this->values_changed_; + // } + + // void MatrixHandlerCuda::setValuesChanged(bool toWhat) + // { + // this->values_changed_ = toWhat; + // } + +// int MatrixHandlerCuda::coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace) +// { +// //this happens on the CPU not on the GPU +// //but will return whatever memspace requested. + +// //count nnzs first + +// index_type nnz_unpacked = 0; +// index_type nnz = A_coo->getNnz(); +// index_type n = A_coo->getNumRows(); +// bool symmetric = A_coo->symmetric(); +// bool expanded = A_coo->expanded(); + +// index_type* nnz_counts = new index_type[n]; +// std::fill_n(nnz_counts, n, 0); +// index_type* coo_rows = A_coo->getRowData("cpu"); +// index_type* coo_cols = A_coo->getColData("cpu"); +// real_type* coo_vals = A_coo->getValues("cpu"); + +// index_type* diag_control = new index_type[n]; //for DEDUPLICATION of the diagonal +// std::fill_n(diag_control, n, 0); +// index_type nnz_unpacked_no_duplicates = 0; +// index_type nnz_no_duplicates = nnz; + + +// //maybe check if they exist? +// for (index_type i = 0; i < nnz; ++i) +// { +// nnz_counts[coo_rows[i]]++; +// nnz_unpacked++; +// nnz_unpacked_no_duplicates++; +// if ((coo_rows[i] != coo_cols[i])&& (symmetric) && (!expanded)) +// { +// nnz_counts[coo_cols[i]]++; +// nnz_unpacked++; +// nnz_unpacked_no_duplicates++; +// } +// if (coo_rows[i] == coo_cols[i]){ +// if (diag_control[coo_rows[i]] > 0) { +// //duplicate +// nnz_unpacked_no_duplicates--; +// nnz_no_duplicates--; +// } +// diag_control[coo_rows[i]]++; +// } +// } +// A_csr->setExpanded(true); +// A_csr->setNnzExpanded(nnz_unpacked_no_duplicates); +// index_type* csr_ia = new index_type[n+1]; +// std::fill_n(csr_ia, n + 1, 0); +// index_type* csr_ja = new index_type[nnz_unpacked]; +// real_type* csr_a = new real_type[nnz_unpacked]; +// index_type* nnz_shifts = new index_type[n]; +// std::fill_n(nnz_shifts, n , 0); + +// indexPlusValue* tmp = new indexPlusValue[nnz_unpacked]; + +// csr_ia[0] = 0; + +// for (index_type i = 1; i < n + 1; ++i){ +// csr_ia[i] = csr_ia[i - 1] + nnz_counts[i - 1] - (diag_control[i-1] - 1); +// } + +// int r, start; + + +// for (index_type i = 0; i < nnz; ++i){ +// //which row +// r = coo_rows[i]; +// start = csr_ia[r]; + +// if ((start + nnz_shifts[r]) > nnz_unpacked) { +// out::warning() << "index out of bounds (case 1) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; +// } +// if ((r == coo_cols[i]) && (diag_control[r] > 1)) {//diagonal, and there are duplicates +// bool already_there = false; +// for (index_type j = start; j < start + nnz_shifts[r]; ++j) +// { +// index_type c = tmp[j].getIdx(); +// if (c == r) { +// real_type val = tmp[j].getValue(); +// val += coo_vals[i]; +// tmp[j].setValue(val); +// already_there = true; +// out::warning() << " duplicate found, row " << c << " adding in place " << j << " current value: " << val << std::endl; +// } +// } +// if (!already_there){ // first time this duplicates appears + +// tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); +// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); + +// nnz_shifts[r]++; +// } +// } else {//not diagonal +// tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); +// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); +// nnz_shifts[r]++; + +// if ((coo_rows[i] != coo_cols[i]) && (symmetric == 1)) +// { +// r = coo_cols[i]; +// start = csr_ia[r]; + +// if ((start + nnz_shifts[r]) > nnz_unpacked) +// out::warning() << "index out of bounds (case 2) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; +// tmp[start + nnz_shifts[r]].setIdx(coo_rows[i]); +// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); +// nnz_shifts[r]++; +// } +// } +// } +// //now sort whatever is inside rows + +// for (int i = 0; i < n; ++i) +// { + +// //now sorting (and adding 1) +// int colStart = csr_ia[i]; +// int colEnd = csr_ia[i + 1]; +// int length = colEnd - colStart; +// std::sort(&tmp[colStart],&tmp[colStart] + length); +// } + +// for (index_type i = 0; i < nnz_unpacked; ++i) +// { +// csr_ja[i] = tmp[i].getIdx(); +// csr_a[i] = tmp[i].getValue(); +// } +// #if 0 +// for (int i = 0; isetNnz(nnz_no_duplicates); +// if (memspace == "cpu"){ +// A_csr->updateData(csr_ia, csr_ja, csr_a, "cpu", "cpu"); +// } else { +// if (memspace == "cuda"){ +// A_csr->updateData(csr_ia, csr_ja, csr_a, "cpu", "cuda"); +// } else { +// //display error +// } +// } +// delete [] nnz_counts; +// delete [] tmp; +// delete [] nnz_shifts; +// delete [] csr_ia; +// delete [] csr_ja; +// delete [] csr_a; +// delete [] diag_control; + +// return 0; +// } + + int MatrixHandlerCuda::matvec(matrix::Sparse* Ageneric, + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta, + std::string matrixFormat) + { + using namespace constants; + int error_sum = 0; + if (matrixFormat == "csr") { + matrix::Csr* A = dynamic_cast(Ageneric); + //result = alpha *A*x + beta * result + // if (memspace == "cuda" ) { + cusparseStatus_t status; + // std::cout << "Matvec on NVIDIA GPU ...\n"; + LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; + cusparseDnVecDescr_t vecx = workspaceCUDA->getVecX(); + //printf("is vec_x NULL? %d\n", vec_x->getData("cuda") == nullptr); + //printf("is vec_result NULL? %d\n", vec_result->getData("cuda") == nullptr); + cusparseCreateDnVec(&vecx, A->getNumRows(), vec_x->getData("cuda"), CUDA_R_64F); + + + cusparseDnVecDescr_t vecAx = workspaceCUDA->getVecY(); + cusparseCreateDnVec(&vecAx, A->getNumRows(), vec_result->getData("cuda"), CUDA_R_64F); + + cusparseSpMatDescr_t matA = workspaceCUDA->getSpmvMatrixDescriptor(); + + void* buffer_spmv = workspaceCUDA->getSpmvBuffer(); + cusparseHandle_t handle_cusparse = workspaceCUDA->getCusparseHandle(); + if (values_changed_) { + status = cusparseCreateCsr(&matA, + A->getNumRows(), + A->getNumColumns(), + A->getNnzExpanded(), + A->getRowData("cuda"), + A->getColData("cuda"), + A->getValues("cuda"), + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_64F); + error_sum += status; + values_changed_ = false; + } + if (!workspaceCUDA->matvecSetup()) { + //setup first, allocate, etc. + size_t bufferSize = 0; + + status = cusparseSpMV_bufferSize(handle_cusparse, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &MINUSONE, + matA, + vecx, + &ONE, + vecAx, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + &bufferSize); + error_sum += status; + mem_.deviceSynchronize(); + mem_.allocateBufferOnDevice(&buffer_spmv, bufferSize); + workspaceCUDA->setSpmvMatrixDescriptor(matA); + workspaceCUDA->setSpmvBuffer(buffer_spmv); + + workspaceCUDA->matvecSetupDone(); + } + + status = cusparseSpMV(handle_cusparse, + CUSPARSE_OPERATION_NON_TRANSPOSE, + alpha, + matA, + vecx, + beta, + vecAx, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + buffer_spmv); + error_sum += status; + mem_.deviceSynchronize(); + if (status) + out::error() << "Matvec status: " << status + << "Last error code: " << mem_.getLastDeviceError() << std::endl; + vec_result->setDataUpdated("cuda"); + + cusparseDestroyDnVec(vecx); + cusparseDestroyDnVec(vecAx); + return error_sum; + } else { + out::error() << "MatVec not implemented (yet) for " + << matrixFormat << " matrix format." << std::endl; + return 1; + } + } + + int MatrixHandlerCuda::Matrix1Norm(matrix::Sparse* /* A */, real_type* /* norm */) + { + return -1; + } + + // int MatrixHandlerCuda::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace) + // { + // //it ONLY WORKS WITH CUDA + // index_type error_sum = 0; + // if (memspace == "cuda") { + // LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; + + // A_csr->allocateMatrixData("cuda"); + // index_type n = A_csc->getNumRows(); + // index_type m = A_csc->getNumRows(); + // index_type nnz = A_csc->getNnz(); + // size_t bufferSize; + // void* d_work; + // cusparseStatus_t status = cusparseCsr2cscEx2_bufferSize(workspaceCUDA->getCusparseHandle(), + // n, + // m, + // nnz, + // A_csc->getValues("cuda"), + // A_csc->getColData("cuda"), + // A_csc->getRowData("cuda"), + // A_csr->getValues("cuda"), + // A_csr->getRowData("cuda"), + // A_csr->getColData("cuda"), + // CUDA_R_64F, + // CUSPARSE_ACTION_NUMERIC, + // CUSPARSE_INDEX_BASE_ZERO, + // CUSPARSE_CSR2CSC_ALG1, + // &bufferSize); + // error_sum += status; + // mem_.allocateBufferOnDevice(&d_work, bufferSize); + // status = cusparseCsr2cscEx2(workspaceCUDA->getCusparseHandle(), + // n, + // m, + // nnz, + // A_csc->getValues("cuda"), + // A_csc->getColData("cuda"), + // A_csc->getRowData("cuda"), + // A_csr->getValues("cuda"), + // A_csr->getRowData("cuda"), + // A_csr->getColData("cuda"), + // CUDA_R_64F, + // CUSPARSE_ACTION_NUMERIC, + // CUSPARSE_INDEX_BASE_ZERO, + // CUSPARSE_CSR2CSC_ALG1, + // d_work); + // error_sum += status; + // return error_sum; + // mem_.deleteOnDevice(d_work); + // } else { + // out::error() << "Not implemented (yet)" << std::endl; + // return -1; + // } + + + // } + +} // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerCuda.hpp b/resolve/matrix/MatrixHandlerCuda.hpp new file mode 100644 index 00000000..1e411ad1 --- /dev/null +++ b/resolve/matrix/MatrixHandlerCuda.hpp @@ -0,0 +1,66 @@ +// this class encapsulates various matrix manipulation operations, commonly required by linear solvers: +// this includes +// (1) Matrix format conversion: coo2csr, csr2csc +// (2) Matrix vector product (SpMV) +// (3) Matrix 1-norm +#pragma once +#include +#include +#include + +namespace ReSolve +{ + namespace vector + { + class Vector; + } + namespace matrix + { + class Sparse; + class Coo; + class Csc; + class Csr; + } + class LinAlgWorkspaceCUDA; +} + + +namespace ReSolve { + /** + * @class MatrixHandlerCuda + * + * @brief CPU implementation of the matrix handler. + */ + class MatrixHandlerCuda : public MatrixHandlerImpl + { + using vector_type = vector::Vector; + + public: + MatrixHandlerCuda(); + MatrixHandlerCuda(LinAlgWorkspace* workspace); + virtual ~MatrixHandlerCuda(); + + // int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace); //memspace decides on what is returned (cpu or cuda pointer) + // int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace); + + /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped + virtual int matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta, + std::string matrix_type); + virtual int Matrix1Norm(matrix::Sparse *A, real_type* norm); + // bool getValuesChanged(); + // void setValuesChanged(bool toWhat); + + private: + LinAlgWorkspace* workspace_{nullptr}; + // bool new_matrix_{true}; ///< if the structure changed, you need a new handler. + // bool values_changed_{true}; ///< needed for matvec + + MemoryHandler mem_; ///< Device memory manager object + }; + +} // namespace ReSolve + diff --git a/resolve/matrix/MatrixHandlerImpl.hpp b/resolve/matrix/MatrixHandlerImpl.hpp new file mode 100644 index 00000000..5a1d6b6f --- /dev/null +++ b/resolve/matrix/MatrixHandlerImpl.hpp @@ -0,0 +1,75 @@ +// this class encapsulates various matrix manipulation operations, commonly required by linear solvers: +// this includes +// (1) Matrix format conversion: coo2csr, csr2csc +// (2) Matrix vector product (SpMV) +// (3) Matrix 1-norm +#pragma once +#include +#include + +namespace ReSolve +{ + namespace vector + { + class Vector; + } + namespace matrix + { + class Sparse; + class Coo; + class Csc; + class Csr; + } + // class LinAlgWorkspace; +} + + +namespace ReSolve { + /** + * @class MatrixHandlerImpl + * + * @brief Base class for different matrix handler implementations. + */ + class MatrixHandlerImpl + { + using vector_type = vector::Vector; + + public: + MatrixHandlerImpl() + {} + // MatrixHandlerImpl(LinAlgWorkspace* workspace); + virtual ~MatrixHandlerImpl() + {} + + // int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace); //memspace decides on what is returned (cpu or cuda pointer) + // int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace); + + /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped + virtual int matvec(matrix::Sparse* A, + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta, + std::string matrix_type) = 0; + virtual int Matrix1Norm(matrix::Sparse* A, real_type* norm) = 0; + + bool isValuesChanged() + { + return values_changed_; + } + + void setValuesChanged(bool isValuesChanged) + { + values_changed_ = isValuesChanged; + } + + protected: + // LinAlgWorkspace* workspace_{nullptr}; + // bool new_matrix_{true}; ///< if the structure changed, you need a new handler. + bool values_changed_{true}; ///< needed for matvec + + // MemoryHandler mem_; ///< Device memory manager object + }; + +} // namespace ReSolve + From 6ad87aad21a25ecacede4f42c501ac1bdc031a9f Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Mon, 16 Oct 2023 21:45:48 -0400 Subject: [PATCH 05/15] Separate CPU and CUDA mmethods for csc2csr conversion. --- examples/r_KLU_KLU.cpp | 2 +- resolve/matrix/CMakeLists.txt | 1 - resolve/matrix/MatrixHandler.cpp | 62 ++----------- resolve/matrix/MatrixHandler.hpp | 3 +- resolve/matrix/MatrixHandlerCpu.cpp | 130 ++++++++++++++++----------- resolve/matrix/MatrixHandlerCpu.hpp | 3 +- resolve/matrix/MatrixHandlerCuda.cpp | 123 ++++++++++++------------- resolve/matrix/MatrixHandlerCuda.hpp | 3 +- resolve/matrix/MatrixHandlerImpl.hpp | 2 +- 9 files changed, 146 insertions(+), 183 deletions(-) diff --git a/examples/r_KLU_KLU.cpp b/examples/r_KLU_KLU.cpp index b1e87203..85c84921 100644 --- a/examples/r_KLU_KLU.cpp +++ b/examples/r_KLU_KLU.cpp @@ -132,7 +132,7 @@ int main(int argc, char *argv[]) status = KLU->solve(vec_rhs, vec_x); std::cout<<"KLU solve status: "<update(rhs, "cpu", "cuda"); + vec_r->update(rhs, "cpu", "cpu"); matrix_handler->setValuesChanged(true); diff --git a/resolve/matrix/CMakeLists.txt b/resolve/matrix/CMakeLists.txt index 8c74ad09..5d05df1b 100644 --- a/resolve/matrix/CMakeLists.txt +++ b/resolve/matrix/CMakeLists.txt @@ -26,7 +26,6 @@ set(Matrix_HEADER_INSTALL Csr.hpp Csc.hpp MatrixHandler.hpp - MatrixHandlerImpl.hpp ) # Build shared library ReSolve::matrix diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index 30c08991..7005046d 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -66,12 +66,6 @@ namespace ReSolve { cudaImpl_ = new MatrixHandlerCuda(new_workspace); } - bool MatrixHandler::getValuesChanged() - { - values_changed_ = cpuImpl_->isValuesChanged(); - return this->values_changed_; - } - void MatrixHandler::setValuesChanged(bool toWhat) { this->values_changed_ = toWhat; @@ -269,58 +263,16 @@ namespace ReSolve { int MatrixHandler::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace) { - //it ONLY WORKS WITH CUDA index_type error_sum = 0; if (memspace == "cuda") { - LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; - - A_csr->allocateMatrixData("cuda"); - index_type n = A_csc->getNumRows(); - index_type m = A_csc->getNumRows(); - index_type nnz = A_csc->getNnz(); - size_t bufferSize; - void* d_work; - cusparseStatus_t status = cusparseCsr2cscEx2_bufferSize(workspaceCUDA->getCusparseHandle(), - n, - m, - nnz, - A_csc->getValues("cuda"), - A_csc->getColData("cuda"), - A_csc->getRowData("cuda"), - A_csr->getValues("cuda"), - A_csr->getRowData("cuda"), - A_csr->getColData("cuda"), - CUDA_R_64F, - CUSPARSE_ACTION_NUMERIC, - CUSPARSE_INDEX_BASE_ZERO, - CUSPARSE_CSR2CSC_ALG1, - &bufferSize); - error_sum += status; - mem_.allocateBufferOnDevice(&d_work, bufferSize); - status = cusparseCsr2cscEx2(workspaceCUDA->getCusparseHandle(), - n, - m, - nnz, - A_csc->getValues("cuda"), - A_csc->getColData("cuda"), - A_csc->getRowData("cuda"), - A_csr->getValues("cuda"), - A_csr->getRowData("cuda"), - A_csr->getColData("cuda"), - CUDA_R_64F, - CUSPARSE_ACTION_NUMERIC, - CUSPARSE_INDEX_BASE_ZERO, - CUSPARSE_CSR2CSC_ALG1, - d_work); - error_sum += status; - return error_sum; - mem_.deleteOnDevice(d_work); - } else { - out::error() << "Not implemented (yet)" << std::endl; + return cudaImpl_->csc2csr(A_csc, A_csr); + } else if (memspace == "cpu") { + out::warning() << "Using untested csc2csr on CPU ..." << std::endl; + return cpuImpl_->csc2csr(A_csc, A_csr); + } else { + out::error() << "csc2csr not implemented for " << memspace << " device." << std::endl; return -1; - } - - + } } } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandler.hpp b/resolve/matrix/MatrixHandler.hpp index ea6abbcb..af444abd 100644 --- a/resolve/matrix/MatrixHandler.hpp +++ b/resolve/matrix/MatrixHandler.hpp @@ -7,7 +7,6 @@ #include #include -#include namespace ReSolve { @@ -23,6 +22,7 @@ namespace ReSolve class Csr; } class LinAlgWorkspace; + class MatrixHandlerImpl; } @@ -71,7 +71,6 @@ namespace ReSolve { std::string matrix_type, std::string memspace); int Matrix1Norm(matrix::Sparse *A, real_type* norm); - bool getValuesChanged(); void setValuesChanged(bool toWhat); private: diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index 5d61501d..d06a4ce5 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -279,60 +280,83 @@ namespace ReSolve { return -1; } - // int MatrixHandlerCpu::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace) - // { - // //it ONLY WORKS WITH CUDA - // index_type error_sum = 0; - // if (memspace == "cuda") { - // LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; - - // A_csr->allocateMatrixData("cuda"); - // index_type n = A_csc->getNumRows(); - // index_type m = A_csc->getNumRows(); - // index_type nnz = A_csc->getNnz(); - // size_t bufferSize; - // void* d_work; - // cusparseStatus_t status = cusparseCsr2cscEx2_bufferSize(workspaceCUDA->getCusparseHandle(), - // n, - // m, - // nnz, - // A_csc->getValues("cuda"), - // A_csc->getColData("cuda"), - // A_csc->getRowData("cuda"), - // A_csr->getValues("cuda"), - // A_csr->getRowData("cuda"), - // A_csr->getColData("cuda"), - // CUDA_R_64F, - // CUSPARSE_ACTION_NUMERIC, - // CUSPARSE_INDEX_BASE_ZERO, - // CUSPARSE_CSR2CSC_ALG1, - // &bufferSize); - // error_sum += status; - // mem_.allocateBufferOnDevice(&d_work, bufferSize); - // status = cusparseCsr2cscEx2(workspaceCUDA->getCusparseHandle(), - // n, - // m, - // nnz, - // A_csc->getValues("cuda"), - // A_csc->getColData("cuda"), - // A_csc->getRowData("cuda"), - // A_csr->getValues("cuda"), - // A_csr->getRowData("cuda"), - // A_csr->getColData("cuda"), - // CUDA_R_64F, - // CUSPARSE_ACTION_NUMERIC, - // CUSPARSE_INDEX_BASE_ZERO, - // CUSPARSE_CSR2CSC_ALG1, - // d_work); - // error_sum += status; - // return error_sum; - // mem_.deleteOnDevice(d_work); - // } else { - // out::error() << "Not implemented (yet)" << std::endl; - // return -1; - // } + /** + * @authors Slaven Peles , Daniel Reynolds (SMU), and + * David Gardner and Carol Woodward (LLNL) + */ + int MatrixHandlerCpu::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) + { + int error_sum = 0; + assert(A_csc->getNnz() == A_csr->getNnz()); + assert(A_csc->getNumRows() == A_csr->getNumColumns()); + assert(A_csr->getNumRows() == A_csc->getNumColumns()); + index_type nnz = A_csc->getNnz(); + index_type n = A_csc->getNumColumns(); - // } + index_type* rowIdxCsc = A_csc->getRowData("cpu"); + index_type* colPtrCsc = A_csc->getColData("cpu"); + real_type* valuesCsc = A_csc->getValues("cpu"); + + index_type* rowPtrCsr = A_csr->getRowData("cpu"); + index_type* colIdxCsr = A_csr->getColData("cpu"); + real_type* valuesCsr = A_csr->getValues("cpu"); + + // Set all CSR row pointers to zero + for (index_type i = 0; i <= n; ++i) { + rowPtrCsr[i] = 0; + } + + // Set all CSR values and column indices to zero + for (index_type i = 0; i < nnz; ++i) { + colIdxCsr[i] = 0; + valuesCsr[i] = 0.0; + } + + // Compute number of entries per row + for (index_type i = 0; i < nnz; ++i) { + rowPtrCsr[rowIdxCsc[i]]++; + } + + // Compute cumualtive sum of nnz per row + for (index_type row = 0, rowsum = 0; row < n; ++row) + { + // Store value in row pointer to temp + index_type temp = rowPtrCsr[row]; + + // Copy cumulative sum to the row pointer + rowPtrCsr[row] = rowsum; + + // Update row sum + rowsum += temp; + } + rowPtrCsr[n] = nnz; + + for (index_type col = 0; col < n; ++col) + { + // Compute positions of column indices and values in CSR matrix and store them there + // Overwrites CSR row pointers in the process + for (index_type jj = colPtrCsc[col]; jj < colPtrCsc[col+1]; jj++) + { + index_type row = rowIdxCsc[jj]; + index_type dest = rowPtrCsr[row]; + + colIdxCsr[dest] = col; + valuesCsr[dest] = valuesCsc[jj]; + + rowPtrCsr[row]++; + } + } + + // Restore CSR row pointer values + for (index_type row = 0, last = 0; row <= n; row++) + { + index_type temp = rowPtrCsr[row]; + rowPtrCsr[row] = last; + last = temp; + } + + return 0; + } } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerCpu.hpp b/resolve/matrix/MatrixHandlerCpu.hpp index e4de02bf..60079a89 100644 --- a/resolve/matrix/MatrixHandlerCpu.hpp +++ b/resolve/matrix/MatrixHandlerCpu.hpp @@ -40,7 +40,7 @@ namespace ReSolve { MatrixHandlerCpu(LinAlgWorkspace* workspace); virtual ~MatrixHandlerCpu(); - // int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace); //memspace decides on what is returned (cpu or cuda pointer) + int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr); //memspace decides on what is returned (cpu or cuda pointer) // int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace); /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped @@ -51,7 +51,6 @@ namespace ReSolve { const real_type* beta, std::string matrix_type); virtual int Matrix1Norm(matrix::Sparse *A, real_type* norm); - // bool getValuesChanged(); // void setValuesChanged(bool toWhat); private: diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index bd151cbf..31515398 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -241,7 +241,6 @@ namespace ReSolve { if (matrixFormat == "csr") { matrix::Csr* A = dynamic_cast(Ageneric); //result = alpha *A*x + beta * result - // if (memspace == "cuda" ) { cusparseStatus_t status; // std::cout << "Matvec on NVIDIA GPU ...\n"; LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; @@ -260,16 +259,16 @@ namespace ReSolve { cusparseHandle_t handle_cusparse = workspaceCUDA->getCusparseHandle(); if (values_changed_) { status = cusparseCreateCsr(&matA, - A->getNumRows(), - A->getNumColumns(), - A->getNnzExpanded(), - A->getRowData("cuda"), - A->getColData("cuda"), - A->getValues("cuda"), - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_64F); + A->getNumRows(), + A->getNumColumns(), + A->getNnzExpanded(), + A->getRowData("cuda"), + A->getColData("cuda"), + A->getValues("cuda"), + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_64F); error_sum += status; values_changed_ = false; } @@ -328,60 +327,52 @@ namespace ReSolve { return -1; } - // int MatrixHandlerCuda::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace) - // { - // //it ONLY WORKS WITH CUDA - // index_type error_sum = 0; - // if (memspace == "cuda") { - // LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; - - // A_csr->allocateMatrixData("cuda"); - // index_type n = A_csc->getNumRows(); - // index_type m = A_csc->getNumRows(); - // index_type nnz = A_csc->getNnz(); - // size_t bufferSize; - // void* d_work; - // cusparseStatus_t status = cusparseCsr2cscEx2_bufferSize(workspaceCUDA->getCusparseHandle(), - // n, - // m, - // nnz, - // A_csc->getValues("cuda"), - // A_csc->getColData("cuda"), - // A_csc->getRowData("cuda"), - // A_csr->getValues("cuda"), - // A_csr->getRowData("cuda"), - // A_csr->getColData("cuda"), - // CUDA_R_64F, - // CUSPARSE_ACTION_NUMERIC, - // CUSPARSE_INDEX_BASE_ZERO, - // CUSPARSE_CSR2CSC_ALG1, - // &bufferSize); - // error_sum += status; - // mem_.allocateBufferOnDevice(&d_work, bufferSize); - // status = cusparseCsr2cscEx2(workspaceCUDA->getCusparseHandle(), - // n, - // m, - // nnz, - // A_csc->getValues("cuda"), - // A_csc->getColData("cuda"), - // A_csc->getRowData("cuda"), - // A_csr->getValues("cuda"), - // A_csr->getRowData("cuda"), - // A_csr->getColData("cuda"), - // CUDA_R_64F, - // CUSPARSE_ACTION_NUMERIC, - // CUSPARSE_INDEX_BASE_ZERO, - // CUSPARSE_CSR2CSC_ALG1, - // d_work); - // error_sum += status; - // return error_sum; - // mem_.deleteOnDevice(d_work); - // } else { - // out::error() << "Not implemented (yet)" << std::endl; - // return -1; - // } - - - // } + int MatrixHandlerCuda::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) + { + index_type error_sum = 0; + LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; + + A_csr->allocateMatrixData("cuda"); + index_type n = A_csc->getNumRows(); + index_type m = A_csc->getNumRows(); + index_type nnz = A_csc->getNnz(); + size_t bufferSize; + void* d_work; + cusparseStatus_t status = cusparseCsr2cscEx2_bufferSize(workspaceCUDA->getCusparseHandle(), + n, + m, + nnz, + A_csc->getValues("cuda"), + A_csc->getColData("cuda"), + A_csc->getRowData("cuda"), + A_csr->getValues("cuda"), + A_csr->getRowData("cuda"), + A_csr->getColData("cuda"), + CUDA_R_64F, + CUSPARSE_ACTION_NUMERIC, + CUSPARSE_INDEX_BASE_ZERO, + CUSPARSE_CSR2CSC_ALG1, + &bufferSize); + error_sum += status; + mem_.allocateBufferOnDevice(&d_work, bufferSize); + status = cusparseCsr2cscEx2(workspaceCUDA->getCusparseHandle(), + n, + m, + nnz, + A_csc->getValues("cuda"), + A_csc->getColData("cuda"), + A_csc->getRowData("cuda"), + A_csr->getValues("cuda"), + A_csr->getRowData("cuda"), + A_csr->getColData("cuda"), + CUDA_R_64F, + CUSPARSE_ACTION_NUMERIC, + CUSPARSE_INDEX_BASE_ZERO, + CUSPARSE_CSR2CSC_ALG1, + d_work); + error_sum += status; + return error_sum; + mem_.deleteOnDevice(d_work); + } } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerCuda.hpp b/resolve/matrix/MatrixHandlerCuda.hpp index 1e411ad1..ae0c3709 100644 --- a/resolve/matrix/MatrixHandlerCuda.hpp +++ b/resolve/matrix/MatrixHandlerCuda.hpp @@ -40,7 +40,7 @@ namespace ReSolve { MatrixHandlerCuda(LinAlgWorkspace* workspace); virtual ~MatrixHandlerCuda(); - // int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace); //memspace decides on what is returned (cpu or cuda pointer) + int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr); //memspace decides on what is returned (cpu or cuda pointer) // int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace); /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped @@ -51,7 +51,6 @@ namespace ReSolve { const real_type* beta, std::string matrix_type); virtual int Matrix1Norm(matrix::Sparse *A, real_type* norm); - // bool getValuesChanged(); // void setValuesChanged(bool toWhat); private: diff --git a/resolve/matrix/MatrixHandlerImpl.hpp b/resolve/matrix/MatrixHandlerImpl.hpp index 5a1d6b6f..ba24d498 100644 --- a/resolve/matrix/MatrixHandlerImpl.hpp +++ b/resolve/matrix/MatrixHandlerImpl.hpp @@ -41,7 +41,7 @@ namespace ReSolve { virtual ~MatrixHandlerImpl() {} - // int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace); //memspace decides on what is returned (cpu or cuda pointer) + virtual int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) = 0; // int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace); /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped From 23edbf7548b96e4e1c27cddb3cdb2f86e32745ff Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Mon, 16 Oct 2023 22:31:37 -0400 Subject: [PATCH 06/15] Method setValuesChanged in MatrixHandler now requires memory space as input. --- examples/r_KLU_GLU.cpp | 2 +- examples/r_KLU_GLU_matrix_values_update.cpp | 2 +- examples/r_KLU_KLU.cpp | 2 +- examples/r_KLU_KLU_standalone.cpp | 2 +- examples/r_KLU_rf.cpp | 2 +- examples/r_KLU_rf_FGMRES.cpp | 6 ++--- .../r_KLU_rf_FGMRES_reuse_factorization.cpp | 6 ++--- resolve/LinSolverIterativeFGMRES.cpp | 2 +- resolve/matrix/MatrixHandler.cpp | 12 ++++++---- resolve/matrix/MatrixHandler.hpp | 5 ++--- resolve/matrix/MatrixHandlerCpu.cpp | 8 +++---- resolve/matrix/MatrixHandlerCpu.hpp | 4 ++-- resolve/matrix/MatrixHandlerCuda.cpp | 8 +++---- resolve/matrix/MatrixHandlerCuda.hpp | 4 ++-- resolve/matrix/MatrixHandlerImpl.hpp | 22 ++++++------------- tests/functionality/testKLU.cpp | 4 ++-- tests/functionality/testKLU_GLU.cpp | 4 ++-- tests/functionality/testKLU_Rf.cpp | 6 ++--- tests/functionality/testKLU_Rf_FGMRES.cpp | 4 ++-- tests/unit/matrix/MatrixHandlerTests.hpp | 2 +- 20 files changed, 51 insertions(+), 56 deletions(-) diff --git a/examples/r_KLU_GLU.cpp b/examples/r_KLU_GLU.cpp index 82c949db..c072eb34 100644 --- a/examples/r_KLU_GLU.cpp +++ b/examples/r_KLU_GLU.cpp @@ -145,7 +145,7 @@ int main(int argc, char *argv[]) vec_r->update(rhs, "cpu", "cuda"); - matrix_handler->setValuesChanged(true); + matrix_handler->setValuesChanged(true, "cuda"); matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); printf("\t 2-Norm of the residual: %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))); diff --git a/examples/r_KLU_GLU_matrix_values_update.cpp b/examples/r_KLU_GLU_matrix_values_update.cpp index 19d7ec3c..af797883 100644 --- a/examples/r_KLU_GLU_matrix_values_update.cpp +++ b/examples/r_KLU_GLU_matrix_values_update.cpp @@ -155,7 +155,7 @@ int main(int argc, char *argv[]) vec_r->update(rhs, "cpu", "cuda"); - matrix_handler->setValuesChanged(true); + matrix_handler->setValuesChanged(true, "cuda"); matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); printf("\t 2-Norm of the residual: %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))); diff --git a/examples/r_KLU_KLU.cpp b/examples/r_KLU_KLU.cpp index 85c84921..e777833a 100644 --- a/examples/r_KLU_KLU.cpp +++ b/examples/r_KLU_KLU.cpp @@ -134,7 +134,7 @@ int main(int argc, char *argv[]) } vec_r->update(rhs, "cpu", "cpu"); - matrix_handler->setValuesChanged(true); + matrix_handler->setValuesChanged(true, "cpu"); matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); real_type* test = vec_r->getData("cpu"); diff --git a/examples/r_KLU_KLU_standalone.cpp b/examples/r_KLU_KLU_standalone.cpp index f0b7c3a4..daeae43a 100644 --- a/examples/r_KLU_KLU_standalone.cpp +++ b/examples/r_KLU_KLU_standalone.cpp @@ -96,7 +96,7 @@ int main(int argc, char *argv[]) std::cout << "KLU solve status: " << status << std::endl; vec_r->update(rhs, "cpu", "cpu"); - matrix_handler->setValuesChanged(true); + matrix_handler->setValuesChanged(true, "cpu"); matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); real_type* test = vec_r->getData("cpu"); diff --git a/examples/r_KLU_rf.cpp b/examples/r_KLU_rf.cpp index f2e96dfe..8954724d 100644 --- a/examples/r_KLU_rf.cpp +++ b/examples/r_KLU_rf.cpp @@ -158,7 +158,7 @@ int main(int argc, char *argv[] ) } vec_r->update(rhs, "cpu", "cuda"); - matrix_handler->setValuesChanged(true); + matrix_handler->setValuesChanged(true, "cuda"); matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); diff --git a/examples/r_KLU_rf_FGMRES.cpp b/examples/r_KLU_rf_FGMRES.cpp index d9543895..52f54a0d 100644 --- a/examples/r_KLU_rf_FGMRES.cpp +++ b/examples/r_KLU_rf_FGMRES.cpp @@ -125,7 +125,7 @@ int main(int argc, char *argv[]) real_type norm_b; if (i < 2){ KLU->setup(A); - matrix_handler->setValuesChanged(true); + matrix_handler->setValuesChanged(true, "cuda"); status = KLU->analyze(); std::cout<<"KLU analysis status: "<factorize(); @@ -135,7 +135,7 @@ int main(int argc, char *argv[]) vec_r->update(rhs, "cpu", "cuda"); norm_b = vector_handler->dot(vec_r, vec_r, "cuda"); norm_b = sqrt(norm_b); - matrix_handler->setValuesChanged(true); + matrix_handler->setValuesChanged(true, "cuda"); matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); printf("\t 2-Norm of the residual : %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))/norm_b); if (i == 1) { @@ -165,7 +165,7 @@ int main(int argc, char *argv[]) norm_b = vector_handler->dot(vec_r, vec_r, "cuda"); norm_b = sqrt(norm_b); - //matrix_handler->setValuesChanged(true); + //matrix_handler->setValuesChanged(true, "cuda"); FGMRES->resetMatrix(A); FGMRES->setupPreconditioner("CuSolverRf", Rf); diff --git a/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp b/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp index d50bdb4a..2c484926 100644 --- a/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp +++ b/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp @@ -126,7 +126,7 @@ int main(int argc, char *argv[]) real_type norm_b; if (i < 2){ KLU->setup(A); - matrix_handler->setValuesChanged(true); + matrix_handler->setValuesChanged(true, "cuda"); status = KLU->analyze(); std::cout<<"KLU analysis status: "<factorize(); @@ -136,7 +136,7 @@ int main(int argc, char *argv[]) vec_r->update(rhs, "cpu", "cuda"); norm_b = vector_handler->dot(vec_r, vec_r, "cuda"); norm_b = sqrt(norm_b); - matrix_handler->setValuesChanged(true); + matrix_handler->setValuesChanged(true, "cuda"); matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); printf("\t 2-Norm of the residual : %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))/norm_b); if (i == 1) { @@ -178,7 +178,7 @@ int main(int argc, char *argv[]) norm_b = vector_handler->dot(vec_r, vec_r, "cuda"); norm_b = sqrt(norm_b); - matrix_handler->setValuesChanged(true); + matrix_handler->setValuesChanged(true, "cuda"); FGMRES->resetMatrix(A); matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); diff --git a/resolve/LinSolverIterativeFGMRES.cpp b/resolve/LinSolverIterativeFGMRES.cpp index 2a6da732..bd7dca42 100644 --- a/resolve/LinSolverIterativeFGMRES.cpp +++ b/resolve/LinSolverIterativeFGMRES.cpp @@ -308,7 +308,7 @@ namespace ReSolve int LinSolverIterativeFGMRES::resetMatrix(matrix::Sparse* new_matrix) { A_ = new_matrix; - matrix_handler_->setValuesChanged(true); + matrix_handler_->setValuesChanged(true, "cuda"); return 0; } diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index 7005046d..6ebadfa7 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -50,7 +50,6 @@ namespace ReSolve { MatrixHandler::MatrixHandler() { this->new_matrix_ = true; - this->values_changed_ = true; cpuImpl_ = new MatrixHandlerCpu(); cudaImpl_ = new MatrixHandlerCuda(); } @@ -66,10 +65,15 @@ namespace ReSolve { cudaImpl_ = new MatrixHandlerCuda(new_workspace); } - void MatrixHandler::setValuesChanged(bool toWhat) + void MatrixHandler::setValuesChanged(bool isValuesChanged, std::string memspace) { - this->values_changed_ = toWhat; - cpuImpl_->setValuesChanged(values_changed_); + if (memspace == "cpu") { + cpuImpl_->setValuesChanged(isValuesChanged); + } else if (memspace == "cuda") { + cudaImpl_->setValuesChanged(isValuesChanged); + } else { + out::error() << "Unsupported device " << memspace << "\n"; + } } int MatrixHandler::coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace) diff --git a/resolve/matrix/MatrixHandler.hpp b/resolve/matrix/MatrixHandler.hpp index af444abd..59ace249 100644 --- a/resolve/matrix/MatrixHandler.hpp +++ b/resolve/matrix/MatrixHandler.hpp @@ -71,15 +71,14 @@ namespace ReSolve { std::string matrix_type, std::string memspace); int Matrix1Norm(matrix::Sparse *A, real_type* norm); - void setValuesChanged(bool toWhat); + void setValuesChanged(bool toWhat, std::string memspace); private: LinAlgWorkspace* workspace_{nullptr}; bool new_matrix_{true}; ///< if the structure changed, you need a new handler. - bool values_changed_{true}; ///< needed for matvec MemoryHandler mem_; ///< Device memory manager object - MatrixHandlerImpl* cpuImpl_{nullptr}; + MatrixHandlerImpl* cpuImpl_{nullptr}; MatrixHandlerImpl* cudaImpl_{nullptr}; }; diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index d06a4ce5..522d2a1b 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -65,10 +65,10 @@ namespace ReSolve { // return this->values_changed_; // } - // void MatrixHandlerCpu::setValuesChanged(bool toWhat) - // { - // this->values_changed_ = toWhat; - // } + void MatrixHandlerCpu::setValuesChanged(bool values_changed) + { + values_changed_ = values_changed; + } // int MatrixHandlerCpu::coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace) // { diff --git a/resolve/matrix/MatrixHandlerCpu.hpp b/resolve/matrix/MatrixHandlerCpu.hpp index 60079a89..fb760750 100644 --- a/resolve/matrix/MatrixHandlerCpu.hpp +++ b/resolve/matrix/MatrixHandlerCpu.hpp @@ -51,12 +51,12 @@ namespace ReSolve { const real_type* beta, std::string matrix_type); virtual int Matrix1Norm(matrix::Sparse *A, real_type* norm); - // void setValuesChanged(bool toWhat); + void setValuesChanged(bool isValuesChanged); private: LinAlgWorkspace* workspace_{nullptr}; // bool new_matrix_{true}; ///< if the structure changed, you need a new handler. - // bool values_changed_{true}; ///< needed for matvec + bool values_changed_{true}; ///< needed for matvec MemoryHandler mem_; ///< Device memory manager object }; diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index 31515398..f40de0d8 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -65,10 +65,10 @@ namespace ReSolve { // return this->values_changed_; // } - // void MatrixHandlerCuda::setValuesChanged(bool toWhat) - // { - // this->values_changed_ = toWhat; - // } + void MatrixHandlerCuda::setValuesChanged(bool values_changed) + { + values_changed_ = values_changed; + } // int MatrixHandlerCuda::coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace) // { diff --git a/resolve/matrix/MatrixHandlerCuda.hpp b/resolve/matrix/MatrixHandlerCuda.hpp index ae0c3709..f2250008 100644 --- a/resolve/matrix/MatrixHandlerCuda.hpp +++ b/resolve/matrix/MatrixHandlerCuda.hpp @@ -51,12 +51,12 @@ namespace ReSolve { const real_type* beta, std::string matrix_type); virtual int Matrix1Norm(matrix::Sparse *A, real_type* norm); - // void setValuesChanged(bool toWhat); + void setValuesChanged(bool isValuesChanged); private: LinAlgWorkspace* workspace_{nullptr}; // bool new_matrix_{true}; ///< if the structure changed, you need a new handler. - // bool values_changed_{true}; ///< needed for matvec + bool values_changed_{true}; ///< needed for matvec MemoryHandler mem_; ///< Device memory manager object }; diff --git a/resolve/matrix/MatrixHandlerImpl.hpp b/resolve/matrix/MatrixHandlerImpl.hpp index ba24d498..015412d0 100644 --- a/resolve/matrix/MatrixHandlerImpl.hpp +++ b/resolve/matrix/MatrixHandlerImpl.hpp @@ -46,27 +46,19 @@ namespace ReSolve { /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped virtual int matvec(matrix::Sparse* A, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta, - std::string matrix_type) = 0; + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta, + std::string matrix_type) = 0; virtual int Matrix1Norm(matrix::Sparse* A, real_type* norm) = 0; - bool isValuesChanged() - { - return values_changed_; - } - - void setValuesChanged(bool isValuesChanged) - { - values_changed_ = isValuesChanged; - } + virtual void setValuesChanged(bool isValuesChanged) = 0; protected: // LinAlgWorkspace* workspace_{nullptr}; // bool new_matrix_{true}; ///< if the structure changed, you need a new handler. - bool values_changed_{true}; ///< needed for matvec + // bool values_changed_{true}; ///< needed for matvec // MemoryHandler mem_; ///< Device memory manager object }; diff --git a/tests/functionality/testKLU.cpp b/tests/functionality/testKLU.cpp index a60083cd..70ee83b0 100644 --- a/tests/functionality/testKLU.cpp +++ b/tests/functionality/testKLU.cpp @@ -104,7 +104,7 @@ int main(int argc, char *argv[]) vec_diff->update(x_data, "cpu", "cpu"); // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, "cpu")); - matrix_handler->setValuesChanged(true); + matrix_handler->setValuesChanged(true, "cpu"); status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","cpu"); error_sum += status; @@ -174,7 +174,7 @@ int main(int argc, char *argv[]) error_sum += status; vec_r->update(rhs, "cpu", "cpu"); - matrix_handler->setValuesChanged(true); + matrix_handler->setValuesChanged(true, "cpu"); status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); error_sum += status; diff --git a/tests/functionality/testKLU_GLU.cpp b/tests/functionality/testKLU_GLU.cpp index 60dcce50..1c66fda1 100644 --- a/tests/functionality/testKLU_GLU.cpp +++ b/tests/functionality/testKLU_GLU.cpp @@ -127,7 +127,7 @@ int main(int argc, char *argv[]) vec_diff->update(x_data, "cpu", "cuda"); // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, "cuda")); - matrix_handler->setValuesChanged(true); + matrix_handler->setValuesChanged(true, "cuda"); status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","cuda"); error_sum += status; @@ -198,7 +198,7 @@ int main(int argc, char *argv[]) error_sum += status; vec_r->update(rhs, "cpu", "cuda"); - matrix_handler->setValuesChanged(true); + matrix_handler->setValuesChanged(true, "cuda"); status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); error_sum += status; diff --git a/tests/functionality/testKLU_Rf.cpp b/tests/functionality/testKLU_Rf.cpp index 50add7cd..656aa724 100644 --- a/tests/functionality/testKLU_Rf.cpp +++ b/tests/functionality/testKLU_Rf.cpp @@ -111,7 +111,7 @@ int main(int argc, char *argv[]) vec_diff->update(x_data, "cpu", "cuda"); // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, "cuda")); - matrix_handler->setValuesChanged(true); + matrix_handler->setValuesChanged(true, "cuda"); status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","cuda"); error_sum += status; @@ -194,8 +194,8 @@ int main(int argc, char *argv[]) status = Rf->solve(vec_rhs, vec_x); error_sum += status; - vec_r->update(rhs, "cpu", "cuda"); - matrix_handler->setValuesChanged(true); + vec_r->update(rhs, "cpu", "cuda"); + matrix_handler->setValuesChanged(true, "cuda"); status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); error_sum += status; diff --git a/tests/functionality/testKLU_Rf_FGMRES.cpp b/tests/functionality/testKLU_Rf_FGMRES.cpp index 2dae74b5..14bb0f33 100644 --- a/tests/functionality/testKLU_Rf_FGMRES.cpp +++ b/tests/functionality/testKLU_Rf_FGMRES.cpp @@ -117,7 +117,7 @@ int main(int argc, char *argv[]) vec_diff->update(x_data, "cpu", "cuda"); // real_type normXmatrix1 = sqrt(vector_handler->dot(vec_test, vec_test, "cuda")); - matrix_handler->setValuesChanged(true); + matrix_handler->setValuesChanged(true, "cuda"); //evaluate the residual ||b-Ax|| status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr","cuda"); error_sum += status; @@ -221,7 +221,7 @@ int main(int argc, char *argv[]) error_sum += status; vec_r->update(rhs, "cpu", "cuda"); - matrix_handler->setValuesChanged(true); + matrix_handler->setValuesChanged(true, "cuda"); //evaluate final residual status = matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cuda"); diff --git a/tests/unit/matrix/MatrixHandlerTests.hpp b/tests/unit/matrix/MatrixHandlerTests.hpp index 41df9f4b..16fd3215 100644 --- a/tests/unit/matrix/MatrixHandlerTests.hpp +++ b/tests/unit/matrix/MatrixHandlerTests.hpp @@ -57,7 +57,7 @@ class MatrixHandlerTests : TestBase real_type alpha = 2.0/30.0; real_type beta = 2.0; - handler.setValuesChanged(true); + handler.setValuesChanged(true, memspace_); handler.matvec(A, &x, &y, &alpha, &beta, "csr", memspace_); status *= verifyAnswer(y, 4.0, memspace_); From edc7e4c3ba3f5b795bfee73da5cb96718fb89b73 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Tue, 17 Oct 2023 11:53:28 -0400 Subject: [PATCH 07/15] Take helper class for index-value pairs outside MatrixHandler sources. --- resolve/matrix/MatrixHandler.cpp | 36 ++----------------- resolve/matrix/MatrixHandler.hpp | 22 ------------ resolve/matrix/MatrixHandlerCpu.cpp | 35 +----------------- resolve/matrix/MatrixHandlerCuda.cpp | 35 +----------------- resolve/utilities/misc/IndexValuePair.hpp | 43 +++++++++++++++++++++++ 5 files changed, 47 insertions(+), 124 deletions(-) create mode 100644 resolve/utilities/misc/IndexValuePair.hpp diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index 6ebadfa7..c605ce5a 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include "MatrixHandler.hpp" #include "MatrixHandlerCpu.hpp" #include "MatrixHandlerCuda.hpp" @@ -14,39 +15,6 @@ namespace ReSolve { // Create a shortcut name for Logger static class using out = io::Logger; - //helper class - indexPlusValue::indexPlusValue() - { - idx_ = 0; - value_ = 0.0; - } - - - indexPlusValue::~indexPlusValue() - { - } - - void indexPlusValue::setIdx(index_type new_idx) - { - idx_ = new_idx; - } - - void indexPlusValue::setValue(real_type new_value) - { - value_ = new_value; - } - - index_type indexPlusValue::getIdx() - { - return idx_; - } - - real_type indexPlusValue::getValue() - { - return value_; - } - //end of helper class - MatrixHandler::MatrixHandler() { this->new_matrix_ = true; @@ -131,7 +99,7 @@ namespace ReSolve { index_type* nnz_shifts = new index_type[n]; std::fill_n(nnz_shifts, n , 0); - indexPlusValue* tmp = new indexPlusValue[nnz_unpacked]; + IndexValuePair* tmp = new IndexValuePair[nnz_unpacked]; csr_ia[0] = 0; diff --git a/resolve/matrix/MatrixHandler.hpp b/resolve/matrix/MatrixHandler.hpp index 59ace249..7ba01a08 100644 --- a/resolve/matrix/MatrixHandler.hpp +++ b/resolve/matrix/MatrixHandler.hpp @@ -28,28 +28,6 @@ namespace ReSolve namespace ReSolve { - //helper class - class indexPlusValue - { - public: - indexPlusValue(); - ~indexPlusValue(); - void setIdx (index_type new_idx); - void setValue (real_type new_value); - - index_type getIdx(); - real_type getValue(); - - bool operator < (const indexPlusValue& str) const - { - return (idx_ < str.idx_); - } - - private: - index_type idx_; - real_type value_; - }; - class MatrixHandler { using vector_type = vector::Vector; diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index 522d2a1b..cc211f5f 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -12,39 +12,6 @@ namespace ReSolve { // Create a shortcut name for Logger static class using out = io::Logger; - // //helper class - // indexPlusValue::indexPlusValue() - // { - // idx_ = 0; - // value_ = 0.0; - // } - - - // indexPlusValue::~indexPlusValue() - // { - // } - - // void indexPlusValue::setIdx(index_type new_idx) - // { - // idx_ = new_idx; - // } - - // void indexPlusValue::setValue(real_type new_value) - // { - // value_ = new_value; - // } - - // index_type indexPlusValue::getIdx() - // { - // return idx_; - // } - - // real_type indexPlusValue::getValue() - // { - // return value_; - // } - // //end of helper class - MatrixHandlerCpu::MatrixHandlerCpu() { // new_matrix_ = true; @@ -125,7 +92,7 @@ namespace ReSolve { // index_type* nnz_shifts = new index_type[n]; // std::fill_n(nnz_shifts, n , 0); -// indexPlusValue* tmp = new indexPlusValue[nnz_unpacked]; +// IndexValuePair* tmp = new IndexValuePair[nnz_unpacked]; // csr_ia[0] = 0; diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index f40de0d8..deb09180 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -12,39 +12,6 @@ namespace ReSolve { // Create a shortcut name for Logger static class using out = io::Logger; - // //helper class - // indexPlusValue::indexPlusValue() - // { - // idx_ = 0; - // value_ = 0.0; - // } - - - // indexPlusValue::~indexPlusValue() - // { - // } - - // void indexPlusValue::setIdx(index_type new_idx) - // { - // idx_ = new_idx; - // } - - // void indexPlusValue::setValue(real_type new_value) - // { - // value_ = new_value; - // } - - // index_type indexPlusValue::getIdx() - // { - // return idx_; - // } - - // real_type indexPlusValue::getValue() - // { - // return value_; - // } - // //end of helper class - MatrixHandlerCuda::MatrixHandlerCuda() { // new_matrix_ = true; @@ -125,7 +92,7 @@ namespace ReSolve { // index_type* nnz_shifts = new index_type[n]; // std::fill_n(nnz_shifts, n , 0); -// indexPlusValue* tmp = new indexPlusValue[nnz_unpacked]; +// IndexValuePair* tmp = new IndexValuePair[nnz_unpacked]; // csr_ia[0] = 0; diff --git a/resolve/utilities/misc/IndexValuePair.hpp b/resolve/utilities/misc/IndexValuePair.hpp new file mode 100644 index 00000000..b09ccf97 --- /dev/null +++ b/resolve/utilities/misc/IndexValuePair.hpp @@ -0,0 +1,43 @@ +#pragma once + + +namespace ReSolve { + + /// @brief Helper class for COO matrix sorting + class IndexValuePair + { + public: + IndexValuePair() : idx_(0), value_(0.0) + {} + ~IndexValuePair() + {} + void setIdx (index_type new_idx) + { + idx_ = new_idx; + } + void setValue (real_type new_value) + { + value_ = new_value; + } + + index_type getIdx() + { + return idx_; + } + real_type getValue() + { + return value_; + } + + bool operator < (const IndexValuePair& str) const + { + return (idx_ < str.idx_); + } + + private: + index_type idx_; + real_type value_; + }; + +} // namespace ReSolve + From 678a3244d0e424152bb184f44b33e80971ffebf7 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Wed, 18 Oct 2023 19:25:51 -0400 Subject: [PATCH 08/15] Complete PIMPL implementation for MatrixHandler. --- examples/r_KLU_KLU.cpp | 6 +- examples/r_KLU_KLU_standalone.cpp | 6 +- resolve/LinSolverIterativeFGMRES.cpp | 4 +- resolve/matrix/MatrixHandler.cpp | 39 ++++- resolve/matrix/MatrixHandler.hpp | 23 ++- resolve/matrix/MatrixHandlerCuda.cpp | 186 ++-------------------- resolve/matrix/MatrixHandlerCuda.hpp | 3 +- tests/functionality/testKLU.cpp | 9 +- tests/functionality/testKLU_GLU.cpp | 2 +- tests/functionality/testKLU_Rf.cpp | 2 +- tests/functionality/testKLU_Rf_FGMRES.cpp | 15 +- tests/unit/matrix/MatrixHandlerTests.hpp | 24 ++- 12 files changed, 117 insertions(+), 202 deletions(-) diff --git a/examples/r_KLU_KLU.cpp b/examples/r_KLU_KLU.cpp index e777833a..bee712bd 100644 --- a/examples/r_KLU_KLU.cpp +++ b/examples/r_KLU_KLU.cpp @@ -35,9 +35,9 @@ int main(int argc, char *argv[]) ReSolve::matrix::Coo* A_coo; ReSolve::matrix::Csr* A; - ReSolve::LinAlgWorkspace* workspace = ReSolve::createLinAlgWorkspace("cpu"); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); + ReSolve::LinAlgWorkspace* workspace = new ReSolve::LinAlgWorkspace(); + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); + ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); real_type* rhs; real_type* x; diff --git a/examples/r_KLU_KLU_standalone.cpp b/examples/r_KLU_KLU_standalone.cpp index daeae43a..a1aa1fad 100644 --- a/examples/r_KLU_KLU_standalone.cpp +++ b/examples/r_KLU_KLU_standalone.cpp @@ -31,9 +31,9 @@ int main(int argc, char *argv[]) ReSolve::matrix::Coo* A_coo; ReSolve::matrix::Csr* A; - ReSolve::LinAlgWorkspace* workspace = ReSolve::createLinAlgWorkspace("cpu"); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); + ReSolve::LinAlgWorkspace* workspace = new ReSolve::LinAlgWorkspace(); + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); + ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); real_type* rhs; real_type* x; diff --git a/resolve/LinSolverIterativeFGMRES.cpp b/resolve/LinSolverIterativeFGMRES.cpp index bd7dca42..fa63f2d5 100644 --- a/resolve/LinSolverIterativeFGMRES.cpp +++ b/resolve/LinSolverIterativeFGMRES.cpp @@ -66,12 +66,12 @@ namespace ReSolve { if (d_V_ != nullptr) { // cudaFree(d_V_); - delete [] d_V_; + delete d_V_; } if (d_Z_ != nullptr) { // cudaFree(d_Z_); - delete [] d_Z_; + delete d_Z_; } } diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index c605ce5a..50dd3082 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -15,6 +15,15 @@ namespace ReSolve { // Create a shortcut name for Logger static class using out = io::Logger; + /** + * @brief Default constructor + * + * @post Instantiates CPU and CUDA matrix handlers, but does not + * create a workspace. + * + * @todo There is little utility for the default constructor. Rethink its purpose. + * Consider making it private method. + */ MatrixHandler::MatrixHandler() { this->new_matrix_ = true; @@ -22,15 +31,43 @@ namespace ReSolve { cudaImpl_ = new MatrixHandlerCuda(); } + /** + * @brief Destructor + * + */ MatrixHandler::~MatrixHandler() { + if (isCpuEnabled_) delete cpuImpl_; + if (isCudaEnabled_) delete cudaImpl_; } + /** + * @brief Constructor taking pointer to the workspace as its parameter. + * + * @note The CPU implementation currently does not require a workspace. + * The workspace pointer parameter is provided for forward compatibility. + */ MatrixHandler::MatrixHandler(LinAlgWorkspace* new_workspace) { - workspace_ = new_workspace; cpuImpl_ = new MatrixHandlerCpu(new_workspace); + isCpuEnabled_ = true; + isCudaEnabled_ = false; + } + + /** + * @brief Constructor taking pointer to the CUDA workspace as its parameter. + * + * @post A CPU implementation instance is created because it is cheap and + * it does not require a workspace. + * + * @post A CUDA implementation instance is created with supplied workspace. + */ + MatrixHandler::MatrixHandler(LinAlgWorkspaceCUDA* new_workspace) + { + cpuImpl_ = new MatrixHandlerCpu(); cudaImpl_ = new MatrixHandlerCuda(new_workspace); + isCpuEnabled_ = true; + isCudaEnabled_ = true; } void MatrixHandler::setValuesChanged(bool isValuesChanged, std::string memspace) diff --git a/resolve/matrix/MatrixHandler.hpp b/resolve/matrix/MatrixHandler.hpp index 7ba01a08..fed6b593 100644 --- a/resolve/matrix/MatrixHandler.hpp +++ b/resolve/matrix/MatrixHandler.hpp @@ -1,8 +1,3 @@ -// this class encapsulates various matrix manipulation operations, commonly required by linear solvers: -// this includes -// (1) Matrix format conversion: coo2csr, csr2csc -// (2) Matrix vector product (SpMV) -// (3) Matrix 1-norm #pragma once #include #include @@ -22,12 +17,25 @@ namespace ReSolve class Csr; } class LinAlgWorkspace; + class LinAlgWorkspaceCUDA; class MatrixHandlerImpl; } namespace ReSolve { + /** + * @brief this class encapsulates various matrix manipulation operations, + * commonly required by linear solvers. + * + * This includes: + * - Matrix format conversion: coo2csr, csr2csc + * - Matrix vector product (SpMV) + * - Matrix 1-norm + * + * @author Kasia Swirydowicz + * @author Slaven Peles + */ class MatrixHandler { using vector_type = vector::Vector; @@ -35,6 +43,7 @@ namespace ReSolve { public: MatrixHandler(); MatrixHandler(LinAlgWorkspace* workspace); + MatrixHandler(LinAlgWorkspaceCUDA* workspace); ~MatrixHandler(); int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace); //memspace decides on what is returned (cpu or cuda pointer) @@ -52,12 +61,14 @@ namespace ReSolve { void setValuesChanged(bool toWhat, std::string memspace); private: - LinAlgWorkspace* workspace_{nullptr}; bool new_matrix_{true}; ///< if the structure changed, you need a new handler. MemoryHandler mem_; ///< Device memory manager object MatrixHandlerImpl* cpuImpl_{nullptr}; MatrixHandlerImpl* cudaImpl_{nullptr}; + + bool isCpuEnabled_{false}; + bool isCudaEnabled_{false}; }; } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index deb09180..15252d85 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -14,7 +14,6 @@ namespace ReSolve { MatrixHandlerCuda::MatrixHandlerCuda() { - // new_matrix_ = true; values_changed_ = true; } @@ -22,186 +21,28 @@ namespace ReSolve { { } - MatrixHandlerCuda::MatrixHandlerCuda(LinAlgWorkspace* new_workspace) + MatrixHandlerCuda::MatrixHandlerCuda(LinAlgWorkspaceCUDA* new_workspace) { workspace_ = new_workspace; } - // bool MatrixHandlerCuda::getValuesChanged() - // { - // return this->values_changed_; - // } + MatrixHandlerCuda::MatrixHandlerCuda(LinAlgWorkspace* new_workspace) + { + workspace_ = (LinAlgWorkspaceCUDA*) new_workspace; + } void MatrixHandlerCuda::setValuesChanged(bool values_changed) { values_changed_ = values_changed; } -// int MatrixHandlerCuda::coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace) -// { -// //this happens on the CPU not on the GPU -// //but will return whatever memspace requested. - -// //count nnzs first - -// index_type nnz_unpacked = 0; -// index_type nnz = A_coo->getNnz(); -// index_type n = A_coo->getNumRows(); -// bool symmetric = A_coo->symmetric(); -// bool expanded = A_coo->expanded(); - -// index_type* nnz_counts = new index_type[n]; -// std::fill_n(nnz_counts, n, 0); -// index_type* coo_rows = A_coo->getRowData("cpu"); -// index_type* coo_cols = A_coo->getColData("cpu"); -// real_type* coo_vals = A_coo->getValues("cpu"); - -// index_type* diag_control = new index_type[n]; //for DEDUPLICATION of the diagonal -// std::fill_n(diag_control, n, 0); -// index_type nnz_unpacked_no_duplicates = 0; -// index_type nnz_no_duplicates = nnz; - - -// //maybe check if they exist? -// for (index_type i = 0; i < nnz; ++i) -// { -// nnz_counts[coo_rows[i]]++; -// nnz_unpacked++; -// nnz_unpacked_no_duplicates++; -// if ((coo_rows[i] != coo_cols[i])&& (symmetric) && (!expanded)) -// { -// nnz_counts[coo_cols[i]]++; -// nnz_unpacked++; -// nnz_unpacked_no_duplicates++; -// } -// if (coo_rows[i] == coo_cols[i]){ -// if (diag_control[coo_rows[i]] > 0) { -// //duplicate -// nnz_unpacked_no_duplicates--; -// nnz_no_duplicates--; -// } -// diag_control[coo_rows[i]]++; -// } -// } -// A_csr->setExpanded(true); -// A_csr->setNnzExpanded(nnz_unpacked_no_duplicates); -// index_type* csr_ia = new index_type[n+1]; -// std::fill_n(csr_ia, n + 1, 0); -// index_type* csr_ja = new index_type[nnz_unpacked]; -// real_type* csr_a = new real_type[nnz_unpacked]; -// index_type* nnz_shifts = new index_type[n]; -// std::fill_n(nnz_shifts, n , 0); - -// IndexValuePair* tmp = new IndexValuePair[nnz_unpacked]; - -// csr_ia[0] = 0; - -// for (index_type i = 1; i < n + 1; ++i){ -// csr_ia[i] = csr_ia[i - 1] + nnz_counts[i - 1] - (diag_control[i-1] - 1); -// } - -// int r, start; - - -// for (index_type i = 0; i < nnz; ++i){ -// //which row -// r = coo_rows[i]; -// start = csr_ia[r]; - -// if ((start + nnz_shifts[r]) > nnz_unpacked) { -// out::warning() << "index out of bounds (case 1) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; -// } -// if ((r == coo_cols[i]) && (diag_control[r] > 1)) {//diagonal, and there are duplicates -// bool already_there = false; -// for (index_type j = start; j < start + nnz_shifts[r]; ++j) -// { -// index_type c = tmp[j].getIdx(); -// if (c == r) { -// real_type val = tmp[j].getValue(); -// val += coo_vals[i]; -// tmp[j].setValue(val); -// already_there = true; -// out::warning() << " duplicate found, row " << c << " adding in place " << j << " current value: " << val << std::endl; -// } -// } -// if (!already_there){ // first time this duplicates appears - -// tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); -// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - -// nnz_shifts[r]++; -// } -// } else {//not diagonal -// tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); -// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); -// nnz_shifts[r]++; - -// if ((coo_rows[i] != coo_cols[i]) && (symmetric == 1)) -// { -// r = coo_cols[i]; -// start = csr_ia[r]; - -// if ((start + nnz_shifts[r]) > nnz_unpacked) -// out::warning() << "index out of bounds (case 2) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; -// tmp[start + nnz_shifts[r]].setIdx(coo_rows[i]); -// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); -// nnz_shifts[r]++; -// } -// } -// } -// //now sort whatever is inside rows - -// for (int i = 0; i < n; ++i) -// { - -// //now sorting (and adding 1) -// int colStart = csr_ia[i]; -// int colEnd = csr_ia[i + 1]; -// int length = colEnd - colStart; -// std::sort(&tmp[colStart],&tmp[colStart] + length); -// } - -// for (index_type i = 0; i < nnz_unpacked; ++i) -// { -// csr_ja[i] = tmp[i].getIdx(); -// csr_a[i] = tmp[i].getValue(); -// } -// #if 0 -// for (int i = 0; isetNnz(nnz_no_duplicates); -// if (memspace == "cpu"){ -// A_csr->updateData(csr_ia, csr_ja, csr_a, "cpu", "cpu"); -// } else { -// if (memspace == "cuda"){ -// A_csr->updateData(csr_ia, csr_ja, csr_a, "cpu", "cuda"); -// } else { -// //display error -// } -// } -// delete [] nnz_counts; -// delete [] tmp; -// delete [] nnz_shifts; -// delete [] csr_ia; -// delete [] csr_ja; -// delete [] csr_a; -// delete [] diag_control; - -// return 0; -// } int MatrixHandlerCuda::matvec(matrix::Sparse* Ageneric, - vector_type* vec_x, - vector_type* vec_result, - const real_type* alpha, - const real_type* beta, - std::string matrixFormat) + vector_type* vec_x, + vector_type* vec_result, + const real_type* alpha, + const real_type* beta, + std::string matrixFormat) { using namespace constants; int error_sum = 0; @@ -209,11 +50,8 @@ namespace ReSolve { matrix::Csr* A = dynamic_cast(Ageneric); //result = alpha *A*x + beta * result cusparseStatus_t status; - // std::cout << "Matvec on NVIDIA GPU ...\n"; - LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; + LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; cusparseDnVecDescr_t vecx = workspaceCUDA->getVecX(); - //printf("is vec_x NULL? %d\n", vec_x->getData("cuda") == nullptr); - //printf("is vec_result NULL? %d\n", vec_result->getData("cuda") == nullptr); cusparseCreateDnVec(&vecx, A->getNumRows(), vec_x->getData("cuda"), CUDA_R_64F); @@ -224,7 +62,7 @@ namespace ReSolve { void* buffer_spmv = workspaceCUDA->getSpmvBuffer(); cusparseHandle_t handle_cusparse = workspaceCUDA->getCusparseHandle(); - if (values_changed_) { + if (values_changed_) { status = cusparseCreateCsr(&matA, A->getNumRows(), A->getNumColumns(), diff --git a/resolve/matrix/MatrixHandlerCuda.hpp b/resolve/matrix/MatrixHandlerCuda.hpp index f2250008..c77eb795 100644 --- a/resolve/matrix/MatrixHandlerCuda.hpp +++ b/resolve/matrix/MatrixHandlerCuda.hpp @@ -38,6 +38,7 @@ namespace ReSolve { public: MatrixHandlerCuda(); MatrixHandlerCuda(LinAlgWorkspace* workspace); + MatrixHandlerCuda(LinAlgWorkspaceCUDA* workspace); virtual ~MatrixHandlerCuda(); int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr); //memspace decides on what is returned (cpu or cuda pointer) @@ -54,7 +55,7 @@ namespace ReSolve { void setValuesChanged(bool isValuesChanged); private: - LinAlgWorkspace* workspace_{nullptr}; + LinAlgWorkspaceCUDA* workspace_{nullptr}; // bool new_matrix_{true}; ///< if the structure changed, you need a new handler. bool values_changed_{true}; ///< needed for matvec diff --git a/tests/functionality/testKLU.cpp b/tests/functionality/testKLU.cpp index 70ee83b0..66790c83 100644 --- a/tests/functionality/testKLU.cpp +++ b/tests/functionality/testKLU.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -9,7 +10,7 @@ #include #include #include -#include +#include //author: KS //functionality test to check whether KLU works correctly. using namespace ReSolve::constants; @@ -26,9 +27,9 @@ int main(int argc, char *argv[]) int error_sum = 0; int status = 0; - ReSolve::LinAlgWorkspace* workspace = ReSolve::createLinAlgWorkspace("cpu"); - ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); - ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); + ReSolve::LinAlgWorkspace* workspace = new ReSolve::LinAlgWorkspace(); + ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); + ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; KLU->setupParameters(1, 0.1, false); diff --git a/tests/functionality/testKLU_GLU.cpp b/tests/functionality/testKLU_GLU.cpp index 1c66fda1..1384f405 100644 --- a/tests/functionality/testKLU_GLU.cpp +++ b/tests/functionality/testKLU_GLU.cpp @@ -30,7 +30,7 @@ int main(int argc, char *argv[]) int error_sum = 0; int status = 0; - ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA; + ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA(); workspace_CUDA->initializeHandles(); ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); diff --git a/tests/functionality/testKLU_Rf.cpp b/tests/functionality/testKLU_Rf.cpp index 656aa724..b7e725a7 100644 --- a/tests/functionality/testKLU_Rf.cpp +++ b/tests/functionality/testKLU_Rf.cpp @@ -30,7 +30,7 @@ int main(int argc, char *argv[]) int error_sum = 0; int status = 0; - ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA; + ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA(); workspace_CUDA->initializeHandles(); ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); diff --git a/tests/functionality/testKLU_Rf_FGMRES.cpp b/tests/functionality/testKLU_Rf_FGMRES.cpp index 14bb0f33..3e4b9d61 100644 --- a/tests/functionality/testKLU_Rf_FGMRES.cpp +++ b/tests/functionality/testKLU_Rf_FGMRES.cpp @@ -31,7 +31,7 @@ int main(int argc, char *argv[]) int error_sum = 0; int status = 0; - ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA; + ReSolve::LinAlgWorkspaceCUDA* workspace_CUDA = new ReSolve::LinAlgWorkspaceCUDA(); workspace_CUDA->initializeHandles(); ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace_CUDA); ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace_CUDA); @@ -258,5 +258,18 @@ int main(int argc, char *argv[]) } else { std::cout<<"Test 4 (KLU with cuSolverRf refactorization + IR) FAILED, error sum: "<setValuesChanged(true, memspace_); + handler->matvec(A, &x, &y, &alpha, &beta, "csr", memspace_); status *= verifyAnswer(y, 4.0, memspace_); - delete workspace; + delete handler; delete A; return status.report(__func__); @@ -71,6 +70,21 @@ class MatrixHandlerTests : TestBase private: std::string memspace_{"cpu"}; + ReSolve::MatrixHandler* createMatrixHandler() + { + if (memspace_ == "cpu") { + LinAlgWorkspace* workpsace = new LinAlgWorkspace(); + return new MatrixHandler(workpsace); + } else if (memspace_ == "cuda") { + LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA(); + workspace->initializeHandles(); + return new MatrixHandler(workspace); + } else { + std::cout << "Invalid memory space " << memspace_ << "\n"; + } + return nullptr; + } + bool verifyAnswer(vector::Vector& x, real_type answer, std::string memspace) { bool status = true; From cec1aa74393bbe5f563912a8093340a112fcbfaa Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Thu, 19 Oct 2023 00:24:56 -0400 Subject: [PATCH 09/15] Use PIMPL in VectorHandler class. --- resolve/vector/CMakeLists.txt | 2 + resolve/vector/VectorHandler.cpp | 165 ++++------------ resolve/vector/VectorHandler.hpp | 23 ++- resolve/vector/VectorHandlerCpu.cpp | 159 +++++++++++++++ resolve/vector/VectorHandlerCpu.hpp | 57 ++++++ resolve/vector/VectorHandlerCuda.cpp | 236 +++++++++++++++++++++++ resolve/vector/VectorHandlerCuda.hpp | 57 ++++++ resolve/vector/VectorHandlerImpl.hpp | 57 ++++++ tests/unit/vector/GramSchmidtTests.hpp | 20 +- tests/unit/vector/VectorHandlerTests.hpp | 60 +++--- 10 files changed, 680 insertions(+), 156 deletions(-) create mode 100644 resolve/vector/VectorHandlerCpu.cpp create mode 100644 resolve/vector/VectorHandlerCpu.hpp create mode 100644 resolve/vector/VectorHandlerCuda.cpp create mode 100644 resolve/vector/VectorHandlerCuda.hpp create mode 100644 resolve/vector/VectorHandlerImpl.hpp diff --git a/resolve/vector/CMakeLists.txt b/resolve/vector/CMakeLists.txt index 8fa9cd59..f418bf55 100644 --- a/resolve/vector/CMakeLists.txt +++ b/resolve/vector/CMakeLists.txt @@ -10,6 +10,8 @@ set(Vector_SRC Vector.cpp VectorHandler.cpp + VectorHandlerCpu.cpp + VectorHandlerCuda.cpp ) diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 71bfce9c..9648058e 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -4,6 +4,9 @@ #include #include #include +#include +#include +#include #include "VectorHandler.hpp" namespace ReSolve { @@ -14,6 +17,8 @@ namespace ReSolve { */ VectorHandler::VectorHandler() { + cpuImpl_ = new VectorHandlerCpu(); + isCpuEnabled_ = true; } /** @@ -21,9 +26,24 @@ namespace ReSolve { * * @param new_workspace - workspace to be set */ - VectorHandler:: VectorHandler(LinAlgWorkspace* new_workspace) + VectorHandler::VectorHandler(LinAlgWorkspace* new_workspace) { - workspace_ = new_workspace; + cpuImpl_ = new VectorHandlerCpu(new_workspace); + isCpuEnabled_ = true; + } + + /** + * @brief constructor + * + * @param new_workspace - workspace to be set + */ + VectorHandler::VectorHandler(LinAlgWorkspaceCUDA* new_workspace) + { + cudaImpl_ = new VectorHandlerCuda(new_workspace); + cpuImpl_ = new VectorHandlerCpu(); + + isCudaEnabled_ = true; + isCpuEnabled_ = true; } /** @@ -46,28 +66,11 @@ namespace ReSolve { real_type VectorHandler::dot(vector::Vector* x, vector::Vector* y, std::string memspace) { - if (memspace == "cuda" ){ - LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - double nrm = 0.0; - cublasStatus_t st= cublasDdot (handle_cublas, x->getSize(), x->getData("cuda"), 1, y->getData("cuda"), 1, &nrm); - if (st!=0) {printf("dot product crashed with code %d \n", st);} - return nrm; + if (memspace == "cuda" ) { + return cudaImpl_->dot(x, y); } else { if (memspace == "cpu") { - real_type* x_data = x->getData("cpu"); - real_type* y_data = y->getData("cpu"); - real_type sum = 0.0; - real_type c = 0.0; - real_type t, y; - for (int i = 0; i < x->getSize(); ++i){ - y = (x_data[i] * y_data[i]) - c; - t = sum + y; - c = (t - sum) - y; - sum = t; - // sum += (x_data[i] * y_data[i]); - } - return sum; + return cpuImpl_->dot(x, y); } else { out::error() << "Not implemented (yet)" << std::endl; return NAN; @@ -85,20 +88,11 @@ namespace ReSolve { */ void VectorHandler::scal(const real_type* alpha, vector::Vector* x, std::string memspace) { - if (memspace == "cuda" ) { - LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - cublasStatus_t st = cublasDscal(handle_cublas, x->getSize(), alpha, x->getData("cuda"), 1); - if (st!=0) { - ReSolve::io::Logger::error() << "scal crashed with code " << st << "\n"; - } + if (memspace == "cuda" ) { + cudaImpl_->scal(alpha, x); } else { if (memspace == "cpu") { - real_type* x_data = x->getData("cpu"); - - for (int i = 0; i < x->getSize(); ++i){ - x_data[i] *= (*alpha); - } + cpuImpl_->scal(alpha, x); } else { out::error() << "Not implemented (yet)" << std::endl; } @@ -114,26 +108,14 @@ namespace ReSolve { * @param[in] memspace String containg memspace (cpu or cuda) * */ - void VectorHandler::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y, std::string memspace ) + void VectorHandler::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y, std::string memspace) { //AXPY: y = alpha * x + y - if (memspace == "cuda" ) { - LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - cublasDaxpy(handle_cublas, - x->getSize(), - alpha, - x->getData("cuda"), - 1, - y->getData("cuda"), - 1); + if (memspace == "cuda" ) { + cudaImpl_->axpy(alpha, x, y); } else { if (memspace == "cpu") { - real_type* x_data = x->getData("cpu"); - real_type* y_data = y->getData("cpu"); - for (int i = 0; i < x->getSize(); ++i){ - y_data[i] = (*alpha) * x_data[i] + y_data[i]; - } + cpuImpl_->axpy(alpha, x, y); } else { out::error() <<"Not implemented (yet)" << std::endl; } @@ -160,38 +142,9 @@ namespace ReSolve { void VectorHandler::gemv(std::string transpose, index_type n, index_type k, const real_type* alpha, const real_type* beta, vector::Vector* V, vector::Vector* y, vector::Vector* x, std::string memspace) { if (memspace == "cuda") { - LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - if (transpose == "T") { - - cublasDgemv(handle_cublas, - CUBLAS_OP_T, - n, - k, - alpha, - V->getData("cuda"), - n, - y->getData("cuda"), - 1, - beta, - x->getData("cuda"), - 1); - - } else { - cublasDgemv(handle_cublas, - CUBLAS_OP_N, - n, - k, - alpha, - V->getData("cuda"), - n, - y->getData("cuda"), - 1, - beta, - x->getData("cuda"), - 1); - } - + cudaImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); + } else if (memspace == "cpu") { + cpuImpl_->gemv(transpose, n, k, alpha, beta, V, y, x); } else { out::error() << "Not implemented (yet)" << std::endl; } @@ -213,26 +166,9 @@ namespace ReSolve { { using namespace constants; if (memspace == "cuda") { - if (k < 200) { - mass_axpy(size, k, x->getData("cuda"), y->getData("cuda"),alpha->getData("cuda")); - } else { - LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - cublasDgemm(handle_cublas, - CUBLAS_OP_N, - CUBLAS_OP_N, - size, // m - 1, // n - k + 1, // k - &MINUSONE, // alpha - x->getData("cuda"), // A - size, // lda - alpha->getData("cuda"), // B - k + 1, // ldb - &ONE, - y->getData("cuda"), // c - size); // ldc - } + cudaImpl_->massAxpy(size, alpha, k, x, y); + } else if (memspace == "cpu") { + cpuImpl_->massAxpy(size, alpha, k, x, y); } else { out::error() << "Not implemented (yet)" << std::endl; } @@ -254,29 +190,10 @@ namespace ReSolve { */ void VectorHandler::massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res, std::string memspace) { - using namespace constants; - if (memspace == "cuda") { - if (k < 200) { - mass_inner_product_two_vectors(size, k, x->getData("cuda") , x->getData(1, "cuda"), V->getData("cuda"), res->getData("cuda")); - } else { - LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; - cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); - cublasDgemm(handle_cublas, - CUBLAS_OP_T, - CUBLAS_OP_N, - k + 1, //m - 2, //n - size, //k - &ONE, //alpha - V->getData("cuda"), //A - size, //lda - x->getData("cuda"), //B - size, //ldb - &ZERO, - res->getData("cuda"), //c - k + 1); //ldc - } + cudaImpl_->massDot2Vec(size, V, k, x, res); + } else if (memspace == "cpu") { + cpuImpl_->massDot2Vec(size, V, k, x, res); } else { out::error() << "Not implemented (yet)" << std::endl; } diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index bf2d37d7..53ec0bca 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -7,7 +7,9 @@ namespace ReSolve { class Vector; } + class VectorHandlerImpl; class LinAlgWorkspace; + class LinAlgWorkspaceCUDA; } @@ -16,13 +18,14 @@ namespace ReSolve { //namespace vector { public: VectorHandler(); VectorHandler(LinAlgWorkspace* new_workspace); + VectorHandler(LinAlgWorkspaceCUDA* new_workspace); ~VectorHandler(); //y = alpha x + y - void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y, std::string memspace ); + void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y, std::string memspace); //dot: x \cdot y - real_type dot(vector::Vector* x, vector::Vector* y, std::string memspace ); + real_type dot(vector::Vector* x, vector::Vector* y, std::string memspace); //scal = alpha * x void scal(const real_type* alpha, vector::Vector* x, std::string memspace); @@ -40,9 +43,21 @@ namespace ReSolve { //namespace vector { * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. */ - void gemv(std::string transpose, index_type n, index_type k, const real_type* alpha, const real_type* beta, vector::Vector* V, vector::Vector* y, vector::Vector* x, std::string memspace); + void gemv(std::string transpose, + index_type n, + index_type k, + const real_type* alpha, + const real_type* beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x, + std::string memspace); private: - LinAlgWorkspace* workspace_; + VectorHandlerImpl* cpuImpl_{nullptr}; + VectorHandlerImpl* cudaImpl_{nullptr}; + + bool isCpuEnabled_{false}; + bool isCudaEnabled_{false}; }; } //} // namespace ReSolve::vector diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp new file mode 100644 index 00000000..593ba5b0 --- /dev/null +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -0,0 +1,159 @@ +#include + +#include +#include +#include +#include +#include +#include "VectorHandlerCpu.hpp" + +namespace ReSolve { + using out = io::Logger; + + /** + * @brief empty constructor that does absolutely nothing + */ + VectorHandlerCpu::VectorHandlerCpu() + { + } + + /** + * @brief constructor + * + * @param new_workspace - workspace to be set + */ + VectorHandlerCpu:: VectorHandlerCpu(LinAlgWorkspace* new_workspace) + { + workspace_ = new_workspace; + } + + /** + * @brief destructor + */ + VectorHandlerCpu::~VectorHandlerCpu() + { + //delete the workspace TODO + } + + /** + * @brief dot product of two vectors i.e, a = x^Ty + * + * @param[in] x The first vector + * @param[in] y The second vector + * @param[in] memspace String containg memspace (cpu or cuda) + * + * @return dot product (real number) of _x_ and _y_ + */ + + real_type VectorHandlerCpu::dot(vector::Vector* x, vector::Vector* y) + { + real_type* x_data = x->getData("cpu"); + real_type* y_data = y->getData("cpu"); + real_type sum = 0.0; + real_type c = 0.0; + // real_type t, y; + for (int i = 0; i < x->getSize(); ++i) { + real_type y = (x_data[i] * y_data[i]) - c; + real_type t = sum + y; + c = (t - sum) - y; + sum = t; + // sum += (x_data[i] * y_data[i]); + } + return sum; + } + + /** + * @brief scale a vector by a constant i.e, x = alpha*x where alpha is a constant + * + * @param[in] alpha The constant + * @param[in,out] x The vector + * @param memspace string containg memspace (cpu or cuda) + * + */ + void VectorHandlerCpu::scal(const real_type* alpha, vector::Vector* x) + { + real_type* x_data = x->getData("cpu"); + + for (int i = 0; i < x->getSize(); ++i){ + x_data[i] *= (*alpha); + } + } + + /** + * @brief axpy i.e, y = alpha*x+y where alpha is a constant + * + * @param[in] alpha The constant + * @param[in] x The first vector + * @param[in,out] y The second vector (result is return in y) + * @param[in] memspace String containg memspace (cpu or cuda) + * + */ + void VectorHandlerCpu::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y) + { + //AXPY: y = alpha * x + y + real_type* x_data = x->getData("cpu"); + real_type* y_data = y->getData("cpu"); + for (int i = 0; i < x->getSize(); ++i) { + y_data[i] = (*alpha) * x_data[i] + y_data[i]; + } + } + + /** + * @brief gemv computes matrix-vector product where both matrix and vectors are dense. + * i.e., x = beta*x + alpha*V*y + * + * @param[in] Transpose - yes (T) or no (N) + * @param[in] n Number of rows in (non-transposed) matrix + * @param[in] k Number of columns in (non-transposed) + * @param[in] alpha Constant real number + * @param[in] beta Constant real number + * @param[in] V Multivector containing the matrix, organized columnwise + * @param[in] y Vector, k x 1 if N and n x 1 if T + * @param[in,out] x Vector, n x 1 if N and k x 1 if T + * @param[in] memspace cpu or cuda (for now) + * + * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 + * + */ + void VectorHandlerCpu::gemv(std::string transpose, index_type n, index_type k, const real_type* alpha, const real_type* beta, vector::Vector* V, vector::Vector* y, vector::Vector* x) + { + out::error() << "Not implemented (yet)" << std::endl; + } + + /** + * @brief mass (bulk) axpy i.e, y = y - x*alpha where alpha is a vector + * + * @param[in] size number of elements in y + * @param[in] alpha vector size k x 1 + * @param[in] x (multi)vector size size x k + * @param[in,out] y vector size size x 1 (this is where the result is stored) + * @param[in] memspace string containg memspace (cpu or cuda) + * + * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() + * + */ + void VectorHandlerCpu::massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) + { + out::error() << "Not implemented (yet)" << std::endl; + } + + /** + * @brief mass (bulk) dot product i.e, V^T x, where V is n x k dense multivector (a dense multivector consisting of k vectors size n) + * and x is k x 2 dense multivector (a multivector consisiting of two vectors size n each) + * + * @param[in] size Number of elements in a single vector in V + * @param[in] V Multivector; k vectors size n x 1 each + * @param[in] k Number of vectors in V + * @param[in] x Multivector; 2 vectors size n x 1 each + * @param[out] res Multivector; 2 vectors size k x 1 each (result is returned in res) + * @param[in] memspace String containg memspace (cpu or cuda) + * + * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated + * + */ + void VectorHandlerCpu::massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) + { + out::error() << "Not implemented (yet)" << std::endl; + } + +} // namespace ReSolve diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp new file mode 100644 index 00000000..a5f09162 --- /dev/null +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -0,0 +1,57 @@ +#pragma once +#include + +namespace ReSolve +{ + namespace vector + { + class Vector; + } + class LinAlgWorkspace; + class VectorHandlerImpl; +} + + +namespace ReSolve { //namespace vector { + class VectorHandlerCpu : public VectorHandlerImpl + { + public: + VectorHandlerCpu(); + VectorHandlerCpu(LinAlgWorkspace* workspace); + virtual ~VectorHandlerCpu(); + + //y = alpha x + y + virtual void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y); + + //dot: x \cdot y + virtual real_type dot(vector::Vector* x, vector::Vector* y); + + //scal = alpha * x + virtual void scal(const real_type* alpha, vector::Vector* x); + + //mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise + virtual void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); + + //mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise + //Size = n + virtual void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res); + + /** gemv: + * if `transpose = N` (no), `x = beta*x + alpha*V*y`, + * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. + * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, + * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. + */ + virtual void gemv(std::string transpose, + index_type n, + index_type k, + const real_type* alpha, + const real_type* beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x); + private: + LinAlgWorkspace* workspace_; + }; + +} //} // namespace ReSolve::vector diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp new file mode 100644 index 00000000..69044f37 --- /dev/null +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -0,0 +1,236 @@ +#include + +#include +#include +#include +#include +#include +#include "VectorHandlerCuda.hpp" + +namespace ReSolve { + using out = io::Logger; + + /** + * @brief empty constructor that does absolutely nothing + */ + VectorHandlerCuda::VectorHandlerCuda() + { + } + + /** + * @brief constructor + * + * @param new_workspace - workspace to be set + */ + VectorHandlerCuda:: VectorHandlerCuda(LinAlgWorkspaceCUDA* new_workspace) + { + workspace_ = new_workspace; + } + + /** + * @brief destructor + */ + VectorHandlerCuda::~VectorHandlerCuda() + { + //delete the workspace TODO + } + + /** + * @brief dot product of two vectors i.e, a = x^Ty + * + * @param[in] x The first vector + * @param[in] y The second vector + * @param[in] memspace String containg memspace (cpu or cuda) + * + * @return dot product (real number) of _x_ and _y_ + */ + + real_type VectorHandlerCuda::dot(vector::Vector* x, vector::Vector* y) + { + LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; + cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + double nrm = 0.0; + cublasStatus_t st= cublasDdot (handle_cublas, x->getSize(), x->getData("cuda"), 1, y->getData("cuda"), 1, &nrm); + if (st!=0) {printf("dot product crashed with code %d \n", st);} + return nrm; + } + + /** + * @brief scale a vector by a constant i.e, x = alpha*x where alpha is a constant + * + * @param[in] alpha The constant + * @param[in,out] x The vector + * @param memspace string containg memspace (cpu or cuda) + * + */ + void VectorHandlerCuda::scal(const real_type* alpha, vector::Vector* x) + { + LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; + cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + cublasStatus_t st = cublasDscal(handle_cublas, x->getSize(), alpha, x->getData("cuda"), 1); + if (st!=0) { + ReSolve::io::Logger::error() << "scal crashed with code " << st << "\n"; + } + } + + /** + * @brief axpy i.e, y = alpha*x+y where alpha is a constant + * + * @param[in] alpha The constant + * @param[in] x The first vector + * @param[in,out] y The second vector (result is return in y) + * @param[in] memspace String containg memspace (cpu or cuda) + * + */ + void VectorHandlerCuda::axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y) + { + //AXPY: y = alpha * x + y + LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; + cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + cublasDaxpy(handle_cublas, + x->getSize(), + alpha, + x->getData("cuda"), + 1, + y->getData("cuda"), + 1); + } + + /** + * @brief gemv computes matrix-vector product where both matrix and vectors are dense. + * i.e., x = beta*x + alpha*V*y + * + * @param[in] Transpose - yes (T) or no (N) + * @param[in] n Number of rows in (non-transposed) matrix + * @param[in] k Number of columns in (non-transposed) + * @param[in] alpha Constant real number + * @param[in] beta Constant real number + * @param[in] V Multivector containing the matrix, organized columnwise + * @param[in] y Vector, k x 1 if N and n x 1 if T + * @param[in,out] x Vector, n x 1 if N and k x 1 if T + * @param[in] memspace cpu or cuda (for now) + * + * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 + * + */ + void VectorHandlerCuda::gemv(std::string transpose, + index_type n, + index_type k, + const real_type* alpha, + const real_type* beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x) + { + LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; + cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + if (transpose == "T") { + + cublasDgemv(handle_cublas, + CUBLAS_OP_T, + n, + k, + alpha, + V->getData("cuda"), + n, + y->getData("cuda"), + 1, + beta, + x->getData("cuda"), + 1); + + } else { + cublasDgemv(handle_cublas, + CUBLAS_OP_N, + n, + k, + alpha, + V->getData("cuda"), + n, + y->getData("cuda"), + 1, + beta, + x->getData("cuda"), + 1); + } + } + + /** + * @brief mass (bulk) axpy i.e, y = y - x*alpha where alpha is a vector + * + * @param[in] size number of elements in y + * @param[in] alpha vector size k x 1 + * @param[in] x (multi)vector size size x k + * @param[in,out] y vector size size x 1 (this is where the result is stored) + * @param[in] memspace string containg memspace (cpu or cuda) + * + * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() + * + */ + void VectorHandlerCuda::massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) + { + using namespace constants; + if (k < 200) { + mass_axpy(size, k, x->getData("cuda"), y->getData("cuda"),alpha->getData("cuda")); + } else { + LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; + cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + cublasDgemm(handle_cublas, + CUBLAS_OP_N, + CUBLAS_OP_N, + size, // m + 1, // n + k + 1, // k + &MINUSONE, // alpha + x->getData("cuda"), // A + size, // lda + alpha->getData("cuda"), // B + k + 1, // ldb + &ONE, + y->getData("cuda"), // c + size); // ldc + } + } + + /** + * @brief mass (bulk) dot product i.e, V^T x, where V is n x k dense multivector + * (a dense multivector consisting of k vectors size n) and x is k x 2 dense + * multivector (a multivector consisiting of two vectors size n each) + * + * @param[in] size Number of elements in a single vector in V + * @param[in] V Multivector; k vectors size n x 1 each + * @param[in] k Number of vectors in V + * @param[in] x Multivector; 2 vectors size n x 1 each + * @param[out] res Multivector; 2 vectors size k x 1 each (result is returned in res) + * @param[in] memspace String containg memspace (cpu or cuda) + * + * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated + * + */ + void VectorHandlerCuda::massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) + { + using namespace constants; + + if (k < 200) { + mass_inner_product_two_vectors(size, k, x->getData("cuda") , x->getData(1, "cuda"), V->getData("cuda"), res->getData("cuda")); + } else { + LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; + cublasHandle_t handle_cublas = workspaceCUDA->getCublasHandle(); + cublasDgemm(handle_cublas, + CUBLAS_OP_T, + CUBLAS_OP_N, + k + 1, //m + 2, //n + size, //k + &ONE, //alpha + V->getData("cuda"), //A + size, //lda + x->getData("cuda"), //B + size, //ldb + &ZERO, + res->getData("cuda"), //c + k + 1); //ldc + } + } + +} // namespace ReSolve diff --git a/resolve/vector/VectorHandlerCuda.hpp b/resolve/vector/VectorHandlerCuda.hpp new file mode 100644 index 00000000..0ee2752d --- /dev/null +++ b/resolve/vector/VectorHandlerCuda.hpp @@ -0,0 +1,57 @@ +#pragma once +#include + +namespace ReSolve +{ + namespace vector + { + class Vector; + } + class LinAlgWorkspaceCUDA; + class VectorHandlerImpl; +} + + +namespace ReSolve { //namespace vector { + class VectorHandlerCuda : public VectorHandlerImpl + { + public: + VectorHandlerCuda(); + VectorHandlerCuda(LinAlgWorkspaceCUDA* workspace); + virtual ~VectorHandlerCuda(); + + //y = alpha x + y + virtual void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y); + + //dot: x \cdot y + virtual real_type dot(vector::Vector* x, vector::Vector* y); + + //scal = alpha * x + virtual void scal(const real_type* alpha, vector::Vector* x); + + //mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise + virtual void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y); + + //mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise + //Size = n + virtual void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res); + + /** gemv: + * if `transpose = N` (no), `x = beta*x + alpha*V*y`, + * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. + * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, + * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. + */ + virtual void gemv(std::string transpose, + index_type n, + index_type k, + const real_type* alpha, + const real_type* beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x); + private: + LinAlgWorkspaceCUDA* workspace_; + }; + +} //} // namespace ReSolve::vector diff --git a/resolve/vector/VectorHandlerImpl.hpp b/resolve/vector/VectorHandlerImpl.hpp new file mode 100644 index 00000000..229a7461 --- /dev/null +++ b/resolve/vector/VectorHandlerImpl.hpp @@ -0,0 +1,57 @@ +#pragma once +#include + +namespace ReSolve +{ + namespace vector + { + class Vector; + } + class VectorHandlerCpu; + class VectorHandlerCuda; +} + + +namespace ReSolve +{ + class VectorHandlerImpl + { + public: + VectorHandlerImpl() + {} + virtual ~VectorHandlerImpl() + {} + + //y = alpha x + y + virtual void axpy(const real_type* alpha, vector::Vector* x, vector::Vector* y ) = 0; + + //dot: x \cdot y + virtual real_type dot(vector::Vector* x, vector::Vector* y ) = 0; + + //scal = alpha * x + virtual void scal(const real_type* alpha, vector::Vector* x) = 0; + + //mass axpy: x*alpha + y where x is [n x k] and alpha is [k x 1]; x is stored columnwise + virtual void massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) = 0; + + //mass dot: V^T x, where V is [n x k] and x is [k x 2], everything is stored and returned columnwise + //Size = n + virtual void massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) = 0; + + /** gemv: + * if `transpose = N` (no), `x = beta*x + alpha*V*y`, + * where `x` is `[n x 1]`, `V` is `[n x k]` and `y` is `[k x 1]`. + * if `transpose = T` (yes), `x = beta*x + alpha*V^T*y`, + * where `x` is `[k x 1]`, `V` is `[n x k]` and `y` is `[n x 1]`. + */ + virtual void gemv(std::string transpose, + index_type n, + index_type k, + const real_type* alpha, + const real_type* beta, + vector::Vector* V, + vector::Vector* y, + vector::Vector* x) = 0; + }; + +} //} // namespace ReSolve::vector diff --git a/tests/unit/vector/GramSchmidtTests.hpp b/tests/unit/vector/GramSchmidtTests.hpp index 2d801c49..eddd800e 100644 --- a/tests/unit/vector/GramSchmidtTests.hpp +++ b/tests/unit/vector/GramSchmidtTests.hpp @@ -66,8 +66,7 @@ namespace ReSolve { break; } - ReSolve::LinAlgWorkspace* workspace = createLinAlgWorkspace(memspace_); - ReSolve::VectorHandler* handler = new ReSolve::VectorHandler(workspace); + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* V = new vector::Vector(N, 3); // we will be using a space of 3 vectors real_type* H = new real_type[6]; //in this case, Hessenberg matrix is 3 x 2 @@ -114,7 +113,7 @@ namespace ReSolve { status *= verifyAnswer(V, 3, handler, memspace_); - delete workspace; + delete handler; delete [] H; delete V; delete GS; @@ -125,6 +124,21 @@ namespace ReSolve { private: std::string memspace_{"cuda"}; + ReSolve::VectorHandler* createVectorHandler() + { + if (memspace_ == "cpu") { // TODO: Fix memory leak here + LinAlgWorkspace* workpsace = new LinAlgWorkspace(); + return new VectorHandler(workpsace); + } else if (memspace_ == "cuda") { + LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA(); + workspace->initializeHandles(); + return new VectorHandler(workspace); + } else { + std::cout << "Invalid memory space " << memspace_ << "\n"; + } + return nullptr; + } + // x is a multivector containing K vectors bool verifyAnswer(vector::Vector* x, index_type K, ReSolve::VectorHandler* handler, std::string memspace) { diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index ad0d3cfb..cc84f25b 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -38,8 +38,7 @@ namespace ReSolve { { TestStatus status; - ReSolve::LinAlgWorkspace* workspace = createLinAlgWorkspace(memspace_); - ReSolve::VectorHandler handler(workspace); + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* x = new vector::Vector(N); vector::Vector* y = new vector::Vector(N); @@ -52,10 +51,10 @@ namespace ReSolve { real_type alpha = 0.5; //the result is a vector with y[i] = 2.5; - handler.axpy(&alpha, x, y, memspace_); + handler->axpy(&alpha, x, y, memspace_); status *= verifyAnswer(y, 2.5, memspace_); - delete workspace; + delete handler; delete x; delete y; @@ -66,8 +65,7 @@ namespace ReSolve { { TestStatus status; - ReSolve::LinAlgWorkspace* workspace = createLinAlgWorkspace(memspace_); - ReSolve::VectorHandler handler(workspace); + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* x = new vector::Vector(N); vector::Vector* y = new vector::Vector(N); @@ -79,7 +77,7 @@ namespace ReSolve { y->setToConst(4.0, memspace_); real_type ans; //the result is N - ans = handler.dot(x, y, memspace_); + ans = handler->dot(x, y, memspace_); bool st = true;; if (ans != (real_type) N) { @@ -88,7 +86,7 @@ namespace ReSolve { } status *= st; - delete workspace; + delete handler; delete x; delete y; @@ -99,8 +97,7 @@ namespace ReSolve { { TestStatus status; - ReSolve::LinAlgWorkspace* workspace = createLinAlgWorkspace(memspace_); - ReSolve::VectorHandler handler(workspace); + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* x = new vector::Vector(N); @@ -111,10 +108,10 @@ namespace ReSolve { real_type alpha = 3.5; //the answer is x[i] = 4.375; - handler.scal(&alpha, x, memspace_); + handler->scal(&alpha, x, memspace_); status *= verifyAnswer(x, 4.375, memspace_); - delete workspace; + delete handler; delete x; return status.report(__func__); @@ -124,8 +121,7 @@ namespace ReSolve { { TestStatus status; - ReSolve::LinAlgWorkspace* workspace = createLinAlgWorkspace(memspace_); - ReSolve::VectorHandler handler(workspace); + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* x = new vector::Vector(N, K); vector::Vector* y = new vector::Vector(N); @@ -148,11 +144,11 @@ namespace ReSolve { index_type r = K % 2; real_type res = (real_type) ((floor((real_type) K / 2.0) + r) * 1.0 + floor((real_type) K / 2.0) * (-0.5)); - handler.massAxpy(N, alpha, K, x, y, memspace_); + handler->massAxpy(N, alpha, K, x, y, memspace_); status *= verifyAnswer(y, 2.0 - res, memspace_); - delete workspace; + delete handler; delete x; delete y; delete alpha; @@ -164,8 +160,7 @@ namespace ReSolve { { TestStatus status; - ReSolve::LinAlgWorkspace* workspace = createLinAlgWorkspace(memspace_); - ReSolve::VectorHandler handler(workspace); + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* x = new vector::Vector(N, K); vector::Vector* y = new vector::Vector(N, 2); @@ -176,11 +171,11 @@ namespace ReSolve { x->setToConst(1.0, memspace_); y->setToConst(-1.0, memspace_); - handler.massDot2Vec(N, x, K, y, res, memspace_); + handler->massDot2Vec(N, x, K, y, res, memspace_); status *= verifyAnswer(res, (-1.0) * (real_type) N, memspace_); - delete workspace; + delete handler; delete x; delete y; delete res; @@ -190,8 +185,8 @@ namespace ReSolve { TestOutcome gemv(index_type N, index_type K) { TestStatus status; - ReSolve::LinAlgWorkspace* workspace = createLinAlgWorkspace(memspace_); - ReSolve::VectorHandler handler(workspace); + // ReSolve::LinAlgWorkspace* workspace = createLinAlgWorkspace(memspace_); + ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* V = new vector::Vector(N, K); // for the test with NO TRANSPOSE vector::Vector* yN = new vector::Vector(K); @@ -214,9 +209,9 @@ namespace ReSolve { real_type alpha = -1.0; real_type beta = 1.0; - handler.gemv("N", N, K, &alpha, &beta, V, yN, xN, memspace_); + handler->gemv("N", N, K, &alpha, &beta, V, yN, xN, memspace_); status *= verifyAnswer(xN, (real_type) (K) + 0.5, memspace_); - handler.gemv("T", N, K, &alpha, &beta, V, yT, xT, memspace_); + handler->gemv("T", N, K, &alpha, &beta, V, yT, xT, memspace_); status *= verifyAnswer(xT, (real_type) (N) + 0.5, memspace_); return status.report(__func__); @@ -225,7 +220,22 @@ namespace ReSolve { private: std::string memspace_{"cpu"}; - // we can verify through norm but that would defeat the purpose of testing vector handler... + ReSolve::VectorHandler* createVectorHandler() + { + if (memspace_ == "cpu") { + LinAlgWorkspace* workpsace = new LinAlgWorkspace(); + return new VectorHandler(workpsace); + } else if (memspace_ == "cuda") { + LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA(); + workspace->initializeHandles(); + return new VectorHandler(workspace); + } else { + std::cout << "Invalid memory space " << memspace_ << "\n"; + } + return nullptr; + } + + // we can verify through norm but that would defeat the purpose of testing vector handler ... bool verifyAnswer(vector::Vector* x, real_type answer, std::string memspace) { bool status = true; From 1e8f84a048c1303969b4a70782ed4e5a5fbff1ed Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Thu, 19 Oct 2023 12:05:12 -0400 Subject: [PATCH 10/15] Remove workspace factory. --- examples/r_KLU_GLU.cpp | 2 +- examples/r_KLU_GLU_matrix_values_update.cpp | 2 +- examples/r_KLU_KLU.cpp | 4 +- examples/r_KLU_KLU_standalone.cpp | 4 +- examples/r_KLU_rf.cpp | 2 +- examples/r_KLU_rf_FGMRES.cpp | 2 +- .../r_KLU_rf_FGMRES_reuse_factorization.cpp | 2 +- resolve/LinSolverDirectCuSolverGLU.cpp | 6 +- resolve/LinSolverDirectCuSolverGLU.hpp | 4 +- resolve/matrix/MatrixHandler.cpp | 5 +- resolve/matrix/MatrixHandler.hpp | 4 +- resolve/matrix/MatrixHandlerCpu.cpp | 162 +----------------- resolve/matrix/MatrixHandlerCpu.hpp | 6 +- resolve/matrix/MatrixHandlerCuda.cpp | 5 - resolve/matrix/MatrixHandlerCuda.hpp | 1 - resolve/matrix/MatrixHandlerImpl.hpp | 6 - resolve/vector/VectorHandler.cpp | 4 +- resolve/vector/VectorHandler.hpp | 4 +- resolve/vector/VectorHandlerCpu.cpp | 17 +- resolve/vector/VectorHandlerCpu.hpp | 6 +- resolve/vector/VectorHandlerCuda.cpp | 2 +- resolve/workspace/CMakeLists.txt | 5 +- resolve/workspace/LinAlgWorkspace.cpp | 16 -- resolve/workspace/LinAlgWorkspace.hpp | 18 +- resolve/workspace/LinAlgWorkspaceCUDA.hpp | 6 +- resolve/workspace/LinAlgWorkspaceCpu.cpp | 16 ++ resolve/workspace/LinAlgWorkspaceCpu.hpp | 18 ++ resolve/workspace/LinAlgWorkspaceFactory.cpp | 23 --- resolve/workspace/LinAlgWorkspaceFactory.hpp | 18 -- tests/functionality/testKLU.cpp | 2 +- tests/functionality/testKLU_GLU.cpp | 2 +- tests/functionality/testKLU_Rf.cpp | 2 +- tests/functionality/testKLU_Rf_FGMRES.cpp | 2 +- tests/unit/matrix/MatrixHandlerTests.hpp | 4 +- tests/unit/vector/GramSchmidtTests.hpp | 4 +- tests/unit/vector/VectorHandlerTests.hpp | 5 +- 36 files changed, 97 insertions(+), 294 deletions(-) delete mode 100644 resolve/workspace/LinAlgWorkspace.cpp create mode 100644 resolve/workspace/LinAlgWorkspaceCpu.cpp create mode 100644 resolve/workspace/LinAlgWorkspaceCpu.hpp delete mode 100644 resolve/workspace/LinAlgWorkspaceFactory.cpp delete mode 100644 resolve/workspace/LinAlgWorkspaceFactory.hpp diff --git a/examples/r_KLU_GLU.cpp b/examples/r_KLU_GLU.cpp index c072eb34..e0831976 100644 --- a/examples/r_KLU_GLU.cpp +++ b/examples/r_KLU_GLU.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include using namespace ReSolve::constants; diff --git a/examples/r_KLU_GLU_matrix_values_update.cpp b/examples/r_KLU_GLU_matrix_values_update.cpp index af797883..e6e09673 100644 --- a/examples/r_KLU_GLU_matrix_values_update.cpp +++ b/examples/r_KLU_GLU_matrix_values_update.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include // this updates the matrix values to simulate what CFD/optimization software does. diff --git a/examples/r_KLU_KLU.cpp b/examples/r_KLU_KLU.cpp index bee712bd..550144ae 100644 --- a/examples/r_KLU_KLU.cpp +++ b/examples/r_KLU_KLU.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include using namespace ReSolve::constants; @@ -35,7 +35,7 @@ int main(int argc, char *argv[]) ReSolve::matrix::Coo* A_coo; ReSolve::matrix::Csr* A; - ReSolve::LinAlgWorkspace* workspace = new ReSolve::LinAlgWorkspace(); + ReSolve::LinAlgWorkspaceCpu* workspace = new ReSolve::LinAlgWorkspaceCpu(); ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); real_type* rhs; diff --git a/examples/r_KLU_KLU_standalone.cpp b/examples/r_KLU_KLU_standalone.cpp index a1aa1fad..628eeb06 100644 --- a/examples/r_KLU_KLU_standalone.cpp +++ b/examples/r_KLU_KLU_standalone.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include using namespace ReSolve::constants; @@ -31,7 +31,7 @@ int main(int argc, char *argv[]) ReSolve::matrix::Coo* A_coo; ReSolve::matrix::Csr* A; - ReSolve::LinAlgWorkspace* workspace = new ReSolve::LinAlgWorkspace(); + ReSolve::LinAlgWorkspaceCpu* workspace = new ReSolve::LinAlgWorkspaceCpu(); ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); real_type* rhs; diff --git a/examples/r_KLU_rf.cpp b/examples/r_KLU_rf.cpp index 8954724d..5dce2dfe 100644 --- a/examples/r_KLU_rf.cpp +++ b/examples/r_KLU_rf.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include using namespace ReSolve::constants; diff --git a/examples/r_KLU_rf_FGMRES.cpp b/examples/r_KLU_rf_FGMRES.cpp index 52f54a0d..2e43f9c3 100644 --- a/examples/r_KLU_rf_FGMRES.cpp +++ b/examples/r_KLU_rf_FGMRES.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include using namespace ReSolve::constants; diff --git a/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp b/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp index 2c484926..2317680c 100644 --- a/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp +++ b/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include using namespace ReSolve::constants; diff --git a/resolve/LinSolverDirectCuSolverGLU.cpp b/resolve/LinSolverDirectCuSolverGLU.cpp index feaf11b8..75039ff4 100644 --- a/resolve/LinSolverDirectCuSolverGLU.cpp +++ b/resolve/LinSolverDirectCuSolverGLU.cpp @@ -3,12 +3,12 @@ #include #include -#include +#include #include "LinSolverDirectCuSolverGLU.hpp" namespace ReSolve { - LinSolverDirectCuSolverGLU::LinSolverDirectCuSolverGLU(LinAlgWorkspace* workspace) + LinSolverDirectCuSolverGLU::LinSolverDirectCuSolverGLU(LinAlgWorkspaceCUDA* workspace) { this->workspace_ = workspace; } @@ -26,7 +26,7 @@ namespace ReSolve { int error_sum = 0; - LinAlgWorkspaceCUDA* workspaceCUDA = (LinAlgWorkspaceCUDA*) workspace_; + LinAlgWorkspaceCUDA* workspaceCUDA = workspace_; //get the handle handle_cusolversp_ = workspaceCUDA->getCusolverSpHandle(); A_ = (matrix::Csr*) A; diff --git a/resolve/LinSolverDirectCuSolverGLU.hpp b/resolve/LinSolverDirectCuSolverGLU.hpp index 0aa27df9..a48c8cba 100644 --- a/resolve/LinSolverDirectCuSolverGLU.hpp +++ b/resolve/LinSolverDirectCuSolverGLU.hpp @@ -26,7 +26,7 @@ namespace ReSolve using vector_type = vector::Vector; public: - LinSolverDirectCuSolverGLU(LinAlgWorkspace* workspace); + LinSolverDirectCuSolverGLU(LinAlgWorkspaceCUDA* workspace); ~LinSolverDirectCuSolverGLU(); int refactorize(); @@ -40,7 +40,7 @@ namespace ReSolve //note: we need cuSolver handle, we can copy it from the workspace to avoid double allocation cusparseMatDescr_t descr_M_; //this is NOT sparse matrix descriptor cusparseMatDescr_t descr_A_; //this is NOT sparse matrix descriptor - LinAlgWorkspace* workspace_;// so we can copy cusparse handle + LinAlgWorkspaceCUDA* workspace_;// so we can copy cusparse handle cusolverSpHandle_t handle_cusolversp_; cusolverStatus_t status_cusolver_; cusparseStatus_t status_cusparse_; diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index 50dd3082..c4849c18 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include "MatrixHandler.hpp" #include "MatrixHandlerCpu.hpp" @@ -47,7 +47,7 @@ namespace ReSolve { * @note The CPU implementation currently does not require a workspace. * The workspace pointer parameter is provided for forward compatibility. */ - MatrixHandler::MatrixHandler(LinAlgWorkspace* new_workspace) + MatrixHandler::MatrixHandler(LinAlgWorkspaceCpu* new_workspace) { cpuImpl_ = new MatrixHandlerCpu(new_workspace); isCpuEnabled_ = true; @@ -272,7 +272,6 @@ namespace ReSolve { int MatrixHandler::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace) { - index_type error_sum = 0; if (memspace == "cuda") { return cudaImpl_->csc2csr(A_csc, A_csr); } else if (memspace == "cpu") { diff --git a/resolve/matrix/MatrixHandler.hpp b/resolve/matrix/MatrixHandler.hpp index fed6b593..e05e6c8f 100644 --- a/resolve/matrix/MatrixHandler.hpp +++ b/resolve/matrix/MatrixHandler.hpp @@ -16,7 +16,7 @@ namespace ReSolve class Csc; class Csr; } - class LinAlgWorkspace; + class LinAlgWorkspaceCpu; class LinAlgWorkspaceCUDA; class MatrixHandlerImpl; } @@ -42,7 +42,7 @@ namespace ReSolve { public: MatrixHandler(); - MatrixHandler(LinAlgWorkspace* workspace); + MatrixHandler(LinAlgWorkspaceCpu* workspace); MatrixHandler(LinAlgWorkspaceCUDA* workspace); ~MatrixHandler(); diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index cc211f5f..95637402 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -22,7 +22,7 @@ namespace ReSolve { { } - MatrixHandlerCpu::MatrixHandlerCpu(LinAlgWorkspace* new_workspace) + MatrixHandlerCpu::MatrixHandlerCpu(LinAlgWorkspaceCpu* new_workspace) { workspace_ = new_workspace; } @@ -37,164 +37,6 @@ namespace ReSolve { values_changed_ = values_changed; } -// int MatrixHandlerCpu::coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace) -// { -// //this happens on the CPU not on the GPU -// //but will return whatever memspace requested. - -// //count nnzs first - -// index_type nnz_unpacked = 0; -// index_type nnz = A_coo->getNnz(); -// index_type n = A_coo->getNumRows(); -// bool symmetric = A_coo->symmetric(); -// bool expanded = A_coo->expanded(); - -// index_type* nnz_counts = new index_type[n]; -// std::fill_n(nnz_counts, n, 0); -// index_type* coo_rows = A_coo->getRowData("cpu"); -// index_type* coo_cols = A_coo->getColData("cpu"); -// real_type* coo_vals = A_coo->getValues("cpu"); - -// index_type* diag_control = new index_type[n]; //for DEDUPLICATION of the diagonal -// std::fill_n(diag_control, n, 0); -// index_type nnz_unpacked_no_duplicates = 0; -// index_type nnz_no_duplicates = nnz; - - -// //maybe check if they exist? -// for (index_type i = 0; i < nnz; ++i) -// { -// nnz_counts[coo_rows[i]]++; -// nnz_unpacked++; -// nnz_unpacked_no_duplicates++; -// if ((coo_rows[i] != coo_cols[i])&& (symmetric) && (!expanded)) -// { -// nnz_counts[coo_cols[i]]++; -// nnz_unpacked++; -// nnz_unpacked_no_duplicates++; -// } -// if (coo_rows[i] == coo_cols[i]){ -// if (diag_control[coo_rows[i]] > 0) { -// //duplicate -// nnz_unpacked_no_duplicates--; -// nnz_no_duplicates--; -// } -// diag_control[coo_rows[i]]++; -// } -// } -// A_csr->setExpanded(true); -// A_csr->setNnzExpanded(nnz_unpacked_no_duplicates); -// index_type* csr_ia = new index_type[n+1]; -// std::fill_n(csr_ia, n + 1, 0); -// index_type* csr_ja = new index_type[nnz_unpacked]; -// real_type* csr_a = new real_type[nnz_unpacked]; -// index_type* nnz_shifts = new index_type[n]; -// std::fill_n(nnz_shifts, n , 0); - -// IndexValuePair* tmp = new IndexValuePair[nnz_unpacked]; - -// csr_ia[0] = 0; - -// for (index_type i = 1; i < n + 1; ++i){ -// csr_ia[i] = csr_ia[i - 1] + nnz_counts[i - 1] - (diag_control[i-1] - 1); -// } - -// int r, start; - - -// for (index_type i = 0; i < nnz; ++i){ -// //which row -// r = coo_rows[i]; -// start = csr_ia[r]; - -// if ((start + nnz_shifts[r]) > nnz_unpacked) { -// out::warning() << "index out of bounds (case 1) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; -// } -// if ((r == coo_cols[i]) && (diag_control[r] > 1)) {//diagonal, and there are duplicates -// bool already_there = false; -// for (index_type j = start; j < start + nnz_shifts[r]; ++j) -// { -// index_type c = tmp[j].getIdx(); -// if (c == r) { -// real_type val = tmp[j].getValue(); -// val += coo_vals[i]; -// tmp[j].setValue(val); -// already_there = true; -// out::warning() << " duplicate found, row " << c << " adding in place " << j << " current value: " << val << std::endl; -// } -// } -// if (!already_there){ // first time this duplicates appears - -// tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); -// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); - -// nnz_shifts[r]++; -// } -// } else {//not diagonal -// tmp[start + nnz_shifts[r]].setIdx(coo_cols[i]); -// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); -// nnz_shifts[r]++; - -// if ((coo_rows[i] != coo_cols[i]) && (symmetric == 1)) -// { -// r = coo_cols[i]; -// start = csr_ia[r]; - -// if ((start + nnz_shifts[r]) > nnz_unpacked) -// out::warning() << "index out of bounds (case 2) start: " << start << "nnz_shifts[" << r << "] = " << nnz_shifts[r] << std::endl; -// tmp[start + nnz_shifts[r]].setIdx(coo_rows[i]); -// tmp[start + nnz_shifts[r]].setValue(coo_vals[i]); -// nnz_shifts[r]++; -// } -// } -// } -// //now sort whatever is inside rows - -// for (int i = 0; i < n; ++i) -// { - -// //now sorting (and adding 1) -// int colStart = csr_ia[i]; -// int colEnd = csr_ia[i + 1]; -// int length = colEnd - colStart; -// std::sort(&tmp[colStart],&tmp[colStart] + length); -// } - -// for (index_type i = 0; i < nnz_unpacked; ++i) -// { -// csr_ja[i] = tmp[i].getIdx(); -// csr_a[i] = tmp[i].getValue(); -// } -// #if 0 -// for (int i = 0; isetNnz(nnz_no_duplicates); -// if (memspace == "cpu"){ -// A_csr->updateData(csr_ia, csr_ja, csr_a, "cpu", "cpu"); -// } else { -// if (memspace == "cuda"){ -// A_csr->updateData(csr_ia, csr_ja, csr_a, "cpu", "cuda"); -// } else { -// //display error -// } -// } -// delete [] nnz_counts; -// delete [] tmp; -// delete [] nnz_shifts; -// delete [] csr_ia; -// delete [] csr_ja; -// delete [] csr_a; -// delete [] diag_control; - -// return 0; -// } int MatrixHandlerCpu::matvec(matrix::Sparse* Ageneric, vector_type* vec_x, @@ -253,7 +95,7 @@ namespace ReSolve { */ int MatrixHandlerCpu::csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) { - int error_sum = 0; + // int error_sum = 0; TODO: Collect error output! assert(A_csc->getNnz() == A_csr->getNnz()); assert(A_csc->getNumRows() == A_csr->getNumColumns()); assert(A_csr->getNumRows() == A_csc->getNumColumns()); diff --git a/resolve/matrix/MatrixHandlerCpu.hpp b/resolve/matrix/MatrixHandlerCpu.hpp index fb760750..3b6c91c0 100644 --- a/resolve/matrix/MatrixHandlerCpu.hpp +++ b/resolve/matrix/MatrixHandlerCpu.hpp @@ -21,7 +21,7 @@ namespace ReSolve class Csc; class Csr; } - class LinAlgWorkspace; + class LinAlgWorkspaceCpu; } @@ -37,7 +37,7 @@ namespace ReSolve { public: MatrixHandlerCpu(); - MatrixHandlerCpu(LinAlgWorkspace* workspace); + MatrixHandlerCpu(LinAlgWorkspaceCpu* workspace); virtual ~MatrixHandlerCpu(); int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr); //memspace decides on what is returned (cpu or cuda pointer) @@ -54,7 +54,7 @@ namespace ReSolve { void setValuesChanged(bool isValuesChanged); private: - LinAlgWorkspace* workspace_{nullptr}; + LinAlgWorkspaceCpu* workspace_{nullptr}; // bool new_matrix_{true}; ///< if the structure changed, you need a new handler. bool values_changed_{true}; ///< needed for matvec diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index 15252d85..b27a63c0 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -26,11 +26,6 @@ namespace ReSolve { workspace_ = new_workspace; } - MatrixHandlerCuda::MatrixHandlerCuda(LinAlgWorkspace* new_workspace) - { - workspace_ = (LinAlgWorkspaceCUDA*) new_workspace; - } - void MatrixHandlerCuda::setValuesChanged(bool values_changed) { values_changed_ = values_changed; diff --git a/resolve/matrix/MatrixHandlerCuda.hpp b/resolve/matrix/MatrixHandlerCuda.hpp index c77eb795..a8fec606 100644 --- a/resolve/matrix/MatrixHandlerCuda.hpp +++ b/resolve/matrix/MatrixHandlerCuda.hpp @@ -37,7 +37,6 @@ namespace ReSolve { public: MatrixHandlerCuda(); - MatrixHandlerCuda(LinAlgWorkspace* workspace); MatrixHandlerCuda(LinAlgWorkspaceCUDA* workspace); virtual ~MatrixHandlerCuda(); diff --git a/resolve/matrix/MatrixHandlerImpl.hpp b/resolve/matrix/MatrixHandlerImpl.hpp index 015412d0..b901e2b5 100644 --- a/resolve/matrix/MatrixHandlerImpl.hpp +++ b/resolve/matrix/MatrixHandlerImpl.hpp @@ -55,12 +55,6 @@ namespace ReSolve { virtual void setValuesChanged(bool isValuesChanged) = 0; - protected: - // LinAlgWorkspace* workspace_{nullptr}; - // bool new_matrix_{true}; ///< if the structure changed, you need a new handler. - // bool values_changed_{true}; ///< needed for matvec - - // MemoryHandler mem_; ///< Device memory manager object }; } // namespace ReSolve diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 9648058e..0feb96a8 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include #include @@ -26,7 +26,7 @@ namespace ReSolve { * * @param new_workspace - workspace to be set */ - VectorHandler::VectorHandler(LinAlgWorkspace* new_workspace) + VectorHandler::VectorHandler(LinAlgWorkspaceCpu* new_workspace) { cpuImpl_ = new VectorHandlerCpu(new_workspace); isCpuEnabled_ = true; diff --git a/resolve/vector/VectorHandler.hpp b/resolve/vector/VectorHandler.hpp index 53ec0bca..c17d4688 100644 --- a/resolve/vector/VectorHandler.hpp +++ b/resolve/vector/VectorHandler.hpp @@ -8,7 +8,7 @@ namespace ReSolve class Vector; } class VectorHandlerImpl; - class LinAlgWorkspace; + class LinAlgWorkspaceCpu; class LinAlgWorkspaceCUDA; } @@ -17,7 +17,7 @@ namespace ReSolve { //namespace vector { class VectorHandler { public: VectorHandler(); - VectorHandler(LinAlgWorkspace* new_workspace); + VectorHandler(LinAlgWorkspaceCpu* new_workspace); VectorHandler(LinAlgWorkspaceCUDA* new_workspace); ~VectorHandler(); diff --git a/resolve/vector/VectorHandlerCpu.cpp b/resolve/vector/VectorHandlerCpu.cpp index 593ba5b0..f5cc463d 100644 --- a/resolve/vector/VectorHandlerCpu.cpp +++ b/resolve/vector/VectorHandlerCpu.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include "VectorHandlerCpu.hpp" @@ -22,7 +22,7 @@ namespace ReSolve { * * @param new_workspace - workspace to be set */ - VectorHandlerCpu:: VectorHandlerCpu(LinAlgWorkspace* new_workspace) + VectorHandlerCpu:: VectorHandlerCpu(LinAlgWorkspaceCpu* new_workspace) { workspace_ = new_workspace; } @@ -115,7 +115,14 @@ namespace ReSolve { * @pre V is stored colum-wise, _n_ > 0, _k_ > 0 * */ - void VectorHandlerCpu::gemv(std::string transpose, index_type n, index_type k, const real_type* alpha, const real_type* beta, vector::Vector* V, vector::Vector* y, vector::Vector* x) + void VectorHandlerCpu::gemv(std::string /* transpose */, + index_type /* n */, + index_type /* k */, + const real_type* /* alpha */, + const real_type* /* beta */, + vector::Vector* /* V */, + vector::Vector* /* y */, + vector::Vector* /* x */) { out::error() << "Not implemented (yet)" << std::endl; } @@ -132,7 +139,7 @@ namespace ReSolve { * @pre _k_ > 0, _size_ > 0, _size_ = x->getSize() * */ - void VectorHandlerCpu::massAxpy(index_type size, vector::Vector* alpha, index_type k, vector::Vector* x, vector::Vector* y) + void VectorHandlerCpu::massAxpy(index_type /* size */, vector::Vector* /* alpha */, index_type /* k */, vector::Vector* /* x */, vector::Vector* /* y */) { out::error() << "Not implemented (yet)" << std::endl; } @@ -151,7 +158,7 @@ namespace ReSolve { * @pre _size_ > 0, _k_ > 0, size = x->getSize(), _res_ needs to be allocated * */ - void VectorHandlerCpu::massDot2Vec(index_type size, vector::Vector* V, index_type k, vector::Vector* x, vector::Vector* res) + void VectorHandlerCpu::massDot2Vec(index_type /* size */, vector::Vector* /* V */, index_type /* k */, vector::Vector* /* x */, vector::Vector* /* res */) { out::error() << "Not implemented (yet)" << std::endl; } diff --git a/resolve/vector/VectorHandlerCpu.hpp b/resolve/vector/VectorHandlerCpu.hpp index a5f09162..3f2f6133 100644 --- a/resolve/vector/VectorHandlerCpu.hpp +++ b/resolve/vector/VectorHandlerCpu.hpp @@ -7,7 +7,7 @@ namespace ReSolve { class Vector; } - class LinAlgWorkspace; + class LinAlgWorkspaceCpu; class VectorHandlerImpl; } @@ -17,7 +17,7 @@ namespace ReSolve { //namespace vector { { public: VectorHandlerCpu(); - VectorHandlerCpu(LinAlgWorkspace* workspace); + VectorHandlerCpu(LinAlgWorkspaceCpu* workspace); virtual ~VectorHandlerCpu(); //y = alpha x + y @@ -51,7 +51,7 @@ namespace ReSolve { //namespace vector { vector::Vector* y, vector::Vector* x); private: - LinAlgWorkspace* workspace_; + LinAlgWorkspaceCpu* workspace_; }; } //} // namespace ReSolve::vector diff --git a/resolve/vector/VectorHandlerCuda.cpp b/resolve/vector/VectorHandlerCuda.cpp index 69044f37..3c887e85 100644 --- a/resolve/vector/VectorHandlerCuda.cpp +++ b/resolve/vector/VectorHandlerCuda.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include "VectorHandlerCuda.hpp" diff --git a/resolve/workspace/CMakeLists.txt b/resolve/workspace/CMakeLists.txt index a50982c8..c6225487 100644 --- a/resolve/workspace/CMakeLists.txt +++ b/resolve/workspace/CMakeLists.txt @@ -7,8 +7,7 @@ ]] set(ReSolve_Workspace_SRC - LinAlgWorkspace.cpp - LinAlgWorkspaceFactory.cpp + LinAlgWorkspaceCpu.cpp ) set(ReSolve_Workspace_CUDA_SRC # consider adding a separate dir for cuda-sdk dependent code @@ -17,7 +16,7 @@ set(ReSolve_Workspace_CUDA_SRC # consider adding a separate dir for cuda-sdk dep set(ReSolve_Workspace_HEADER_INSTALL LinAlgWorkspace.hpp - LinAlgWorkspaceFactory.hpp + LinAlgWorkspaceCpu.hpp LinAlgWorkspaceCUDA.hpp ) diff --git a/resolve/workspace/LinAlgWorkspace.cpp b/resolve/workspace/LinAlgWorkspace.cpp deleted file mode 100644 index 5e4b55d8..00000000 --- a/resolve/workspace/LinAlgWorkspace.cpp +++ /dev/null @@ -1,16 +0,0 @@ -#include "LinAlgWorkspace.hpp" - -namespace ReSolve -{ - LinAlgWorkspace::LinAlgWorkspace() - { - } - - LinAlgWorkspace::~LinAlgWorkspace() - { - } - - void LinAlgWorkspace::initializeHandles() - { - } -} diff --git a/resolve/workspace/LinAlgWorkspace.hpp b/resolve/workspace/LinAlgWorkspace.hpp index 68074eb8..6da58fda 100644 --- a/resolve/workspace/LinAlgWorkspace.hpp +++ b/resolve/workspace/LinAlgWorkspace.hpp @@ -1,18 +1,8 @@ #pragma once +#include -#include +#ifdef RESOLVE_USE_CUDA +#include +#endif -namespace ReSolve -{ - class LinAlgWorkspace - { - public: - LinAlgWorkspace(); - ~LinAlgWorkspace(); - void initializeHandles(); - protected: - MemoryHandler mem_; - }; - -} diff --git a/resolve/workspace/LinAlgWorkspaceCUDA.hpp b/resolve/workspace/LinAlgWorkspaceCUDA.hpp index c0d4554c..5076563a 100644 --- a/resolve/workspace/LinAlgWorkspaceCUDA.hpp +++ b/resolve/workspace/LinAlgWorkspaceCUDA.hpp @@ -4,11 +4,11 @@ #include "cusparse.h" #include "cusolverSp.h" -#include +#include namespace ReSolve { - class LinAlgWorkspaceCUDA : public LinAlgWorkspace + class LinAlgWorkspaceCUDA { public: LinAlgWorkspaceCUDA(); @@ -55,6 +55,8 @@ namespace ReSolve void* buffer_1norm_; bool matvec_setup_done_; //check if setup is done for matvec i.e. if buffer is allocated, csr structure is set etc. + + MemoryHandler mem_; }; } // namespace ReSolve diff --git a/resolve/workspace/LinAlgWorkspaceCpu.cpp b/resolve/workspace/LinAlgWorkspaceCpu.cpp new file mode 100644 index 00000000..3ed9aa43 --- /dev/null +++ b/resolve/workspace/LinAlgWorkspaceCpu.cpp @@ -0,0 +1,16 @@ +#include "LinAlgWorkspaceCpu.hpp" + +namespace ReSolve +{ + LinAlgWorkspaceCpu::LinAlgWorkspaceCpu() + { + } + + LinAlgWorkspaceCpu::~LinAlgWorkspaceCpu() + { + } + + void LinAlgWorkspaceCpu::initializeHandles() + { + } +} diff --git a/resolve/workspace/LinAlgWorkspaceCpu.hpp b/resolve/workspace/LinAlgWorkspaceCpu.hpp new file mode 100644 index 00000000..00e5f38e --- /dev/null +++ b/resolve/workspace/LinAlgWorkspaceCpu.hpp @@ -0,0 +1,18 @@ +#pragma once + + +#include + +namespace ReSolve +{ + class LinAlgWorkspaceCpu + { + public: + LinAlgWorkspaceCpu(); + ~LinAlgWorkspaceCpu(); + void initializeHandles(); + private: + MemoryHandler mem_; + }; + +} diff --git a/resolve/workspace/LinAlgWorkspaceFactory.cpp b/resolve/workspace/LinAlgWorkspaceFactory.cpp deleted file mode 100644 index 43a10870..00000000 --- a/resolve/workspace/LinAlgWorkspaceFactory.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include "LinAlgWorkspaceFactory.hpp" - -namespace ReSolve -{ - /// @brief Workspace factory - /// @param[in] memspace memory space ID - /// @return pointer to the linear algebra workspace - LinAlgWorkspace* createLinAlgWorkspace(std::string memspace) - { - if (memspace == "cuda") { -#ifdef RESOLVE_USE_CUDA - LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA(); - workspace->initializeHandles(); - return workspace; -#else - return nullptr; -#endif - } - // If not CUDA, return default - return (new LinAlgWorkspace()); - } - -} diff --git a/resolve/workspace/LinAlgWorkspaceFactory.hpp b/resolve/workspace/LinAlgWorkspaceFactory.hpp deleted file mode 100644 index 0f5860c1..00000000 --- a/resolve/workspace/LinAlgWorkspaceFactory.hpp +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once - -#include - -#include - -#ifdef RESOLVE_USE_CUDA -#include -#endif - -// #ifndef RESOLVE_USE_CUDA -// using ReSolve::LinAlgWorkspace = ReSolve::LinAlgWorkspaceCUDA; -// #endif - -namespace ReSolve -{ - LinAlgWorkspace* createLinAlgWorkspace(std::string memspace); -} diff --git a/tests/functionality/testKLU.cpp b/tests/functionality/testKLU.cpp index 66790c83..f3c1da57 100644 --- a/tests/functionality/testKLU.cpp +++ b/tests/functionality/testKLU.cpp @@ -27,7 +27,7 @@ int main(int argc, char *argv[]) int error_sum = 0; int status = 0; - ReSolve::LinAlgWorkspace* workspace = new ReSolve::LinAlgWorkspace(); + ReSolve::LinAlgWorkspaceCpu* workspace = new ReSolve::LinAlgWorkspaceCpu(); ReSolve::MatrixHandler* matrix_handler = new ReSolve::MatrixHandler(workspace); ReSolve::VectorHandler* vector_handler = new ReSolve::VectorHandler(workspace); ReSolve::LinSolverDirectKLU* KLU = new ReSolve::LinSolverDirectKLU; diff --git a/tests/functionality/testKLU_GLU.cpp b/tests/functionality/testKLU_GLU.cpp index 1384f405..0e9bb4bd 100644 --- a/tests/functionality/testKLU_GLU.cpp +++ b/tests/functionality/testKLU_GLU.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include //author: KS //functionality test to check whether cuSolverGLU works correctly. diff --git a/tests/functionality/testKLU_Rf.cpp b/tests/functionality/testKLU_Rf.cpp index b7e725a7..729968f5 100644 --- a/tests/functionality/testKLU_Rf.cpp +++ b/tests/functionality/testKLU_Rf.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include //author: KS //functionality test to check whether cuSolverRf works correctly. diff --git a/tests/functionality/testKLU_Rf_FGMRES.cpp b/tests/functionality/testKLU_Rf_FGMRES.cpp index 3e4b9d61..a474e406 100644 --- a/tests/functionality/testKLU_Rf_FGMRES.cpp +++ b/tests/functionality/testKLU_Rf_FGMRES.cpp @@ -12,7 +12,7 @@ #include #include #include -#include +#include //author: KS //functionality test to check whether cuSolverRf/FGMRES works correctly. diff --git a/tests/unit/matrix/MatrixHandlerTests.hpp b/tests/unit/matrix/MatrixHandlerTests.hpp index 3e0ec56d..c94524e3 100644 --- a/tests/unit/matrix/MatrixHandlerTests.hpp +++ b/tests/unit/matrix/MatrixHandlerTests.hpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -73,7 +73,7 @@ class MatrixHandlerTests : TestBase ReSolve::MatrixHandler* createMatrixHandler() { if (memspace_ == "cpu") { - LinAlgWorkspace* workpsace = new LinAlgWorkspace(); + LinAlgWorkspaceCpu* workpsace = new LinAlgWorkspaceCpu(); return new MatrixHandler(workpsace); } else if (memspace_ == "cuda") { LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA(); diff --git a/tests/unit/vector/GramSchmidtTests.hpp b/tests/unit/vector/GramSchmidtTests.hpp index eddd800e..6f562739 100644 --- a/tests/unit/vector/GramSchmidtTests.hpp +++ b/tests/unit/vector/GramSchmidtTests.hpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace ReSolve { namespace tests { @@ -127,7 +127,7 @@ namespace ReSolve { ReSolve::VectorHandler* createVectorHandler() { if (memspace_ == "cpu") { // TODO: Fix memory leak here - LinAlgWorkspace* workpsace = new LinAlgWorkspace(); + LinAlgWorkspaceCpu* workpsace = new LinAlgWorkspaceCpu(); return new VectorHandler(workpsace); } else if (memspace_ == "cuda") { LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA(); diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index cc84f25b..dc6dabb8 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include namespace ReSolve { namespace tests { @@ -185,7 +185,6 @@ namespace ReSolve { TestOutcome gemv(index_type N, index_type K) { TestStatus status; - // ReSolve::LinAlgWorkspace* workspace = createLinAlgWorkspace(memspace_); ReSolve::VectorHandler* handler = createVectorHandler(); vector::Vector* V = new vector::Vector(N, K); // for the test with NO TRANSPOSE @@ -223,7 +222,7 @@ namespace ReSolve { ReSolve::VectorHandler* createVectorHandler() { if (memspace_ == "cpu") { - LinAlgWorkspace* workpsace = new LinAlgWorkspace(); + LinAlgWorkspaceCpu* workpsace = new LinAlgWorkspaceCpu(); return new VectorHandler(workpsace); } else if (memspace_ == "cuda") { LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA(); From 7a920bed286c1a3d0cdfbc90c59ab61378811d4a Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Thu, 19 Oct 2023 18:25:04 -0400 Subject: [PATCH 11/15] Put preprocessor guards around CUDA code in unit tests. --- tests/unit/matrix/MatrixHandlerTests.hpp | 4 +++- tests/unit/matrix/runMatrixHandlerTests.cpp | 2 ++ tests/unit/vector/GramSchmidtTests.hpp | 4 +++- tests/unit/vector/VectorHandlerTests.hpp | 4 +++- tests/unit/vector/runGramSchmidtTests.cpp | 2 ++ tests/unit/vector/runVectorHandlerTests.cpp | 2 ++ 6 files changed, 15 insertions(+), 3 deletions(-) diff --git a/tests/unit/matrix/MatrixHandlerTests.hpp b/tests/unit/matrix/MatrixHandlerTests.hpp index c94524e3..0aa476f8 100644 --- a/tests/unit/matrix/MatrixHandlerTests.hpp +++ b/tests/unit/matrix/MatrixHandlerTests.hpp @@ -75,12 +75,14 @@ class MatrixHandlerTests : TestBase if (memspace_ == "cpu") { LinAlgWorkspaceCpu* workpsace = new LinAlgWorkspaceCpu(); return new MatrixHandler(workpsace); +#ifdef RESOLVE_USE_CUDA } else if (memspace_ == "cuda") { LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA(); workspace->initializeHandles(); return new MatrixHandler(workspace); +#endif } else { - std::cout << "Invalid memory space " << memspace_ << "\n"; + std::cout << "ReSolve not built with support for memory space " << memspace_ << "\n"; } return nullptr; } diff --git a/tests/unit/matrix/runMatrixHandlerTests.cpp b/tests/unit/matrix/runMatrixHandlerTests.cpp index d18f0965..6eee90d5 100644 --- a/tests/unit/matrix/runMatrixHandlerTests.cpp +++ b/tests/unit/matrix/runMatrixHandlerTests.cpp @@ -20,6 +20,7 @@ int main(int, char**) std::cout << "\n"; } +#ifdef RESOLVE_USE_CUDA { std::cout << "Running tests with CUDA backend:\n"; ReSolve::tests::MatrixHandlerTests test("cuda"); @@ -30,6 +31,7 @@ int main(int, char**) std::cout << "\n"; } +#endif return result.summary(); } diff --git a/tests/unit/vector/GramSchmidtTests.hpp b/tests/unit/vector/GramSchmidtTests.hpp index 6f562739..9981ea48 100644 --- a/tests/unit/vector/GramSchmidtTests.hpp +++ b/tests/unit/vector/GramSchmidtTests.hpp @@ -129,12 +129,14 @@ namespace ReSolve { if (memspace_ == "cpu") { // TODO: Fix memory leak here LinAlgWorkspaceCpu* workpsace = new LinAlgWorkspaceCpu(); return new VectorHandler(workpsace); +#ifdef RESOLVE_USE_CUDA } else if (memspace_ == "cuda") { LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA(); workspace->initializeHandles(); return new VectorHandler(workspace); +#endif } else { - std::cout << "Invalid memory space " << memspace_ << "\n"; + std::cout << "ReSolve not built with support for memory space " << memspace_ << "\n"; } return nullptr; } diff --git a/tests/unit/vector/VectorHandlerTests.hpp b/tests/unit/vector/VectorHandlerTests.hpp index dc6dabb8..d2f8c73c 100644 --- a/tests/unit/vector/VectorHandlerTests.hpp +++ b/tests/unit/vector/VectorHandlerTests.hpp @@ -224,12 +224,14 @@ namespace ReSolve { if (memspace_ == "cpu") { LinAlgWorkspaceCpu* workpsace = new LinAlgWorkspaceCpu(); return new VectorHandler(workpsace); +#ifdef RESOLVE_USE_CUDA } else if (memspace_ == "cuda") { LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA(); workspace->initializeHandles(); return new VectorHandler(workspace); +#endif } else { - std::cout << "Invalid memory space " << memspace_ << "\n"; + std::cout << "ReSolve not built with support for memory space " << memspace_ << "\n"; } return nullptr; } diff --git a/tests/unit/vector/runGramSchmidtTests.cpp b/tests/unit/vector/runGramSchmidtTests.cpp index 2b5a3a17..e118eb6d 100644 --- a/tests/unit/vector/runGramSchmidtTests.cpp +++ b/tests/unit/vector/runGramSchmidtTests.cpp @@ -7,6 +7,7 @@ int main(int, char**) { ReSolve::tests::TestingResults result; +#ifdef RESOLVE_USE_CUDA { std::cout << "Running tests with CUDA backend:\n"; ReSolve::tests::GramSchmidtTests test("cuda"); @@ -18,6 +19,7 @@ int main(int, char**) result += test.orthogonalize(5000, ReSolve::GramSchmidt::mgs_pm); std::cout << "\n"; } +#endif return result.summary(); } diff --git a/tests/unit/vector/runVectorHandlerTests.cpp b/tests/unit/vector/runVectorHandlerTests.cpp index ae40f5d7..77e99471 100644 --- a/tests/unit/vector/runVectorHandlerTests.cpp +++ b/tests/unit/vector/runVectorHandlerTests.cpp @@ -19,6 +19,7 @@ int main(int, char**) std::cout << "\n"; } +#ifdef RESOLVE_USE_CUDA { std::cout << "Running tests with CUDA backend:\n"; ReSolve::tests::VectorHandlerTests test("cuda"); @@ -34,6 +35,7 @@ int main(int, char**) std::cout << "\n"; } +#endif return result.summary(); } From 5c5bdd85279e0e89efb02e2daaef7b489920a7b0 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Thu, 19 Oct 2023 19:46:13 -0400 Subject: [PATCH 12/15] Make CUDA optional dependency (lots of cleanup needed. --- examples/CMakeLists.txt | 1 + examples/r_KLU_KLU.cpp | 1 + examples/r_KLU_KLU_standalone.cpp | 1 + resolve/CMakeLists.txt | 15 +++++++++++++-- resolve/cpu/CMakeLists.txt | 1 + resolve/cpu/cpuVectorKernels.cpp | 15 +++++++++++++++ resolve/matrix/CMakeLists.txt | 24 +++++++++++++++++++----- resolve/matrix/MatrixHandler.cpp | 7 +++++++ resolve/vector/CMakeLists.txt | 24 +++++++++++++++++++----- resolve/vector/VectorHandler.cpp | 9 +++++++-- resolve/workspace/CMakeLists.txt | 17 ++++++++++++----- 11 files changed, 96 insertions(+), 19 deletions(-) create mode 100644 resolve/cpu/cpuVectorKernels.cpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 6250400b..f2d323f4 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -14,6 +14,7 @@ target_link_libraries(klu_klu.exe PRIVATE ReSolve) add_executable(klu_klu_standalone.exe r_KLU_KLU_standalone.cpp) target_link_libraries(klu_klu_standalone.exe PRIVATE ReSolve) +# Create CUDA examples if(RESOLVE_USE_CUDA) # Build example with KLU factorization and GLU refactorization diff --git a/examples/r_KLU_KLU.cpp b/examples/r_KLU_KLU.cpp index 550144ae..19c9fc33 100644 --- a/examples/r_KLU_KLU.cpp +++ b/examples/r_KLU_KLU.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include diff --git a/examples/r_KLU_KLU_standalone.cpp b/examples/r_KLU_KLU_standalone.cpp index 628eeb06..9d658b6e 100644 --- a/examples/r_KLU_KLU_standalone.cpp +++ b/examples/r_KLU_KLU_standalone.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include diff --git a/resolve/CMakeLists.txt b/resolve/CMakeLists.txt index 2efac125..8dbcc467 100644 --- a/resolve/CMakeLists.txt +++ b/resolve/CMakeLists.txt @@ -8,16 +8,21 @@ add_subdirectory(utilities) +# C++ files set(ReSolve_SRC LinSolver.cpp LinSolverDirectKLU.cpp +) + +# C++ code that links to CUDA SDK libraries +set(ReSolve_CUDASDK_SRC LinSolverIterativeFGMRES.cpp GramSchmidt.cpp LinSolverDirectCuSolverGLU.cpp LinSolverDirectCuSolverRf.cpp ) - +# Header files to be installed set(ReSolve_HEADER_INSTALL Common.hpp cusolver_defs.hpp @@ -71,17 +76,21 @@ set(ReSolve_Targets_List resolve_workspace ) +# If CUDA support is enabled add CUDA SDK specific code and dependencies if(RESOLVE_USE_CUDA) + set(ReSolve_SRC ${ReSolve_SRC} ${ReSolve_CUDASDK_SRC}) set(ReSolve_Targets_List ${ReSolve_Targets_List} resolve_backend_cuda) endif() +# If no GPU support is enabled, link to dummy device backend if(NOT RESOLVE_USE_GPU) set(ReSolve_Targets_List ${ReSolve_Targets_List} resolve_backend_cpu) endif() - +# Set installable targets install(TARGETS ${ReSolve_Targets_List} EXPORT ReSolveTargets) +# Create ReSolve library add_library(ReSolve SHARED ${ReSolve_SRC}) target_include_directories(ReSolve INTERFACE @@ -92,10 +101,12 @@ target_include_directories(ReSolve INTERFACE # TODO: Make this PRIVATE dependency (requires refactoring ReSolve code) target_link_libraries(ReSolve PUBLIC ${ReSolve_Targets_List}) +# Install targets install(TARGETS ReSolve EXPORT ReSolveTargets ARCHIVE DESTINATION lib LIBRARY DESTINATION lib) + # install include headers install(FILES ${ReSolve_HEADER_INSTALL} DESTINATION include/resolve) diff --git a/resolve/cpu/CMakeLists.txt b/resolve/cpu/CMakeLists.txt index cb84dc55..4e58e94a 100644 --- a/resolve/cpu/CMakeLists.txt +++ b/resolve/cpu/CMakeLists.txt @@ -8,6 +8,7 @@ set(ReSolve_CPU_SRC MemoryUtils.cpp + cpuVectorKernels.cpp ) set(ReSolve_CPU_HEADER_INSTALL diff --git a/resolve/cpu/cpuVectorKernels.cpp b/resolve/cpu/cpuVectorKernels.cpp new file mode 100644 index 00000000..26a20413 --- /dev/null +++ b/resolve/cpu/cpuVectorKernels.cpp @@ -0,0 +1,15 @@ +#include +#include + + +namespace ReSolve { namespace vector { + + +void set_array_const(index_type n, real_type val, real_type* arr) +{ + for(index_type i = 0; i < n; ++i) { + arr[i] = val; + } +} + +}} // namespace ReSolve::vector \ No newline at end of file diff --git a/resolve/matrix/CMakeLists.txt b/resolve/matrix/CMakeLists.txt index 5d05df1b..78b1689c 100644 --- a/resolve/matrix/CMakeLists.txt +++ b/resolve/matrix/CMakeLists.txt @@ -6,7 +6,7 @@ ]] - +# C++ code set(Matrix_SRC io.cpp Sparse.cpp @@ -15,10 +15,14 @@ set(Matrix_SRC Coo.cpp MatrixHandler.cpp MatrixHandlerCpu.cpp - MatrixHandlerCuda.cpp ) +# C++ code that depends on CUDA SDK libraries +set(Matrix_CUDASDK_SRC + MatrixHandlerCuda.cpp +) +# Header files to be installed set(Matrix_HEADER_INSTALL io.hpp Sparse.hpp @@ -28,13 +32,23 @@ set(Matrix_HEADER_INSTALL MatrixHandler.hpp ) +# Add CUDA matrix handler if CUDA support is enabled +if(RESOLVE_USE_CUDA) + set(Matrix_SRC ${Matrix_SRC} ${Matrix_CUDASDK_SRC}) +endif() + + # Build shared library ReSolve::matrix add_library(resolve_matrix SHARED ${Matrix_SRC}) +# Link to CUDA ReSolve backend if CUDA is support enabled if (RESOLVE_USE_CUDA) - target_link_libraries(resolve_matrix PUBLIC resolve_backend_cuda) -else() - target_link_libraries(resolve_matrix PUBLIC resolve_backend_cpu) + target_link_libraries(resolve_matrix PUBLIC resolve_backend_cuda) +endif() + +# Link to dummy device backend if GPU support is not enabled +if (NOT RESOLVE_USE_GPU) + target_link_libraries(resolve_matrix PUBLIC resolve_backend_cpu) endif() diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index c4849c18..b0ae516e 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -9,7 +9,10 @@ #include #include "MatrixHandler.hpp" #include "MatrixHandlerCpu.hpp" + +#ifdef RESOLVE_USE_CUDA #include "MatrixHandlerCuda.hpp" +#endif namespace ReSolve { // Create a shortcut name for Logger static class @@ -28,7 +31,9 @@ namespace ReSolve { { this->new_matrix_ = true; cpuImpl_ = new MatrixHandlerCpu(); +#ifdef RESOLVE_USE_CUDA cudaImpl_ = new MatrixHandlerCuda(); +#endif } /** @@ -62,6 +67,7 @@ namespace ReSolve { * * @post A CUDA implementation instance is created with supplied workspace. */ +#ifdef RESOLVE_USE_CUDA MatrixHandler::MatrixHandler(LinAlgWorkspaceCUDA* new_workspace) { cpuImpl_ = new MatrixHandlerCpu(); @@ -69,6 +75,7 @@ namespace ReSolve { isCpuEnabled_ = true; isCudaEnabled_ = true; } +#endif void MatrixHandler::setValuesChanged(bool isValuesChanged, std::string memspace) { diff --git a/resolve/vector/CMakeLists.txt b/resolve/vector/CMakeLists.txt index f418bf55..ca056acb 100644 --- a/resolve/vector/CMakeLists.txt +++ b/resolve/vector/CMakeLists.txt @@ -6,29 +6,43 @@ ]] - +# C++ code set(Vector_SRC Vector.cpp VectorHandler.cpp VectorHandlerCpu.cpp - VectorHandlerCuda.cpp ) +# C++ code that depends on CUDA SDK libraries +set(Vector_CUDASDK_SRC + VectorHandlerCuda.cpp +) +# Header files to be installed set(Vector_HEADER_INSTALL Vector.hpp VectorHandler.hpp VectorKernels.hpp ) +# Add CUDA vector handler if CUDA support is enabled +if(RESOLVE_USE_CUDA) + set(Vector_SRC ${Vector_SRC} ${Vector_CUDASDK_SRC}) +endif() + add_library(resolve_vector SHARED ${Vector_SRC}) +# Link to ReSolve CUDA backend if CUDA is enabled if (RESOLVE_USE_CUDA) - target_link_libraries(resolve_vector PUBLIC resolve_backend_cuda) -else() - target_link_libraries(resolve_vector PUBLIC resolve_backend_cpu) + target_link_libraries(resolve_vector PUBLIC resolve_backend_cuda) endif() +# If no GPU is enabled link to dummy device backend +if(NOT RESOLVE_USE_GPU) + target_link_libraries(resolve_vector PUBLIC resolve_backend_cpu) +endif(NOT RESOLVE_USE_GPU) + + target_include_directories(resolve_vector INTERFACE $ $ diff --git a/resolve/vector/VectorHandler.cpp b/resolve/vector/VectorHandler.cpp index 0feb96a8..8c89cb2f 100644 --- a/resolve/vector/VectorHandler.cpp +++ b/resolve/vector/VectorHandler.cpp @@ -1,14 +1,17 @@ #include +#include #include -#include #include #include #include #include -#include #include "VectorHandler.hpp" +#ifdef RESOLVE_USE_CUDA +#include +#endif + namespace ReSolve { using out = io::Logger; @@ -32,6 +35,7 @@ namespace ReSolve { isCpuEnabled_ = true; } +#ifdef RESOLVE_USE_CUDA /** * @brief constructor * @@ -45,6 +49,7 @@ namespace ReSolve { isCudaEnabled_ = true; isCpuEnabled_ = true; } +#endif /** * @brief destructor diff --git a/resolve/workspace/CMakeLists.txt b/resolve/workspace/CMakeLists.txt index c6225487..673fac4b 100644 --- a/resolve/workspace/CMakeLists.txt +++ b/resolve/workspace/CMakeLists.txt @@ -6,11 +6,13 @@ ]] +# C++ code set(ReSolve_Workspace_SRC LinAlgWorkspaceCpu.cpp ) -set(ReSolve_Workspace_CUDA_SRC # consider adding a separate dir for cuda-sdk dependent code +# C++ code that depends on CUDA SDK libraries +set(ReSolve_Workspace_CUDASDK_SRC LinAlgWorkspaceCUDA.cpp ) @@ -20,12 +22,17 @@ set(ReSolve_Workspace_HEADER_INSTALL LinAlgWorkspaceCUDA.hpp ) +# If cuda is enabled, add CUDA SDK workspace files +if(RESOLVE_USE_CUDA) + set(ReSolve_Workspace_SRC ${ReSolve_Workspace_SRC} ${ReSolve_Workspace_CUDASDK_SRC}) +endif() + +add_library(resolve_workspace SHARED ${ReSolve_Workspace_SRC}) + +# If CUDA is enabled, link to ReSolve CUDA backend if(RESOLVE_USE_CUDA) - add_library(resolve_workspace SHARED ${ReSolve_Workspace_SRC} ${ReSolve_Workspace_CUDA_SRC}) target_link_libraries(resolve_workspace PUBLIC resolve_backend_cuda) -else(RESOLVE_USE_CUDA) - add_library(resolve_workspace SHARED ${ReSolve_Workspace_SRC}) -endif(RESOLVE_USE_CUDA) +endif(RESOLVE_USE_CUDA) target_include_directories(resolve_workspace INTERFACE $ From 977e1dab2b9a8eef2b633bc343e7988ad772f022 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Thu, 19 Oct 2023 22:48:39 -0400 Subject: [PATCH 13/15] Fixes to CMake --- resolve/cpu/CMakeLists.txt | 2 ++ resolve/matrix/CMakeLists.txt | 1 + resolve/vector/CMakeLists.txt | 1 + tests/unit/CMakeLists.txt | 8 +++----- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/resolve/cpu/CMakeLists.txt b/resolve/cpu/CMakeLists.txt index 4e58e94a..7105655c 100644 --- a/resolve/cpu/CMakeLists.txt +++ b/resolve/cpu/CMakeLists.txt @@ -17,6 +17,8 @@ set(ReSolve_CPU_HEADER_INSTALL # First create dummy backend add_library(resolve_backend_cpu SHARED ${ReSolve_CPU_SRC}) +target_link_libraries(resolve_backend_cpu PRIVATE resolve_logger) + target_include_directories(resolve_backend_cpu INTERFACE $ $ diff --git a/resolve/matrix/CMakeLists.txt b/resolve/matrix/CMakeLists.txt index 78b1689c..554c0ba7 100644 --- a/resolve/matrix/CMakeLists.txt +++ b/resolve/matrix/CMakeLists.txt @@ -40,6 +40,7 @@ endif() # Build shared library ReSolve::matrix add_library(resolve_matrix SHARED ${Matrix_SRC}) +target_link_libraries(resolve_matrix PRIVATE resolve_logger resolve_vector) # Link to CUDA ReSolve backend if CUDA is support enabled if (RESOLVE_USE_CUDA) diff --git a/resolve/vector/CMakeLists.txt b/resolve/vector/CMakeLists.txt index ca056acb..16d53010 100644 --- a/resolve/vector/CMakeLists.txt +++ b/resolve/vector/CMakeLists.txt @@ -31,6 +31,7 @@ if(RESOLVE_USE_CUDA) endif() add_library(resolve_vector SHARED ${Vector_SRC}) +target_link_libraries(resolve_vector PRIVATE resolve_logger) # Link to ReSolve CUDA backend if CUDA is enabled if (RESOLVE_USE_CUDA) diff --git a/tests/unit/CMakeLists.txt b/tests/unit/CMakeLists.txt index d61250fe..f91c2ff7 100644 --- a/tests/unit/CMakeLists.txt +++ b/tests/unit/CMakeLists.txt @@ -6,8 +6,6 @@ ]] -if(RESOLVE_USE_CUDA) - add_subdirectory(matrix) - add_subdirectory(vector) - add_subdirectory(utilities) -endif(RESOLVE_USE_CUDA) +add_subdirectory(matrix) +add_subdirectory(vector) +add_subdirectory(utilities) From 3037513c3e1b59877cc047e56d96f054dffa4886 Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Thu, 19 Oct 2023 23:57:37 -0400 Subject: [PATCH 14/15] Code cleanup. --- resolve/cuda/CMakeLists.txt | 5 +++- resolve/matrix/MatrixHandler.cpp | 35 ++++++++++++------------ resolve/matrix/MatrixHandler.hpp | 18 +++++++----- resolve/matrix/MatrixHandlerCpu.cpp | 13 ++++----- resolve/matrix/MatrixHandlerCpu.hpp | 10 +------ resolve/matrix/MatrixHandlerCuda.cpp | 5 ---- resolve/matrix/MatrixHandlerCuda.hpp | 14 ++-------- resolve/matrix/MatrixHandlerImpl.hpp | 12 +------- tests/unit/matrix/MatrixHandlerTests.hpp | 4 +-- 9 files changed, 43 insertions(+), 73 deletions(-) diff --git a/resolve/cuda/CMakeLists.txt b/resolve/cuda/CMakeLists.txt index 04a9c2ff..f97267bc 100644 --- a/resolve/cuda/CMakeLists.txt +++ b/resolve/cuda/CMakeLists.txt @@ -21,8 +21,11 @@ set(ReSolve_CUDA_HEADER_INSTALL set_source_files_properties(${ReSolve_CUDA_SRC} PROPERTIES LANGUAGE CUDA) -# First create CUDA backend (this should really be CUDA _API_ backend, separate backend will be needed for CUDA SDK) +# First create CUDA backend +# (this should really be CUDA _API_ backend, +# separate backend will be needed for CUDA SDK) add_library(resolve_backend_cuda SHARED ${ReSolve_CUDA_SRC}) +target_link_libraries(resolve_backend_cuda PRIVATE resolve_logger) target_link_libraries(resolve_backend_cuda PUBLIC resolve_cuda) target_include_directories(resolve_backend_cuda INTERFACE $ diff --git a/resolve/matrix/MatrixHandler.cpp b/resolve/matrix/MatrixHandler.cpp index b0ae516e..8bf4302c 100644 --- a/resolve/matrix/MatrixHandler.cpp +++ b/resolve/matrix/MatrixHandler.cpp @@ -29,11 +29,8 @@ namespace ReSolve { */ MatrixHandler::MatrixHandler() { - this->new_matrix_ = true; - cpuImpl_ = new MatrixHandlerCpu(); -#ifdef RESOLVE_USE_CUDA - cudaImpl_ = new MatrixHandlerCuda(); -#endif + new_matrix_ = true; + cpuImpl_ = new MatrixHandlerCpu(); } /** @@ -59,6 +56,7 @@ namespace ReSolve { isCudaEnabled_ = false; } +#ifdef RESOLVE_USE_CUDA /** * @brief Constructor taking pointer to the CUDA workspace as its parameter. * @@ -67,7 +65,6 @@ namespace ReSolve { * * @post A CUDA implementation instance is created with supplied workspace. */ -#ifdef RESOLVE_USE_CUDA MatrixHandler::MatrixHandler(LinAlgWorkspaceCUDA* new_workspace) { cpuImpl_ = new MatrixHandlerCpu(); @@ -88,13 +85,14 @@ namespace ReSolve { } } + /** + * @brief Converts COO to CSR matrix format. + * + * Conversion takes place on CPU, and then CSR matrix is copied to `memspace`. + */ int MatrixHandler::coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace) { - //this happens on the CPU not on the GPU - //but will return whatever memspace requested. - //count nnzs first - index_type nnz_unpacked = 0; index_type nnz = A_coo->getNnz(); index_type n = A_coo->getNumRows(); @@ -248,14 +246,15 @@ namespace ReSolve { } /** - * @brief Matrix vector product method result = alpha *A*x + beta * result - * @param A - * @param vec_x - * @param vec_result - * @param[in] alpha - * @param[in] beta - * @param[in] matrixFormat - * @param[in] memspace + * @brief Matrix vector product: result = alpha * A * x + beta * result + * + * @param[in] A - Sparse matrix + * @param[in] vec_x - Vector multiplied by the matrix + * @param[out] vec_result - Vector where the result is stored + * @param[in] alpha - scalar parameter + * @param[in] beta - scalar parameter + * @param[in] matrixFormat - Only CSR format is supported at this time + * @param[in] memspace - Device where the product is computed * @return result := alpha * A * x + beta * result */ int MatrixHandler::matvec(matrix::Sparse* A, diff --git a/resolve/matrix/MatrixHandler.hpp b/resolve/matrix/MatrixHandler.hpp index e05e6c8f..398a8039 100644 --- a/resolve/matrix/MatrixHandler.hpp +++ b/resolve/matrix/MatrixHandler.hpp @@ -33,6 +33,10 @@ namespace ReSolve { * - Matrix vector product (SpMV) * - Matrix 1-norm * + * The class uses pointer to implementation (PIMPL) idiom to create + * multiple matrix operation implementations running on CUDA and HIP devices + * as well as on CPU. + * * @author Kasia Swirydowicz * @author Slaven Peles */ @@ -46,7 +50,7 @@ namespace ReSolve { MatrixHandler(LinAlgWorkspaceCUDA* workspace); ~MatrixHandler(); - int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace); //memspace decides on what is returned (cpu or cuda pointer) + int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr, std::string memspace); int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace); /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped @@ -61,14 +65,14 @@ namespace ReSolve { void setValuesChanged(bool toWhat, std::string memspace); private: - bool new_matrix_{true}; ///< if the structure changed, you need a new handler. + bool new_matrix_{true}; ///< if the structure changed, you need a new handler. - MemoryHandler mem_; ///< Device memory manager object - MatrixHandlerImpl* cpuImpl_{nullptr}; - MatrixHandlerImpl* cudaImpl_{nullptr}; + MemoryHandler mem_; ///< Device memory manager object + MatrixHandlerImpl* cpuImpl_{nullptr}; ///< Pointer to CPU implementation + MatrixHandlerImpl* cudaImpl_{nullptr}; ///< Pointer to CUDA implementation - bool isCpuEnabled_{false}; - bool isCudaEnabled_{false}; + bool isCpuEnabled_{false}; ///< true if CPU implementation is instantiated + bool isCudaEnabled_{false}; ///< true if CUDA implementation is instantiated }; } // namespace ReSolve diff --git a/resolve/matrix/MatrixHandlerCpu.cpp b/resolve/matrix/MatrixHandlerCpu.cpp index 95637402..2c434dcb 100644 --- a/resolve/matrix/MatrixHandlerCpu.cpp +++ b/resolve/matrix/MatrixHandlerCpu.cpp @@ -14,8 +14,6 @@ namespace ReSolve { MatrixHandlerCpu::MatrixHandlerCpu() { - // new_matrix_ = true; - // values_changed_ = true; } MatrixHandlerCpu::~MatrixHandlerCpu() @@ -27,17 +25,15 @@ namespace ReSolve { workspace_ = new_workspace; } - // bool MatrixHandlerCpu::getValuesChanged() - // { - // return this->values_changed_; - // } - void MatrixHandlerCpu::setValuesChanged(bool values_changed) { values_changed_ = values_changed; } + /** + * @brief result := alpha * A * x + beta * result + */ int MatrixHandlerCpu::matvec(matrix::Sparse* Ageneric, vector_type* vec_x, vector_type* vec_result, @@ -49,7 +45,6 @@ namespace ReSolve { // int error_sum = 0; if (matrixFormat == "csr") { matrix::Csr* A = (matrix::Csr*) Ageneric; - //result = alpha *A*x + beta * result index_type* ia = A->getRowData("cpu"); index_type* ja = A->getColData("cpu"); real_type* a = A->getValues("cpu"); @@ -90,6 +85,8 @@ namespace ReSolve { } /** + * @brief Convert CSC to CSR matrix on the host + * * @authors Slaven Peles , Daniel Reynolds (SMU), and * David Gardner and Carol Woodward (LLNL) */ diff --git a/resolve/matrix/MatrixHandlerCpu.hpp b/resolve/matrix/MatrixHandlerCpu.hpp index 3b6c91c0..0b0afbd3 100644 --- a/resolve/matrix/MatrixHandlerCpu.hpp +++ b/resolve/matrix/MatrixHandlerCpu.hpp @@ -1,8 +1,3 @@ -// this class encapsulates various matrix manipulation operations, commonly required by linear solvers: -// this includes -// (1) Matrix format conversion: coo2csr, csr2csc -// (2) Matrix vector product (SpMV) -// (3) Matrix 1-norm #pragma once #include #include @@ -40,10 +35,8 @@ namespace ReSolve { MatrixHandlerCpu(LinAlgWorkspaceCpu* workspace); virtual ~MatrixHandlerCpu(); - int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr); //memspace decides on what is returned (cpu or cuda pointer) - // int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace); + int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr); - /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped virtual int matvec(matrix::Sparse* A, vector_type* vec_x, vector_type* vec_result, @@ -55,7 +48,6 @@ namespace ReSolve { private: LinAlgWorkspaceCpu* workspace_{nullptr}; - // bool new_matrix_{true}; ///< if the structure changed, you need a new handler. bool values_changed_{true}; ///< needed for matvec MemoryHandler mem_; ///< Device memory manager object diff --git a/resolve/matrix/MatrixHandlerCuda.cpp b/resolve/matrix/MatrixHandlerCuda.cpp index b27a63c0..3405ba8d 100644 --- a/resolve/matrix/MatrixHandlerCuda.cpp +++ b/resolve/matrix/MatrixHandlerCuda.cpp @@ -12,11 +12,6 @@ namespace ReSolve { // Create a shortcut name for Logger static class using out = io::Logger; - MatrixHandlerCuda::MatrixHandlerCuda() - { - values_changed_ = true; - } - MatrixHandlerCuda::~MatrixHandlerCuda() { } diff --git a/resolve/matrix/MatrixHandlerCuda.hpp b/resolve/matrix/MatrixHandlerCuda.hpp index a8fec606..efd4c356 100644 --- a/resolve/matrix/MatrixHandlerCuda.hpp +++ b/resolve/matrix/MatrixHandlerCuda.hpp @@ -1,8 +1,3 @@ -// this class encapsulates various matrix manipulation operations, commonly required by linear solvers: -// this includes -// (1) Matrix format conversion: coo2csr, csr2csc -// (2) Matrix vector product (SpMV) -// (3) Matrix 1-norm #pragma once #include #include @@ -29,21 +24,17 @@ namespace ReSolve { /** * @class MatrixHandlerCuda * - * @brief CPU implementation of the matrix handler. + * @brief CUDA implementation of the matrix handler. */ class MatrixHandlerCuda : public MatrixHandlerImpl { using vector_type = vector::Vector; public: - MatrixHandlerCuda(); MatrixHandlerCuda(LinAlgWorkspaceCUDA* workspace); virtual ~MatrixHandlerCuda(); - int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr); //memspace decides on what is returned (cpu or cuda pointer) - // int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace); - - /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped + int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr); virtual int matvec(matrix::Sparse* A, vector_type* vec_x, vector_type* vec_result, @@ -55,7 +46,6 @@ namespace ReSolve { private: LinAlgWorkspaceCUDA* workspace_{nullptr}; - // bool new_matrix_{true}; ///< if the structure changed, you need a new handler. bool values_changed_{true}; ///< needed for matvec MemoryHandler mem_; ///< Device memory manager object diff --git a/resolve/matrix/MatrixHandlerImpl.hpp b/resolve/matrix/MatrixHandlerImpl.hpp index b901e2b5..2bef6b3d 100644 --- a/resolve/matrix/MatrixHandlerImpl.hpp +++ b/resolve/matrix/MatrixHandlerImpl.hpp @@ -1,8 +1,3 @@ -// this class encapsulates various matrix manipulation operations, commonly required by linear solvers: -// this includes -// (1) Matrix format conversion: coo2csr, csr2csc -// (2) Matrix vector product (SpMV) -// (3) Matrix 1-norm #pragma once #include #include @@ -20,7 +15,6 @@ namespace ReSolve class Csc; class Csr; } - // class LinAlgWorkspace; } @@ -37,14 +31,11 @@ namespace ReSolve { public: MatrixHandlerImpl() {} - // MatrixHandlerImpl(LinAlgWorkspace* workspace); virtual ~MatrixHandlerImpl() {} virtual int csc2csr(matrix::Csc* A_csc, matrix::Csr* A_csr) = 0; - // int coo2csr(matrix::Coo* A_coo, matrix::Csr* A_csr, std::string memspace); - /// Should compute vec_result := alpha*A*vec_x + beta*vec_result, but at least on cpu alpha and beta are flipped virtual int matvec(matrix::Sparse* A, vector_type* vec_x, vector_type* vec_result, @@ -53,8 +44,7 @@ namespace ReSolve { std::string matrix_type) = 0; virtual int Matrix1Norm(matrix::Sparse* A, real_type* norm) = 0; - virtual void setValuesChanged(bool isValuesChanged) = 0; - + virtual void setValuesChanged(bool isValuesChanged) = 0; }; } // namespace ReSolve diff --git a/tests/unit/matrix/MatrixHandlerTests.hpp b/tests/unit/matrix/MatrixHandlerTests.hpp index 0aa476f8..e203017a 100644 --- a/tests/unit/matrix/MatrixHandlerTests.hpp +++ b/tests/unit/matrix/MatrixHandlerTests.hpp @@ -73,8 +73,8 @@ class MatrixHandlerTests : TestBase ReSolve::MatrixHandler* createMatrixHandler() { if (memspace_ == "cpu") { - LinAlgWorkspaceCpu* workpsace = new LinAlgWorkspaceCpu(); - return new MatrixHandler(workpsace); + LinAlgWorkspaceCpu* workspace = new LinAlgWorkspaceCpu(); + return new MatrixHandler(workspace); #ifdef RESOLVE_USE_CUDA } else if (memspace_ == "cuda") { LinAlgWorkspaceCUDA* workspace = new LinAlgWorkspaceCUDA(); From 126b16aa0f1fe5a9ac11a4c62b232eb78da0c74b Mon Sep 17 00:00:00 2001 From: Slaven Peles Date: Sat, 21 Oct 2023 16:16:06 -0400 Subject: [PATCH 15/15] Fix I/O in examples. --- examples/r_KLU_GLU.cpp | 5 ++- examples/r_KLU_GLU_matrix_values_update.cpp | 5 ++- examples/r_KLU_KLU.cpp | 9 ++--- examples/r_KLU_KLU_standalone.cpp | 7 ++-- examples/r_KLU_rf.cpp | 5 ++- examples/r_KLU_rf_FGMRES.cpp | 12 +++++- .../r_KLU_rf_FGMRES_reuse_factorization.cpp | 38 ++++++++++++++----- runResolve | 6 +-- 8 files changed, 62 insertions(+), 25 deletions(-) diff --git a/examples/r_KLU_GLU.cpp b/examples/r_KLU_GLU.cpp index e0831976..e2cbfde4 100644 --- a/examples/r_KLU_GLU.cpp +++ b/examples/r_KLU_GLU.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -148,7 +149,9 @@ int main(int argc, char *argv[]) matrix_handler->setValuesChanged(true, "cuda"); matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); - printf("\t 2-Norm of the residual: %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))); + std::cout << "\t 2-Norm of the residual: " + << std::scientific << std::setprecision(16) + << sqrt(vector_handler->dot(vec_r, vec_r, "cuda")) << "\n"; } // for (int i = 0; i < numSystems; ++i) diff --git a/examples/r_KLU_GLU_matrix_values_update.cpp b/examples/r_KLU_GLU_matrix_values_update.cpp index e6e09673..7d1bb141 100644 --- a/examples/r_KLU_GLU_matrix_values_update.cpp +++ b/examples/r_KLU_GLU_matrix_values_update.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -158,7 +159,9 @@ int main(int argc, char *argv[]) matrix_handler->setValuesChanged(true, "cuda"); matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); - printf("\t 2-Norm of the residual: %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))); + std::cout << "\t 2-Norm of the residual: " + << std::scientific << std::setprecision(16) + << sqrt(vector_handler->dot(vec_r, vec_r, "cuda")) << "\n"; } diff --git a/examples/r_KLU_KLU.cpp b/examples/r_KLU_KLU.cpp index 19c9fc33..8b0ea59a 100644 --- a/examples/r_KLU_KLU.cpp +++ b/examples/r_KLU_KLU.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -138,12 +139,10 @@ int main(int argc, char *argv[]) matrix_handler->setValuesChanged(true, "cpu"); matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); - real_type* test = vec_r->getData("cpu"); - (void) test; // TODO: Do we need `test` variable in this example? - - printf("\t 2-Norm of the residual: %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "cpu"))); - + std::cout << "\t 2-Norm of the residual: " + << std::scientific << std::setprecision(16) + << sqrt(vector_handler->dot(vec_r, vec_r, "cpu")) << "\n"; } //now DELETE diff --git a/examples/r_KLU_KLU_standalone.cpp b/examples/r_KLU_KLU_standalone.cpp index 9d658b6e..77e5b97a 100644 --- a/examples/r_KLU_KLU_standalone.cpp +++ b/examples/r_KLU_KLU_standalone.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -100,10 +101,10 @@ int main(int argc, char *argv[]) matrix_handler->setValuesChanged(true, "cpu"); matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE, "csr", "cpu"); - real_type* test = vec_r->getData("cpu"); - (void) test; // TODO: Do we need `test` variable in this example? - printf("\t 2-Norm of the residual: %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "cpu"))); + std::cout << "\t 2-Norm of the residual: " + << std::scientific << std::setprecision(16) + << sqrt(vector_handler->dot(vec_r, vec_r, "cpu")) << "\n"; diff --git a/examples/r_KLU_rf.cpp b/examples/r_KLU_rf.cpp index 5dce2dfe..01fa0f3c 100644 --- a/examples/r_KLU_rf.cpp +++ b/examples/r_KLU_rf.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -162,7 +163,9 @@ int main(int argc, char *argv[] ) matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); - printf("\t 2-Norm of the residual: %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))); + std::cout << "\t 2-Norm of the residual: " + << std::scientific << std::setprecision(16) + << sqrt(vector_handler->dot(vec_r, vec_r, "cuda")) << "\n"; } // for (int i = 0; i < numSystems; ++i) diff --git a/examples/r_KLU_rf_FGMRES.cpp b/examples/r_KLU_rf_FGMRES.cpp index 2e43f9c3..ee674869 100644 --- a/examples/r_KLU_rf_FGMRES.cpp +++ b/examples/r_KLU_rf_FGMRES.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -171,12 +172,19 @@ int main(int argc, char *argv[]) matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); - printf("\t 2-Norm of the residual (before IR): %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))/norm_b); + std::cout << "\t 2-Norm of the residual (before IR): " + << std::scientific << std::setprecision(16) + << sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))/norm_b << "\n"; vec_rhs->update(rhs, "cpu", "cuda"); FGMRES->solve(vec_rhs, vec_x); - printf("FGMRES: init nrm: %16.16e final nrm: %16.16e iter: %d \n", FGMRES->getInitResidualNorm()/norm_b, FGMRES->getFinalResidualNorm()/norm_b, FGMRES->getNumIter()); + std::cout << "FGMRES: init nrm: " + << std::scientific << std::setprecision(16) + << FGMRES->getInitResidualNorm()/norm_b + << " final nrm: " + << FGMRES->getFinalResidualNorm()/norm_b + << " iter: " << FGMRES->getNumIter() << "\n"; } } // for (int i = 0; i < numSystems; ++i) diff --git a/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp b/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp index 2317680c..6a520a7a 100644 --- a/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp +++ b/examples/r_KLU_rf_FGMRES_reuse_factorization.cpp @@ -1,5 +1,7 @@ #include #include +#include + #include #include #include @@ -138,7 +140,9 @@ int main(int argc, char *argv[]) norm_b = sqrt(norm_b); matrix_handler->setValuesChanged(true, "cuda"); matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); - printf("\t 2-Norm of the residual : %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))/norm_b); + std::cout << "\t 2-Norm of the residual : " + << std::scientific << std::setprecision(16) + << sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))/norm_b << "\n"; if (i == 1) { ReSolve::matrix::Csc* L_csc = (ReSolve::matrix::Csc*) KLU->getLFactor(); ReSolve::matrix::Csc* U_csc = (ReSolve::matrix::Csc*) KLU->getUFactor(); @@ -147,7 +151,9 @@ int main(int argc, char *argv[]) matrix_handler->csc2csr(L_csc,L, "cuda"); matrix_handler->csc2csr(U_csc,U, "cuda"); - if (L == nullptr) {printf("ERROR");} + if (L == nullptr) { + std::cout << "ERROR\n"; + } index_type* P = KLU->getPOrdering(); index_type* Q = KLU->getQOrdering(); Rf->setup(A, L, U, P, Q); @@ -163,14 +169,17 @@ int main(int argc, char *argv[]) if ((i % 2 == 0)) { status = Rf->refactorize(); - std::cout<<"CUSOLVER RF, using REAL refactorization, refactorization status: "<update(rhs, "cpu", "cuda"); status = Rf->solve(vec_rhs, vec_x); FGMRES->setupPreconditioner("CuSolverRf", Rf); } - //if (i%2!=0) vec_x->setToZero("cuda"); + //if (i%2!=0) vec_x->setToZero("cuda"); real_type norm_x = vector_handler->dot(vec_x, vec_x, "cuda"); - printf("Norm of x(before solve): %16.16e \n", sqrt(norm_x)); + std::cout << "Norm of x (before solve): " + << std::scientific << std::setprecision(16) + << sqrt(norm_x) << "\n"; std::cout<<"CUSOLVER RF solve status: "<update(rhs, "cpu", "cuda"); @@ -183,15 +192,26 @@ int main(int argc, char *argv[]) matrix_handler->matvec(A, vec_x, vec_r, &ONE, &MINUSONE,"csr", "cuda"); - printf("\t 2-Norm of the residual (before IR): %16.16e\n", sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))/norm_b); - printf("\t 2-Norm of the RIGHT HAND SIDE: %16.16e\n", norm_b); + std::cout << "\t 2-Norm of the residual (before IR): " + << std::scientific << std::setprecision(16) + << sqrt(vector_handler->dot(vec_r, vec_r, "cuda"))/norm_b << "\n"; + std::cout << "\t 2-Norm of the RIGHT HAND SIDE: " + << std::scientific << std::setprecision(16) + << norm_b << "\n"; vec_rhs->update(rhs, "cpu", "cuda"); FGMRES->solve(vec_rhs, vec_x); - printf("FGMRES: init nrm: %16.16e final nrm: %16.16e iter: %d \n", FGMRES->getInitResidualNorm()/norm_b, FGMRES->getFinalResidualNorm()/norm_b, FGMRES->getNumIter()); + std::cout << "FGMRES: init nrm: " + << std::scientific << std::setprecision(16) + << FGMRES->getInitResidualNorm()/norm_b + << " final nrm: " + << FGMRES->getFinalResidualNorm()/norm_b + << " iter: " << FGMRES->getNumIter() << "\n"; norm_x = vector_handler->dot(vec_x, vec_x, "cuda"); - printf("Norm of x (after IR): %16.16e \n", sqrt(norm_x)); + std::cout << "Norm of x (after IR): " + << std::scientific << std::setprecision(16) + << sqrt(norm_x) << "\n"; } diff --git a/runResolve b/runResolve index 0a8e5dbf..2d5c67ca 100755 --- a/runResolve +++ b/runResolve @@ -13,9 +13,9 @@ foo=" ${foo} ${i} ${i}" done -matfile="/gpfs/wolf/csc359/scratch/peles/testcases/ACTIVSg10k_AC/matrix_ACTIVSg10k_AC_" +matfile="$HOME/testcases/ACTIVSg10k_AC/matrix_ACTIVSg10k_AC_" -rhsfile="/gpfs/wolf/csc359/scratch/peles/testcases/ACTIVSg10k_AC/rhs_ACTIVSg10k_AC_" +rhsfile="$HOME/testcases/ACTIVSg10k_AC/rhs_ACTIVSg10k_AC_" -./resolve/klu_rf_fgmres.exe $matfile $rhsfile 10 $foo +./$1 $matfile $rhsfile 10 $foo