eth-cscs · kabicm · Nov 11, 2019 · Nov 8, 2019 · Nov 8, 2019 · Nov 8, 2019
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -18,16 +18,38 @@ option(COSMA_WITH_APPS "Generate the miniapp targets." ${MASTER_PROJECT})
 option(COSMA_WITH_BENCHMARKS "Generate the benchmark targets." ${MASTER_PROJECT})
 option(COSMA_WITH_INSTALL "Enable installation." ${MASTER_PROJECT})
 option(COSMA_WITH_PROFILING "Enable profiling." OFF)
+option(COSMA_WITH_FOTRAN_BINDINGS "Make Fortran bindings to COSMA." OFF) 
 
-option(COSMA_WITH_GPU "Use CUDA cuBLAS GPU back end." OFF)
-option(COSMA_WITH_MKL_BLAS "Use MKL BLAS back end." OFF)
-option(COSMA_WITH_OPENBLAS "Use OpenBLAS backend." OFF)
-option(COSMA_WITH_NETLIB_BLAS "Use Netlib BLAS backend." OFF)
+set(COSMA_BLAS "MKL" CACHE STRING 
+  "Blas backend. Can be MKL, OPENBLAS, NETLIB, CUDA or ROCM.")
+set_property(CACHE COSMA_BLAS PROPERTY STRINGS 
+  "MKL" "OPENBLAS" "NETLIB" "CUDA" "ROCM")
+
+set(COSMA_SCALAPACK "OFF" CACHE STRING
+  "Blas backend. Can be MKL, NETLIB or OFF.")
+set_property(CACHE COSMA_SCALAPACK PROPERTY STRINGS
+  "OFF" "MKL" "NETLIB")
+
+# check if blas backend is valid
+message(STATUS "Selected BLAS backend for COSMA: ${COSMA_BLAS}")
+get_property(BACKEND_LIST CACHE COSMA_BLAS PROPERTY STRINGS)
+if(NOT COSMA_BLAS IN_LIST BACKEND_LIST)
+  message(FATAL_ERROR "Invalid value for COSMA_BLAS!")
+endif()
 
-option(COSMA_WITH_MKL_ScaLAPACK "Use MKL ScaLAPACK back end." OFF) 
-option(COSMA_WITH_NETLIB_ScaLAPACK "Use Netlib ScaLAPACK back end." OFF) 
+# check if scalapack backend is valid
+message(STATUS "Selected ScaLAPACK backend for COSMA: ${COSMA_SCALAPACK}")
+unset(BACKEND_LIST)
+get_property(BACKEND_LIST CACHE COSMA_SCALAPACK PROPERTY STRINGS)
+if(COSMA_SCALAPACK AND NOT COSMA_SCALAPACK IN_LIST BACKEND_LIST)
+  message(FATAL_ERROR "Invalid value for COSMA_SCALAPACK!")
+endif()
 
-option(COSMA_WITH_FOTRAN_BINDINGS "Make Fortran bindings to COSMA." OFF) 
+if (NOT ${COSMA_BLAS} STREQUAL "CUDA" AND NOT ${COSMA_BLAS} STREQUAL "ROCM")
+  if (COSMA_SCALAPACK AND NOT ${COSMA_BLAS} STREQUAL ${COSMA_SCALAPACK})
+    message(FATAL_ERROR "ScaLAPACK backend MUST match BLAS backend if no GPU backend is used!")
+  endif()
+endif()
 
 project(cosma VERSION 0.1 LANGUAGES CXX)
 
@@ -58,38 +80,38 @@ add_subdirectory(libs/grid2grid)
 #
 set(BLAS_TARGET "")
 set(BLAS_DEF "")
-if (COSMA_WITH_MKL_BLAS)
+
+if (${COSMA_BLAS} STREQUAL "MKL")
     find_package(MKL REQUIRED COMPONENTS BLAS_32BIT_OMP)
     set(BLAS_TARGET "mkl::blas_32bit_omp")
     set(BLAS_DEF "COSMA_WITH_MKL_BLAS")
-elseif (COSMA_WITH_GPU)
+elseif (${COSMA_BLAS} STREQUAL "CUDA" OR ${COSMA_BLAS} STREQUAL "ROCM")
     option(TILEDMM_WITH_INSTALL "" ${COSMA_WITH_INSTALL}) 
+    set(TILEMM_GPU_BACKEND ${COSMA_BLAS} CACHE STRING FORCE "")
     add_subdirectory(libs/Tiled-MM)
     set(BLAS_TARGET "Tiled-MM")
     set(BLAS_DEF "COSMA_HAVE_GPU")
-elseif (COSMA_WITH_OPENBLAS)
+elseif (${COSMA_BLAS} STREQUAL "OPENBLAS")
     find_package(OpenBLAS REQUIRED)
     add_library(openblas SHARED IMPORTED)
     set_target_properties(openblas PROPERTIES 
                                    INTERFACE_INCLUDE_DIRECTORIES "${OpenBLAS_INCLUDE_DIRS}"
                                    IMPORTED_LOCATION "${OpenBLAS_LIBRARIES}")
     set(BLAS_TARGET "openblas")
     set(BLAS_DEF "COSMA_WITH_BLAS")
-elseif (COSMA_WITH_NETLIB_BLAS)
+elseif (${COSMA_BLAS} STREQUAL "NETLIB")
     find_package(cblas NO_MODULE REQUIRED)
     set(BLAS_TARGET "cblas")
     set(BLAS_DEF "COSMA_WITH_BLAS")
-else ()
-    message(FATAL_ERROR "No BLAS back end found! Select a back end and make sure it's installed with support for the cblas interface and CMake config files.")
 endif ()
 
 # (optional) ScaLAPACK providers 
 #
 set(ScaLAPACK_TARGET "")
-if (COSMA_WITH_MKL_ScaLAPACK)
+if (${COSMA_SCALAPACK} STREQUAL "MKL")
     find_package(MKL REQUIRED COMPONENTS SCALAPACK_32BIT_OMP)
     set(ScaLAPACK_TARGET "mkl::scalapack_32bit_omp")
-elseif (COSMA_WITH_NETLIB_ScaLAPACK) 
+  elseif (${COSMA_SCALAPACK} STREQUAL "NETLIB")
     find_package(scalapack REQUIRED)  
     set(ScaLAPACK_TARGET "scalapack")
 else  ()

diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt
@@ -12,7 +12,7 @@ foreach(exec ${executables})
     target_link_libraries(${exec} cosma ${cosma_deps})
 endforeach()
 
-if (COSMA_WITH_MKL_BLAS)
+if (${COSMA_BLAS} STREQUAL "MKL")
     add_executable(transpose "transpose.cpp")
     target_link_libraries(transpose cosma ${cosma_deps})
 endif()

diff --git a/cmake/cosmaConfig.cmake.in b/cmake/cosmaConfig.cmake.in
@@ -15,14 +15,27 @@ if(NOT TARGET cosma::cosma)
 
     find_package(MPI)
 
-    if ("@COSMA_WITH_GPU@")
-        find_dependency(Tiled-MM) # bundled
-    elseif ("@COSMA_WITH_MKL_BLAS@")
+    set(COSMA_BLAS "@COSMA_BLAS@")
+    set(COSMA_SCALAPACK "@COSMA_SCALAPACK@")
+
+    if (${COSMA_BLAS} STREQUAL "MKL")
         find_package(MKL COMPONENTS BLAS_32BIT_OMP)
+    elseif (${COSMA_BLAS} STREQUAL "CUDA" OR ${COSMA_BLAS} STREQUAL "ROCM")
+        set(TILEMM_GPU_BACKEND ${COSMA_BLAS} CACHE STRING FORCE "")
+        find_dependency(Tiled-MM) # bundled
+    elseif (${COSMA_BLAS} STREQUAL "OPENBLAS")
+        find_package(OpenBLAS)
+        set_target_properties(openblas PROPERTIES 
+                                       INTERFACE_INCLUDE_DIRECTORIES "${OpenBLAS_INCLUDE_DIRS}"
+                                       IMPORTED_LOCATION "${OpenBLAS_LIBRARIES}")
+    elseif (${COSMA_BLAS} STREQUAL "NETLIB")
+        find_package(cblas NO_MODULE)
     endif ()
 
-    if ("@COSMA_WITH_MKL_ScaLAPACK@")
+    if (${COSMA_SCALAPACK} STREQUAL "MKL_SCALAPACK")
         find_package(MKL COMPONENTS SCALAPACK_32BIT_OMP)
+    elseif (${COSMA_SCALAPACK} STREQUAL "NETLIB_SCALAPACK")
+        find_package(scalapack)
     endif ()
 
     if ("@COSMA_WITH_PROFILING@")

diff --git a/miniapp/CMakeLists.txt b/miniapp/CMakeLists.txt
@@ -3,7 +3,7 @@
 ################
 set(executables "cosma-miniapp" "blocking_vs_non_blocking" "cosma-statistics" "scalars_miniapp")
 
-if(COSMA_WITH_MKL_ScaLAPACK OR COSMA_WITH_NETLIB_ScaLAPACK)
+if(COSMA_SCALAPACK)
     add_executable(pdgemm-miniapp "pdgemm-miniapp.cpp")
     target_link_libraries(pdgemm-miniapp PRIVATE cosma_pxgemm options)
     if (COSMA_WITH_PROFILING)

diff --git a/scripts/build.sh b/scripts/build.sh
@@ -35,28 +35,16 @@
 #
 # BLAS (select one of:)
 #
-# `COSMA_WITH_GPU` := ON|OFF (default: OFF)
-#    If `ON` uses the TiledMM (submodule) GPU gemm back-end instead of MKL.
+# `COSMA_BLAS` := MKL|OPENBLAS|NETLIB|CUDA|ROCM (default: MKL)
 #
-# `COSMA_WITH_MKL_BLAS` := ON|OFF (default: OFF)
+# `COSMA_SCALAPACK` := OFF|MKL|NETLIB (default: OFF)
 #
 #    Note: Mixing OpenMP runtimes results in performance issues. If you use 
 #          COSMA within a large application, make sure that a single OpenMP
 #          back end is used. If using GCC, that should be GNU OpenMP, except
 #          on Mac. COSMA automically selects the right OpenMP runtime back end 
 #          based on platform and compiler.
 #
-# `COSMA_WITH_OPENBLAS := ON|OFF (default: OFF)`
-#
-# `COSMA_WITH_NETLIB_BLAS := ON|OFF (default: OFF)`
-#
-#
-# ScaLAPACK (optional)
-#
-# `COSMA_WITH_MKL_ScaLAPACK := ON|OFF (default: OFF)`
-#
-# `COSMA_WITH_NETLIB_ScaLAPACK := ON|OFF (default: OFF)`
-# 
 cmake <FIXME:cosma_source_dir> \
   -D CMAKE_INSTALL_PREFIX=<FIXME:cosma_install_dir> \
 
diff --git a/src/cosma/buffer.cpp b/src/cosma/buffer.cpp
@@ -3,6 +3,10 @@
 #include <cosma/profiler.hpp>
 #include <complex>
 
+#ifdef COSMA_HAVE_GPU
+#include <Tiled-MM/gpu_runtime_api.hpp>
+#endif
+
 namespace cosma {
 
 template<typename T>
@@ -79,10 +83,11 @@ void Buffer<T>::pin_for_gpu() {
         // pin the buffer that will be used in gemm
         int buff_index_to_pin = buff_index_before_gemm();
         auto buffer_to_pin = ctxt_->get_memory_pool().get_buffer_pointer(buffers_[buff_index_to_pin]);
-        auto status = cudaHostRegister(buffer_to_pin,
-                                       buff_sizes_[buff_index_to_pin] * sizeof(T),
-                                       cudaHostRegisterDefault);
-        gpu::cuda_check_status(status);
+        auto status = gpu::runtime_api::host_register(
+            buffer_to_pin,
+            buff_sizes_[buff_index_to_pin] * sizeof(T),
+            gpu::runtime_api::flag::HostRegisterDefault);
+        gpu::check_runtime_status(status);
         pinned_ = true;
 #endif
 }
@@ -136,8 +141,8 @@ void Buffer<T>::free_communication_buffers(bool dry_run) {
         // if pinned buffer is the communication buffer
         if (buff_index_to_pin >= 0) {
             auto buffer_to_pin = ctxt_->get_memory_pool().get_buffer_pointer(buffers_[buff_index_to_pin]);
-            auto status = cudaHostUnregister(buffer_to_pin);
-            gpu::cuda_check_status(status);
+            auto status = gpu::runtime_api::host_unregister(buffer_to_pin);
+            gpu::check_runtime_status(status);
 
             // any cuda kernel call is asynchronous,
             // so make sure it is finished on GPU

diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -32,13 +32,11 @@ add_cosma_mpi_test(multiply 16 cosma)
 add_cosma_mpi_test(scalar_matmul 8 cosma)
 
 
-if(COSMA_WITH_MKL_ScaLAPACK OR COSMA_WITH_NETLIB_ScaLAPACK)
+if(COSMA_SCALAPACK)
     add_cosma_mpi_test(pdgemm 6 cosma_pxgemm options)
-endif()
 
-# Functional tests
-#
-if(COSMA_WITH_MKL_ScaLAPACK OR COSMA_WITH_NETLIB_ScaLAPACK)
+    # Functional tests
+    #
     add_executable(test.cosma_pdgemm cosma_pdgemm_test.cpp)
     target_link_libraries(test.cosma_pdgemm PRIVATE cosma_pxgemm options)
     target_include_directories(test.cosma_pdgemm PRIVATE .)