Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for ROCm backend of TiledMM. Blas backend selection is now a single option. #18

Merged
merged 4 commits into from
Nov 11, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 37 additions & 15 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,38 @@ option(COSMA_WITH_APPS "Generate the miniapp targets." ${MASTER_PROJECT})
option(COSMA_WITH_BENCHMARKS "Generate the benchmark targets." ${MASTER_PROJECT})
option(COSMA_WITH_INSTALL "Enable installation." ${MASTER_PROJECT})
option(COSMA_WITH_PROFILING "Enable profiling." OFF)
option(COSMA_WITH_FOTRAN_BINDINGS "Make Fortran bindings to COSMA." OFF)

option(COSMA_WITH_GPU "Use CUDA cuBLAS GPU back end." OFF)
option(COSMA_WITH_MKL_BLAS "Use MKL BLAS back end." OFF)
option(COSMA_WITH_OPENBLAS "Use OpenBLAS backend." OFF)
option(COSMA_WITH_NETLIB_BLAS "Use Netlib BLAS backend." OFF)
set(COSMA_BLAS "MKL" CACHE STRING
"Blas backend. Can be MKL, OPENBLAS, NETLIB, CUDA or ROCM.")
set_property(CACHE COSMA_BLAS PROPERTY STRINGS
"MKL" "OPENBLAS" "NETLIB" "CUDA" "ROCM")

set(COSMA_SCALAPACK "OFF" CACHE STRING
"Blas backend. Can be MKL, NETLIB or OFF.")
set_property(CACHE COSMA_SCALAPACK PROPERTY STRINGS
"OFF" "MKL" "NETLIB")

# check if blas backend is valid
message(STATUS "Selected BLAS backend for COSMA: ${COSMA_BLAS}")
get_property(BACKEND_LIST CACHE COSMA_BLAS PROPERTY STRINGS)
if(NOT COSMA_BLAS IN_LIST BACKEND_LIST)
message(FATAL_ERROR "Invalid value for COSMA_BLAS!")
endif()

option(COSMA_WITH_MKL_ScaLAPACK "Use MKL ScaLAPACK back end." OFF)
option(COSMA_WITH_NETLIB_ScaLAPACK "Use Netlib ScaLAPACK back end." OFF)
# check if scalapack backend is valid
message(STATUS "Selected ScaLAPACK backend for COSMA: ${COSMA_SCALAPACK}")
unset(BACKEND_LIST)
get_property(BACKEND_LIST CACHE COSMA_SCALAPACK PROPERTY STRINGS)
if(COSMA_SCALAPACK AND NOT COSMA_SCALAPACK IN_LIST BACKEND_LIST)
message(FATAL_ERROR "Invalid value for COSMA_SCALAPACK!")
endif()

option(COSMA_WITH_FOTRAN_BINDINGS "Make Fortran bindings to COSMA." OFF)
if (NOT ${COSMA_BLAS} STREQUAL "CUDA" AND NOT ${COSMA_BLAS} STREQUAL "ROCM")
if (COSMA_SCALAPACK AND NOT ${COSMA_BLAS} STREQUAL ${COSMA_SCALAPACK})
message(FATAL_ERROR "ScaLAPACK backend MUST match BLAS backend if no GPU backend is used!")
endif()
endif()

project(cosma VERSION 0.1 LANGUAGES CXX)

Expand Down Expand Up @@ -58,38 +80,38 @@ add_subdirectory(libs/grid2grid)
#
set(BLAS_TARGET "")
set(BLAS_DEF "")
if (COSMA_WITH_MKL_BLAS)

if (${COSMA_BLAS} STREQUAL "MKL")
find_package(MKL REQUIRED COMPONENTS BLAS_32BIT_OMP)
set(BLAS_TARGET "mkl::blas_32bit_omp")
set(BLAS_DEF "COSMA_WITH_MKL_BLAS")
elseif (COSMA_WITH_GPU)
elseif (${COSMA_BLAS} STREQUAL "CUDA" OR ${COSMA_BLAS} STREQUAL "ROCM")
option(TILEDMM_WITH_INSTALL "" ${COSMA_WITH_INSTALL})
set(TILEMM_GPU_BACKEND ${COSMA_BLAS} CACHE STRING FORCE "")
add_subdirectory(libs/Tiled-MM)
set(BLAS_TARGET "Tiled-MM")
set(BLAS_DEF "COSMA_HAVE_GPU")
elseif (COSMA_WITH_OPENBLAS)
elseif (${COSMA_BLAS} STREQUAL "OPENBLAS")
find_package(OpenBLAS REQUIRED)
add_library(openblas SHARED IMPORTED)
set_target_properties(openblas PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${OpenBLAS_INCLUDE_DIRS}"
IMPORTED_LOCATION "${OpenBLAS_LIBRARIES}")
set(BLAS_TARGET "openblas")
set(BLAS_DEF "COSMA_WITH_BLAS")
elseif (COSMA_WITH_NETLIB_BLAS)
elseif (${COSMA_BLAS} STREQUAL "NETLIB")
find_package(cblas NO_MODULE REQUIRED)
set(BLAS_TARGET "cblas")
set(BLAS_DEF "COSMA_WITH_BLAS")
else ()
message(FATAL_ERROR "No BLAS back end found! Select a back end and make sure it's installed with support for the cblas interface and CMake config files.")
endif ()

# (optional) ScaLAPACK providers
#
set(ScaLAPACK_TARGET "")
if (COSMA_WITH_MKL_ScaLAPACK)
if (${COSMA_SCALAPACK} STREQUAL "MKL")
find_package(MKL REQUIRED COMPONENTS SCALAPACK_32BIT_OMP)
set(ScaLAPACK_TARGET "mkl::scalapack_32bit_omp")
elseif (COSMA_WITH_NETLIB_ScaLAPACK)
elseif (${COSMA_SCALAPACK} STREQUAL "NETLIB")
find_package(scalapack REQUIRED)
set(ScaLAPACK_TARGET "scalapack")
else ()
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ foreach(exec ${executables})
target_link_libraries(${exec} cosma ${cosma_deps})
endforeach()

if (COSMA_WITH_MKL_BLAS)
if (${COSMA_BLAS} STREQUAL "MKL")
add_executable(transpose "transpose.cpp")
target_link_libraries(transpose cosma ${cosma_deps})
endif()
Expand Down
21 changes: 17 additions & 4 deletions cmake/cosmaConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,27 @@ if(NOT TARGET cosma::cosma)

find_package(MPI)

if ("@COSMA_WITH_GPU@")
find_dependency(Tiled-MM) # bundled
elseif ("@COSMA_WITH_MKL_BLAS@")
set(COSMA_BLAS "@COSMA_BLAS@")
set(COSMA_SCALAPACK "@COSMA_SCALAPACK@")

if (${COSMA_BLAS} STREQUAL "MKL")
find_package(MKL COMPONENTS BLAS_32BIT_OMP)
elseif (${COSMA_BLAS} STREQUAL "CUDA" OR ${COSMA_BLAS} STREQUAL "ROCM")
set(TILEMM_GPU_BACKEND ${COSMA_BLAS} CACHE STRING FORCE "")
find_dependency(Tiled-MM) # bundled
elseif (${COSMA_BLAS} STREQUAL "OPENBLAS")
find_package(OpenBLAS)
set_target_properties(openblas PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${OpenBLAS_INCLUDE_DIRS}"
IMPORTED_LOCATION "${OpenBLAS_LIBRARIES}")
elseif (${COSMA_BLAS} STREQUAL "NETLIB")
find_package(cblas NO_MODULE)
endif ()

if ("@COSMA_WITH_MKL_ScaLAPACK@")
if (${COSMA_SCALAPACK} STREQUAL "MKL_SCALAPACK")
find_package(MKL COMPONENTS SCALAPACK_32BIT_OMP)
elseif (${COSMA_SCALAPACK} STREQUAL "NETLIB_SCALAPACK")
find_package(scalapack)
endif ()

if ("@COSMA_WITH_PROFILING@")
Expand Down
2 changes: 1 addition & 1 deletion miniapp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
################
set(executables "cosma-miniapp" "blocking_vs_non_blocking" "cosma-statistics" "scalars_miniapp")

if(COSMA_WITH_MKL_ScaLAPACK OR COSMA_WITH_NETLIB_ScaLAPACK)
if(COSMA_SCALAPACK)
add_executable(pdgemm-miniapp "pdgemm-miniapp.cpp")
target_link_libraries(pdgemm-miniapp PRIVATE cosma_pxgemm options)
if (COSMA_WITH_PROFILING)
Expand Down
16 changes: 2 additions & 14 deletions scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,28 +35,16 @@
#
# BLAS (select one of:)
#
# `COSMA_WITH_GPU` := ON|OFF (default: OFF)
# If `ON` uses the TiledMM (submodule) GPU gemm back-end instead of MKL.
# `COSMA_BLAS` := MKL|OPENBLAS|NETLIB|CUDA|ROCM (default: MKL)
#
# `COSMA_WITH_MKL_BLAS` := ON|OFF (default: OFF)
# `COSMA_SCALAPACK` := OFF|MKL|NETLIB (default: OFF)
#
# Note: Mixing OpenMP runtimes results in performance issues. If you use
# COSMA within a large application, make sure that a single OpenMP
# back end is used. If using GCC, that should be GNU OpenMP, except
# on Mac. COSMA automically selects the right OpenMP runtime back end
# based on platform and compiler.
#
# `COSMA_WITH_OPENBLAS := ON|OFF (default: OFF)`
#
# `COSMA_WITH_NETLIB_BLAS := ON|OFF (default: OFF)`
#
#
# ScaLAPACK (optional)
#
# `COSMA_WITH_MKL_ScaLAPACK := ON|OFF (default: OFF)`
#
# `COSMA_WITH_NETLIB_ScaLAPACK := ON|OFF (default: OFF)`
#
cmake <FIXME:cosma_source_dir> \
-D CMAKE_INSTALL_PREFIX=<FIXME:cosma_install_dir> \

17 changes: 11 additions & 6 deletions src/cosma/buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
#include <cosma/profiler.hpp>
#include <complex>

#ifdef COSMA_HAVE_GPU
#include <Tiled-MM/gpu_runtime_api.hpp>
#endif

namespace cosma {

template<typename T>
Expand Down Expand Up @@ -79,10 +83,11 @@ void Buffer<T>::pin_for_gpu() {
// pin the buffer that will be used in gemm
int buff_index_to_pin = buff_index_before_gemm();
auto buffer_to_pin = ctxt_->get_memory_pool().get_buffer_pointer(buffers_[buff_index_to_pin]);
auto status = cudaHostRegister(buffer_to_pin,
buff_sizes_[buff_index_to_pin] * sizeof(T),
cudaHostRegisterDefault);
gpu::cuda_check_status(status);
auto status = gpu::runtime_api::host_register(
buffer_to_pin,
buff_sizes_[buff_index_to_pin] * sizeof(T),
gpu::runtime_api::flag::HostRegisterDefault);
gpu::check_runtime_status(status);
pinned_ = true;
#endif
}
Expand Down Expand Up @@ -136,8 +141,8 @@ void Buffer<T>::free_communication_buffers(bool dry_run) {
// if pinned buffer is the communication buffer
if (buff_index_to_pin >= 0) {
auto buffer_to_pin = ctxt_->get_memory_pool().get_buffer_pointer(buffers_[buff_index_to_pin]);
auto status = cudaHostUnregister(buffer_to_pin);
gpu::cuda_check_status(status);
auto status = gpu::runtime_api::host_unregister(buffer_to_pin);
gpu::check_runtime_status(status);

// any cuda kernel call is asynchronous,
// so make sure it is finished on GPU
Expand Down
8 changes: 3 additions & 5 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,11 @@ add_cosma_mpi_test(multiply 16 cosma)
add_cosma_mpi_test(scalar_matmul 8 cosma)


if(COSMA_WITH_MKL_ScaLAPACK OR COSMA_WITH_NETLIB_ScaLAPACK)
if(COSMA_SCALAPACK)
add_cosma_mpi_test(pdgemm 6 cosma_pxgemm options)
endif()

# Functional tests
#
if(COSMA_WITH_MKL_ScaLAPACK OR COSMA_WITH_NETLIB_ScaLAPACK)
# Functional tests
#
add_executable(test.cosma_pdgemm cosma_pdgemm_test.cpp)
target_link_libraries(test.cosma_pdgemm PRIVATE cosma_pxgemm options)
target_include_directories(test.cosma_pdgemm PRIVATE .)
Expand Down