Skip to content

Commit

Permalink
First attempt at HIP implemetation
Browse files Browse the repository at this point in the history
  • Loading branch information
pelesh committed Oct 26, 2023
1 parent ab5584b commit 3bcdccc
Show file tree
Hide file tree
Showing 10 changed files with 350 additions and 7 deletions.
25 changes: 20 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,17 @@ endif()

option(RESOLVE_TEST_WITH_BSUB "Use `jsrun` instead of `mpirun` commands when running tests" OFF)
option(RESOLVE_USE_KLU "Use KLU, AMD and COLAMD libraries from SuiteSparse" ON)
option(RESOLVE_USE_GPU "Use GPU device for computations" ON)
option(RESOLVE_USE_CUDA "Use CUDA language and SDK" ON)
option(RESOLVE_USE_GPU "Use GPU device for computations" OFF)
option(RESOLVE_USE_CUDA "Use CUDA language and SDK" OFF)
option(RESOLVE_USE_HIP "Use HIP language and ROCm library" OFF)
set(RESOLVE_CTEST_OUTPUT_DIR ${PROJECT_BINARY_DIR} CACHE PATH "Directory where CTest outputs are saved")

if(RESOLVE_USE_CUDA)
set(RESOLVE_USE_GPU On CACHE BOOL "Using GPU!" FORCE)
else()
set(RESOLVE_USE_GPU Off CACHE BOOL "Using GPU!" FORCE)
set(RESOLVE_USE_GPU On CACHE BOOL "Using CUDA GPU!" FORCE)
endif()

if(RESOLVE_USE_HIP)
set(RESOLVE_USE_GPU On CACHE BOOL "Using HIP GPU!" FORCE)
endif()


Expand Down Expand Up @@ -89,6 +92,18 @@ else()
message(STATUS "Not using CUDA")
endif()

if(RESOLVE_USE_HIP)
enable_language(HIP)
check_language(HIP)
include(ReSolveFindHipLibraries)

# set(CMAKE_HIP_FLAGS "${CMAKE_HIP_FLAGS} -D__HIP_PLATFORM_AMD__")
include_directories("/opt/rocm-5.6.0/include")
else()
message(STATUS "Not using HIP")
endif(RESOLVE_USE_HIP)


# The binary dir is already a global include directory
configure_file(
${CMAKE_SOURCE_DIR}/resolve/resolve_defs.hpp.in
Expand Down
15 changes: 15 additions & 0 deletions cmake/ReSolveFindHipLibraries.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Exports target `resolve_hip` which finds all hip libraries needed by resolve.


add_library(resolve_hip INTERFACE)

find_package(hip REQUIRED)
find_package(hipblas REQUIRED)

target_link_libraries(resolve_hip INTERFACE
hip::host
hip::device
roc::hipblas
)

install(TARGETS resolve_hip EXPORT ReSolveTargets)
13 changes: 13 additions & 0 deletions resolve/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ if(RESOLVE_USE_CUDA)
add_subdirectory(cuda)
endif()

if(RESOLVE_USE_HIP)
add_subdirectory(hip)
endif()

# Now, build workspaces
add_subdirectory(workspace)

Expand All @@ -67,6 +71,10 @@ if(RESOLVE_USE_CUDA)
target_link_libraries(resolve_tpl INTERFACE resolve_cuda)
endif(RESOLVE_USE_CUDA)

if(RESOLVE_USE_HIP)
target_link_libraries(resolve_tpl INTERFACE resolve_hip)
endif(RESOLVE_USE_HIP)


set(ReSolve_Targets_List
resolve_matrix
Expand All @@ -82,6 +90,11 @@ if(RESOLVE_USE_CUDA)
set(ReSolve_Targets_List ${ReSolve_Targets_List} resolve_backend_cuda)
endif()

# If HIP support is enabled add HIP SDK specific code and dependencies
if(RESOLVE_USE_HIP)
set(ReSolve_Targets_List ${ReSolve_Targets_List} resolve_backend_hip)
endif()

# If no GPU support is enabled, link to dummy device backend
if(NOT RESOLVE_USE_GPU)
set(ReSolve_Targets_List ${ReSolve_Targets_List} resolve_backend_cpu)
Expand Down
3 changes: 2 additions & 1 deletion resolve/MemoryUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ namespace ReSolve
#include <resolve/cuda/CudaMemory.hpp>
using MemoryHandler = ReSolve::MemoryUtils<ReSolve::memory::Cuda>;
#elif defined RESOLVE_USE_HIP
#error HIP support requested, but not available! Probably a bug in CMake configuration.
#include <resolve/hip/HipMemory.hpp>
using MemoryHandler = ReSolve::MemoryUtils<ReSolve::memory::Hip>;
#else
#error Unrecognized device, probably bug in CMake configuration
#endif
Expand Down
37 changes: 37 additions & 0 deletions resolve/hip/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#[[

@brief Build ReSolve HIP backend

@author Slaven Peles <peless@ornl.gov>

]]

set(ReSolve_HIP_SRC
# hipKernels.cu
hipVectorKernels.hip
MemoryUtils.hip
)

set(ReSolve_HIP_HEADER_INSTALL
# hipKernels.h
# hipVectorKernels.h
HipMemory.hpp
# hip_check_errors.hpp
)

set_source_files_properties(${ReSolve_HIP_SRC} PROPERTIES LANGUAGE HIP)

# First create HIP backend
# (this should really be HIP _API_ backend,
# separate backend will be needed for HIP SDK)
add_library(resolve_backend_hip SHARED ${ReSolve_HIP_SRC})
target_link_libraries(resolve_backend_hip PRIVATE resolve_logger)
target_link_libraries(resolve_backend_hip PRIVATE resolve_hip)
# target_include_directories(resolve_backend_hip PUBLIC "/opt/rocm-5.6.0/include")
target_include_directories(resolve_backend_hip INTERFACE
$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}>
$<INSTALL_INTERFACE:include>
)

# install include headers
install(FILES ${ReSolve_HIP_HEADER_INSTALL} DESTINATION include/resolve/hip)
152 changes: 152 additions & 0 deletions resolve/hip/HipMemory.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#pragma once

#include <iostream>
#include <hip/hip_runtime.h>

#include "hip_check_errors.hpp"

namespace ReSolve
{
namespace memory
{
/**
* @brief Class containing wrappers for CUDA API functions.
*
* All wrappers are implemented as static functions returning integer
* error code from CUDA API functions.
*
* @author Slaven Peles <peless@ornl.gov>
*/
struct Hip
{
static void deviceSynchronize()
{
hipDeviceSynchronize();
}

static int getLastDeviceError()
{
return static_cast<int>(hipGetLastError());
}

/**
* @brief deletes variable from device
*
* @param v - a variable on the device
*
* @post v is freed from the device
*/
static int deleteOnDevice(void* v)
{
return checkHipErrors(hipFree(v));
}

/**
* @brief allocates array v onto device
*
* @param v - pointer to the array to be allocated on the device
* @param n - number of array elements (int, size_t)
*
* @tparam T - Array element type
* @tparam I - Array index type
*
* @post v is now a array with size n on the device
*/
template <typename I, typename T>
static int allocateArrayOnDevice(T** v, I n)
{
return checkHipErrors(hipMalloc((void**) v, sizeof(T) * n));
}

/**
* @brief allocates buffer v onto device.
*
* The difference from the array is that buffer size is required in bytes,
* not number of elements.
*
* @param v - pointer to the buffer to be allocated on the device
* @param n - size of the buffer in bytes
*
* @tparam T - Buffer element data type type (typically void)
* @tparam I - Buffer size type (typically size_t)
*
* @post v is now a buffer of n bytes
*/
template <typename I, typename T>
static int allocateBufferOnDevice(T** v, I n)
{
return checkHipErrors(hipMalloc((void**) v, n));
}

/**
* @brief Sets elements of device array v to zero
*
* @param v - pointer to the array to be allocated on the device
* @param n - number of the array elements to be set to zero
*
* @tparam T - Array element type
* @tparam I - Array index type
*
* @post First n elements of array v are set to zero
*/
template <typename I, typename T>
static int setZeroArrayOnDevice(T* v, I n)
{
return checkHipErrors(hipMemset(v, 0, sizeof(T) * n));
}

/**
* @brief Copies array `src` from device to the array `dst` on the host.
*
* @param[in] n - size of src array
* @param[in] src - array on device
* @param[out] dst - array on host
*
* @pre `src` is a pointer to an allocated array on the device
* @pre `dst` is allocated to size >= n on the host
* @post Content of `dst` is overwritten by the content of `src`
*/
template <typename I, typename T>
static int copyArrayDeviceToHost(T* dst, const T* src, I n)
{
return checkHipErrors(hipMemcpy(dst, src, sizeof(T) * n, hipMemcpyDeviceToHost));
}

/**
* @brief Copies array `src` to the array `dst` on the device.
*
* @param n - size of src array
* @param src - array on device to be copied
* @param dst - array on device to be copied onto
*
* @pre `src` is a pointer to an allocated array on the device
* @pre `dst` is allocated to size >= n on the device
* @post Content of `dst` is overwritten by the content of `src`
*/
template <typename I, typename T>
static int copyArrayDeviceToDevice(T* dst, const T* src, I n)
{
return checkHipErrors(hipMemcpy(dst, src, sizeof(T) * n, hipMemcpyDeviceToDevice));
}

/**
* @brief Copies array `src` from the host to the array `dst` on the device.
*
* @param n - size of src array
* @param src - array on the host to be copied
* @param dst - array on the device to be copied onto
*
* @pre `src` is a pointer to an allocated array on the host
* @pre `dst` is allocated to size >= n on the device
* @post Content of `dst` is overwritten by the content of `src`
*/
template <typename I, typename T>
static int copyArrayHostToDevice(T* dst, const T* src, I n)
{
return checkHipErrors(hipMemcpy(dst, src, sizeof(T) * n, hipMemcpyHostToDevice));
}

};
}

} //namespace ReSolve
40 changes: 40 additions & 0 deletions resolve/hip/MemoryUtils.hip
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/**
* @file MemoryUtils.cu
*
* This file includes MemoryUtils.tpp and specifies what functions to
* instantiate from function templates.
*
* @author Slaven Peles <peless@ornl.gov>
*/


#include <iostream>

#include <resolve/Common.hpp>
#include <resolve/MemoryUtils.hpp>

#include <resolve/MemoryUtils.tpp>

namespace ReSolve
{
template void MemoryUtils<memory::Hip>::deviceSynchronize();
template int MemoryUtils<memory::Hip>::getLastDeviceError();
template int MemoryUtils<memory::Hip>::deleteOnDevice(void*);

template int MemoryUtils<memory::Hip>::allocateArrayOnDevice<index_type, real_type>( real_type**, index_type);
template int MemoryUtils<memory::Hip>::allocateArrayOnDevice<index_type, index_type>(index_type**, index_type);

template int MemoryUtils<memory::Hip>::allocateBufferOnDevice<size_t, void>(void** v, size_t n);

template int MemoryUtils<memory::Hip>::setZeroArrayOnDevice<index_type, real_type>( real_type*, index_type);

template int MemoryUtils<memory::Hip>::copyArrayDeviceToHost<index_type, real_type>( real_type*, const real_type*, index_type);
template int MemoryUtils<memory::Hip>::copyArrayDeviceToHost<index_type, index_type>(index_type*, const index_type*, index_type);

template int MemoryUtils<memory::Hip>::copyArrayDeviceToDevice<index_type, real_type>( real_type*, const real_type*, index_type);
template int MemoryUtils<memory::Hip>::copyArrayDeviceToDevice<index_type, index_type>(index_type*, const index_type*, index_type);

template int MemoryUtils<memory::Hip>::copyArrayHostToDevice<index_type, real_type>( real_type*, const real_type*, index_type);
template int MemoryUtils<memory::Hip>::copyArrayHostToDevice<index_type, index_type>(index_type*, const index_type*, index_type);

} //namespace ReSolve
29 changes: 29 additions & 0 deletions resolve/hip/hipVectorKernels.hip
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// #include "cudaVectorKernels.h"
#include <resolve/Common.hpp>
#include <resolve/vector/VectorKernels.hpp>


namespace ReSolve { namespace vector {

namespace kernels {

// __global__ void set_const(index_type n, real_type val, real_type* arr)
// {
// index_type i = blockIdx.x * blockDim.x + threadIdx.x;
// if(i < n)
// {
// arr[i] = val;
// }
// }

} // namespace kernels

void set_array_const(index_type /* n */, real_type /* val */, real_type* /* arr */)
{
// index_type num_blocks;
// index_type block_size = 512;
// num_blocks = (n + block_size - 1) / block_size;
// kernels::set_const<<<num_blocks, block_size>>>(n, val, arr);
}

}} // namespace ReSolve::vector
29 changes: 29 additions & 0 deletions resolve/hip/hip_check_errors.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/**
* @file hip_check_errors.hpp
*
* Contains macro to get error code from CUDA functions and to stream
* appropriate error output to Re::Solve's logger.
*
* @author Kasia Swirydowicz <kasia.swirydowicz@pnnl.gov>
* @author Slaven Peles <peless@ornl.gov>
*/
#pragma once

#include <resolve/utilities/logger/Logger.hpp>

template <typename T>
int check(T result,
char const *const func,
const char *const file,
int const line)
{
if (result) {
ReSolve::io::Logger::error() << "HIP error in function "
<< func << " at " << file << ":" << line
<< ", error# " << result << "\n";
return -1;
}
return 0;
}
// #define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
#define checkHipErrors(val) val
Loading

0 comments on commit 3bcdccc

Please sign in to comment.