Skip to content

Commit

Permalink
ROCm 3.3 support (#3623)
Browse files Browse the repository at this point in the history
The `ln -s /opt/rocm/bin/hcc* /opt/rocm/hip/bin/` issue has been worked around by properly setting `HCC_PATH` on the CMake side.
The shutdown issue has been worked around by replacing interrupts with polling (suggested at ROCm/roctracer#22 (comment)). Something is wrong with the destruction order in our code, but I cannot easily identify what. It's not the missing `cudaDestoryStream` though.

Fixes #3620 (according to `ctest -R save_checkpoint_lb.cpu-p3m.cpu-lj-therm.lb_1 --repeat-until-fail 1000`).
Fixes #3587 (according to `ctest -R ek_charged_plate --repeat-until-fail 100`).

**TODO**
- https://github.com/espressomd/docker/blob/master/docker/rocm-python3/Dockerfile-latest needs to be updated to ROCm 3.3 once this pull request is merged.
  • Loading branch information
kodiakhq[bot] authored Apr 3, 2020
2 parents cc2d279 + d0b4c33 commit 23fd9f8
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 6 deletions.
10 changes: 9 additions & 1 deletion cmake/FindCUDACompiler.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,12 @@ else()
list(APPEND CMAKE_MODULE_PATH "${ROCM_HOME}/hip/cmake")
find_package(HIP 1.5.18494 QUIET MODULE)
if(HIP_FOUND)
set(HCC_PATH "${HIP_ROOT_DIR}")
if(HIP_VERSION VERSION_LESS "3.1")
set(HCC_PATH "${HIP_ROOT_DIR}")
else()
set(HCC_PATH "${ROCM_HOME}/hcc")
endif()

find_package(HIP MODULE)
message(STATUS "Found HIP compiler: ${HIP_HIPCC_EXECUTABLE}")
set(CUDA 1)
Expand All @@ -83,6 +88,9 @@ else()
HIP_HCC_FLAGS
"-pedantic -Wall -Wextra -Wno-sign-compare -Wno-unused-function -Wno-unused-variable -Wno-unused-parameter -Wno-missing-braces -Wno-gnu-anonymous-struct -Wno-nested-anon-types -Wno-gnu-zero-variadic-macro-arguments"
)
if(NOT HIP_VERSION VERSION_LESS "3.3")
list(APPEND HIP_HCC_FLAGS "-Wno-deprecated-copy")
endif()
if(WARNINGS_ARE_ERRORS)
list(APPEND HIP_HCC_FLAGS "-Werror")
endif()
Expand Down
3 changes: 3 additions & 0 deletions src/core/cuda_init_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ static const int computeCapabilityMinMinor = 0;
const char *cuda_error;

void cuda_init() {
#if defined(__HIPCC__) and not defined(__CUDACC__)
setenv("HSA_ENABLE_INTERRUPT", "0", 1);
#endif
#if defined(__HIPCC__) and not defined(__CUDACC__) and \
HIP_VERSION_PATCH <= 19171 /* i.e. <= v2.4.0 */
// Catch an exception that causes `import espressomd` to crash in
Expand Down
4 changes: 2 additions & 2 deletions src/core/curand_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ class philox4x32_10_stateless : private rocrand_device::philox4x32_10_engine {

__forceinline__ __device__ uint4 curand_Philox4x32_10(uint4 counter,
uint2 key) {
philox4x32_10_stateless *e = nullptr;
return (*e)(counter, key);
philox4x32_10_stateless e;
return e(counter, key);
}

#endif
Expand Down
6 changes: 3 additions & 3 deletions src/core/grid_based_algorithms/electrokinetics_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ __device__ void ek_diffusion_migration_lbforce_linkcentered_stencil(

float agrid_inv = 1.0f / ek_parameters_gpu->agrid;
float sqrt2agrid_inv = 1.0f / (sqrtf(2.0f) * ek_parameters_gpu->agrid);
float sqrt2_inv = 1.0f / sqrt(2.0f);
float sqrt2_inv = 1.0f / sqrtf(2.0f);
float twoT_inv = 1.0f / (2.0f * ek_parameters_gpu->T);
float D_inv = 1.0f / ek_parameters_gpu->D[species_index];
float force_conv =
Expand Down Expand Up @@ -1365,7 +1365,7 @@ __device__ void ek_add_fluctuations_to_flux(unsigned int index,
for (int i = 0; i < 9; i++) {

if (i % 4 == 0) {
random_floats = random_wrapper_philox(index, i + 40, philox_counter);
random_floats = ek_random_wrapper_philox(index, i + 40, philox_counter);
random = (random_floats.w - 0.5f) * 2.0f;
} else if (i % 4 == 1) {
random = (random_floats.x - 0.5f) * 2.0f;
Expand All @@ -1389,7 +1389,7 @@ __device__ void ek_add_fluctuations_to_flux(unsigned int index,
powf(2.0f * average_density * diffusion * time_step /
(agrid * agrid),
0.5f) *
random * ek_parameters_gpu->fluctuation_amplitude / sqrt(2.0f);
random * ek_parameters_gpu->fluctuation_amplitude / sqrtf(2.0f);
fluc *=
!(lb_node.boundary[index] || lb_node.boundary[neighborindex[i]]);
#ifdef EK_DEBUG
Expand Down

0 comments on commit 23fd9f8

Please sign in to comment.