Skip to content

Commit

Permalink
Merge fix for ROCm 6.1 issues
Browse files Browse the repository at this point in the history
This fixes some issues with assertions and the bitonic sorting kernels on ROCm 6.x

Related PR: #1670
  • Loading branch information
upsj authored Aug 25, 2024
2 parents 9929854 + acb4ccc commit c09529f
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 25 deletions.
10 changes: 7 additions & 3 deletions common/cuda_hip/components/sorting.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,15 @@ struct bitonic_warp {

__forceinline__ __device__ static void merge(ValueType* els, bool reverse)
{
auto tile =
group::tiled_partition<num_threads>(group::this_thread_block());
auto new_reverse = reverse != upper_half();
for (int i = 0; i < num_local; ++i) {
auto other = tile.shfl_xor(els[i], num_threads / 2);
// workaround for ROCm 6.x segfaults on gfx906
#ifdef GKO_COMPILING_CUDA
auto other = __shfl_xor_sync(config::full_lane_mask, els[i],
num_threads / 2, num_threads);
#else
auto other = __shfl_xor(els[i], num_threads / 2, num_threads);
#endif
bitonic_cas(els[i], other, new_reverse);
}
half::merge(els, reverse);
Expand Down
22 changes: 0 additions & 22 deletions include/ginkgo/core/base/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,30 +51,8 @@
#endif


#if (defined(__CUDA_ARCH__) && defined(__APPLE__)) || \
defined(__HIP_DEVICE_COMPILE__)

#ifdef NDEBUG
#define GKO_ASSERT(condition) ((void)0)
#else // NDEBUG
// Poor man's assertions on GPUs for MACs. They won't terminate the program
// but will at least print something on the screen
#define GKO_ASSERT(condition) \
((condition) \
? ((void)0) \
: ((void)printf("%s: %d: %s: Assertion `" #condition "' failed\n", \
__FILE__, __LINE__, __func__)))
#endif // NDEBUG

#else // (defined(__CUDA_ARCH__) && defined(__APPLE__)) ||
// defined(__HIP_DEVICE_COMPILE__)

// Handle assertions normally on other systems
#define GKO_ASSERT(condition) assert(condition)

#endif // (defined(__CUDA_ARCH__) && defined(__APPLE__)) ||
// defined(__HIP_DEVICE_COMPILE__)


// Handle deprecated notices correctly on different systems
// clang-format off
Expand Down

0 comments on commit c09529f

Please sign in to comment.