Skip to content

Commit

Permalink
Added workaround for weird AMD SI Hainan bug
Browse files Browse the repository at this point in the history
  • Loading branch information
CNugteren committed Jul 25, 2018
1 parent 6a8b9e2 commit e8dea34
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 2 deletions.
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ option(TESTS "Enable compilation of the correctness tests" OFF)
option(NETLIB "Enable compilation of the CBLAS Netlib API" OFF)
option(CUBLAS "Enables performance comparison against cuBLAS on NVIDIA GPUs" OFF)

# Workarounds for bugs
option(AMD_HAINAN "Enables workaround for bug in AMD SI Hainan GPUs" OFF)
if(AMD_HAINAN)
add_definitions(-DAMD_HAINAN_WORKAROUND)
endif()

# Select between an OpenCL API (default) or a CUDA API (beta)
option(OPENCL "Build CLBlast with an OpenCL API (default)" ON)
option(CUDA "Build CLBlast with a CUDA API (beta)" OFF)
Expand Down
25 changes: 23 additions & 2 deletions src/clpp11.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -447,8 +447,15 @@ class Program {

// Source-based constructor with memory management
explicit Program(const Context &context, const std::string &source) {
const char *source_ptr = &source[0];
const auto length = source.length();
#ifdef AMD_HAINAN_WORKAROUND
const std::string source_hainan = source + "\n__kernel void null_kernel() {}\n";
const char *source_ptr = &source_hainan[0];
const auto length = source_hainan.length();
printf("%s\n", source_hainan.c_str());
#else
const char *source_ptr = &source[0];
const auto length = source.length();
#endif
auto status = CL_SUCCESS;
program_ = clCreateProgramWithSource(context(), 1, &source_ptr, &length, &status);
CLCudaAPIError::Check(status, "clCreateProgramWithSource");
Expand Down Expand Up @@ -768,6 +775,10 @@ class Kernel {
auto status = CL_SUCCESS;
*kernel_ = clCreateKernel(program->operator()(), name.c_str(), &status);
CLCudaAPIError::Check(status, "clCreateKernel");
#ifdef AMD_HAINAN_WORKAROUND
*null_kernel_ = clCreateKernel(program->operator()(), "null_kernel", &status);
CLCudaAPIError::Check(status, "clCreateKernel");
#endif
}

// Sets a kernel argument at the indicated position
Expand Down Expand Up @@ -831,12 +842,22 @@ class Kernel {
static_cast<cl_uint>(waitForEventsPlain.size()),
!waitForEventsPlain.empty() ? waitForEventsPlain.data() : nullptr,
event));
#ifdef AMD_HAINAN_WORKAROUND
const std::vector<size_t> nullRange = {1};
CheckError(clEnqueueNDRangeKernel(queue(), *null_kernel_, static_cast<cl_uint>(nullRange.size()),
nullptr, nullRange.data(), nullptr,
static_cast<cl_uint>(waitForEventsPlain.size()),
nullptr, event));
#endif
}

// Accessor to the private data-member
const cl_kernel& operator()() const { return *kernel_; }
private:
std::shared_ptr<cl_kernel> kernel_;
#ifdef AMD_HAINAN_WORKAROUND
std::shared_ptr<cl_kernel> null_kernel_;
#endif

// Internal implementation for the recursive SetArguments function.
template <typename T>
Expand Down

0 comments on commit e8dea34

Please sign in to comment.