Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 4 additions & 47 deletions sycl/plugins/opencl/pi_opencl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ template <class To, class From> To cast(From value) {
CONSTFIX char clHostMemAllocName[] = "clHostMemAllocINTEL";
CONSTFIX char clDeviceMemAllocName[] = "clDeviceMemAllocINTEL";
CONSTFIX char clSharedMemAllocName[] = "clSharedMemAllocINTEL";
CONSTFIX char clMemFreeName[] = "clMemFreeINTEL";
CONSTFIX char clMemBlockingFreeName[] = "clMemBlockingFreeINTEL";
CONSTFIX char clCreateBufferWithPropertiesName[] =
"clCreateBufferWithPropertiesINTEL";
Expand Down Expand Up @@ -1017,7 +1016,7 @@ pi_result piextUSMSharedAlloc(void **result_ptr, pi_context context,
return RetVal;
}

/// Frees allocated USM memory
/// Frees allocated USM memory in a blocking manner
///
/// \param context is the pi_context of the allocation
/// \param ptr is the memory to be freed
Expand All @@ -1026,52 +1025,10 @@ pi_result piextUSMFree(pi_context context, void *ptr) {
// might be still running.
clMemBlockingFreeINTEL_fn FuncPtr = nullptr;

// We need to use clMemBlockingFreeINTEL here, however, due to a bug in OpenCL
// CPU runtime this call fails with CL_INVALID_EVENT on CPU devices in certain
// cases. As a temporary workaround, this function replicates caching of
// extension function pointers in getExtFuncFromContext, while choosing
// clMemBlockingFreeINTEL for GPU and clMemFreeINTEL for other device types.
// TODO remove this workaround when the new OpenCL CPU runtime version is
// uplifted in CI.
static_assert(
std::is_same<clMemBlockingFreeINTEL_fn, clMemFreeINTEL_fn>::value);
cl_uint deviceCount;
cl_int ret_err =
clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_NUM_DEVICES,
sizeof(cl_uint), &deviceCount, nullptr);

if (ret_err != CL_SUCCESS || deviceCount < 1) {
return PI_ERROR_INVALID_CONTEXT;
}

std::vector<cl_device_id> devicesInCtx(deviceCount);
ret_err = clGetContextInfo(cast<cl_context>(context), CL_CONTEXT_DEVICES,
deviceCount * sizeof(cl_device_id),
devicesInCtx.data(), nullptr);

if (ret_err != CL_SUCCESS) {
return PI_ERROR_INVALID_CONTEXT;
}

bool useBlockingFree = true;
for (const cl_device_id &dev : devicesInCtx) {
cl_device_type devType = CL_DEVICE_TYPE_DEFAULT;
ret_err = clGetDeviceInfo(dev, CL_DEVICE_TYPE, sizeof(cl_device_type),
&devType, nullptr);
if (ret_err != CL_SUCCESS) {
return PI_ERROR_INVALID_DEVICE;
}
useBlockingFree &= devType == CL_DEVICE_TYPE_GPU;
}

pi_result RetVal = PI_ERROR_INVALID_OPERATION;
if (useBlockingFree)
RetVal =
getExtFuncFromContext<clMemBlockingFreeName, clMemBlockingFreeINTEL_fn>(
context, &FuncPtr);
else
RetVal = getExtFuncFromContext<clMemFreeName, clMemFreeINTEL_fn>(context,
&FuncPtr);
RetVal =
getExtFuncFromContext<clMemBlockingFreeName, clMemBlockingFreeINTEL_fn>(
context, &FuncPtr);

if (FuncPtr) {
RetVal = cast<pi_result>(FuncPtr(cast<cl_context>(context), ptr));
Expand Down