From 77b705dccd4dfe59885b6226456b2d3f803c57ec Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Wed, 18 Oct 2023 14:58:42 +0100 Subject: [PATCH 01/12] [OpenCL] Implement urEventSetCallback and urContextSetExtendedDeleter. --- source/adapters/opencl/context.cpp | 57 ++++++++++++++++++++++-- source/adapters/opencl/enqueue.cpp | 8 ++-- source/adapters/opencl/event.cpp | 69 +++++++++++++++++++++++++++--- 3 files changed, 121 insertions(+), 13 deletions(-) diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index 16c5999160..6bc05c2003 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -10,6 +10,10 @@ #include "context.hpp" +#include +#include +#include + ur_result_t cl_adapter::getDevicesFromContext( ur_context_handle_t hContext, std::unique_ptr> &DevicesInCtx) { @@ -130,8 +134,53 @@ UR_APIEXPORT ur_result_t UR_APICALL urContextCreateWithNativeHandle( } UR_APIEXPORT ur_result_t UR_APICALL urContextSetExtendedDeleter( - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] ur_context_extended_deleter_t pfnDeleter, - [[maybe_unused]] void *pUserData) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + ur_context_handle_t hContext, ur_context_extended_deleter_t pfnDeleter, + void *pUserData) { + static std::unordered_map> + ContextCallbackMap; + static std::mutex ContextCallbackMutex; + + { + std::lock_guard Lock(ContextCallbackMutex); + // Callbacks can only be registered once and we need to avoid double + // allocating. + if (ContextCallbackMap.count(hContext) && + ContextCallbackMap[hContext].count(pfnDeleter)) { + return UR_RESULT_SUCCESS; + } + + ContextCallbackMap[hContext].insert(pfnDeleter); + } + + struct ContextCallback { + void execute() { + pfnDeleter(pUserData); + { + std::lock_guard Lock(*CallbackMutex); + (*CallbackMap)[hContext].erase(pfnDeleter); + if ((*CallbackMap)[hContext].empty()) { + CallbackMap->erase(hContext); + } + } + delete this; + } + ur_context_handle_t hContext; + ur_context_extended_deleter_t pfnDeleter; + void *pUserData; + std::unordered_map> *CallbackMap; + std::mutex *CallbackMutex; + }; + auto Callback = + new ContextCallback({hContext, pfnDeleter, pUserData, &ContextCallbackMap, + &ContextCallbackMutex}); + auto ClCallback = [](cl_context, void *pUserData) { + auto *C = static_cast(pUserData); + C->execute(); + }; + CL_RETURN_ON_FAILURE(clSetContextDestructorCallback( + cl_adapter::cast(hContext), ClCallback, Callback)); + + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 29c5ad672e..5f41878182 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -350,9 +350,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( return mapCLErrorToUR(CLErr); } - clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; + cl_ext::clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = - cl_ext::getExtFuncFromContext( + cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueReadHostPipeINTELCache, cl_ext::EnqueueReadHostPipeName, &FuncPtr); @@ -382,9 +382,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( return mapCLErrorToUR(CLErr); } - clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; + cl_ext::clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = - cl_ext::getExtFuncFromContext( + cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueWriteHostPipeINTELCache, cl_ext::EnqueueWriteHostPipeName, &FuncPtr); diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index 78303a0829..1d75fa7f28 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -10,6 +10,10 @@ #include "common.hpp" +#include +#include +#include + cl_event_info convertUREventInfoToCL(const ur_event_info_t PropName) { switch (PropName) { case UR_EVENT_INFO_COMMAND_QUEUE: @@ -128,9 +132,64 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetProfilingInfo( UR_APIEXPORT ur_result_t UR_APICALL urEventSetCallback(ur_event_handle_t hEvent, ur_execution_info_t execStatus, ur_event_callback_t pfnNotify, void *pUserData) { - std::ignore = hEvent; - std::ignore = execStatus; - std::ignore = pfnNotify; - std::ignore = pUserData; - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + static std::unordered_map> + EventCallbackMap; + static std::mutex EventCallbackMutex; + + { + std::lock_guard Lock(EventCallbackMutex); + // Callbacks can only be registered once and we need to avoid double + // allocating. + if (EventCallbackMap.count(hEvent) && + EventCallbackMap[hEvent].count(pfnNotify)) { + return UR_RESULT_SUCCESS; + } + + EventCallbackMap[hEvent].insert(pfnNotify); + } + + cl_int CallbackType = 0; + switch (execStatus) { + case UR_EXECUTION_INFO_EXECUTION_INFO_SUBMITTED: + CallbackType = CL_SUBMITTED; + break; + case UR_EXECUTION_INFO_EXECUTION_INFO_RUNNING: + CallbackType = CL_RUNNING; + break; + case UR_EXECUTION_INFO_EXECUTION_INFO_COMPLETE: + CallbackType = CL_COMPLETE; + break; + default: + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + struct EventCallback { + void execute() { + pfnNotify(hEvent, execStatus, pUserData); + { + std::lock_guard Lock(*CallbackMutex); + (*CallbackMap)[hEvent].erase(pfnNotify); + if ((*CallbackMap)[hEvent].empty()) { + CallbackMap->erase(hEvent); + } + } + delete this; + } + ur_event_handle_t hEvent; + ur_execution_info_t execStatus; + ur_event_callback_t pfnNotify; + void *pUserData; + std::unordered_map> + *CallbackMap; + std::mutex *CallbackMutex; + }; + auto Callback = new EventCallback({hEvent, execStatus, pfnNotify, pUserData, + &EventCallbackMap, &EventCallbackMutex}); + auto ClCallback = [](cl_event, cl_int, void *pUserData) { + auto *C = static_cast(pUserData); + C->execute(); + }; + CL_RETURN_ON_FAILURE(clSetEventCallback(cl_adapter::cast(hEvent), + CallbackType, ClCallback, Callback)); + return UR_RESULT_SUCCESS; } From 2792092121d2f911d9c90517ee89d8ab29552e9d Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Fri, 20 Oct 2023 09:57:12 +0100 Subject: [PATCH 02/12] [OpenCL] Add more mappings from CL error codes to UR error codes. Also merge urQueueCreate InvalidValueProperties test into InvalidQueueProperties test. --- source/adapters/opencl/common.cpp | 17 +++++++++++++++++ test/conformance/queue/urQueueCreate.cpp | 19 ++++++++----------- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/source/adapters/opencl/common.cpp b/source/adapters/opencl/common.cpp index 2b0e7b6a27..77a51694dd 100644 --- a/source/adapters/opencl/common.cpp +++ b/source/adapters/opencl/common.cpp @@ -60,6 +60,23 @@ ur_result_t mapCLErrorToUR(cl_int Result) { return UR_RESULT_ERROR_OUT_OF_RESOURCES; case CL_INVALID_MEM_OBJECT: return UR_RESULT_ERROR_INVALID_MEM_OBJECT; + case CL_INVALID_QUEUE_PROPERTIES: + return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES; + case CL_INVALID_BUFFER_SIZE: + return UR_RESULT_ERROR_INVALID_BUFFER_SIZE; + case CL_INVALID_IMAGE_SIZE: + return UR_RESULT_ERROR_INVALID_IMAGE_SIZE; + case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: + case CL_INVALID_IMAGE_DESCRIPTOR: + return UR_RESULT_ERROR_INVALID_IMAGE_FORMAT_DESCRIPTOR; + case CL_IMAGE_FORMAT_NOT_SUPPORTED: + return UR_RESULT_ERROR_UNSUPPORTED_IMAGE_FORMAT; + case CL_PROFILING_INFO_NOT_AVAILABLE: + return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE; + case CL_LINK_PROGRAM_FAILURE: + return UR_RESULT_ERROR_PROGRAM_LINK_FAILURE; + case CL_INVALID_ARG_INDEX: + return UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX; default: return UR_RESULT_ERROR_UNKNOWN; } diff --git a/test/conformance/queue/urQueueCreate.cpp b/test/conformance/queue/urQueueCreate.cpp index 0f99009abd..90813b20a5 100644 --- a/test/conformance/queue/urQueueCreate.cpp +++ b/test/conformance/queue/urQueueCreate.cpp @@ -65,26 +65,23 @@ TEST_P(urQueueCreateTest, InvalidNullPointerQueue) { urQueueCreate(context, device, 0, nullptr)); } -TEST_P(urQueueCreateTest, InvalidValueProperties) { - ur_queue_handle_t queue = nullptr; +TEST_P(urQueueCreateTest, InvalidQueueProperties) { ur_queue_properties_t props = { /*.stype =*/UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, /*.pNext =*/nullptr, /*.flags =*/UR_QUEUE_FLAG_FORCE_UINT32, }; - ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_VALUE, - urQueueCreate(context, device, &props, &queue)); -} -TEST_P(urQueueCreateTest, InvalidQueueProperties) { - ur_queue_properties_t props = { - /*.stype =*/UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, - /*.pNext =*/nullptr, - /*.flags =*/UR_QUEUE_FLAG_PRIORITY_HIGH | UR_QUEUE_FLAG_PRIORITY_LOW, - }; + // Initial value is just not a valid enum + { + ur_queue_handle_t queue = nullptr; + ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES, + urQueueCreate(context, device, &props, &queue)); + } // It should be an error to specify both low/high priorities { ur_queue_handle_t queue = nullptr; + props.flags = UR_QUEUE_FLAG_PRIORITY_HIGH | UR_QUEUE_FLAG_PRIORITY_LOW; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES, urQueueCreate(context, device, &props, &queue)); } From 16e28e289d1ff28fee3036ef5f31fc78a246ada0 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Fri, 20 Oct 2023 10:31:36 +0100 Subject: [PATCH 03/12] Update match files to reflect removed test case --- test/conformance/queue/queue_adapter_cuda.match | 1 - test/conformance/queue/queue_adapter_hip.match | 1 - test/conformance/queue/queue_adapter_level_zero.match | 1 - 3 files changed, 3 deletions(-) diff --git a/test/conformance/queue/queue_adapter_cuda.match b/test/conformance/queue/queue_adapter_cuda.match index 3b2f27c1d6..f7967fb388 100644 --- a/test/conformance/queue/queue_adapter_cuda.match +++ b/test/conformance/queue/queue_adapter_cuda.match @@ -1,4 +1,3 @@ -urQueueCreateTest.InvalidValueProperties/NVIDIA_CUDA_BACKEND___{{.*}}_ urQueueCreateTest.InvalidQueueProperties/NVIDIA_CUDA_BACKEND___{{.*}}_ urQueueCreateWithNativeHandleTest.Success/NVIDIA_CUDA_BACKEND___{{.*}}_ urQueueGetInfoTestWithInfoParam.Success/NVIDIA_CUDA_BACKEND___{{.*}}___UR_QUEUE_INFO_DEVICE_DEFAULT diff --git a/test/conformance/queue/queue_adapter_hip.match b/test/conformance/queue/queue_adapter_hip.match index 6cce588dc4..16166a827c 100644 --- a/test/conformance/queue/queue_adapter_hip.match +++ b/test/conformance/queue/queue_adapter_hip.match @@ -1,4 +1,3 @@ -urQueueCreateTest.InvalidValueProperties/AMD_HIP_BACKEND___{{.*}}_ urQueueCreateTest.InvalidQueueProperties/AMD_HIP_BACKEND___{{.*}}_ urQueueCreateWithParamTest.SuccessWithProperties/AMD_HIP_BACKEND___{{.*}}___UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE urQueueCreateWithParamTest.SuccessWithProperties/AMD_HIP_BACKEND___{{.*}}___UR_QUEUE_FLAG_PROFILING_ENABLE diff --git a/test/conformance/queue/queue_adapter_level_zero.match b/test/conformance/queue/queue_adapter_level_zero.match index 0013d5b397..9ceebd4233 100644 --- a/test/conformance/queue/queue_adapter_level_zero.match +++ b/test/conformance/queue/queue_adapter_level_zero.match @@ -1,3 +1,2 @@ -urQueueCreateTest.InvalidValueProperties/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ urQueueCreateTest.InvalidQueueProperties/Intel_R__oneAPI_Unified_Runtime_over_Level_Zero___{{.*}}_ {{Segmentation fault|Aborted}} From 5c8a86bc8659f646135b7dfc548a29c2290ae8eb Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Fri, 20 Oct 2023 11:57:36 +0100 Subject: [PATCH 04/12] [OpenCL] Fix some unchecked dereferencing of optional params. --- source/adapters/opencl/kernel.cpp | 3 ++- source/adapters/opencl/memory.cpp | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 80b1502854..289ddd81fd 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -199,7 +199,8 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, } } - *(static_cast(pPropValue)) = static_cast(RetVal); + if (pPropValue) + *(static_cast(pPropValue)) = static_cast(RetVal); if (pPropSizeRet) *pPropSizeRet = sizeof(uint32_t); diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 279faad376..ee3c502006 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -268,9 +268,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( } } + void *HostPtr = pProperties ? pProperties->pHost : nullptr; *phBuffer = reinterpret_cast(clCreateBuffer( cl_adapter::cast(hContext), static_cast(flags), - size, pProperties->pHost, cl_adapter::cast(&RetErr))); + size, HostPtr, cl_adapter::cast(&RetErr))); CL_RETURN_ON_FAILURE(RetErr); return UR_RESULT_SUCCESS; From bfb3daccc9d88b9484d0544ce8a8d35fd6385234 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Wed, 18 Oct 2023 17:20:36 +0100 Subject: [PATCH 05/12] [OpenCL] Implement urEnqueueUSMMemcpy2D and allow large fill patterns. Normally OpenCL limits fill type operations to a max pattern size of 128, this patch includes a workaround to extend that. --- source/adapters/opencl/enqueue.cpp | 49 ++++++++-- source/adapters/opencl/usm.cpp | 144 ++++++++++++++++++++++++----- 2 files changed, 165 insertions(+), 28 deletions(-) diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 29c5ad672e..ab5126c53f 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -178,12 +178,47 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( size_t patternSize, size_t offset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + // CL FillBuffer only allows pattern sizes up to the largest CL type: + // long16/double16 + if (patternSize <= 128) { + CL_RETURN_ON_FAILURE( + clEnqueueFillBuffer(cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), pPattern, + patternSize, offset, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + return UR_RESULT_SUCCESS; + } + + auto NumValues = size / sizeof(uint64_t); + auto HostBuffer = new uint64_t[NumValues]; + auto NumChunks = patternSize / sizeof(uint64_t); + for (size_t i = 0; i < NumValues; i++) { + HostBuffer[i] = static_cast(pPattern)[i % NumChunks]; + } - CL_RETURN_ON_FAILURE(clEnqueueFillBuffer( + cl_event WriteEvent = nullptr; + auto ClErr = clEnqueueWriteBuffer( cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), pPattern, patternSize, offset, size, + cl_adapter::cast(hBuffer), false, offset, size, HostBuffer, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + &WriteEvent); + if (ClErr != CL_SUCCESS) { + delete[] HostBuffer; + CL_RETURN_ON_FAILURE(ClErr); + } + + auto DeleteCallback = [](cl_event, cl_int, void *pUserData) { + delete[] static_cast(pUserData); + }; + CL_RETURN_ON_FAILURE( + clSetEventCallback(WriteEvent, CL_COMPLETE, DeleteCallback, HostBuffer)); + + if (phEvent) { + *phEvent = cl_adapter::cast(WriteEvent); + } else { + CL_RETURN_ON_FAILURE(clReleaseEvent(WriteEvent)); + } return UR_RESULT_SUCCESS; } @@ -350,9 +385,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( return mapCLErrorToUR(CLErr); } - clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; + cl_ext::clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = - cl_ext::getExtFuncFromContext( + cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueReadHostPipeINTELCache, cl_ext::EnqueueReadHostPipeName, &FuncPtr); @@ -382,9 +417,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( return mapCLErrorToUR(CLErr); } - clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; + cl_ext::clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = - cl_ext::getExtFuncFromContext( + cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clEnqueueWriteHostPipeINTELCache, cl_ext::EnqueueWriteHostPipeName, &FuncPtr); diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index afa22ffbb9..d6008d51e7 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -197,7 +197,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( ur_queue_handle_t hQueue, void *ptr, size_t patternSize, const void *pPattern, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - // Have to look up the context from the kernel cl_context CLContext; cl_int CLErr = clGetCommandQueueInfo( @@ -207,20 +206,82 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( return mapCLErrorToUR(CLErr); } - clEnqueueMemFillINTEL_fn FuncPtr = nullptr; - ur_result_t RetVal = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemFillINTELCache, - cl_ext::EnqueueMemFillName, &FuncPtr); + if (patternSize <= 128) { + clEnqueueMemFillINTEL_fn EnqueueMemFill = nullptr; + UR_RETURN_ON_FAILURE( + cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemFillINTELCache, + cl_ext::EnqueueMemFillName, &EnqueueMemFill)); + + CL_RETURN_ON_FAILURE( + EnqueueMemFill(cl_adapter::cast(hQueue), ptr, + pPattern, patternSize, size, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + return UR_RESULT_SUCCESS; + } - if (FuncPtr) { - RetVal = mapCLErrorToUR( - FuncPtr(cl_adapter::cast(hQueue), ptr, pPattern, - patternSize, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + // OpenCL only supports pattern sizes as large as the largest CL type + // (double16/long16 - 128 bytes), anything larger we need to do on the host + // side and copy it into the target allocation. + clHostMemAllocINTEL_fn HostMemAlloc = nullptr; + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, + cl_ext::HostMemAllocName, &HostMemAlloc)); + + clEnqueueMemcpyINTEL_fn USMMemcpy = nullptr; + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemcpyINTELCache, + cl_ext::EnqueueMemcpyName, &USMMemcpy)); + + clMemBlockingFreeINTEL_fn USMFree = nullptr; + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, + cl_ext::MemBlockingFreeName, &USMFree)); + + cl_int ClErr = CL_SUCCESS; + auto HostBuffer = static_cast( + HostMemAlloc(CLContext, nullptr, size, 0, &ClErr)); + CL_RETURN_ON_FAILURE(ClErr); + + auto NumValues = size / sizeof(uint64_t); + auto NumChunks = patternSize / sizeof(uint64_t); + for (size_t i = 0; i < NumValues; i++) { + HostBuffer[i] = static_cast(pPattern)[i % NumChunks]; } - return RetVal; + cl_event CopyEvent = nullptr; + CL_RETURN_ON_FAILURE(USMMemcpy( + cl_adapter::cast(hQueue), false, ptr, HostBuffer, size, + numEventsInWaitList, cl_adapter::cast(phEventWaitList), + &CopyEvent)); + + struct DeleteCallbackInfo { + clMemBlockingFreeINTEL_fn USMFree; + cl_context CLContext; + void *HostBuffer; + void execute() { + USMFree(CLContext, HostBuffer); + delete this; + } + }; + + auto Info = new DeleteCallbackInfo{USMFree, CLContext, HostBuffer}; + + auto DeleteCallback = [](cl_event, cl_int, void *pUserData) { + static_cast(pUserData)->execute(); + }; + + CL_RETURN_ON_FAILURE( + clSetEventCallback(CopyEvent, CL_COMPLETE, DeleteCallback, Info)); + + if (phEvent) { + *phEvent = cl_adapter::cast(CopyEvent); + } else { + CL_RETURN_ON_FAILURE(clReleaseEvent(CopyEvent)); + } + + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( @@ -343,18 +404,59 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill2D( [[maybe_unused]] uint32_t numEventsInWaitList, [[maybe_unused]] const ur_event_handle_t *phEventWaitList, [[maybe_unused]] ur_event_handle_t *phEvent) { - return UR_RESULT_ERROR_INVALID_OPERATION; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( - [[maybe_unused]] ur_queue_handle_t hQueue, [[maybe_unused]] bool blocking, - [[maybe_unused]] void *pDst, [[maybe_unused]] size_t dstPitch, - [[maybe_unused]] const void *pSrc, [[maybe_unused]] size_t srcPitch, - [[maybe_unused]] size_t width, [[maybe_unused]] size_t height, - [[maybe_unused]] uint32_t numEventsInWaitList, - [[maybe_unused]] const ur_event_handle_t *phEventWaitList, - [[maybe_unused]] ur_event_handle_t *phEvent) { - return UR_RESULT_ERROR_INVALID_OPERATION; + ur_queue_handle_t hQueue, bool blocking, void *pDst, size_t dstPitch, + const void *pSrc, size_t srcPitch, size_t width, size_t height, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) { + cl_context CLContext; + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CL_QUEUE_CONTEXT, + sizeof(cl_context), &CLContext, nullptr)); + + clEnqueueMemcpyINTEL_fn FuncPtr = nullptr; + ur_result_t RetVal = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemcpyINTELCache, + cl_ext::EnqueueMemcpyName, &FuncPtr); + + if (!FuncPtr) { + return RetVal; + } + + std::vector Events; + for (size_t HeightIndex = 0; HeightIndex < height; HeightIndex++) { + cl_event Event = nullptr; + auto ClResult = + FuncPtr(cl_adapter::cast(hQueue), false, + static_cast(pDst) + dstPitch * HeightIndex, + static_cast(pSrc) + srcPitch * HeightIndex, + width, numEventsInWaitList, + cl_adapter::cast(phEventWaitList), &Event); + Events.push_back(Event); + if (ClResult != CL_SUCCESS) { + for (const auto &E : Events) { + clReleaseEvent(E); + } + CL_RETURN_ON_FAILURE(ClResult); + } + } + cl_int ClResult = CL_SUCCESS; + if (blocking) { + ClResult = clWaitForEvents(Events.size(), Events.data()); + } + if (phEvent && ClResult == CL_SUCCESS) { + ClResult = clEnqueueBarrierWithWaitList( + cl_adapter::cast(hQueue), Events.size(), + Events.data(), cl_adapter::cast(phEvent)); + } + for (const auto &E : Events) { + clReleaseEvent(E); + } + CL_RETURN_ON_FAILURE(ClResult) + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL From 603dcfbb524c7d3b72641c543b88aef551c2c784 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Tue, 31 Oct 2023 15:14:22 +0000 Subject: [PATCH 06/12] Address feedback --- source/adapters/opencl/enqueue.cpp | 12 +++++++-- source/adapters/opencl/usm.cpp | 39 +++++++++++++++++++++--------- 2 files changed, 37 insertions(+), 14 deletions(-) diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index ab5126c53f..5dff7066ae 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -211,8 +211,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( auto DeleteCallback = [](cl_event, cl_int, void *pUserData) { delete[] static_cast(pUserData); }; - CL_RETURN_ON_FAILURE( - clSetEventCallback(WriteEvent, CL_COMPLETE, DeleteCallback, HostBuffer)); + ClErr = + clSetEventCallback(WriteEvent, CL_COMPLETE, DeleteCallback, HostBuffer); + if (ClErr != CL_SUCCESS) { + // We can attempt to recover gracefully by attempting to wait for the write + // to finish and deleting the host buffer. + clWaitForEvents(1, &WriteEvent); + delete[] HostBuffer; + clReleaseEvent(WriteEvent); + CL_RETURN_ON_FAILURE(ClErr); + } if (phEvent) { *phEvent = cl_adapter::cast(WriteEvent); diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index d6008d51e7..b411fd1bcd 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -257,24 +257,39 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( &CopyEvent)); struct DeleteCallbackInfo { + DeleteCallbackInfo(clMemBlockingFreeINTEL_fn USMFree, cl_context CLContext, + void *HostBuffer) + : USMFree(USMFree), CLContext(CLContext), HostBuffer(HostBuffer) { + clRetainContext(CLContext); + } + ~DeleteCallbackInfo() { + USMFree(CLContext, HostBuffer); + clReleaseContext(CLContext); + } + DeleteCallbackInfo(const DeleteCallbackInfo &) = delete; + DeleteCallbackInfo &operator=(const DeleteCallbackInfo &) = delete; + clMemBlockingFreeINTEL_fn USMFree; cl_context CLContext; void *HostBuffer; - void execute() { - USMFree(CLContext, HostBuffer); - delete this; - } }; - auto Info = new DeleteCallbackInfo{USMFree, CLContext, HostBuffer}; + auto Info = new DeleteCallbackInfo(USMFree, CLContext, HostBuffer); auto DeleteCallback = [](cl_event, cl_int, void *pUserData) { - static_cast(pUserData)->execute(); + auto Info = static_cast(pUserData); + delete Info; }; - CL_RETURN_ON_FAILURE( - clSetEventCallback(CopyEvent, CL_COMPLETE, DeleteCallback, Info)); - + ClErr = clSetEventCallback(CopyEvent, CL_COMPLETE, DeleteCallback, Info); + if (ClErr != CL_SUCCESS) { + // We can attempt to recover gracefully by attempting to wait for the copy + // to finish and deleting the info struct here. + clWaitForEvents(1, &CopyEvent); + delete Info; + clReleaseEvent(CopyEvent); + CL_RETURN_ON_FAILURE(ClErr); + } if (phEvent) { *phEvent = cl_adapter::cast(CopyEvent); } else { @@ -426,7 +441,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( return RetVal; } - std::vector Events; + std::vector Events(height); for (size_t HeightIndex = 0; HeightIndex < height; HeightIndex++) { cl_event Event = nullptr; auto ClResult = @@ -435,7 +450,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( static_cast(pSrc) + srcPitch * HeightIndex, width, numEventsInWaitList, cl_adapter::cast(phEventWaitList), &Event); - Events.push_back(Event); + Events[HeightIndex] = Event; if (ClResult != CL_SUCCESS) { for (const auto &E : Events) { clReleaseEvent(E); @@ -453,7 +468,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( Events.data(), cl_adapter::cast(phEvent)); } for (const auto &E : Events) { - clReleaseEvent(E); + CL_RETURN_ON_FAILURE(clReleaseEvent(E)); } CL_RETURN_ON_FAILURE(ClResult) return UR_RESULT_SUCCESS; From fc34c26a8fb3ca24795663a4312e2b93aa9635d3 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Wed, 1 Nov 2023 11:12:57 +0000 Subject: [PATCH 07/12] [OpenCL] Make USM functions return UR_RESULT_ERROR_INVALID_USM_SIZE Also ignore flags in no-op urEnqueueUSMPrefetch hint. --- source/adapters/opencl/usm.cpp | 93 ++++++++++++++++++++-------------- 1 file changed, 55 insertions(+), 38 deletions(-) diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index afa22ffbb9..d8cd067f4b 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -15,7 +15,6 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, ur_usm_pool_handle_t, size_t size, void **ppMem) { void *Ptr = nullptr; - ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; cl_mem_alloc_flags_intel Flags = 0; @@ -40,23 +39,28 @@ urUSMHostAlloc(ur_context_handle_t hContext, const ur_usm_desc_t *pUSMDesc, // First we need to look up the function pointer clHostMemAllocINTEL_fn FuncPtr = nullptr; cl_context CLContext = cl_adapter::cast(hContext); - RetVal = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, - cl_ext::HostMemAllocName, &FuncPtr); + if (auto UrResult = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, + cl_ext::HostMemAllocName, &FuncPtr)) { + return UrResult; + } if (FuncPtr) { - Ptr = FuncPtr(CLContext, Properties, size, Alignment, - cl_adapter::cast(&RetVal)); + cl_int ClResult = CL_SUCCESS; + Ptr = FuncPtr(CLContext, Properties, size, Alignment, &ClResult); + if (ClResult == CL_INVALID_BUFFER_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); } *ppMem = Ptr; - // ensure we aligned the allocation correctly - if (RetVal == UR_RESULT_SUCCESS && Alignment != 0) - assert(reinterpret_cast(*ppMem) % Alignment == 0 && - "allocation not aligned correctly"); + assert((Alignment == 0 || + reinterpret_cast(*ppMem) % Alignment == 0) && + "Allocation not aligned correctly!"); - return RetVal; + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL @@ -65,7 +69,6 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, void **ppMem) { void *Ptr = nullptr; - ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; cl_mem_alloc_flags_intel Flags = 0; @@ -92,24 +95,30 @@ urUSMDeviceAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, // First we need to look up the function pointer clDeviceMemAllocINTEL_fn FuncPtr = nullptr; cl_context CLContext = cl_adapter::cast(hContext); - RetVal = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, - cl_ext::DeviceMemAllocName, &FuncPtr); + if (auto UrResult = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, + cl_ext::DeviceMemAllocName, &FuncPtr)) { + return UrResult; + } if (FuncPtr) { + cl_int ClResult = CL_SUCCESS; Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), cl_adapter::cast(Properties), size, - Alignment, cl_adapter::cast(&RetVal)); + Alignment, &ClResult); + if (ClResult == CL_INVALID_BUFFER_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); } *ppMem = Ptr; - // ensure we aligned the allocation correctly - if (RetVal == UR_RESULT_SUCCESS && Alignment != 0) - assert(reinterpret_cast(*ppMem) % Alignment == 0 && - "allocation not aligned correctly"); + assert((Alignment == 0 || + reinterpret_cast(*ppMem) % Alignment == 0) && + "Allocation not aligned correctly!"); - return RetVal; + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL @@ -118,7 +127,6 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size, void **ppMem) { void *Ptr = nullptr; - ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; uint32_t Alignment = pUSMDesc ? pUSMDesc->align : 0; cl_mem_alloc_flags_intel Flags = 0; @@ -155,22 +163,29 @@ urUSMSharedAlloc(ur_context_handle_t hContext, ur_device_handle_t hDevice, // First we need to look up the function pointer clSharedMemAllocINTEL_fn FuncPtr = nullptr; cl_context CLContext = cl_adapter::cast(hContext); - RetVal = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, - cl_ext::SharedMemAllocName, &FuncPtr); + if (auto UrResult = cl_ext::getExtFuncFromContext( + CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, + cl_ext::SharedMemAllocName, &FuncPtr)) { + return UrResult; + } if (FuncPtr) { + cl_int ClResult = CL_SUCCESS; Ptr = FuncPtr(CLContext, cl_adapter::cast(hDevice), cl_adapter::cast(Properties), size, - Alignment, cl_adapter::cast(&RetVal)); + Alignment, cl_adapter::cast(&ClResult)); + if (ClResult == CL_INVALID_BUFFER_SIZE) { + return UR_RESULT_ERROR_INVALID_USM_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); } *ppMem = Ptr; - assert(Alignment == 0 || - (RetVal == UR_RESULT_SUCCESS && - reinterpret_cast(*ppMem) % Alignment == 0)); - return RetVal; + assert((Alignment == 0 || + reinterpret_cast(*ppMem) % Alignment == 0) && + "Allocation not aligned correctly!"); + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t hContext, @@ -255,14 +270,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( ur_queue_handle_t hQueue, [[maybe_unused]] const void *pMem, - [[maybe_unused]] size_t size, ur_usm_migration_flags_t flags, + [[maybe_unused]] size_t size, + [[maybe_unused]] ur_usm_migration_flags_t flags, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - // flags is currently unused so fail if set - if (flags != 0) - return UR_RESULT_ERROR_INVALID_VALUE; - return mapCLErrorToUR(clEnqueueMarkerWithWaitList( cl_adapter::cast(hQueue), numEventsInWaitList, cl_adapter::cast(phEventWaitList), @@ -387,9 +399,14 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, } if (FuncPtr) { - RetVal = - mapCLErrorToUR(FuncPtr(cl_adapter::cast(hContext), pMem, - PropNameCL, propSize, pPropValue, pPropSizeRet)); + size_t CheckPropSize = 0; + size_t *CheckPropSizeRet = pPropSizeRet ? pPropSizeRet : &CheckPropSize; + RetVal = mapCLErrorToUR(FuncPtr(cl_adapter::cast(hContext), + pMem, PropNameCL, propSize, pPropValue, + CheckPropSizeRet)); + if (pPropValue && *CheckPropSizeRet != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } if (RetVal == UR_RESULT_SUCCESS && pPropValue && propName == UR_USM_ALLOC_INFO_TYPE) { auto *AllocTypeCL = From 371e1b85ac5cee8d52447c0cd8c4eb196ba5878d Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Wed, 1 Nov 2023 11:46:11 +0000 Subject: [PATCH 08/12] [OpenCL] Retain native handle objects when properties dictate. --- source/adapters/opencl/event.cpp | 13 ++++++++----- source/adapters/opencl/kernel.cpp | 7 +++++-- source/adapters/opencl/memory.cpp | 14 ++++++++------ source/adapters/opencl/program.cpp | 7 +++++-- 4 files changed, 26 insertions(+), 15 deletions(-) diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index 78303a0829..64cf410460 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -50,12 +50,15 @@ convertURProfilingInfoToCL(const ur_profiling_info_t PropName) { } } -UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( - ur_native_handle_t hNativeEvent, - [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] const ur_event_native_properties_t *pProperties, - ur_event_handle_t *phEvent) { +UR_APIEXPORT ur_result_t UR_APICALL +urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent, + [[maybe_unused]] ur_context_handle_t hContext, + const ur_event_native_properties_t *pProperties, + ur_event_handle_t *phEvent) { *phEvent = reinterpret_cast(hNativeEvent); + if (!pProperties || !pProperties->isNativeHandleOwned) { + return urEventRetain(*phEvent); + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 80b1502854..6c688021d7 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -335,9 +335,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urKernelCreateWithNativeHandle( ur_native_handle_t hNativeKernel, ur_context_handle_t, ur_program_handle_t, - const ur_kernel_native_properties_t *, ur_kernel_handle_t *phKernel) { - + const ur_kernel_native_properties_t *pProperties, + ur_kernel_handle_t *phKernel) { *phKernel = reinterpret_cast(hNativeKernel); + if (!pProperties || !pProperties->isNativeHandleOwned) { + return urKernelRetain(*phKernel); + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 279faad376..8912d333e3 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -331,10 +331,11 @@ urMemGetNativeHandle(ur_mem_handle_t hMem, ur_native_handle_t *phNativeMem) { UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreateWithNativeHandle( ur_native_handle_t hNativeMem, [[maybe_unused]] ur_context_handle_t hContext, - [[maybe_unused]] const ur_mem_native_properties_t *pProperties, - ur_mem_handle_t *phMem) { - + const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { *phMem = reinterpret_cast(hNativeMem); + if (!pProperties || !pProperties->isNativeHandleOwned) { + return urMemRetain(*phMem); + } return UR_RESULT_SUCCESS; } @@ -343,10 +344,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageCreateWithNativeHandle( [[maybe_unused]] ur_context_handle_t hContext, [[maybe_unused]] const ur_image_format_t *pImageFormat, [[maybe_unused]] const ur_image_desc_t *pImageDesc, - [[maybe_unused]] const ur_mem_native_properties_t *pProperties, - ur_mem_handle_t *phMem) { - + const ur_mem_native_properties_t *pProperties, ur_mem_handle_t *phMem) { *phMem = reinterpret_cast(hNativeMem); + if (!pProperties || !pProperties->isNativeHandleOwned) { + return urMemRetain(*phMem); + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 0beca23dab..954c2dc48f 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -299,9 +299,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle( UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithNativeHandle( ur_native_handle_t hNativeProgram, ur_context_handle_t, - const ur_program_native_properties_t *, ur_program_handle_t *phProgram) { - + const ur_program_native_properties_t *pProperties, + ur_program_handle_t *phProgram) { *phProgram = reinterpret_cast(hNativeProgram); + if (!pProperties || !pProperties->isNativeHandleOwned) { + return urProgramRetain(*phProgram); + } return UR_RESULT_SUCCESS; } From c5fbda04f0050ae81b34e22ee3801bdd9c4a2041 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Mon, 6 Nov 2023 12:06:16 +0000 Subject: [PATCH 09/12] [OpenCL] Fix enum passed for urKernelSetExecInfo's USM_PTRS property Also return RESULT_SUCCESS for no-op UR_KERNEL_EXEC_INFO_CACHE_CONFIG hint. --- source/adapters/opencl/kernel.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 80b1502854..ee5559310e 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -284,12 +284,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetExecInfo( return UR_RESULT_SUCCESS; } case UR_KERNEL_EXEC_INFO_CACHE_CONFIG: { - /* Setting the cache config is unsupported in OpenCL */ - return UR_RESULT_ERROR_INVALID_ENUMERATION; + // Setting the cache config is unsupported in OpenCL, but this is just a + // hint. + return UR_RESULT_SUCCESS; } case UR_KERNEL_EXEC_INFO_USM_PTRS: { CL_RETURN_ON_FAILURE(clSetKernelExecInfo( - cl_adapter::cast(hKernel), propName, propSize, pPropValue)); + cl_adapter::cast(hKernel), + CL_KERNEL_EXEC_INFO_USM_PTRS_INTEL, propSize, pPropValue)); return UR_RESULT_SUCCESS; } default: { From 6a3c63da12a4777cfe43d44314d812396c8d8811 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Fri, 3 Nov 2023 13:56:43 +0000 Subject: [PATCH 10/12] [OpenCL] Return INVALID_SIZE from GetInfo entry points. Also includes a few other GetInfo related fixes: * Add missing device info queries * Add mapping of CL command type to UR command type * Correct mapping of UR_QUEUE_INFO_FLAGS * Add mapping of cl_command_queue_properties to ur_queue_flags_t * Add mapping of cl_unified_shared_memory_type_intel to ur_usm_type_t * Add UNSUPPORTED_ENUMERATION path to KernelGeGroupInfo tests. And a fix related to one of the fixed queries: * Populate pfnReadHostPipe and pfnWriteHostPipe ddi table entries. --- source/adapters/opencl/context.cpp | 13 ++- source/adapters/opencl/device.cpp | 36 ++++++- source/adapters/opencl/event.cpp | 96 ++++++++++++++++--- source/adapters/opencl/kernel.cpp | 47 ++++++++- source/adapters/opencl/memory.cpp | 27 ++++-- source/adapters/opencl/program.cpp | 54 ++++++----- source/adapters/opencl/queue.cpp | 52 ++++++++-- source/adapters/opencl/sampler.cpp | 16 +++- .../adapters/opencl/ur_interface_loader.cpp | 2 + source/adapters/opencl/usm.cpp | 70 +++++++------- .../kernel/urKernelGetGroupInfo.cpp | 16 ++-- 11 files changed, 318 insertions(+), 111 deletions(-) diff --git a/source/adapters/opencl/context.cpp b/source/adapters/opencl/context.cpp index 6bc05c2003..3ada4a3d37 100644 --- a/source/adapters/opencl/context.cpp +++ b/source/adapters/opencl/context.cpp @@ -93,10 +93,17 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, case UR_CONTEXT_INFO_NUM_DEVICES: case UR_CONTEXT_INFO_DEVICES: case UR_CONTEXT_INFO_REFERENCE_COUNT: { - - CL_RETURN_ON_FAILURE( + size_t CheckPropSize = 0; + auto ClResult = clGetContextInfo(cl_adapter::cast(hContext), CLPropName, - propSize, pPropValue, pPropSizeRet)); + propSize, pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } return UR_RESULT_SUCCESS; } default: diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 3fc6f5d491..710ebcfb88 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -345,6 +345,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(URDeviceType); } + case UR_DEVICE_INFO_DEVICE_ID: { + bool Supported = false; + CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), {"cl_khr_pci_bus_info"}, + Supported)); + + if (!Supported) { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } + + cl_device_pci_bus_info_khr PciInfo = {}; + CL_RETURN_ON_FAILURE(clGetDeviceInfo( + cl_adapter::cast(hDevice), CL_DEVICE_PCI_BUS_INFO_KHR, + sizeof(PciInfo), &PciInfo, nullptr)); + return ReturnValue(PciInfo.pci_device); + } + case UR_DEVICE_INFO_BACKEND_RUNTIME_VERSION: { oclv::OpenCLVersion Version; CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion( @@ -760,6 +777,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(Supported); } + case UR_DEVICE_INFO_VIRTUAL_MEMORY_SUPPORT: { + return ReturnValue(false); + } + case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED: { + bool Supported = false; + CL_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), + {"cl_intel_program_scope_host_pipe"}, Supported)); + return ReturnValue(Supported); + } case UR_DEVICE_INFO_QUEUE_PROPERTIES: case UR_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: case UR_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: @@ -775,7 +802,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, /* CL type: cl_bitfield / enum * UR type: ur_flags_t (uint32_t) */ - cl_bitfield CLValue; + cl_bitfield CLValue = 0; CL_RETURN_ON_FAILURE( clGetDeviceInfo(cl_adapter::cast(hDevice), CLPropName, sizeof(cl_bitfield), &CLValue, nullptr)); @@ -898,13 +925,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, * sycl/doc/extensions/supported/sycl_ext_intel_device_info.md */ case UR_DEVICE_INFO_UUID: /* This enums have no equivalent in OpenCL */ - case UR_DEVICE_INFO_DEVICE_ID: + case UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP: case UR_DEVICE_INFO_GLOBAL_MEM_FREE: case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: case UR_DEVICE_INFO_MEMORY_BUS_WIDTH: - case UR_DEVICE_INFO_ASYNC_BARRIER: - case UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED: { - return UR_RESULT_ERROR_INVALID_ENUMERATION; + case UR_DEVICE_INFO_ASYNC_BARRIER: { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } default: { return UR_RESULT_ERROR_INVALID_ENUMERATION; diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index 850df82648..44862f319f 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -54,6 +54,62 @@ convertURProfilingInfoToCL(const ur_profiling_info_t PropName) { } } +const ur_command_t +convertCLCommandTypeToUR(const cl_command_type &CommandType) { + /* Note: the following enums don't have a CL equivalent: + UR_COMMAND_USM_FILL_2D + UR_COMMAND_USM_MEMCPY_2D + UR_COMMAND_DEVICE_GLOBAL_VARIABLE_WRITE + UR_COMMAND_DEVICE_GLOBAL_VARIABLE_READ + UR_COMMAND_READ_HOST_PIPE + UR_COMMAND_WRITE_HOST_PIPE + UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP + UR_COMMAND_INTEROP_SEMAPHORE_WAIT_EXP + UR_COMMAND_INTEROP_SEMAPHORE_SIGNAL_EXP */ + switch (CommandType) { + case CL_COMMAND_NDRANGE_KERNEL: + return UR_COMMAND_KERNEL_LAUNCH; + case CL_COMMAND_MARKER: + // CL can't distinguish between UR_COMMAND_EVENTS_WAIT_WITH_BARRIER and + // UR_COMMAND_EVENTS_WAIT. + return UR_COMMAND_EVENTS_WAIT; + case CL_COMMAND_READ_BUFFER: + return UR_COMMAND_MEM_BUFFER_READ; + case CL_COMMAND_WRITE_BUFFER: + return UR_COMMAND_MEM_BUFFER_WRITE; + case CL_COMMAND_READ_BUFFER_RECT: + return UR_COMMAND_MEM_BUFFER_READ_RECT; + case CL_COMMAND_WRITE_BUFFER_RECT: + return UR_COMMAND_MEM_BUFFER_WRITE_RECT; + case CL_COMMAND_COPY_BUFFER: + return UR_COMMAND_MEM_BUFFER_COPY; + case CL_COMMAND_COPY_BUFFER_RECT: + return UR_COMMAND_MEM_BUFFER_COPY_RECT; + case CL_COMMAND_FILL_BUFFER: + return UR_COMMAND_MEM_BUFFER_FILL; + case CL_COMMAND_READ_IMAGE: + return UR_COMMAND_MEM_IMAGE_READ; + case CL_COMMAND_WRITE_IMAGE: + return UR_COMMAND_MEM_IMAGE_WRITE; + case CL_COMMAND_COPY_IMAGE: + return UR_COMMAND_MEM_IMAGE_COPY; + case CL_COMMAND_MAP_BUFFER: + return UR_COMMAND_MEM_BUFFER_MAP; + case CL_COMMAND_UNMAP_MEM_OBJECT: + return UR_COMMAND_MEM_UNMAP; + case CL_COMMAND_MEMFILL_INTEL: + return UR_COMMAND_USM_FILL; + case CL_COMMAND_MEMCPY_INTEL: + return UR_COMMAND_USM_MEMCPY; + case CL_COMMAND_MIGRATEMEM_INTEL: + return UR_COMMAND_USM_PREFETCH; + case CL_COMMAND_MEMADVISE_INTEL: + return UR_COMMAND_USM_ADVISE; + default: + return UR_COMMAND_FORCE_UINT32; + } +} + UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent, [[maybe_unused]] ur_context_handle_t hContext, @@ -97,24 +153,36 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetInfo(ur_event_handle_t hEvent, void *pPropValue, size_t *pPropSizeRet) { cl_event_info CLEventInfo = convertUREventInfoToCL(propName); + + size_t CheckPropSize = 0; cl_int RetErr = clGetEventInfo(cl_adapter::cast(hEvent), CLEventInfo, propSize, - pPropValue, pPropSizeRet); + pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } CL_RETURN_ON_FAILURE(RetErr); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } - if (RetErr == CL_SUCCESS && - propName == UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) { - /* If the CL_EVENT_COMMAND_EXECUTION_STATUS info value is CL_QUEUED, change - * it to CL_SUBMITTED. sycl::info::event::event_command_status has no - * equivalent to CL_QUEUED. - * - * FIXME UR Port: This should not be part of the UR adapter. Since PI_QUEUED - * exists, SYCL RT should be changed to handle this situation. In addition, - * SYCL RT is relying on PI_QUEUED status to make sure that the queues are - * flushed. */ - const auto param_value_int = static_cast(pPropValue); - if (*param_value_int == UR_EVENT_STATUS_QUEUED) { - *param_value_int = UR_EVENT_STATUS_SUBMITTED; + if (pPropValue) { + if (propName == UR_EVENT_INFO_COMMAND_TYPE) { + *reinterpret_cast(pPropValue) = convertCLCommandTypeToUR( + *reinterpret_cast(pPropValue)); + } else if (propName == UR_EVENT_INFO_COMMAND_EXECUTION_STATUS) { + /* If the CL_EVENT_COMMAND_EXECUTION_STATUS info value is CL_QUEUED, + * change it to CL_SUBMITTED. sycl::info::event::event_command_status has + * no equivalent to CL_QUEUED. + * + * FIXME UR Port: This should not be part of the UR adapter. Since + * PI_QUEUED exists, SYCL RT should be changed to handle this situation. + * In addition, SYCL RT is relying on PI_QUEUED status to make sure that + * the queues are flushed. */ + const auto param_value_int = static_cast(pPropValue); + if (*param_value_int == UR_EVENT_STATUS_QUEUED) { + *param_value_int = UR_EVENT_STATUS_SUBMITTED; + } } } diff --git a/source/adapters/opencl/kernel.cpp b/source/adapters/opencl/kernel.cpp index 69fcec7b21..e7c8444a17 100644 --- a/source/adapters/opencl/kernel.cpp +++ b/source/adapters/opencl/kernel.cpp @@ -69,10 +69,34 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetInfo(ur_kernel_handle_t hKernel, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - - CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), - mapURKernelInfoToCL(propName), propSize, - pPropValue, pPropSizeRet)); + // We need this little bit of ugliness because the UR NUM_ARGS property is + // size_t whereas the CL one is cl_uint. We should consider changing that see + // #1038 + if (propName == UR_KERNEL_INFO_NUM_ARGS) { + if (pPropSizeRet) + *pPropSizeRet = sizeof(size_t); + cl_uint NumArgs = 0; + CL_RETURN_ON_FAILURE(clGetKernelInfo(cl_adapter::cast(hKernel), + mapURKernelInfoToCL(propName), + sizeof(NumArgs), &NumArgs, nullptr)); + if (pPropValue) { + if (propSize != sizeof(size_t)) + return UR_RESULT_ERROR_INVALID_SIZE; + *static_cast(pPropValue) = static_cast(NumArgs); + } + } else { + size_t CheckPropSize = 0; + cl_int ClResult = clGetKernelInfo(cl_adapter::cast(hKernel), + mapURKernelInfoToCL(propName), propSize, + pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } + } return UR_RESULT_SUCCESS; } @@ -101,7 +125,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, ur_kernel_group_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - + // From the CL spec for GROUP_INFO_GLOBAL: "If device is not a custom device + // and kernel is not a built-in kernel, clGetKernelWorkGroupInfo returns the + // error CL_INVALID_VALUE.". Unfortunately there doesn't seem to be a nice + // way to query whether a kernel is a builtin kernel but this should suffice + // to deter naive use of the query. + if (propName == UR_KERNEL_GROUP_INFO_GLOBAL_WORK_SIZE) { + cl_device_type ClDeviceType; + CL_RETURN_ON_FAILURE( + clGetDeviceInfo(cl_adapter::cast(hDevice), CL_DEVICE_TYPE, + sizeof(ClDeviceType), &ClDeviceType, nullptr)); + if (ClDeviceType != CL_DEVICE_TYPE_CUSTOM) { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } + } CL_RETURN_ON_FAILURE(clGetKernelWorkGroupInfo( cl_adapter::cast(hKernel), cl_adapter::cast(hDevice), diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index 87024f2f9a..be9b266f3d 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -362,9 +362,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemGetInfo(ur_mem_handle_t hMemory, UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); const cl_int CLPropName = mapURMemInfoToCL(propName); - CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl_adapter::cast(hMemory), - CLPropName, propSize, pPropValue, - pPropSizeRet)); + size_t CheckPropSize = 0; + auto ClResult = + clGetMemObjectInfo(cl_adapter::cast(hMemory), CLPropName, + propSize, pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } return UR_RESULT_SUCCESS; } @@ -377,9 +385,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t hMemory, UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); const cl_int CLPropName = mapURMemImageInfoToCL(propName); - CL_RETURN_ON_FAILURE(clGetImageInfo(cl_adapter::cast(hMemory), - CLPropName, propSize, pPropValue, - pPropSizeRet)); + size_t CheckPropSize = 0; + auto ClResult = clGetImageInfo(cl_adapter::cast(hMemory), CLPropName, + propSize, pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/program.cpp b/source/adapters/opencl/program.cpp index 733f2509cb..fad0dd69f7 100644 --- a/source/adapters/opencl/program.cpp +++ b/source/adapters/opencl/program.cpp @@ -176,11 +176,17 @@ static cl_int mapURProgramInfoToCL(ur_program_info_t URPropName) { UR_APIEXPORT ur_result_t UR_APICALL urProgramGetInfo(ur_program_handle_t hProgram, ur_program_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - - CL_RETURN_ON_FAILURE(clGetProgramInfo(cl_adapter::cast(hProgram), - mapURProgramInfoToCL(propName), - propSize, pPropValue, pPropSizeRet)); - + size_t CheckPropSize = 0; + auto ClResult = clGetProgramInfo(cl_adapter::cast(hProgram), + mapURProgramInfoToCL(propName), propSize, + pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClResult); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } return UR_RESULT_SUCCESS; } @@ -249,30 +255,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice, ur_program_build_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - - UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); - - switch (propName) { - case UR_PROGRAM_BUILD_INFO_BINARY_TYPE: - cl_program_binary_type cl_value; + if (propName == UR_PROGRAM_BUILD_INFO_BINARY_TYPE) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + cl_program_binary_type BinaryType; CL_RETURN_ON_FAILURE(clGetProgramBuildInfo( cl_adapter::cast(hProgram), cl_adapter::cast(hDevice), mapURProgramBuildInfoToCL(propName), sizeof(cl_program_binary_type), - &cl_value, nullptr)); - return ReturnValue(mapCLBinaryTypeToUR(cl_value)); - case UR_PROGRAM_BUILD_INFO_LOG: - case UR_PROGRAM_BUILD_INFO_OPTIONS: - case UR_PROGRAM_BUILD_INFO_STATUS: - CL_RETURN_ON_FAILURE( - clGetProgramBuildInfo(cl_adapter::cast(hProgram), - cl_adapter::cast(hDevice), - mapURProgramBuildInfoToCL(propName), propSize, - pPropValue, pPropSizeRet)); - return UR_RESULT_SUCCESS; - default: - return UR_RESULT_ERROR_INVALID_ENUMERATION; + &BinaryType, nullptr)); + return ReturnValue(mapCLBinaryTypeToUR(BinaryType)); } + size_t CheckPropSize = 0; + cl_int ClErr = clGetProgramBuildInfo(cl_adapter::cast(hProgram), + cl_adapter::cast(hDevice), + mapURProgramBuildInfoToCL(propName), + propSize, pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClErr); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } + + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index 8b5496e619..163d283651 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -19,7 +19,7 @@ cl_command_queue_info mapURQueueInfoToCL(const ur_queue_info_t PropName) { case UR_QUEUE_INFO_DEVICE_DEFAULT: return CL_QUEUE_DEVICE_DEFAULT; case UR_QUEUE_INFO_FLAGS: - return CL_QUEUE_PROPERTIES_ARRAY; + return CL_QUEUE_PROPERTIES; case UR_QUEUE_INFO_REFERENCE_COUNT: return CL_QUEUE_REFERENCE_COUNT; case UR_QUEUE_INFO_SIZE: @@ -49,6 +49,24 @@ convertURQueuePropertiesToCL(const ur_queue_properties_t *URQueueProperties) { return CLCommandQueueProperties; } +const ur_queue_flags_t +mapCLQueuePropsToUR(const cl_command_queue_properties &Properties) { + ur_queue_flags_t Flags = 0; + if (Properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { + Flags |= UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; + } + if (Properties & CL_QUEUE_PROFILING_ENABLE) { + Flags |= UR_QUEUE_FLAG_PROFILING_ENABLE; + } + if (Properties & CL_QUEUE_ON_DEVICE) { + Flags |= UR_QUEUE_FLAG_ON_DEVICE; + } + if (Properties & CL_QUEUE_ON_DEVICE_DEFAULT) { + Flags |= UR_QUEUE_FLAG_ON_DEVICE_DEFAULT; + } + return Flags; +} + UR_APIEXPORT ur_result_t UR_APICALL urQueueCreate( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_queue_properties_t *pProperties, ur_queue_handle_t *phQueue) { @@ -102,15 +120,35 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetInfo(ur_queue_handle_t hQueue, size_t *pPropSizeRet) { if (propName == UR_QUEUE_INFO_EMPTY) { // OpenCL doesn't provide API to check the status of the queue. - return UR_RESULT_ERROR_INVALID_VALUE; + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; } - cl_command_queue_info CLCommandQueueInfo = mapURQueueInfoToCL(propName); - cl_int RetErr = clGetCommandQueueInfo( - cl_adapter::cast(hQueue), CLCommandQueueInfo, propSize, - pPropValue, pPropSizeRet); - CL_RETURN_ON_FAILURE(RetErr); + // Unfortunately the size of cl_bitfield (unsigned long) doesn't line up with + // our enums (forced to be sizeof(uint32_t)) so this needs special handling. + if (propName == UR_QUEUE_INFO_FLAGS) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + cl_command_queue_properties QueueProperties = 0; + CL_RETURN_ON_FAILURE(clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CLCommandQueueInfo, + sizeof(QueueProperties), &QueueProperties, nullptr)); + + return ReturnValue(mapCLQueuePropsToUR(QueueProperties)); + } else { + size_t CheckPropSize = 0; + cl_int RetErr = clGetCommandQueueInfo( + cl_adapter::cast(hQueue), CLCommandQueueInfo, + propSize, pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(RetErr); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; + } + } + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/sampler.cpp b/source/adapters/opencl/sampler.cpp index 0cd4cbed2b..5f58216446 100644 --- a/source/adapters/opencl/sampler.cpp +++ b/source/adapters/opencl/sampler.cpp @@ -154,16 +154,22 @@ ur_result_t urSamplerCreate(ur_context_handle_t hContext, UR_APIEXPORT ur_result_t UR_APICALL urSamplerGetInfo(ur_sampler_handle_t hSampler, ur_sampler_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - cl_sampler_info SamplerInfo = ur2CLSamplerInfo(propName); static_assert(sizeof(cl_addressing_mode) == sizeof(ur_sampler_addressing_mode_t)); - if (ur_result_t Err = mapCLErrorToUR( - clGetSamplerInfo(cl_adapter::cast(hSampler), SamplerInfo, - propSize, pPropValue, pPropSizeRet))) { - return Err; + size_t CheckPropSize = 0; + ur_result_t Err = mapCLErrorToUR( + clGetSamplerInfo(cl_adapter::cast(hSampler), SamplerInfo, + propSize, pPropValue, &CheckPropSize)); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(Err); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; } + // Convert OpenCL returns to UR cl2URSamplerInfoValue(SamplerInfo, pPropValue); diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index 32d26cf58c..7333385182 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -190,6 +190,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEnqueueProcAddrTable( pDdiTable->pfnUSMMemcpy2D = urEnqueueUSMMemcpy2D; pDdiTable->pfnUSMMemcpy = urEnqueueUSMMemcpy; pDdiTable->pfnUSMPrefetch = urEnqueueUSMPrefetch; + pDdiTable->pfnReadHostPipe = urEnqueueReadHostPipe; + pDdiTable->pfnWriteHostPipe = urEnqueueWriteHostPipe; return UR_RESULT_SUCCESS; } diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index 3b77472062..a910a39da5 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -486,16 +486,31 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( return UR_RESULT_SUCCESS; } +const ur_usm_type_t +mapCLUSMTypeToUR(const cl_unified_shared_memory_type_intel &Type) { + switch (Type) { + case CL_MEM_TYPE_HOST_INTEL: + return UR_USM_TYPE_HOST; + case CL_MEM_TYPE_DEVICE_INTEL: + return UR_USM_TYPE_DEVICE; + case CL_MEM_TYPE_SHARED_INTEL: + return UR_USM_TYPE_SHARED; + case CL_MEM_TYPE_UNKNOWN_INTEL: + default: + return UR_USM_TYPE_UNKNOWN; + } +} + UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, ur_usm_alloc_info_t propName, size_t propSize, void *pPropValue, size_t *pPropSizeRet) { - clGetMemAllocInfoINTEL_fn FuncPtr = nullptr; + clGetMemAllocInfoINTEL_fn GetMemAllocInfo = nullptr; cl_context CLContext = cl_adapter::cast(hContext); - ur_result_t RetVal = cl_ext::getExtFuncFromContext( + UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( CLContext, cl_ext::ExtFuncPtrCache->clGetMemAllocInfoINTELCache, - cl_ext::GetMemAllocInfoName, &FuncPtr); + cl_ext::GetMemAllocInfoName, &GetMemAllocInfo)); cl_mem_info_intel PropNameCL; switch (propName) { @@ -515,41 +530,24 @@ urUSMGetMemAllocInfo(ur_context_handle_t hContext, const void *pMem, return UR_RESULT_ERROR_INVALID_VALUE; } - if (FuncPtr) { - size_t CheckPropSize = 0; - size_t *CheckPropSizeRet = pPropSizeRet ? pPropSizeRet : &CheckPropSize; - RetVal = mapCLErrorToUR(FuncPtr(cl_adapter::cast(hContext), - pMem, PropNameCL, propSize, pPropValue, - CheckPropSizeRet)); - if (pPropValue && *CheckPropSizeRet != propSize) { - return UR_RESULT_ERROR_INVALID_SIZE; - } - if (RetVal == UR_RESULT_SUCCESS && pPropValue && - propName == UR_USM_ALLOC_INFO_TYPE) { - auto *AllocTypeCL = - static_cast(pPropValue); - ur_usm_type_t AllocTypeUR; - switch (*AllocTypeCL) { - case CL_MEM_TYPE_HOST_INTEL: - AllocTypeUR = UR_USM_TYPE_HOST; - break; - case CL_MEM_TYPE_DEVICE_INTEL: - AllocTypeUR = UR_USM_TYPE_DEVICE; - break; - case CL_MEM_TYPE_SHARED_INTEL: - AllocTypeUR = UR_USM_TYPE_SHARED; - break; - case CL_MEM_TYPE_UNKNOWN_INTEL: - default: - AllocTypeUR = UR_USM_TYPE_UNKNOWN; - break; - } - auto *AllocTypeOut = static_cast(pPropValue); - *AllocTypeOut = AllocTypeUR; - } + size_t CheckPropSize = 0; + cl_int ClErr = + GetMemAllocInfo(cl_adapter::cast(hContext), pMem, PropNameCL, + propSize, pPropValue, &CheckPropSize); + if (pPropValue && CheckPropSize != propSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + CL_RETURN_ON_FAILURE(ClErr); + if (pPropSizeRet) { + *pPropSizeRet = CheckPropSize; } - return RetVal; + if (pPropValue && propName == UR_USM_ALLOC_INFO_TYPE) { + *static_cast(pPropValue) = mapCLUSMTypeToUR( + *static_cast(pPropValue)); + } + + return UR_RESULT_SUCCESS; } UR_APIEXPORT ur_result_t UR_APICALL diff --git a/test/conformance/kernel/urKernelGetGroupInfo.cpp b/test/conformance/kernel/urKernelGetGroupInfo.cpp index 7a6066b0b0..5ad6225676 100644 --- a/test/conformance/kernel/urKernelGetGroupInfo.cpp +++ b/test/conformance/kernel/urKernelGetGroupInfo.cpp @@ -22,12 +22,16 @@ TEST_P(urKernelGetGroupInfoTest, Success) { auto property_name = getParam(); size_t property_size = 0; std::vector property_value; - ASSERT_SUCCESS(urKernelGetGroupInfo(kernel, device, property_name, 0, - nullptr, &property_size)); - property_value.resize(property_size); - ASSERT_SUCCESS(urKernelGetGroupInfo(kernel, device, property_name, - property_size, property_value.data(), - nullptr)); + auto result = urKernelGetGroupInfo(kernel, device, property_name, 0, + nullptr, &property_size); + if (result == UR_RESULT_SUCCESS) { + property_value.resize(property_size); + ASSERT_SUCCESS(urKernelGetGroupInfo(kernel, device, property_name, + property_size, + property_value.data(), nullptr)); + } else { + ASSERT_EQ_RESULT(result, UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION); + } } TEST_P(urKernelGetGroupInfoTest, InvalidNullHandleKernel) { From 39eec0c34c561afc68bb1843bed649ae37b974ea Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Mon, 6 Nov 2023 17:25:29 +0000 Subject: [PATCH 11/12] Remove useless const qualifiers from helper function return types. --- source/adapters/opencl/event.cpp | 3 +-- source/adapters/opencl/queue.cpp | 2 +- source/adapters/opencl/usm.cpp | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/source/adapters/opencl/event.cpp b/source/adapters/opencl/event.cpp index 44862f319f..87f1f58f1a 100644 --- a/source/adapters/opencl/event.cpp +++ b/source/adapters/opencl/event.cpp @@ -54,8 +54,7 @@ convertURProfilingInfoToCL(const ur_profiling_info_t PropName) { } } -const ur_command_t -convertCLCommandTypeToUR(const cl_command_type &CommandType) { +ur_command_t convertCLCommandTypeToUR(const cl_command_type &CommandType) { /* Note: the following enums don't have a CL equivalent: UR_COMMAND_USM_FILL_2D UR_COMMAND_USM_MEMCPY_2D diff --git a/source/adapters/opencl/queue.cpp b/source/adapters/opencl/queue.cpp index 163d283651..4a39a91ef5 100644 --- a/source/adapters/opencl/queue.cpp +++ b/source/adapters/opencl/queue.cpp @@ -49,7 +49,7 @@ convertURQueuePropertiesToCL(const ur_queue_properties_t *URQueueProperties) { return CLCommandQueueProperties; } -const ur_queue_flags_t +ur_queue_flags_t mapCLQueuePropsToUR(const cl_command_queue_properties &Properties) { ur_queue_flags_t Flags = 0; if (Properties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { diff --git a/source/adapters/opencl/usm.cpp b/source/adapters/opencl/usm.cpp index a910a39da5..5d46aec2ef 100644 --- a/source/adapters/opencl/usm.cpp +++ b/source/adapters/opencl/usm.cpp @@ -486,7 +486,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( return UR_RESULT_SUCCESS; } -const ur_usm_type_t +ur_usm_type_t mapCLUSMTypeToUR(const cl_unified_shared_memory_type_intel &Type) { switch (Type) { case CL_MEM_TYPE_HOST_INTEL: From f65473d9315c1319538f481d7a8c82dd2710c933 Mon Sep 17 00:00:00 2001 From: Aaron Greig Date: Mon, 6 Nov 2023 16:35:33 +0000 Subject: [PATCH 12/12] [OpenCL] Add bounds checking to the Enqueue memory operations. This allows us to return UR_ERROR_INVALID_SIZE when we should. Extra checks are only performed on a non-success error code. Also adds a missing bounds check to urMemBufferPartition --- source/adapters/opencl/enqueue.cpp | 180 +++++++++++++++++++++++------ source/adapters/opencl/memory.cpp | 11 +- 2 files changed, 152 insertions(+), 39 deletions(-) diff --git a/source/adapters/opencl/enqueue.cpp b/source/adapters/opencl/enqueue.cpp index 5dff7066ae..ad6eaec88f 100644 --- a/source/adapters/opencl/enqueue.cpp +++ b/source/adapters/opencl/enqueue.cpp @@ -25,6 +25,77 @@ cl_map_flags convertURMapFlagsToCL(ur_map_flags_t URFlags) { return CLFlags; } +ur_result_t ValidateBufferSize(ur_mem_handle_t Buffer, size_t Size, + size_t Origin) { + size_t BufferSize = 0; + CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl_adapter::cast(Buffer), + CL_MEM_SIZE, sizeof(BufferSize), + &BufferSize, nullptr)); + if (Size + Origin > BufferSize) + return UR_RESULT_ERROR_INVALID_SIZE; + return UR_RESULT_SUCCESS; +} + +ur_result_t ValidateBufferRectSize(ur_mem_handle_t Buffer, + ur_rect_region_t Region, + ur_rect_offset_t Offset) { + size_t BufferSize = 0; + CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl_adapter::cast(Buffer), + CL_MEM_SIZE, sizeof(BufferSize), + &BufferSize, nullptr)); + if (Offset.x >= BufferSize || Offset.y >= BufferSize || + Offset.z >= BufferSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + if ((Region.width + Offset.x) * (Region.height + Offset.y) * + (Region.depth + Offset.z) > + BufferSize) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t ValidateImageSize(ur_mem_handle_t Image, ur_rect_region_t Region, + ur_rect_offset_t Origin) { + size_t Width = 0; + CL_RETURN_ON_FAILURE(clGetImageInfo(cl_adapter::cast(Image), + CL_IMAGE_WIDTH, sizeof(Width), &Width, + nullptr)); + if (Region.width + Origin.x > Width) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + size_t Height = 0; + CL_RETURN_ON_FAILURE(clGetImageInfo(cl_adapter::cast(Image), + CL_IMAGE_HEIGHT, sizeof(Height), &Height, + nullptr)); + + // CL returns a height and depth of 0 for images that don't have those + // dimensions, but regions for enqueue operations must set these to 1, so we + // need to make this adjustment to validate. + if (Height == 0) + Height = 1; + + if (Region.height + Origin.y > Height) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + size_t Depth = 0; + CL_RETURN_ON_FAILURE(clGetImageInfo(cl_adapter::cast(Image), + CL_IMAGE_DEPTH, sizeof(Depth), &Depth, + nullptr)); + if (Depth == 0) + Depth = 1; + + if (Region.depth + Origin.z > Depth) { + return UR_RESULT_ERROR_INVALID_SIZE; + } + + return UR_RESULT_SUCCESS; +} + UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, @@ -70,13 +141,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - CL_RETURN_ON_FAILURE(clEnqueueReadBuffer( + auto ClErr = clEnqueueReadBuffer( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingRead, offset, size, pDst, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_adapter::cast(phEvent)); - return UR_RESULT_SUCCESS; + if (ClErr == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, size, offset)); + } + return mapCLErrorToUR(ClErr); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( @@ -84,13 +158,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - CL_RETURN_ON_FAILURE(clEnqueueWriteBuffer( + auto ClErr = clEnqueueWriteBuffer( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingWrite, offset, size, pSrc, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_adapter::cast(phEvent)); - return UR_RESULT_SUCCESS; + if (ClErr == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, size, offset)); + } + return mapCLErrorToUR(ClErr); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( @@ -101,7 +178,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - CL_RETURN_ON_FAILURE(clEnqueueReadBufferRect( + auto ClErr = clEnqueueReadBufferRect( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingRead, cl_adapter::cast(&bufferOrigin), @@ -109,9 +186,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferReadRect( cl_adapter::cast(®ion), bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pDst, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_adapter::cast(phEvent)); - return UR_RESULT_SUCCESS; + if (ClErr == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateBufferRectSize(hBuffer, region, bufferOrigin)); + } + return mapCLErrorToUR(ClErr); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( @@ -122,7 +202,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - CL_RETURN_ON_FAILURE(clEnqueueWriteBufferRect( + auto ClErr = clEnqueueWriteBufferRect( cl_adapter::cast(hQueue), cl_adapter::cast(hBuffer), blockingWrite, cl_adapter::cast(&bufferOrigin), @@ -130,9 +210,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWriteRect( cl_adapter::cast(®ion), bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, pSrc, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_adapter::cast(phEvent)); - return UR_RESULT_SUCCESS; + if (ClErr == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateBufferRectSize(hBuffer, region, bufferOrigin)); + } + return mapCLErrorToUR(ClErr); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( @@ -141,14 +224,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - CL_RETURN_ON_FAILURE(clEnqueueCopyBuffer( + auto ClErr = clEnqueueCopyBuffer( cl_adapter::cast(hQueue), cl_adapter::cast(hBufferSrc), cl_adapter::cast(hBufferDst), srcOffset, dstOffset, size, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_adapter::cast(phEvent)); - return UR_RESULT_SUCCESS; + if (ClErr == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateBufferSize(hBufferSrc, size, srcOffset)); + UR_RETURN_ON_FAILURE(ValidateBufferSize(hBufferDst, size, dstOffset)); + } + return mapCLErrorToUR(ClErr); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( @@ -159,7 +246,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - CL_RETURN_ON_FAILURE(clEnqueueCopyBufferRect( + auto ClErr = clEnqueueCopyBufferRect( cl_adapter::cast(hQueue), cl_adapter::cast(hBufferSrc), cl_adapter::cast(hBufferDst), @@ -168,9 +255,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( cl_adapter::cast(®ion), srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_adapter::cast(phEvent)); - return UR_RESULT_SUCCESS; + if (ClErr == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateBufferRectSize(hBufferSrc, region, srcOrigin)); + UR_RETURN_ON_FAILURE(ValidateBufferRectSize(hBufferDst, region, dstOrigin)); + } + return mapCLErrorToUR(ClErr); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( @@ -181,13 +272,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( // CL FillBuffer only allows pattern sizes up to the largest CL type: // long16/double16 if (patternSize <= 128) { - CL_RETURN_ON_FAILURE( - clEnqueueFillBuffer(cl_adapter::cast(hQueue), - cl_adapter::cast(hBuffer), pPattern, - patternSize, offset, size, numEventsInWaitList, - cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); - return UR_RESULT_SUCCESS; + auto ClErr = (clEnqueueFillBuffer( + cl_adapter::cast(hQueue), + cl_adapter::cast(hBuffer), pPattern, patternSize, offset, size, + numEventsInWaitList, + cl_adapter::cast(phEventWaitList), + cl_adapter::cast(phEvent))); + if (ClErr != CL_SUCCESS) { + UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, size, offset)); + } + return mapCLErrorToUR(ClErr); } auto NumValues = size / sizeof(uint64_t); @@ -205,6 +299,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( &WriteEvent); if (ClErr != CL_SUCCESS) { delete[] HostBuffer; + UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, offset, size)); CL_RETURN_ON_FAILURE(ClErr); } @@ -237,15 +332,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead( size_t slicePitch, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - CL_RETURN_ON_FAILURE(clEnqueueReadImage( + auto ClErr = clEnqueueReadImage( cl_adapter::cast(hQueue), cl_adapter::cast(hImage), blockingRead, cl_adapter::cast(&origin), cl_adapter::cast(®ion), rowPitch, slicePitch, pDst, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_adapter::cast(phEvent)); - return UR_RESULT_SUCCESS; + if (ClErr == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateImageSize(hImage, region, origin)); + } + return mapCLErrorToUR(ClErr); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( @@ -254,15 +352,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite( size_t slicePitch, void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - CL_RETURN_ON_FAILURE(clEnqueueWriteImage( + auto ClErr = clEnqueueWriteImage( cl_adapter::cast(hQueue), cl_adapter::cast(hImage), blockingWrite, cl_adapter::cast(&origin), cl_adapter::cast(®ion), rowPitch, slicePitch, pSrc, numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_adapter::cast(phEvent)); - return UR_RESULT_SUCCESS; + if (ClErr == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateImageSize(hImage, region, origin)); + } + return mapCLErrorToUR(ClErr); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( @@ -272,16 +373,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - CL_RETURN_ON_FAILURE(clEnqueueCopyImage( + auto ClErr = clEnqueueCopyImage( cl_adapter::cast(hQueue), cl_adapter::cast(hImageSrc), cl_adapter::cast(hImageDst), cl_adapter::cast(&srcOrigin), cl_adapter::cast(&dstOrigin), cl_adapter::cast(®ion), numEventsInWaitList, cl_adapter::cast(phEventWaitList), - cl_adapter::cast(phEvent))); + cl_adapter::cast(phEvent)); - return UR_RESULT_SUCCESS; + if (ClErr == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateImageSize(hImageSrc, region, srcOrigin)); + UR_RETURN_ON_FAILURE(ValidateImageSize(hImageDst, region, dstOrigin)); + } + return mapCLErrorToUR(ClErr); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( @@ -298,9 +403,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( cl_adapter::cast(phEventWaitList), cl_adapter::cast(phEvent), &Err); - CL_RETURN_ON_FAILURE(Err); - - return UR_RESULT_SUCCESS; + if (Err == CL_INVALID_VALUE) { + UR_RETURN_ON_FAILURE(ValidateBufferSize(hBuffer, size, offset)); + } + return mapCLErrorToUR(Err); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( diff --git a/source/adapters/opencl/memory.cpp b/source/adapters/opencl/memory.cpp index be9b266f3d..1a77754c57 100644 --- a/source/adapters/opencl/memory.cpp +++ b/source/adapters/opencl/memory.cpp @@ -319,9 +319,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferPartition( *phMem = reinterpret_cast(clCreateSubBuffer( cl_adapter::cast(hBuffer), static_cast(flags), BufferCreateType, &BufferRegion, cl_adapter::cast(&RetErr))); - CL_RETURN_ON_FAILURE(RetErr); - return UR_RESULT_SUCCESS; + if (RetErr == CL_INVALID_VALUE) { + size_t BufferSize = 0; + CL_RETURN_ON_FAILURE(clGetMemObjectInfo(cl_adapter::cast(hBuffer), + CL_MEM_SIZE, sizeof(BufferSize), + &BufferSize, nullptr)); + if (BufferRegion.size + BufferRegion.origin > BufferSize) + return UR_RESULT_ERROR_INVALID_BUFFER_SIZE; + } + return mapCLErrorToUR(RetErr); } UR_APIEXPORT ur_result_t UR_APICALL