From e234fda0701c84df1558d7f01aacd8c103531f7c Mon Sep 17 00:00:00 2001 From: Alexey Sachkov Date: Wed, 9 Jul 2025 16:26:30 +0200 Subject: [PATCH 1/6] Fix _FORTIFY_SOURCE=3 (#19268) The problem here is that the macro is actually handled by glibc and value `3` isn't supported with older compiler/glibc combinations, causing warnings about the macro redefinition. We still have to support older compilers/glibc and therefore two changes were made: - UR skips setting their own `_FORTIFY_SOURCE` in favor of a global one if it is built as part of LLVM (i.e. not standalone) - Before setting `_FORTIFY_SOURCE` globally we check the compiler and fallback to value `2` for older gcc --- cmake/helpers.cmake | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake index a2d6a26cd3..de22fe7580 100644 --- a/cmake/helpers.cmake +++ b/cmake/helpers.cmake @@ -88,7 +88,13 @@ endif() function(add_ur_target_compile_options name) if(NOT MSVC) - target_compile_definitions(${name} PRIVATE -D_FORTIFY_SOURCE=2) + if (NOT LLVM_ENABLE_PROJECTS) + # If UR is built as part of LLVM (i.e. as part of SYCL), then + # _FORTIFY_SOURCE will be set globally in advance to a potentially + # different value. To avoid redefinition errors, only set the + # macro for a "standalone" build. + target_compile_definitions(${name} PRIVATE -D_FORTIFY_SOURCE=2) + endif() target_compile_options(${name} PRIVATE # Warning options -Wall From b31d0a06c186fd1a162232620b47283b34251ba0 Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Thu, 10 Jul 2025 11:11:37 +0100 Subject: [PATCH 2/6] Spec wording around `PROGRAM_INFO_BINARIES` and test (#19321) The wording of the spec was a bit confusing, so it has been clarified. The conformance test was also updated to be more robust, which unfortunately exposes failures in HIP and Cuda. --- include/ur_api.h | 4 ++-- include/ur_print.hpp | 2 +- scripts/core/program.yml | 2 +- scripts/templates/print.hpp.mako | 2 +- test/conformance/program/urProgramGetInfo.cpp | 9 +++++++++ 5 files changed, 14 insertions(+), 5 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index 6b996c1a4e..8a2dc3afce 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -5779,8 +5779,8 @@ typedef enum ur_program_info_t { UR_PROGRAM_INFO_IL = 4, /// [size_t[]] Return program binary sizes for each device. UR_PROGRAM_INFO_BINARY_SIZES = 5, - /// [unsigned char[]] Return program binaries for all devices for this - /// Program. These are not null-terminated. + /// [unsigned char *[]] Write program binaries into caller-provided + /// buffers for each device. These are not null-terminated. UR_PROGRAM_INFO_BINARIES = 6, /// [size_t][optional-query] Number of kernels in Program, return type /// size_t. diff --git a/include/ur_print.hpp b/include/ur_print.hpp index ddf0af88eb..f0e7e96c23 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -9060,7 +9060,7 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, } break; case UR_PROGRAM_INFO_BINARIES: { - const unsigned char *tptr = (const unsigned char *)ptr; + const unsigned char *const *tptr = (const unsigned char *const *)ptr; printPtr(os, tptr); } break; case UR_PROGRAM_INFO_NUM_KERNELS: { diff --git a/scripts/core/program.yml b/scripts/core/program.yml index 807b06038b..c6c2a9f83d 100644 --- a/scripts/core/program.yml +++ b/scripts/core/program.yml @@ -385,7 +385,7 @@ etors: - name: BINARY_SIZES desc: "[size_t[]] Return program binary sizes for each device." - name: BINARIES - desc: "[unsigned char[]] Return program binaries for all devices for this Program. These are not null-terminated." + desc: "[unsigned char *[]] Write program binaries into caller-provided buffers for each device. These are not null-terminated." - name: NUM_KERNELS desc: "[size_t][optional-query] Number of kernels in Program, return type size_t." - name: KERNEL_NAMES diff --git a/scripts/templates/print.hpp.mako b/scripts/templates/print.hpp.mako index 81d9d795b8..4481847130 100644 --- a/scripts/templates/print.hpp.mako +++ b/scripts/templates/print.hpp.mako @@ -284,7 +284,7 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const ur_bool %>case ${ename}: { %if th.value_traits.is_array(vtype): <% atype = th.value_traits.get_array_name(vtype) %> - %if 'void' in atype: + %if 'void' in atype or '*' in atype: const ${atype} const *tptr = (const ${atype} const*)ptr; %else: const ${atype} *tptr = (const ${atype} *)ptr; diff --git a/test/conformance/program/urProgramGetInfo.cpp b/test/conformance/program/urProgramGetInfo.cpp index 2a2d4ab234..27fda5c804 100644 --- a/test/conformance/program/urProgramGetInfo.cpp +++ b/test/conformance/program/urProgramGetInfo.cpp @@ -153,6 +153,10 @@ TEST_P(urProgramGetInfoTest, SuccessBinarySizes) { } TEST_P(urProgramGetInfoTest, SuccessBinaries) { + // Not implemented correctly on these targets - they copy their own pointer into the output rather than copying the + // binary + UUR_KNOWN_FAILURE_ON(uur::HIP{}, uur::CUDA{}); + size_t binary_sizes_len = 0; std::vector property_value(0); @@ -175,6 +179,11 @@ TEST_P(urProgramGetInfoTest, SuccessBinaries) { urProgramGetInfo(program, UR_PROGRAM_INFO_BINARIES, sizeof(binaries[0]), binaries, nullptr), UR_PROGRAM_INFO_BINARIES); + + // We assume that there is at least 1 non-zero byte in the binary + bool nonzero_found = std::any_of(property_value.begin(), property_value.end(), + [](char c) { return c != 0; }); + ASSERT_TRUE(nonzero_found); } TEST_P(urProgramGetInfoTest, SuccessNumKernels) { From b81b47941ec934f74e3941202156a0e43962992b Mon Sep 17 00:00:00 2001 From: Yang Zhao Date: Thu, 10 Jul 2025 18:14:14 +0800 Subject: [PATCH 3/6] Add e2e tests for memcpy2d (#19335) --- source/loader/layers/sanitizer/msan/msan_ddi.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/source/loader/layers/sanitizer/msan/msan_ddi.cpp index 439afeae99..e2bbb166a5 100644 --- a/source/loader/layers/sanitizer/msan/msan_ddi.cpp +++ b/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -61,8 +61,7 @@ ur_result_t urEnqueueUSMFill2DFallback(ur_queue_handle_t hQueue, void *pMem, ur_result_t Result = getContext()->urDdiTable.Enqueue.pfnUSMFill2D( hQueue, pMem, pitch, patternSize, pPattern, width, height, numEventsInWaitList, phEventWaitList, phEvent); - if (Result == UR_RESULT_SUCCESS || - Result != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + if (Result != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { return Result; } From cd4188b6c151e3607bc23cf66354002c70da1770 Mon Sep 17 00:00:00 2001 From: Georgi Mirazchiyski Date: Thu, 10 Jul 2025 12:40:08 +0100 Subject: [PATCH 4/6] Add DX11 memory interop (#19217) In DX11 Texture1D and Texture3D cannot be shared between process or devices, so only Texture2D is used but its layout is adapted (height = 1 for 1D and ArraySlices = depth for 3D) for the interop purposes and testing 1D and 3D image operations on it in the SYCL kernel. The new DXGI adapter selection fixes issues with non-matching devices between DX and SYCL which also failed the DX12 tests when more than 1 potential adapter is visible to DirectX. Ideally the introduction of LUIDs to SYCL will resolve that completely when they are properly matched. The future flow should change to - 1) create SYCL device (so the device selection itself can be manipulated via `ONEAPI_DEVICE_SELECTOR`) , 2) get DXGI adapter for creating a DX logical device (D3D11 or D3D12) by matching adapter LUIDs (this is going to depend on a LUID device info query extension for SYCL). Some notes on synchronisation: - IKeyedMutex is required for synchronising the access to the shared resource (texture) between devices or processes. - Currently the SYCL queue calls wait after submission to execute immediately and block until completion, but we can use ID3D11Fence imported in SYCL to signal the completion of the work to D3D11 via SYCL in the future when this kind of interop is considered. --- include/ur_api.h | 8 ++++++-- include/ur_print.hpp | 3 +++ scripts/core/EXP-BINDLESS-IMAGES.rst | 4 ++++ scripts/core/exp-bindless-images.yml | 2 ++ source/adapters/cuda/image.cpp | 4 ++++ source/adapters/hip/image.cpp | 8 ++++++++ source/adapters/level_zero/image_common.cpp | 3 +++ source/loader/layers/validation/ur_valddi.cpp | 4 ++-- source/loader/ur_libapi.cpp | 6 ++++-- source/ur_api.cpp | 6 ++++-- 10 files changed, 40 insertions(+), 8 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index 8a2dc3afce..577bb4d5b2 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -9800,6 +9800,8 @@ typedef enum ur_exp_external_mem_type_t { UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX12_RESOURCE = 2, /// dma_buf file descriptor UR_EXP_EXTERNAL_MEM_TYPE_DMA_BUF = 3, + /// Win32 NT DirectX 11 resource handle + UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX11_RESOURCE = 4, /// @cond UR_EXP_EXTERNAL_MEM_TYPE_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -10518,7 +10520,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesMipmapFreeExp( /// + `NULL == hContext` /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_EXP_EXTERNAL_MEM_TYPE_DMA_BUF < memHandleType` +/// + `::UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX11_RESOURCE < +/// memHandleType` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pExternalMemDesc` /// + `NULL == phExternalMem` @@ -10675,7 +10678,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesFreeMappedLinearMemoryExp( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_EXP_EXTERNAL_MEM_TYPE_DMA_BUF < memHandleType` +/// + `::UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX11_RESOURCE < +/// memHandleType` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pSupportedRet` /// - ::UR_RESULT_ERROR_INVALID_DEVICE diff --git a/include/ur_print.hpp b/include/ur_print.hpp index f0e7e96c23..7fc43237a2 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -11321,6 +11321,9 @@ inline std::ostream &operator<<(std::ostream &os, case UR_EXP_EXTERNAL_MEM_TYPE_DMA_BUF: os << "UR_EXP_EXTERNAL_MEM_TYPE_DMA_BUF"; break; + case UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX11_RESOURCE: + os << "UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX11_RESOURCE"; + break; default: os << "unknown enumerator"; break; diff --git a/scripts/core/EXP-BINDLESS-IMAGES.rst b/scripts/core/EXP-BINDLESS-IMAGES.rst index c3034dce6d..35d2fedbef 100644 --- a/scripts/core/EXP-BINDLESS-IMAGES.rst +++ b/scripts/core/EXP-BINDLESS-IMAGES.rst @@ -120,6 +120,7 @@ Enums * ${X}_EXP_EXTERNAL_MEM_TYPE_WIN32_NT * ${X}_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX12_RESOURCE * ${X}_EXP_EXTERNAL_MEM_TYPE_DMA_BUF + * ${X}_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX11_RESOURCE * ${x}_exp_external_semaphore_type_t * ${X}_EXP_EXTERNAL_SEMAPHORE_TYPE_OPAQUE_FD @@ -301,6 +302,9 @@ Changelog | || * ${X}_EXP_EXTERNAL_MEM_TYPE_DMA_BUF | | || * ${x}BindlessImagesSupportsImportingHandleTypeExp | +----------+-------------------------------------------------------------+ +| 26.0 || Added support for importing DX11 resources | +| || * ${X}_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX11_RESOURCE | ++----------+-------------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- diff --git a/scripts/core/exp-bindless-images.yml b/scripts/core/exp-bindless-images.yml index 0161eee862..6ace4e7740 100644 --- a/scripts/core/exp-bindless-images.yml +++ b/scripts/core/exp-bindless-images.yml @@ -214,6 +214,8 @@ etors: desc: "Win32 NT DirectX 12 resource handle" - name: DMA_BUF desc: "dma_buf file descriptor" + - name: WIN32_NT_DX11_RESOURCE + desc: "Win32 NT DirectX 11 resource handle" --- #-------------------------------------------------------------------------- type: enum desc: "Dictates the type of external semaphore handle." diff --git a/source/adapters/cuda/image.cpp b/source/adapters/cuda/image.cpp index de07bfa39e..4d97b225cb 100644 --- a/source/adapters/cuda/image.cpp +++ b/source/adapters/cuda/image.cpp @@ -1524,6 +1524,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( extMemDesc.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE; extMemDesc.flags = CUDA_EXTERNAL_MEMORY_DEDICATED; break; + case UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX11_RESOURCE: + extMemDesc.type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE; + extMemDesc.flags = CUDA_EXTERNAL_MEMORY_DEDICATED; + break; default: return UR_RESULT_ERROR_INVALID_VALUE; } diff --git a/source/adapters/hip/image.cpp b/source/adapters/hip/image.cpp index fefd67e0c7..4851b197d6 100644 --- a/source/adapters/hip/image.cpp +++ b/source/adapters/hip/image.cpp @@ -1399,6 +1399,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( extMemDesc.flags = hipExternalMemoryDedicated; #else return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +#endif + break; + case UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX11_RESOURCE: +#if HIP_VERSION >= 50600000 + extMemDesc.type = hipExternalMemoryHandleTypeD3D11Resource; + extMemDesc.flags = hipExternalMemoryDedicated; +#else + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; #endif break; default: diff --git a/source/adapters/level_zero/image_common.cpp b/source/adapters/level_zero/image_common.cpp index 11bd49a96d..b34a59ad5e 100644 --- a/source/adapters/level_zero/image_common.cpp +++ b/source/adapters/level_zero/image_common.cpp @@ -1268,6 +1268,9 @@ ur_result_t urBindlessImagesImportExternalMemoryExp( case UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX12_RESOURCE: importWin32->flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_D3D12_RESOURCE; break; + case UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX11_RESOURCE: + importWin32->flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_D3D11_TEXTURE; + break; default: delete importWin32; delete externalMemoryData; diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 73c91de4a1..979eb3ef22 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -8314,7 +8314,7 @@ __urdlllocal ur_result_t UR_APICALL urBindlessImagesImportExternalMemoryExp( if (NULL == hDevice) return UR_RESULT_ERROR_INVALID_NULL_HANDLE; - if (UR_EXP_EXTERNAL_MEM_TYPE_DMA_BUF < memHandleType) + if (UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX11_RESOURCE < memHandleType) return UR_RESULT_ERROR_INVALID_ENUMERATION; } @@ -8558,7 +8558,7 @@ urBindlessImagesSupportsImportingHandleTypeExp( if (NULL == hDevice) return UR_RESULT_ERROR_INVALID_NULL_HANDLE; - if (UR_EXP_EXTERNAL_MEM_TYPE_DMA_BUF < memHandleType) + if (UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX11_RESOURCE < memHandleType) return UR_RESULT_ERROR_INVALID_ENUMERATION; } diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 5f5f86bd4f..1261145424 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -8041,7 +8041,8 @@ ur_result_t UR_APICALL urBindlessImagesMipmapFreeExp( /// + `NULL == hContext` /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_EXP_EXTERNAL_MEM_TYPE_DMA_BUF < memHandleType` +/// + `::UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX11_RESOURCE < +/// memHandleType` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pExternalMemDesc` /// + `NULL == phExternalMem` @@ -8250,7 +8251,8 @@ ur_result_t UR_APICALL urBindlessImagesFreeMappedLinearMemoryExp( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_EXP_EXTERNAL_MEM_TYPE_DMA_BUF < memHandleType` +/// + `::UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX11_RESOURCE < +/// memHandleType` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pSupportedRet` /// - ::UR_RESULT_ERROR_INVALID_DEVICE diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 2c685ac2cd..cc69811f57 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -7027,7 +7027,8 @@ ur_result_t UR_APICALL urBindlessImagesMipmapFreeExp( /// + `NULL == hContext` /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_EXP_EXTERNAL_MEM_TYPE_DMA_BUF < memHandleType` +/// + `::UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX11_RESOURCE < +/// memHandleType` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pExternalMemDesc` /// + `NULL == phExternalMem` @@ -7199,7 +7200,8 @@ ur_result_t UR_APICALL urBindlessImagesFreeMappedLinearMemoryExp( /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_EXP_EXTERNAL_MEM_TYPE_DMA_BUF < memHandleType` +/// + `::UR_EXP_EXTERNAL_MEM_TYPE_WIN32_NT_DX11_RESOURCE < +/// memHandleType` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pSupportedRet` /// - ::UR_RESULT_ERROR_INVALID_DEVICE From 7f8285cd066a2d49b274e212b8f87beea06fd324 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Chor=C4=85=C5=BCewicz?= Date: Thu, 10 Jul 2025 08:13:49 -0700 Subject: [PATCH 5/6] Unify logging and leak checking for L0 v1 and v2 (#19328) This is needed so that we can enable V2 adapter by default on certain platforms: https://github.com/intel/llvm/pull/19333 The reason is that we need to load both adapters (legacy and v2) to check the device version. However, loading v2 adapter causes L0 loader to emit logs for all API calls (if ZE_DEBUG=1 is set). Since the legacy adapter used different logic for printing API calls, this would result in printing the same logs twice. This patch fixes that. --- source/adapters/level_zero/adapter.cpp | 112 +----------------- source/adapters/level_zero/common.cpp | 23 ---- source/adapters/level_zero/common.hpp | 3 - .../adapters/level_zero/v2/command_buffer.cpp | 2 + 4 files changed, 7 insertions(+), 133 deletions(-) diff --git a/source/adapters/level_zero/adapter.cpp b/source/adapters/level_zero/adapter.cpp index 0c1abe7667..0809ec97ed 100644 --- a/source/adapters/level_zero/adapter.cpp +++ b/source/adapters/level_zero/adapter.cpp @@ -309,12 +309,10 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() if (UrL0Debug & UR_L0_DEBUG_BASIC) { logger.setLegacySink(std::make_unique()); -#ifdef UR_ADAPTER_LEVEL_ZERO_V2 setEnvVar("ZEL_ENABLE_LOADER_LOGGING", "1"); setEnvVar("ZEL_LOADER_LOGGING_LEVEL", "trace"); setEnvVar("ZEL_LOADER_LOG_CONSOLE", "1"); setEnvVar("ZE_ENABLE_VALIDATION_LAYER", "1"); -#endif }; if (UrL0Debug & UR_L0_DEBUG_VALIDATION) { @@ -322,19 +320,12 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() setEnvVar("ZE_ENABLE_PARAMETER_VALIDATION", "1"); } - PlatformCache.Compute = [](Result &result) { - static std::once_flag ZeCallCountInitialized; - try { - std::call_once(ZeCallCountInitialized, []() { - if (UrL0LeaksDebug) { - ZeCallCount = new std::map; - } - }); - } catch (...) { - result = exceptionToResult(std::current_exception()); - return; - } + if (UrL0LeaksDebug) { + setEnvVar("ZE_ENABLE_VALIDATION_LAYER", "1"); + setEnvVar("ZEL_ENABLE_BASIC_LEAK_CHECKER", "1"); + } + PlatformCache.Compute = [](Result &result) { uint32_t UserForcedSysManInit = 0; // Check if the user has disabled the default L0 Env initialization. const int UrSysManEnvInitEnabled = [&UserForcedSysManInit] { @@ -426,7 +417,6 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() useInitDrivers = true; } -#ifdef UR_ADAPTER_LEVEL_ZERO_V2 if ((loader_version.major == 1 && loader_version.minor < 21) || (loader_version.major == 1 && loader_version.minor == 21 && loader_version.patch < 2)) { @@ -435,7 +425,6 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() "WARNING: Level Zero Loader version is older than 1.21.2. " "Please update to the latest version for API logging support.\n"); } -#endif } if (useInitDrivers) { @@ -552,97 +541,6 @@ void globalAdapterOnDemandCleanup() { } ur_result_t adapterStateTeardown() { - // Print the balance of various create/destroy native calls. - // The idea is to verify if the number of create(+) and destroy(-) calls are - // matched. - if (ZeCallCount && (UrL0LeaksDebug) != 0) { - bool LeakFound = false; - // clang-format off - // - // The format of this table is such that each row accounts for a - // specific type of objects, and all elements in the raw except the last - // one are allocating objects of that type, while the last element is known - // to deallocate objects of that type. - // - std::vector> CreateDestroySet = { - {"zeContextCreate", "zeContextDestroy"}, - {"zeCommandQueueCreate", "zeCommandQueueDestroy"}, - {"zeModuleCreate", "zeModuleDestroy"}, - {"zeKernelCreate", "zeKernelDestroy"}, - {"zeEventPoolCreate", "zeEventPoolDestroy"}, - {"zeCommandListCreateImmediate", "zeCommandListCreate", "zeCommandListDestroy"}, - {"zeEventCreate", "zeEventDestroy"}, - {"zeFenceCreate", "zeFenceDestroy"}, - {"zeImageCreate","zeImageViewCreateExt", "zeImageDestroy"}, - {"zeSamplerCreate", "zeSamplerDestroy"}, - {"zeMemAllocDevice", "zeMemAllocHost", "zeMemAllocShared", "zeMemFree"}, - }; - - // A sample output aimed below is this: - // ------------------------------------------------------------------------ - // zeContextCreate = 1 \---> zeContextDestroy = 1 - // zeCommandQueueCreate = 1 \---> zeCommandQueueDestroy = 1 - // zeModuleCreate = 1 \---> zeModuleDestroy = 1 - // zeKernelCreate = 1 \---> zeKernelDestroy = 1 - // zeEventPoolCreate = 1 \---> zeEventPoolDestroy = 1 - // zeCommandListCreateImmediate = 1 | - // zeCommandListCreate = 1 \---> zeCommandListDestroy = 1 ---> LEAK = 1 - // zeEventCreate = 2 \---> zeEventDestroy = 2 - // zeFenceCreate = 1 \---> zeFenceDestroy = 1 - // zeImageCreate = 0 \---> zeImageDestroy = 0 - // zeSamplerCreate = 0 \---> zeSamplerDestroy = 0 - // zeMemAllocDevice = 0 | - // zeMemAllocHost = 1 | - // zeMemAllocShared = 0 \---> zeMemFree = 1 - // - // clang-format on - // TODO: use logger to print this messages - std::cerr << "Check balance of create/destroy calls\n"; - std::cerr << "----------------------------------------------------------\n"; - std::stringstream ss; - for (const auto &Row : CreateDestroySet) { - int diff = 0; - for (auto I = Row.begin(); I != Row.end();) { - const char *ZeName = (*I).c_str(); - const auto &ZeCount = (*ZeCallCount)[*I]; - - bool First = (I == Row.begin()); - bool Last = (++I == Row.end()); - - if (Last) { - ss << " \\--->"; - diff -= ZeCount; - } else { - diff += ZeCount; - if (!First) { - ss << " | "; - std::cerr << ss.str() << "\n"; - ss.str(""); - ss.clear(); - } - } - ss << std::setw(30) << std::right << ZeName; - ss << " = "; - ss << std::setw(5) << std::left << ZeCount; - } - - if (diff) { - LeakFound = true; - ss << " ---> LEAK = " << diff; - } - - std::cerr << ss.str() << '\n'; - ss.str(""); - ss.clear(); - } - - ZeCallCount->clear(); - delete ZeCallCount; - ZeCallCount = nullptr; - if (LeakFound) - return UR_RESULT_ERROR_INVALID_MEM_OBJECT; - } - // Due to multiple DLLMain definitions with SYCL, register to cleanup the // Global Adapter after refcnt is 0 #if defined(_WIN32) diff --git a/source/adapters/level_zero/common.cpp b/source/adapters/level_zero/common.cpp index c41264fe3e..8ed6d7e579 100644 --- a/source/adapters/level_zero/common.cpp +++ b/source/adapters/level_zero/common.cpp @@ -86,8 +86,6 @@ bool setEnvVar(const char *name, const char *value) { ZeUSMImportExtension ZeUSMImport; -std::map *ZeCallCount = nullptr; - void zeParseError(ze_result_t ZeError, const char *&ErrorString) { switch (ZeError) { #define ZE_ERRCASE(ERR) \ @@ -137,31 +135,10 @@ void zeParseError(ze_result_t ZeError, const char *&ErrorString) { } // switch } -#ifdef UR_ADAPTER_LEVEL_ZERO_V2 ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *, const char *, bool) { return ZeResult; } -#else -ze_result_t ZeCall::doCall(ze_result_t ZeResult, const char *ZeName, - const char *ZeArgs, bool TraceError) { - UR_LOG(DEBUG, "ZE ---> {}{}", ZeName, ZeArgs); - - if (ZeResult == ZE_RESULT_SUCCESS) { - if (UrL0LeaksDebug) { - ++(*ZeCallCount)[ZeName]; - } - return ZE_RESULT_SUCCESS; - } - - if (TraceError) { - const char *ErrorString = "Unknown"; - zeParseError(ZeResult, ErrorString); - UR_LOG(ERR, "Error ({}) in {}", ErrorString, ZeName); - } - return ZeResult; -} -#endif // Specializations for various L0 structures template <> ze_structure_type_t getZeStructureType() { diff --git a/source/adapters/level_zero/common.hpp b/source/adapters/level_zero/common.hpp index cfb19f4977..d39f2dcd21 100644 --- a/source/adapters/level_zero/common.hpp +++ b/source/adapters/level_zero/common.hpp @@ -328,9 +328,6 @@ class ZeUSMImportExtension { // Helper wrapper for working with USM import extension in Level Zero. extern ZeUSMImportExtension ZeUSMImport; -// This will count the calls to Level-Zero -extern std::map *ZeCallCount; - // Some opencl extensions we know are supported by all Level Zero devices. constexpr char ZE_SUPPORTED_EXTENSIONS[] = "cl_khr_il_program cl_khr_subgroups cl_intel_subgroups " diff --git a/source/adapters/level_zero/v2/command_buffer.cpp b/source/adapters/level_zero/v2/command_buffer.cpp index bbc200bac6..92118587d4 100644 --- a/source/adapters/level_zero/v2/command_buffer.cpp +++ b/source/adapters/level_zero/v2/command_buffer.cpp @@ -167,6 +167,8 @@ ur_result_t ur_exp_command_buffer_handle_t_::registerExecutionEventUnlocked( } ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { + UR_CALL_NOCHECK(commandListManager.lock()->releaseSubmittedKernels()); + if (currentExecution) { currentExecution->release(); } From cfc35c5ce64a81bf36983dcfa654590594513514 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Fri, 11 Jul 2025 00:43:00 +0000 Subject: [PATCH 6/6] Update intel/llvm mirror base commit to 5f86594c --- .github/intel-llvm-mirror-base-commit | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/intel-llvm-mirror-base-commit b/.github/intel-llvm-mirror-base-commit index 5a0c5a5ebb..866faaeec3 100644 --- a/.github/intel-llvm-mirror-base-commit +++ b/.github/intel-llvm-mirror-base-commit @@ -1 +1 @@ -92690a39bcb6bd40dce506d08ce4636564f314f2 +5f86594c35c62c45d6928e07a7191c62d69248dd