From 0d6e7692a3e3ee7162053b9570b3ef69d507ba01 Mon Sep 17 00:00:00 2001 From: Jakub Chlanda Date: Fri, 18 Jul 2025 08:34:22 +0200 Subject: [PATCH 1/5] Support UR_DEVICE_INFO_USM_CONTEXT_MEMCPY_SUPPORT_EXP on Cuda (#19267) This is an optimisation that uses a direct memcopy from host to device. --- source/adapters/cuda/device.cpp | 2 +- source/adapters/cuda/usm.cpp | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index 97b2430748..a77fba9aff 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1164,7 +1164,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_LOW_POWER_EVENTS_SUPPORT_EXP: return ReturnValue(false); case UR_DEVICE_INFO_USM_CONTEXT_MEMCPY_SUPPORT_EXP: - return ReturnValue(false); + return ReturnValue(true); case UR_DEVICE_INFO_USE_NATIVE_ASSERT: return ReturnValue(true); case UR_DEVICE_INFO_USM_P2P_SUPPORT_EXP: diff --git a/source/adapters/cuda/usm.cpp b/source/adapters/cuda/usm.cpp index 8549be7af2..c805c1084e 100644 --- a/source/adapters/cuda/usm.cpp +++ b/source/adapters/cuda/usm.cpp @@ -575,7 +575,9 @@ urUSMPoolTrimToExp(ur_context_handle_t hContext, ur_device_handle_t hDevice, } UR_APIEXPORT ur_result_t UR_APICALL urUSMContextMemcpyExp(ur_context_handle_t, - void *, const void *, - size_t) { - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + void *pDst, + const void *pSrc, + size_t Size) { + UR_CHECK_ERROR(cuMemcpy((CUdeviceptr)pDst, (CUdeviceptr)pSrc, Size)); + return UR_RESULT_SUCCESS; } From b72c92a289c21816d0681b780fa1e874eb7b8e83 Mon Sep 17 00:00:00 2001 From: Maosu Zhao Date: Fri, 18 Jul 2025 16:39:27 +0800 Subject: [PATCH 2/5] Set maximum supported local/private shadow memory size (#19465) If number of work group is too large, local/private shadow memory will consume a lot of device memory and easily cause out of resource issue. So we'd better set a limitation for it. Also include other changes in this commit: 1.Allocate private base/shadow per subgroup instead of workgroup for GPU device. 2.Refine the algorithm to calculate Subgroup linear Id. --- .../sanitizer/asan/asan_interceptor.cpp | 15 ++++++----- .../layers/sanitizer/asan/asan_libdevice.hpp | 4 +++ .../layers/sanitizer/asan/asan_shadow.cpp | 14 +++++----- .../layers/sanitizer/asan/asan_shadow.hpp | 11 ++++---- .../sanitizer/msan/msan_interceptor.cpp | 17 +++++++----- .../layers/sanitizer/msan/msan_libdevice.hpp | 4 +++ .../layers/sanitizer/msan/msan_shadow.cpp | 12 +++++---- .../layers/sanitizer/msan/msan_shadow.hpp | 11 ++++---- .../sanitizer_common/sanitizer_utils.cpp | 27 +++++++++++++++++++ .../sanitizer_common/sanitizer_utils.hpp | 1 + 10 files changed, 80 insertions(+), 36 deletions(-) diff --git a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp index 293268e8c3..5e984d9b93 100644 --- a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp @@ -807,11 +807,14 @@ ur_result_t AsanInterceptor::prepareLaunch( LocalWorkSize[Dim]; } - uint64_t NumWI = 1; + uint64_t NumWILocal = 1; for (uint32_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) { - NumWI *= LaunchInfo.GlobalWorkSize[Dim]; + NumWILocal *= LocalWorkSize[Dim]; } + size_t SGSize = GetSubGroupSize(Kernel, DeviceInfo->Handle); + uint32_t NumSG = ((NumWILocal + SGSize - 1) / SGSize) * NumWG; + // Prepare asan runtime data LaunchInfo.Data.Host.GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin; LaunchInfo.Data.Host.GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd; @@ -841,20 +844,20 @@ ur_result_t AsanInterceptor::prepareLaunch( // Write shadow memory offset for private memory if (getContext()->Options.DetectPrivates) { if (DeviceInfo->Shadow->AllocPrivateShadow( - Queue, NumWI, NumWG, LaunchInfo.Data.Host.PrivateBase, + Queue, NumSG, LaunchInfo.Data.Host.PrivateBase, LaunchInfo.Data.Host.PrivateShadowOffset, LaunchInfo.Data.Host.PrivateShadowOffsetEnd) != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, WARN, "Failed to allocate shadow memory for private memory, " - "maybe the number of workgroup ({}) is too large", - NumWG); + "maybe the number of subgroup ({}) is too large", + NumSG); UR_LOG_L(getContext()->logger, WARN, "Skip checking private memory of kernel <{}>", GetKernelName(Kernel)); LaunchInfo.Data.Host.PrivateShadowOffset = 0; } else { UR_LOG_L(getContext()->logger, INFO, - "ShadowMemory(Private, WorkGroup={}, {} - {})", NumWG, + "ShadowMemory(Private, SubGroup={}, {} - {})", NumSG, (void *)LaunchInfo.Data.Host.PrivateShadowOffset, (void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd); } diff --git a/source/loader/layers/sanitizer/asan/asan_libdevice.hpp b/source/loader/layers/sanitizer/asan/asan_libdevice.hpp index 441ac48c89..d076f6537f 100644 --- a/source/loader/layers/sanitizer/asan/asan_libdevice.hpp +++ b/source/loader/layers/sanitizer/asan/asan_libdevice.hpp @@ -49,6 +49,10 @@ struct LocalArgsInfo { uint64_t SizeWithRedZone = 0; }; +constexpr uint32_t ASAN_MAX_WG_LOCAL = 8192; + +constexpr uint32_t ASAN_MAX_SG_PRIVATE = 256; + constexpr uint64_t ASAN_MAX_NUM_REPORTS = 10; struct AsanRuntimeData { diff --git a/source/loader/layers/sanitizer/asan/asan_shadow.cpp b/source/loader/layers/sanitizer/asan/asan_shadow.cpp index 5fdbd7417c..209752d718 100644 --- a/source/loader/layers/sanitizer/asan/asan_shadow.cpp +++ b/source/loader/layers/sanitizer/asan/asan_shadow.cpp @@ -247,7 +247,8 @@ ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue, uptr &End) { const size_t LocalMemorySize = GetDeviceLocalMemorySize(Device); const size_t RequiredShadowSize = - (NumWG * LocalMemorySize) >> ASAN_SHADOW_SCALE; + (std::min(ASAN_MAX_WG_LOCAL, NumWG) * LocalMemorySize) >> + ASAN_SHADOW_SCALE; static size_t LastAllocedSize = 0; if (RequiredShadowSize > LastAllocedSize) { ur_context_handle_t QueueContext = GetContext(Queue); @@ -285,16 +286,17 @@ ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue, } ur_result_t ShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue, - uint64_t NumWI, uint32_t NumWG, - uptr *&Base, uptr &Begin, - uptr &End) { + uint32_t NumSG, uptr *&Base, + uptr &Begin, uptr &End) { // Trying to allocate private base array and private shadow, and any one of // them fail to allocate would be a failure static size_t LastPrivateBaseAllocedSize = 0; static size_t LastPrivateShadowAllocedSize = 0; + NumSG = std::min(NumSG, ASAN_MAX_SG_PRIVATE); + try { - const size_t NewPrivateBaseSize = NumWI * sizeof(uptr); + const size_t NewPrivateBaseSize = NumSG * sizeof(uptr); if (NewPrivateBaseSize > LastPrivateBaseAllocedSize) { if (PrivateBasePtr) { UR_CALL_THROWS(getContext()->urDdiTable.USM.pfnFree( @@ -317,7 +319,7 @@ ur_result_t ShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue, } const size_t NewPrivateShadowSize = - (NumWG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE; + (NumSG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE; if (NewPrivateShadowSize > LastPrivateShadowAllocedSize) { ur_context_handle_t QueueContext = GetContext(Queue); auto ContextInfo = getAsanInterceptor()->getContextInfo(QueueContext); diff --git a/source/loader/layers/sanitizer/asan/asan_shadow.hpp b/source/loader/layers/sanitizer/asan/asan_shadow.hpp index b8ac42fd08..7ab897dc28 100644 --- a/source/loader/layers/sanitizer/asan/asan_shadow.hpp +++ b/source/loader/layers/sanitizer/asan/asan_shadow.hpp @@ -56,9 +56,8 @@ struct ShadowMemory { uptr &Begin, uptr &End) = 0; virtual ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue, - uint64_t NumWI, uint32_t NumWG, - uptr *&Base, uptr &Begin, - uptr &End) = 0; + uint32_t NumSG, uptr *&Base, + uptr &Begin, uptr &End) = 0; ur_context_handle_t Context{}; @@ -90,7 +89,7 @@ struct ShadowMemoryCPU final : public ShadowMemory { return UR_RESULT_SUCCESS; } - ur_result_t AllocPrivateShadow(ur_queue_handle_t, uint64_t, uint32_t, uptr *&, + ur_result_t AllocPrivateShadow(ur_queue_handle_t, uint32_t, uptr *&, uptr &Begin, uptr &End) override { Begin = ShadowBegin; End = ShadowEnd; @@ -110,8 +109,8 @@ struct ShadowMemoryGPU : public ShadowMemory { ur_result_t AllocLocalShadow(ur_queue_handle_t Queue, uint32_t NumWG, uptr &Begin, uptr &End) override final; - ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue, uint64_t NumWI, - uint32_t NumWG, uptr *&Base, uptr &Begin, + ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue, uint32_t NumSG, + uptr *&Base, uptr &Begin, uptr &End) override final; ur_mutex VirtualMemMapsMutex; diff --git a/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp index 5cc7061b44..de6c9c241c 100644 --- a/source/loader/layers/sanitizer/msan/msan_interceptor.cpp +++ b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -530,11 +530,14 @@ ur_result_t MsanInterceptor::prepareLaunch( LocalWorkSize[Dim]; } - uint64_t NumWI = 1; + uint64_t NumWILocal = 1; for (uint32_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) { - NumWI *= LaunchInfo.GlobalWorkSize[Dim]; + NumWILocal *= LocalWorkSize[Dim]; } + size_t SGSize = GetSubGroupSize(Kernel, DeviceInfo->Handle); + uint32_t NumSG = ((NumWILocal + SGSize - 1) / SGSize) * NumWG; + // Write shadow memory offset for local memory if (KernelInfo.IsCheckLocals) { if (DeviceInfo->Shadow->AllocLocalShadow( @@ -558,13 +561,13 @@ ur_result_t MsanInterceptor::prepareLaunch( // Write shadow memory offset for private memory if (KernelInfo.IsCheckPrivates) { if (DeviceInfo->Shadow->AllocPrivateShadow( - Queue, NumWI, NumWG, LaunchInfo.Data.Host.PrivateBase, + Queue, NumSG, LaunchInfo.Data.Host.PrivateBase, LaunchInfo.Data.Host.PrivateShadowOffset, LaunchInfo.Data.Host.PrivateShadowOffsetEnd) != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, WARN, "Failed to allocate shadow memory for private memory, " - "maybe the number of workgroup ({}) is too large", - NumWG); + "maybe the number of subgroup ({}) is too large", + NumSG); UR_LOG_L(getContext()->logger, WARN, "Skip checking private memory of kernel <{}>", GetKernelName(Kernel)); @@ -572,8 +575,8 @@ ur_result_t MsanInterceptor::prepareLaunch( } else { UR_LOG_L( getContext()->logger, DEBUG, - "ShadowMemory(Private, WorkGroup={}, PrivateBase={}, Shadow={} - {})", - NumWG, (void *)LaunchInfo.Data.Host.PrivateBase, + "ShadowMemory(Private, SubGroup={}, PrivateBase={}, Shadow={} - {})", + NumSG, (void *)LaunchInfo.Data.Host.PrivateBase, (void *)LaunchInfo.Data.Host.PrivateShadowOffset, (void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd); } diff --git a/source/loader/layers/sanitizer/msan/msan_libdevice.hpp b/source/loader/layers/sanitizer/msan/msan_libdevice.hpp index 6d2f6917f5..575655fe99 100644 --- a/source/loader/layers/sanitizer/msan/msan_libdevice.hpp +++ b/source/loader/layers/sanitizer/msan/msan_libdevice.hpp @@ -21,6 +21,10 @@ namespace ur_sanitizer_layer { constexpr unsigned MSAN_ORIGIN_GRANULARITY = 4U; +constexpr uint32_t MSAN_MAX_WG_LOCAL = 1024; + +constexpr uint32_t MSAN_MAX_SG_PRIVATE = 32; + struct MsanErrorReport { int Flag = 0; diff --git a/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/source/loader/layers/sanitizer/msan/msan_shadow.cpp index 6243163c10..123dab662f 100644 --- a/source/loader/layers/sanitizer/msan/msan_shadow.cpp +++ b/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -378,7 +378,8 @@ ur_result_t MsanShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue, uint32_t NumWG, uptr &Begin, uptr &End) { const size_t LocalMemorySize = GetDeviceLocalMemorySize(Device); - const size_t RequiredShadowSize = NumWG * LocalMemorySize; + const size_t RequiredShadowSize = + std::min(NumWG, MSAN_MAX_WG_LOCAL) * LocalMemorySize; static size_t LastAllocedSize = 0; if (RequiredShadowSize > LastAllocedSize) { auto ContextInfo = getMsanInterceptor()->getContextInfo(Context); @@ -414,16 +415,17 @@ ur_result_t MsanShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue, } ur_result_t MsanShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue, - uint64_t NumWI, - uint32_t NumWG, uptr *&Base, + uint32_t NumSG, uptr *&Base, uptr &Begin, uptr &End) { // Trying to allocate private base array and private shadow, and any one of // them fail to allocate would be a failure static size_t LastPrivateBaseAllocedSize = 0; static size_t LastPrivateShadowAllocedSize = 0; + NumSG = std::min(NumSG, MSAN_MAX_SG_PRIVATE); + try { - const size_t NewPrivateBaseSize = NumWI * sizeof(uptr); + const size_t NewPrivateBaseSize = NumSG * sizeof(uptr); if (NewPrivateBaseSize > LastPrivateBaseAllocedSize) { if (PrivateBasePtr) { UR_CALL_THROWS(getContext()->urDdiTable.USM.pfnFree( @@ -445,7 +447,7 @@ ur_result_t MsanShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue, LastPrivateBaseAllocedSize = NewPrivateBaseSize; } - const size_t NewPrivateShadowSize = NumWG * MSAN_PRIVATE_SIZE; + const size_t NewPrivateShadowSize = NumSG * MSAN_PRIVATE_SIZE; if (NewPrivateShadowSize > LastPrivateShadowAllocedSize) { if (PrivateShadowOffset) { diff --git a/source/loader/layers/sanitizer/msan/msan_shadow.hpp b/source/loader/layers/sanitizer/msan/msan_shadow.hpp index e9575b5612..660c93e19f 100644 --- a/source/loader/layers/sanitizer/msan/msan_shadow.hpp +++ b/source/loader/layers/sanitizer/msan/msan_shadow.hpp @@ -53,9 +53,8 @@ struct MsanShadowMemory { uptr &Begin, uptr &End) = 0; virtual ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue, - uint64_t NumWI, uint32_t NumWG, - uptr *&Base, uptr &Begin, - uptr &End) = 0; + uint32_t NumSG, uptr *&Base, + uptr &Begin, uptr &End) = 0; ur_context_handle_t Context{}; @@ -113,7 +112,7 @@ struct MsanShadowMemoryCPU final : public MsanShadowMemory { return UR_RESULT_SUCCESS; } - ur_result_t AllocPrivateShadow(ur_queue_handle_t, uint64_t, uint32_t, uptr *&, + ur_result_t AllocPrivateShadow(ur_queue_handle_t, uint32_t, uptr *&, uptr &Begin, uptr &End) override { // This is necessary as msan_rtl use it to check whether detecting private // is enabled @@ -147,8 +146,8 @@ struct MsanShadowMemoryGPU : public MsanShadowMemory { ur_result_t AllocLocalShadow(ur_queue_handle_t Queue, uint32_t NumWG, uptr &Begin, uptr &End) override final; - ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue, uint64_t NumWI, - uint32_t NumWG, uptr *&Base, uptr &Begin, + ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue, uint32_t NumWG, + uptr *&Base, uptr &Begin, uptr &End) override final; virtual size_t GetShadowSize() = 0; diff --git a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp index f8f7c58bf5..2bd8733394 100644 --- a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp @@ -137,6 +137,33 @@ std::string GetKernelName(ur_kernel_handle_t Kernel) { return std::string(KernelNameBuf.data(), KernelNameSize - 1); } +size_t GetSubGroupSize(ur_kernel_handle_t Kernel, ur_device_handle_t Device) { + uint32_t SubGroupSize = 0; + [[maybe_unused]] auto Result = + getContext()->urDdiTable.Kernel.pfnGetSubGroupInfo( + Kernel, Device, UR_KERNEL_SUB_GROUP_INFO_SUB_GROUP_SIZE_INTEL, + sizeof(uint32_t), &SubGroupSize, nullptr); + assert(Result == UR_RESULT_SUCCESS && "getSubGroupSize() failed"); + + // If user doesn't require the subgroup size, choose device supported smallest + // one. + if (SubGroupSize == 0) { + size_t PropertySize = 0; + Result = getContext()->urDdiTable.Device.pfnGetInfo( + Device, UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, 0, nullptr, + &PropertySize); + assert(Result == UR_RESULT_SUCCESS && "getDeviceInfo() failed"); + std::vector SupportedSubGroupSize(PropertySize / + sizeof(uint32_t)); + Result = getContext()->urDdiTable.Device.pfnGetInfo( + Device, UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL, PropertySize, + SupportedSubGroupSize.data(), nullptr); + assert(Result == UR_RESULT_SUCCESS && "getDeviceInfo() failed"); + SubGroupSize = SupportedSubGroupSize[0]; + } + return SubGroupSize; +} + bool IsUSM(ur_context_handle_t Context, const void *MemPtr) { ur_usm_type_t USMType = GetUSMType(Context, MemPtr); return USMType != UR_USM_TYPE_UNKNOWN; diff --git a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp index 607267ec6c..6024c644a4 100644 --- a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp @@ -48,6 +48,7 @@ ur_device_handle_t GetParentDevice(ur_device_handle_t Device); bool GetDeviceUSMCapability(ur_device_handle_t Device, ur_device_info_t Feature); std::string GetKernelName(ur_kernel_handle_t Kernel); +size_t GetSubGroupSize(ur_kernel_handle_t Kernel, ur_device_handle_t Device); size_t GetDeviceLocalMemorySize(ur_device_handle_t Device); ur_program_handle_t GetProgram(ur_kernel_handle_t Kernel); bool IsUSM(ur_context_handle_t Context, const void *MemPtr); From 4610fca5e7a64c5b87409f20494a53944c7dd99d Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Fri, 18 Jul 2025 11:28:50 +0100 Subject: [PATCH 3/5] Use `ASSERT` instead of `EXPECT` in test bodies (#19494) `EXPECT` doesn't return from the current function, so a number of tests would keep going if they failed, in one case segfaulting due to writing to memory that was not allocated correctly. Standardise the test suite so that it always returns on the first failure using `ASSERT` where possible. --- test/conformance/adapter/urAdapterRelease.cpp | 2 +- test/conformance/adapter/urAdapterRetain.cpp | 2 +- test/conformance/context/urContextRetain.cpp | 2 +- test/conformance/device/urDeviceRelease.cpp | 6 +- test/conformance/device/urDeviceRetain.cpp | 4 +- .../enqueue/urEnqueueEventsWait.cpp | 6 +- .../urEnqueueEventsWaitWithBarrier.cpp | 76 +++++++++---------- .../enqueue/urEnqueueKernelLaunch.cpp | 2 +- .../enqueue/urEnqueueMemBufferCopy.cpp | 8 +- .../enqueue/urEnqueueMemBufferCopyRect.cpp | 22 +++--- .../enqueue/urEnqueueMemBufferReadRect.cpp | 6 +- .../enqueue/urEnqueueMemBufferWriteRect.cpp | 6 +- .../enqueue/urEnqueueMemImageCopy.cpp | 2 +- test/conformance/enqueue/urEnqueueUSMFill.cpp | 4 +- .../enqueue/urEnqueueUSMFill2D.cpp | 4 +- .../enqueue/urEnqueueUSMMemcpy.cpp | 2 +- test/conformance/event/urEventWait.cpp | 12 +-- .../exp_command_buffer/enqueue.cpp | 2 +- .../exp_command_buffer/rect_read.cpp | 2 +- .../exp_command_buffer/release.cpp | 8 +- .../update/invalid_update.cpp | 10 +-- test/conformance/kernel/urKernelRetain.cpp | 2 +- test/conformance/program/urProgramRetain.cpp | 2 +- test/conformance/queue/urQueueRetain.cpp | 2 +- .../virtual_memory/urVirtualMemMap.cpp | 2 +- .../virtual_memory/urVirtualMemReserve.cpp | 6 +- 26 files changed, 101 insertions(+), 101 deletions(-) diff --git a/test/conformance/adapter/urAdapterRelease.cpp b/test/conformance/adapter/urAdapterRelease.cpp index da6dd71070..a1702aaf9e 100644 --- a/test/conformance/adapter/urAdapterRelease.cpp +++ b/test/conformance/adapter/urAdapterRelease.cpp @@ -18,7 +18,7 @@ TEST_P(urAdapterReleaseTest, Success) { &referenceCountBefore, nullptr)); uint32_t referenceCountAfter = 0; - EXPECT_SUCCESS(urAdapterRelease(adapter)); + ASSERT_SUCCESS(urAdapterRelease(adapter)); ASSERT_SUCCESS(urAdapterGetInfo(adapter, UR_ADAPTER_INFO_REFERENCE_COUNT, sizeof(referenceCountAfter), &referenceCountAfter, nullptr)); diff --git a/test/conformance/adapter/urAdapterRetain.cpp b/test/conformance/adapter/urAdapterRetain.cpp index 66120f97b3..70f37fc764 100644 --- a/test/conformance/adapter/urAdapterRetain.cpp +++ b/test/conformance/adapter/urAdapterRetain.cpp @@ -17,7 +17,7 @@ TEST_P(urAdapterRetainTest, Success) { &referenceCountBefore, nullptr)); uint32_t referenceCountAfter = 0; - EXPECT_SUCCESS(urAdapterRetain(adapter)); + ASSERT_SUCCESS(urAdapterRetain(adapter)); ASSERT_SUCCESS(urAdapterGetInfo(adapter, UR_ADAPTER_INFO_REFERENCE_COUNT, sizeof(referenceCountAfter), &referenceCountAfter, nullptr)); diff --git a/test/conformance/context/urContextRetain.cpp b/test/conformance/context/urContextRetain.cpp index a6aacef978..b1c5aacda5 100644 --- a/test/conformance/context/urContextRetain.cpp +++ b/test/conformance/context/urContextRetain.cpp @@ -20,7 +20,7 @@ TEST_P(urContextRetainTest, Success) { ASSERT_LT(prevRefCount, refCount); - EXPECT_SUCCESS(urContextRelease(context)); + ASSERT_SUCCESS(urContextRelease(context)); } TEST_P(urContextRetainTest, InvalidNullHandleContext) { diff --git a/test/conformance/device/urDeviceRelease.cpp b/test/conformance/device/urDeviceRelease.cpp index 97aa9d88a8..c7a330e883 100644 --- a/test/conformance/device/urDeviceRelease.cpp +++ b/test/conformance/device/urDeviceRelease.cpp @@ -12,7 +12,7 @@ TEST_P(urDeviceReleaseTest, Success) { uint32_t prevRefCount = 0; ASSERT_SUCCESS(uur::GetObjectReferenceCount(device, prevRefCount)); - EXPECT_SUCCESS(urDeviceRelease(device)); + ASSERT_SUCCESS(urDeviceRelease(device)); uint32_t refCount = 0; ASSERT_SUCCESS(uur::GetObjectReferenceCount(device, refCount)); @@ -46,14 +46,14 @@ TEST_P(urDeviceReleaseTest, SuccessSubdevices) { uint32_t prevRefCount = 0; ASSERT_SUCCESS(uur::GetObjectReferenceCount(sub_device, prevRefCount)); - EXPECT_SUCCESS(urDeviceRelease(sub_device)); + ASSERT_SUCCESS(urDeviceRelease(sub_device)); uint32_t refCount = 0; ASSERT_SUCCESS(uur::GetObjectReferenceCount(sub_device, refCount)); ASSERT_GT(prevRefCount, refCount); - EXPECT_SUCCESS(urDeviceRelease(sub_device)); + ASSERT_SUCCESS(urDeviceRelease(sub_device)); } TEST_P(urDeviceReleaseTest, InvalidNullHandle) { diff --git a/test/conformance/device/urDeviceRetain.cpp b/test/conformance/device/urDeviceRetain.cpp index b3a4dc5116..ca89b82af7 100644 --- a/test/conformance/device/urDeviceRetain.cpp +++ b/test/conformance/device/urDeviceRetain.cpp @@ -51,8 +51,8 @@ TEST_P(urDeviceRetainTest, SuccessSubdevices) { ASSERT_LT(prevRefCount, refCount); - EXPECT_SUCCESS(urDeviceRelease(sub_device)); - EXPECT_SUCCESS(urDeviceRelease(sub_device)); + ASSERT_SUCCESS(urDeviceRelease(sub_device)); + ASSERT_SUCCESS(urDeviceRelease(sub_device)); } TEST_P(urDeviceRetainTest, InvalidNullHandle) { diff --git a/test/conformance/enqueue/urEnqueueEventsWait.cpp b/test/conformance/enqueue/urEnqueueEventsWait.cpp index 1d9baa2f14..cd4fe1d3d9 100644 --- a/test/conformance/enqueue/urEnqueueEventsWait.cpp +++ b/test/conformance/enqueue/urEnqueueEventsWait.cpp @@ -92,9 +92,9 @@ TEST_P(urEnqueueEventsWaitTest, Success) { ASSERT_SUCCESS(urEnqueueMemBufferRead(queue2, dst_buffer, true, 0, size, output.data(), 0, nullptr, nullptr)); ASSERT_EQ(input, output); - EXPECT_SUCCESS(urEventRelease(event1)); - EXPECT_SUCCESS(urEventRelease(waitEvent)); - EXPECT_SUCCESS(urEventRelease(event2)); + ASSERT_SUCCESS(urEventRelease(event1)); + ASSERT_SUCCESS(urEventRelease(waitEvent)); + ASSERT_SUCCESS(urEventRelease(event2)); } TEST_P(urEnqueueEventsWaitTest, InvalidNullHandleQueue) { diff --git a/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp b/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp index 20806da38a..37486077d1 100644 --- a/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp +++ b/test/conformance/enqueue/urEnqueueEventsWaitWithBarrier.cpp @@ -41,10 +41,10 @@ struct urEnqueueEventsWaitWithBarrierTest void TearDown() override { if (src_buffer) { - EXPECT_SUCCESS(urMemRelease(src_buffer)); + ASSERT_SUCCESS(urMemRelease(src_buffer)); } if (dst_buffer) { - EXPECT_SUCCESS(urMemRelease(dst_buffer)); + ASSERT_SUCCESS(urMemRelease(dst_buffer)); } urMultiQueueTestWithParam::TearDown(); } @@ -110,32 +110,32 @@ TEST_P(urEnqueueEventsWaitWithBarrierTest, Success) { ur_event_handle_t waitEvent = nullptr; ASSERT_SUCCESS(urEnqueueMemBufferCopy(queue1, src_buffer, dst_buffer, 0, 0, size, 0, nullptr, &event1)); - EXPECT_SUCCESS(EnqueueBarrier(queue2, 1, &event1, &waitEvent)); - EXPECT_SUCCESS(urQueueFlush(queue2)); - EXPECT_SUCCESS(urQueueFlush(queue1)); - EXPECT_SUCCESS(urEventWait(1, &waitEvent)); + ASSERT_SUCCESS(EnqueueBarrier(queue2, 1, &event1, &waitEvent)); + ASSERT_SUCCESS(urQueueFlush(queue2)); + ASSERT_SUCCESS(urQueueFlush(queue1)); + ASSERT_SUCCESS(urEventWait(1, &waitEvent)); std::vector output(count, 1); - EXPECT_SUCCESS(urEnqueueMemBufferRead(queue1, dst_buffer, true, 0, size, + ASSERT_SUCCESS(urEnqueueMemBufferRead(queue1, dst_buffer, true, 0, size, output.data(), 0, nullptr, nullptr)); EXPECT_EQ(input, output); - EXPECT_SUCCESS(urEventRelease(waitEvent)); - EXPECT_SUCCESS(urEventRelease(event1)); + ASSERT_SUCCESS(urEventRelease(waitEvent)); + ASSERT_SUCCESS(urEventRelease(event1)); ur_event_handle_t event2 = nullptr; input.assign(count, 420); - EXPECT_SUCCESS(urEnqueueMemBufferWrite(queue2, src_buffer, true, 0, size, + ASSERT_SUCCESS(urEnqueueMemBufferWrite(queue2, src_buffer, true, 0, size, input.data(), 0, nullptr, nullptr)); - EXPECT_SUCCESS(urEnqueueMemBufferCopy(queue2, src_buffer, dst_buffer, 0, 0, + ASSERT_SUCCESS(urEnqueueMemBufferCopy(queue2, src_buffer, dst_buffer, 0, 0, size, 0, nullptr, &event2)); - EXPECT_SUCCESS(EnqueueBarrier(queue1, 1, &event2, &waitEvent)); - EXPECT_SUCCESS(urQueueFlush(queue2)); - EXPECT_SUCCESS(urQueueFlush(queue1)); - EXPECT_SUCCESS(urEventWait(1, &waitEvent)); - EXPECT_SUCCESS(urEnqueueMemBufferRead(queue2, dst_buffer, true, 0, size, + ASSERT_SUCCESS(EnqueueBarrier(queue1, 1, &event2, &waitEvent)); + ASSERT_SUCCESS(urQueueFlush(queue2)); + ASSERT_SUCCESS(urQueueFlush(queue1)); + ASSERT_SUCCESS(urEventWait(1, &waitEvent)); + ASSERT_SUCCESS(urEnqueueMemBufferRead(queue2, dst_buffer, true, 0, size, output.data(), 0, nullptr, nullptr)); - EXPECT_SUCCESS(urEventRelease(waitEvent)); - EXPECT_SUCCESS(urEventRelease(event2)); + ASSERT_SUCCESS(urEventRelease(waitEvent)); + ASSERT_SUCCESS(urEventRelease(event2)); EXPECT_EQ(input, output); } @@ -181,15 +181,15 @@ TEST_P(urEnqueueEventsWaitWithBarrierOrderingTest, constexpr uint32_t ONE = 1; urEnqueueMemBufferWrite(queue, buffer, true, 0, sizeof(uint32_t), &ONE, 0, nullptr, &event); - EXPECT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 1, &event, nullptr)); - EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, add_kernel, 1, &offset, &count, + ASSERT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 1, &event, nullptr)); + ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, add_kernel, 1, &offset, &count, nullptr, 0, nullptr, 0, nullptr, &event)); - EXPECT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 1, &event, nullptr)); - EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, mul_kernel, 1, &offset, &count, + ASSERT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 1, &event, nullptr)); + ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, mul_kernel, 1, &offset, &count, nullptr, 0, nullptr, 0, nullptr, &event)); - EXPECT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 1, &event, nullptr)); + ASSERT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 1, &event, nullptr)); addHelper.ValidateBuffer(buffer, sizeof(uint32_t), 4004); } } @@ -212,15 +212,15 @@ TEST_P(urEnqueueEventsWaitWithBarrierOrderingTest, constexpr uint32_t ONE = 1; urEnqueueMemBufferWrite(queue, buffer, true, 0, sizeof(uint32_t), &ONE, 0, nullptr, nullptr); - EXPECT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &event)); - EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, add_kernel, 1, &offset, &count, + ASSERT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &event)); + ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, add_kernel, 1, &offset, &count, nullptr, 0, nullptr, 1, &event, nullptr)); - EXPECT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &event)); - EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, mul_kernel, 1, &offset, &count, + ASSERT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &event)); + ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, mul_kernel, 1, &offset, &count, nullptr, 0, nullptr, 1, &event, nullptr)); - EXPECT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &event)); + ASSERT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, &event)); addHelper.ValidateBuffer(buffer, sizeof(uint32_t), 4004); } } @@ -242,17 +242,17 @@ TEST_P(urEnqueueEventsWaitWithBarrierOrderingTest, SuccessEventDependencies) { constexpr uint32_t ONE = 1; urEnqueueMemBufferWrite(queue, buffer, true, 0, sizeof(uint32_t), &ONE, 0, nullptr, &event[0]); - EXPECT_SUCCESS( + ASSERT_SUCCESS( urEnqueueEventsWaitWithBarrier(queue, 1, &event[0], &event[1])); - EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, add_kernel, 1, &offset, &count, + ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, add_kernel, 1, &offset, &count, nullptr, 0, nullptr, 1, &event[1], &event[2])); - EXPECT_SUCCESS( + ASSERT_SUCCESS( urEnqueueEventsWaitWithBarrier(queue, 1, &event[2], &event[3])); - EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, mul_kernel, 1, &offset, &count, + ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, mul_kernel, 1, &offset, &count, nullptr, 0, nullptr, 1, &event[3], &event[4])); - EXPECT_SUCCESS( + ASSERT_SUCCESS( urEnqueueEventsWaitWithBarrier(queue, 1, &event[4], &event[5])); addHelper.ValidateBuffer(buffer, sizeof(uint32_t), 4004); } @@ -275,15 +275,15 @@ TEST_P(urEnqueueEventsWaitWithBarrierOrderingTest, constexpr uint32_t ONE = 1; urEnqueueMemBufferWrite(queue, buffer, true, 0, sizeof(uint32_t), &ONE, 0, nullptr, nullptr); - EXPECT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, nullptr)); - EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, add_kernel, 1, &offset, &count, + ASSERT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, add_kernel, 1, &offset, &count, nullptr, 0, nullptr, 0, nullptr, nullptr)); - EXPECT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, nullptr)); - EXPECT_SUCCESS(urEnqueueKernelLaunch(queue, mul_kernel, 1, &offset, &count, + ASSERT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, mul_kernel, 1, &offset, &count, nullptr, 0, nullptr, 0, nullptr, nullptr)); - EXPECT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueEventsWaitWithBarrier(queue, 0, nullptr, nullptr)); addHelper.ValidateBuffer(buffer, sizeof(uint32_t), 4004); } } diff --git a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp index fa3eb3f4b5..c59134660d 100644 --- a/test/conformance/enqueue/urEnqueueKernelLaunch.cpp +++ b/test/conformance/enqueue/urEnqueueKernelLaunch.cpp @@ -631,7 +631,7 @@ TEST_P(urEnqueueKernelLaunchMultiDeviceTest, KernelLaunchReadDifferentQueues) { nullptr, 0, nullptr, nullptr)); // Wait for the queue to finish executing. - EXPECT_SUCCESS(urEnqueueEventsWait(queues[0], 0, nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueEventsWait(queues[0], 0, nullptr, nullptr)); // Then the remaining queues do blocking reads from the buffer. Since the // queues target different devices this checks that any devices memory has diff --git a/test/conformance/enqueue/urEnqueueMemBufferCopy.cpp b/test/conformance/enqueue/urEnqueueMemBufferCopy.cpp index 8adc4a7712..d6127d52fc 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferCopy.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferCopy.cpp @@ -121,11 +121,11 @@ TEST_P(urEnqueueMemBufferCopyMultiDeviceTest, CopyReadDifferentQueues) { ur_mem_handle_t dst_buffer = nullptr; ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_ONLY, size, nullptr, &dst_buffer)); - EXPECT_SUCCESS(urEnqueueMemBufferCopy(queues[0], buffer, dst_buffer, 0, 0, + ASSERT_SUCCESS(urEnqueueMemBufferCopy(queues[0], buffer, dst_buffer, 0, 0, size, 0, nullptr, nullptr)); // Wait for the queue to finish executing. - EXPECT_SUCCESS(urEnqueueEventsWait(queues[0], 0, nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueEventsWait(queues[0], 0, nullptr, nullptr)); // Then the remaining queues do blocking reads from the buffer. Since the // queues target different devices this checks that any devices memory has @@ -133,7 +133,7 @@ TEST_P(urEnqueueMemBufferCopyMultiDeviceTest, CopyReadDifferentQueues) { for (unsigned i = 1; i < queues.size(); ++i) { const auto queue = queues[i]; std::vector output(count, 0); - EXPECT_SUCCESS(urEnqueueMemBufferRead(queue, dst_buffer, true, 0, size, + ASSERT_SUCCESS(urEnqueueMemBufferRead(queue, dst_buffer, true, 0, size, output.data(), 0, nullptr, nullptr)); for (unsigned j = 0; j < count; ++j) { EXPECT_EQ(input, output[j]) @@ -141,5 +141,5 @@ TEST_P(urEnqueueMemBufferCopyMultiDeviceTest, CopyReadDifferentQueues) { } } - EXPECT_SUCCESS(urMemRelease(dst_buffer)); + ASSERT_SUCCESS(urMemRelease(dst_buffer)); } diff --git a/test/conformance/enqueue/urEnqueueMemBufferCopyRect.cpp b/test/conformance/enqueue/urEnqueueMemBufferCopyRect.cpp index 6ce24b46c1..6cde8bf1ea 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferCopyRect.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferCopyRect.cpp @@ -101,30 +101,30 @@ TEST_P(urEnqueueMemBufferCopyRectTestWithParam, Success) { // Fill src buffer with sequentially increasing values. std::vector input(src_buffer_size, 0x0); std::iota(std::begin(input), std::end(input), 0x0); - EXPECT_SUCCESS(urEnqueueMemBufferWrite(queue, src_buffer, + ASSERT_SUCCESS(urEnqueueMemBufferWrite(queue, src_buffer, /* is_blocking */ true, 0, src_buffer_size, input.data(), 0, nullptr, nullptr)); ur_mem_handle_t dst_buffer = nullptr; - EXPECT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, + ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_WRITE, dst_buffer_size, nullptr, &dst_buffer)); // Zero destination buffer to begin with since the write may not cover the // whole buffer. const uint8_t zero = 0x0; - EXPECT_SUCCESS(urEnqueueMemBufferFill(queue, dst_buffer, &zero, sizeof(zero), + ASSERT_SUCCESS(urEnqueueMemBufferFill(queue, dst_buffer, &zero, sizeof(zero), 0, dst_buffer_size, 0, nullptr, nullptr)); // Enqueue the rectangular copy between the buffers. - EXPECT_SUCCESS(urEnqueueMemBufferCopyRect( + ASSERT_SUCCESS(urEnqueueMemBufferCopyRect( queue, src_buffer, dst_buffer, src_buffer_origin, dst_buffer_origin, region, src_buffer_row_pitch, src_buffer_slice_pitch, dst_buffer_row_pitch, dst_buffer_slice_pitch, 0, nullptr, nullptr)); std::vector output(dst_buffer_size, 0x0); - EXPECT_SUCCESS(urEnqueueMemBufferRead(queue, dst_buffer, + ASSERT_SUCCESS(urEnqueueMemBufferRead(queue, dst_buffer, /* is_blocking */ true, 0, dst_buffer_size, output.data(), 0, nullptr, nullptr)); @@ -139,8 +139,8 @@ TEST_P(urEnqueueMemBufferCopyRectTestWithParam, Success) { EXPECT_EQ(expected, output); // Cleanup. - EXPECT_SUCCESS(urMemRelease(src_buffer)); - EXPECT_SUCCESS(urMemRelease(dst_buffer)); + ASSERT_SUCCESS(urMemRelease(src_buffer)); + ASSERT_SUCCESS(urMemRelease(dst_buffer)); } struct urEnqueueMemBufferCopyRectTest : uur::urQueueTest { @@ -321,12 +321,12 @@ TEST_P(urEnqueueMemBufferCopyRectMultiDeviceTest, CopyRectReadDifferentQueues) { ur_mem_handle_t dst_buffer = nullptr; ASSERT_SUCCESS(urMemBufferCreate(context, UR_MEM_FLAG_READ_ONLY, size, nullptr, &dst_buffer)); - EXPECT_SUCCESS(urEnqueueMemBufferCopyRect( + ASSERT_SUCCESS(urEnqueueMemBufferCopyRect( queues[0], buffer, dst_buffer, {0, 0, 0}, {0, 0, 0}, {size, 1, 1}, size, size, size, size, 0, nullptr, nullptr)); // Wait for the queue to finish executing. - EXPECT_SUCCESS(urEnqueueEventsWait(queues[0], 0, nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueEventsWait(queues[0], 0, nullptr, nullptr)); // Then the remaining queues do blocking reads from the buffer. Since the // queues target different devices this checks that any devices memory has @@ -334,7 +334,7 @@ TEST_P(urEnqueueMemBufferCopyRectMultiDeviceTest, CopyRectReadDifferentQueues) { for (unsigned i = 1; i < queues.size(); ++i) { const auto queue = queues[i]; std::vector output(count, 0); - EXPECT_SUCCESS(urEnqueueMemBufferRead(queue, dst_buffer, true, 0, size, + ASSERT_SUCCESS(urEnqueueMemBufferRead(queue, dst_buffer, true, 0, size, output.data(), 0, nullptr, nullptr)); for (unsigned j = 0; j < count; ++j) { EXPECT_EQ(input, output[j]) @@ -342,5 +342,5 @@ TEST_P(urEnqueueMemBufferCopyRectMultiDeviceTest, CopyRectReadDifferentQueues) { } } - EXPECT_SUCCESS(urMemRelease(dst_buffer)); + ASSERT_SUCCESS(urMemRelease(dst_buffer)); } diff --git a/test/conformance/enqueue/urEnqueueMemBufferReadRect.cpp b/test/conformance/enqueue/urEnqueueMemBufferReadRect.cpp index d112868dfd..8cbc7f7c3d 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferReadRect.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferReadRect.cpp @@ -99,13 +99,13 @@ TEST_P(urEnqueueMemBufferReadRectTestWithParam, Success) { // The input will just be sequentially increasing values. std::vector input(buffer_size, 0x0); std::iota(std::begin(input), std::end(input), 0x0); - EXPECT_SUCCESS(urEnqueueMemBufferWrite(queue, buffer, /* isBlocking */ true, + ASSERT_SUCCESS(urEnqueueMemBufferWrite(queue, buffer, /* isBlocking */ true, 0, input.size(), input.data(), 0, nullptr, nullptr)); // Enqueue the rectangular read. std::vector output(host_size, 0x0); - EXPECT_SUCCESS(urEnqueueMemBufferReadRect( + ASSERT_SUCCESS(urEnqueueMemBufferReadRect( queue, buffer, /* isBlocking */ true, buffer_offset, host_offset, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, output.data(), 0, nullptr, nullptr)); @@ -119,7 +119,7 @@ TEST_P(urEnqueueMemBufferReadRectTestWithParam, Success) { EXPECT_EQ(expected, output); // Cleanup. - EXPECT_SUCCESS(urMemRelease(buffer)); + ASSERT_SUCCESS(urMemRelease(buffer)); } struct urEnqueueMemBufferReadRectTest : public uur::urMemBufferQueueTest { diff --git a/test/conformance/enqueue/urEnqueueMemBufferWriteRect.cpp b/test/conformance/enqueue/urEnqueueMemBufferWriteRect.cpp index 3cc4fb66c1..7ba8dc67fa 100644 --- a/test/conformance/enqueue/urEnqueueMemBufferWriteRect.cpp +++ b/test/conformance/enqueue/urEnqueueMemBufferWriteRect.cpp @@ -106,13 +106,13 @@ TEST_P(urEnqueueMemBufferWriteRectTestWithParam, Success) { std::iota(std::begin(input), std::end(input), 0x0); // Enqueue the rectangular write from that host buffer. - EXPECT_SUCCESS(urEnqueueMemBufferWriteRect( + ASSERT_SUCCESS(urEnqueueMemBufferWriteRect( queue, buffer, /* isBlocking */ true, buffer_origin, host_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, input.data(), 0, nullptr, nullptr)); std::vector output(buffer_size, 0x0); - EXPECT_SUCCESS(urEnqueueMemBufferRead(queue, buffer, /* is_blocking */ true, + ASSERT_SUCCESS(urEnqueueMemBufferRead(queue, buffer, /* is_blocking */ true, 0, buffer_size, output.data(), 0, nullptr, nullptr)); @@ -126,7 +126,7 @@ TEST_P(urEnqueueMemBufferWriteRectTestWithParam, Success) { EXPECT_EQ(expected, output); // Cleanup. - EXPECT_SUCCESS(urMemRelease(buffer)); + ASSERT_SUCCESS(urMemRelease(buffer)); } struct urEnqueueMemBufferWriteRectTest : public uur::urMemBufferQueueTest { diff --git a/test/conformance/enqueue/urEnqueueMemImageCopy.cpp b/test/conformance/enqueue/urEnqueueMemImageCopy.cpp index 808e2f3da2..55609cfcbf 100644 --- a/test/conformance/enqueue/urEnqueueMemImageCopy.cpp +++ b/test/conformance/enqueue/urEnqueueMemImageCopy.cpp @@ -290,7 +290,7 @@ TEST_P(urEnqueueMemImageCopyMultiDeviceTest, CopyReadDifferentQueues) { origin, region3D, 0, nullptr, nullptr)); // Wait for the queue to finish executing. - EXPECT_SUCCESS(urEnqueueEventsWait(queues[0], 0, nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueEventsWait(queues[0], 0, nullptr, nullptr)); // The remaining queues do blocking reads from the image1D/2D/3D. Since the // queues target different devices this checks that any devices memory has diff --git a/test/conformance/enqueue/urEnqueueUSMFill.cpp b/test/conformance/enqueue/urEnqueueUSMFill.cpp index 2f27d8f46a..db7901a5eb 100644 --- a/test/conformance/enqueue/urEnqueueUSMFill.cpp +++ b/test/conformance/enqueue/urEnqueueUSMFill.cpp @@ -105,14 +105,14 @@ TEST_P(urEnqueueUSMFillTestWithParam, Success) { ASSERT_SUCCESS(urEnqueueUSMFill(queue, ptr, pattern_size, pattern.data(), size, 0, nullptr, &event)); - EXPECT_SUCCESS(urQueueFlush(queue)); + ASSERT_SUCCESS(urQueueFlush(queue)); ASSERT_SUCCESS(urEventWait(1, &event)); ur_event_status_t event_status; ASSERT_SUCCESS(uur::GetEventInfo( event, UR_EVENT_INFO_COMMAND_EXECUTION_STATUS, event_status)); ASSERT_EQ(event_status, UR_EVENT_STATUS_COMPLETE); - EXPECT_SUCCESS(urEventRelease(event)); + ASSERT_SUCCESS(urEventRelease(event)); ASSERT_NO_FATAL_FAILURE(verifyData()); } diff --git a/test/conformance/enqueue/urEnqueueUSMFill2D.cpp b/test/conformance/enqueue/urEnqueueUSMFill2D.cpp index 80f564273e..c6cdf18401 100644 --- a/test/conformance/enqueue/urEnqueueUSMFill2D.cpp +++ b/test/conformance/enqueue/urEnqueueUSMFill2D.cpp @@ -139,14 +139,14 @@ TEST_P(urEnqueueUSMFill2DTestWithParam, Success) { UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( urEnqueueUSMFill2D(queue, ptr, pitch, pattern_size, pattern.data(), width, height, 0, nullptr, &event)); - EXPECT_SUCCESS(urQueueFlush(queue)); + ASSERT_SUCCESS(urQueueFlush(queue)); ASSERT_SUCCESS(urEventWait(1, &event)); ur_event_status_t event_status; ASSERT_SUCCESS(uur::GetEventInfo( event, UR_EVENT_INFO_COMMAND_EXECUTION_STATUS, event_status)); ASSERT_EQ(event_status, UR_EVENT_STATUS_COMPLETE); - EXPECT_SUCCESS(urEventRelease(event)); + ASSERT_SUCCESS(urEventRelease(event)); ASSERT_NO_FATAL_FAILURE(verifyData()); } diff --git a/test/conformance/enqueue/urEnqueueUSMMemcpy.cpp b/test/conformance/enqueue/urEnqueueUSMMemcpy.cpp index 4a4103ac42..7b9bc58ce8 100644 --- a/test/conformance/enqueue/urEnqueueUSMMemcpy.cpp +++ b/test/conformance/enqueue/urEnqueueUSMMemcpy.cpp @@ -105,7 +105,7 @@ TEST_P(urEnqueueUSMMemcpyTest, BlockingWithEvent) { urEventGetInfo(memcpy_event, UR_EVENT_INFO_COMMAND_EXECUTION_STATUS, sizeof(ur_event_status_t), &event_status, nullptr)); ASSERT_EQ(event_status, UR_EVENT_STATUS_COMPLETE); - EXPECT_SUCCESS(urEventRelease(memcpy_event)); + ASSERT_SUCCESS(urEventRelease(memcpy_event)); ASSERT_NO_FATAL_FAILURE(verifyData()); } diff --git a/test/conformance/event/urEventWait.cpp b/test/conformance/event/urEventWait.cpp index 769faf3206..61ce2f1680 100644 --- a/test/conformance/event/urEventWait.cpp +++ b/test/conformance/event/urEventWait.cpp @@ -75,13 +75,13 @@ TEST_P(urEventWaitTest, Success) { size, output.data(), 0, nullptr, &event2)); std::vector events{event1, event2}; - EXPECT_SUCCESS(urQueueFlush(queues[0])); + ASSERT_SUCCESS(urQueueFlush(queues[0])); ASSERT_SUCCESS( urEventWait(static_cast(events.size()), events.data())); ASSERT_EQ(input[0], output); - EXPECT_SUCCESS(urEventRelease(event1)); - EXPECT_SUCCESS(urEventRelease(event2)); + ASSERT_SUCCESS(urEventRelease(event1)); + ASSERT_SUCCESS(urEventRelease(event2)); } using urEventWaitNegativeTest = uur::urQueueTest; @@ -125,7 +125,7 @@ TEST_P(urEventWaitTest, WaitWithMultipleContexts) { } for (auto &event : events) { - EXPECT_SUCCESS(urEventRelease(event)); + ASSERT_SUCCESS(urEventRelease(event)); } } @@ -162,6 +162,6 @@ TEST_P(urEventWaitTest, WithCrossContextDependencies) { urEventWait(static_cast(events.size()), events.data())); ASSERT_EQ(input.front(), output); - EXPECT_SUCCESS(urEventRelease(event1)); - EXPECT_SUCCESS(urEventRelease(event2)); + ASSERT_SUCCESS(urEventRelease(event1)); + ASSERT_SUCCESS(urEventRelease(event2)); } diff --git a/test/conformance/exp_command_buffer/enqueue.cpp b/test/conformance/exp_command_buffer/enqueue.cpp index ccbdaca879..afc49540c4 100644 --- a/test/conformance/exp_command_buffer/enqueue.cpp +++ b/test/conformance/exp_command_buffer/enqueue.cpp @@ -201,6 +201,6 @@ TEST_P(urEnqueueCommandBufferExpTest, EnqueueAndRelease) { in_or_out_of_order_queue, cmd_buf_handle, 0, nullptr, nullptr)); // Release the command buffer without explicitly waiting beforehand - EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); cmd_buf_handle = nullptr; } diff --git a/test/conformance/exp_command_buffer/rect_read.cpp b/test/conformance/exp_command_buffer/rect_read.cpp index 452bc66ab8..842d0ee4ca 100644 --- a/test/conformance/exp_command_buffer/rect_read.cpp +++ b/test/conformance/exp_command_buffer/rect_read.cpp @@ -132,7 +132,7 @@ TEST_P(urCommandBufferAppendMemBufferReadRectTestWithParam, Success) { // Enqueue the rectangular read. std::vector output(host_size, 0x0); - EXPECT_SUCCESS(urCommandBufferAppendMemBufferReadRectExp( + ASSERT_SUCCESS(urCommandBufferAppendMemBufferReadRectExp( cmd_buf_handle, buffer, buffer_origin, host_origin, region, buffer_row_pitch, buffer_slice_pitch, host_row_pitch, host_slice_pitch, output.data(), 0, nullptr, 0, nullptr, nullptr, nullptr, nullptr)); diff --git a/test/conformance/exp_command_buffer/release.cpp b/test/conformance/exp_command_buffer/release.cpp index 181a7af7d3..42b5389465 100644 --- a/test/conformance/exp_command_buffer/release.cpp +++ b/test/conformance/exp_command_buffer/release.cpp @@ -14,15 +14,15 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE(urCommandBufferReleaseExpTest); TEST_P(urCommandBufferReleaseExpTest, Success) { // https://github.com/intel/llvm/issues/19139 UUR_KNOWN_FAILURE_ON(uur::OpenCL{}); - EXPECT_SUCCESS(urCommandBufferRetainExp(cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferRetainExp(cmd_buf_handle)); uint32_t prev_ref_count = 0; - EXPECT_SUCCESS(uur::GetObjectReferenceCount(cmd_buf_handle, prev_ref_count)); + ASSERT_SUCCESS(uur::GetObjectReferenceCount(cmd_buf_handle, prev_ref_count)); - EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); uint32_t ref_count = 0; - EXPECT_SUCCESS(uur::GetObjectReferenceCount(cmd_buf_handle, ref_count)); + ASSERT_SUCCESS(uur::GetObjectReferenceCount(cmd_buf_handle, ref_count)); EXPECT_GT(prev_ref_count, ref_count); } diff --git a/test/conformance/exp_command_buffer/update/invalid_update.cpp b/test/conformance/exp_command_buffer/update/invalid_update.cpp index e7a93b1174..dcaab8201f 100644 --- a/test/conformance/exp_command_buffer/update/invalid_update.cpp +++ b/test/conformance/exp_command_buffer/update/invalid_update.cpp @@ -130,7 +130,7 @@ TEST_P(InvalidUpdateTest, NotUpdatableCommandBuffer) { UR_RESULT_ERROR_INVALID_OPERATION); ASSERT_EQ(test_command_handle, nullptr); - EXPECT_SUCCESS(urCommandBufferFinalizeExp(test_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(test_cmd_buf_handle)); finalized = true; // Set new value to use for fill at kernel index 1 @@ -168,7 +168,7 @@ TEST_P(InvalidUpdateTest, NotUpdatableCommandBuffer) { EXPECT_EQ(UR_RESULT_ERROR_INVALID_NULL_HANDLE, result); if (test_cmd_buf_handle) { - EXPECT_SUCCESS(urCommandBufferReleaseExp(test_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferReleaseExp(test_cmd_buf_handle)); } } @@ -263,8 +263,8 @@ TEST_P(InvalidUpdateTest, CommandBufferMismatch) { urCommandBufferCreateExp(context, device, &desc, &test_cmd_buf_handle)); EXPECT_NE(test_cmd_buf_handle, nullptr); - EXPECT_SUCCESS(urCommandBufferFinalizeExp(test_cmd_buf_handle)); - EXPECT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(test_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); finalized = true; // Set new value to use for fill at kernel index 1 @@ -302,7 +302,7 @@ TEST_P(InvalidUpdateTest, CommandBufferMismatch) { EXPECT_EQ(UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP, result); if (test_cmd_buf_handle) { - EXPECT_SUCCESS(urCommandBufferReleaseExp(test_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferReleaseExp(test_cmd_buf_handle)); } } diff --git a/test/conformance/kernel/urKernelRetain.cpp b/test/conformance/kernel/urKernelRetain.cpp index d64f0a40df..221e038644 100644 --- a/test/conformance/kernel/urKernelRetain.cpp +++ b/test/conformance/kernel/urKernelRetain.cpp @@ -12,7 +12,7 @@ UUR_INSTANTIATE_DEVICE_TEST_SUITE(urKernelRetainTest); TEST_P(urKernelRetainTest, Success) { ASSERT_SUCCESS(urKernelRetain(kernel)); - EXPECT_SUCCESS(urKernelRelease(kernel)); + ASSERT_SUCCESS(urKernelRelease(kernel)); } TEST_P(urKernelRetainTest, InvalidNullHandleKernel) { diff --git a/test/conformance/program/urProgramRetain.cpp b/test/conformance/program/urProgramRetain.cpp index b9a4bcdeb6..623dddf23b 100644 --- a/test/conformance/program/urProgramRetain.cpp +++ b/test/conformance/program/urProgramRetain.cpp @@ -20,7 +20,7 @@ TEST_P(urProgramRetainTest, Success) { ASSERT_LT(prevRefCount, refCount); - EXPECT_SUCCESS(urProgramRetain(program)); + ASSERT_SUCCESS(urProgramRetain(program)); } TEST_P(urProgramRetainTest, InvalidNullHandleProgram) { diff --git a/test/conformance/queue/urQueueRetain.cpp b/test/conformance/queue/urQueueRetain.cpp index 8aad0d59dd..b75a08a2c7 100644 --- a/test/conformance/queue/urQueueRetain.cpp +++ b/test/conformance/queue/urQueueRetain.cpp @@ -22,7 +22,7 @@ TEST_P(urQueueRetainTest, Success) { ASSERT_LT(prevRefCount, refCount); - EXPECT_SUCCESS(urQueueRelease(queue)); + ASSERT_SUCCESS(urQueueRelease(queue)); } TEST_P(urQueueRetainTest, InvalidNullHandleQueue) { diff --git a/test/conformance/virtual_memory/urVirtualMemMap.cpp b/test/conformance/virtual_memory/urVirtualMemMap.cpp index 62bc183f7c..d8484cd841 100644 --- a/test/conformance/virtual_memory/urVirtualMemMap.cpp +++ b/test/conformance/virtual_memory/urVirtualMemMap.cpp @@ -17,7 +17,7 @@ UUR_DEVICE_TEST_SUITE_WITH_PARAM( TEST_P(urVirtualMemMapWithFlagsTest, Success) { ASSERT_SUCCESS( urVirtualMemMap(context, virtual_ptr, size, physical_mem, 0, getParam())); - EXPECT_SUCCESS(urVirtualMemUnmap(context, virtual_ptr, size)); + ASSERT_SUCCESS(urVirtualMemUnmap(context, virtual_ptr, size)); } using urVirtualMemMapTest = uur::urVirtualMemTest; diff --git a/test/conformance/virtual_memory/urVirtualMemReserve.cpp b/test/conformance/virtual_memory/urVirtualMemReserve.cpp index ecc1d520a9..2b1eeeebd3 100644 --- a/test/conformance/virtual_memory/urVirtualMemReserve.cpp +++ b/test/conformance/virtual_memory/urVirtualMemReserve.cpp @@ -22,7 +22,7 @@ TEST_P(urVirtualMemReserveTestWithParam, SuccessNoStartPointer) { &virtual_mem_start)); ASSERT_NE(virtual_mem_start, nullptr); - EXPECT_SUCCESS( + ASSERT_SUCCESS( urVirtualMemFree(context, virtual_mem_start, virtual_mem_size)); } @@ -41,8 +41,8 @@ TEST_P(urVirtualMemReserveTestWithParam, SuccessWithStartPointer) { ASSERT_NE(virtual_mem_ptr, nullptr); // both pointers have to be freed - EXPECT_SUCCESS(urVirtualMemFree(context, origin_ptr, page_size)); - EXPECT_SUCCESS(urVirtualMemFree(context, virtual_mem_ptr, page_size)); + ASSERT_SUCCESS(urVirtualMemFree(context, origin_ptr, page_size)); + ASSERT_SUCCESS(urVirtualMemFree(context, virtual_mem_ptr, page_size)); } using urVirtualMemReserveTest = uur::urVirtualMemGranularityTest; From 3622f77a964f0d843a15fb8e3467836affd0169c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Chor=C4=85=C5=BCewicz?= Date: Fri, 18 Jul 2025 07:41:36 -0700 Subject: [PATCH 4/5] Remove duplicate devices when creating a context (#19509) V2 stores device handles in a vector of size `Platform->getNumDevices()`. If there are more devices than that, we may have out-of-range acces. --- source/adapters/level_zero/v2/context.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/source/adapters/level_zero/v2/context.cpp b/source/adapters/level_zero/v2/context.cpp index bf054a493f..b96e05e9d3 100644 --- a/source/adapters/level_zero/v2/context.cpp +++ b/source/adapters/level_zero/v2/context.cpp @@ -44,12 +44,20 @@ populateP2PDevices(size_t maxDevices, return p2pDevices; } +static std::vector +uniqueDevices(uint32_t numDevices, const ur_device_handle_t *phDevices) { + std::vector devices(phDevices, phDevices + numDevices); + std::sort(devices.begin(), devices.end()); + devices.erase(std::unique(devices.begin(), devices.end()), devices.end()); + return devices; +} + ur_context_handle_t_::ur_context_handle_t_(ze_context_handle_t hContext, uint32_t numDevices, const ur_device_handle_t *phDevices, bool ownZeContext) : hContext(hContext, ownZeContext), - hDevices(phDevices, phDevices + numDevices), + hDevices(uniqueDevices(numDevices, phDevices)), commandListCache(hContext, {phDevices[0]->Platform->ZeCopyOffloadExtensionSupported, phDevices[0]->Platform->ZeMutableCmdListExt.Supported}), From 29f3de1938d1f364ff535cbf73affd8a4a34c8f6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 19 Jul 2025 00:42:32 +0000 Subject: [PATCH 5/5] Update intel/llvm mirror base commit to 22ee417a --- .github/intel-llvm-mirror-base-commit | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/intel-llvm-mirror-base-commit b/.github/intel-llvm-mirror-base-commit index 948fb08ab2..64328d59d3 100644 --- a/.github/intel-llvm-mirror-base-commit +++ b/.github/intel-llvm-mirror-base-commit @@ -1 +1 @@ -25323c85d7091f92bea2c057202612ff941a36d2 +22ee417ae6191b8e3940b82d19ae587092021224