From 1dee8fc72d540109e13ea80193caa4432545790a Mon Sep 17 00:00:00 2001 From: uwedolinsky Date: Mon, 18 Aug 2025 09:38:01 +0100 Subject: [PATCH 01/15] [SYCL][NATIVECPU] async ops, wait for dependencies in threads (#19537) Makes most operations including memory copies asynchronous except `enqueueBufferMap` Worker threads wait for dependent events instead of the main thread. The state struct is now constructed by the threads instead of being constructed by the main thread and copied which 1. reduces the capturing overhead in the enqueue lambda 2. enabled removing `mutable` from enqueue lambda Also removing `#ifdef NATIVECPU_USE_OCK` --- .../source/adapters/native_cpu/enqueue.cpp | 254 +++++++++++------- 1 file changed, 159 insertions(+), 95 deletions(-) diff --git a/unified-runtime/source/adapters/native_cpu/enqueue.cpp b/unified-runtime/source/adapters/native_cpu/enqueue.cpp index 5fecdc5b8f67..86da10bbffef 100644 --- a/unified-runtime/source/adapters/native_cpu/enqueue.cpp +++ b/unified-runtime/source/adapters/native_cpu/enqueue.cpp @@ -50,8 +50,42 @@ struct NDRDescT { << GlobalOffset[2] << "\n"; } }; + +namespace { +class WaitInfo { + std::vector *const events; + static_assert(std::is_pointer_v); + +public: + WaitInfo(uint32_t numEvents, const ur_event_handle_t *WaitList) + : events(numEvents ? new std::vector( + WaitList, WaitList + numEvents) + : nullptr) {} + void wait() const { + if (events) + urEventWait(events->size(), events->data()); + } + std::unique_ptr> getUniquePtr() { + return std::unique_ptr>(events); + } +}; + +inline static WaitInfo getWaitInfo(uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList) { + return native_cpu::WaitInfo(numEventsInWaitList, phEventWaitList); +} + +} // namespace } // namespace native_cpu +static inline native_cpu::state getState(const native_cpu::NDRDescT &ndr) { + native_cpu::state resized_state( + ndr.GlobalSize[0], ndr.GlobalSize[1], ndr.GlobalSize[2], ndr.LocalSize[0], + ndr.LocalSize[1], ndr.LocalSize[2], ndr.GlobalOffset[0], + ndr.GlobalOffset[1], ndr.GlobalOffset[2]); + return resized_state; +} + UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( ur_queue_handle_t hQueue, ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, @@ -67,7 +101,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( } } - urEventWait(numEventsInWaitList, phEventWaitList); UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); @@ -119,14 +152,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( auto &tp = hQueue->getDevice()->tp; const size_t numParallelThreads = tp.num_threads(); std::vector> futures; - std::vector> groups; auto numWG0 = ndr.GlobalSize[0] / ndr.LocalSize[0]; auto numWG1 = ndr.GlobalSize[1] / ndr.LocalSize[1]; auto numWG2 = ndr.GlobalSize[2] / ndr.LocalSize[2]; - native_cpu::state state(ndr.GlobalSize[0], ndr.GlobalSize[1], - ndr.GlobalSize[2], ndr.LocalSize[0], ndr.LocalSize[1], - ndr.LocalSize[2], ndr.GlobalOffset[0], - ndr.GlobalOffset[1], ndr.GlobalOffset[2]); auto event = new ur_event_handle_t_(hQueue, UR_COMMAND_KERNEL_LAUNCH); event->tick_start(); @@ -134,6 +162,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( auto kernel = std::make_unique(*hKernel); kernel->updateMemPool(numParallelThreads); + auto InEvents = native_cpu::getWaitInfo(numEventsInWaitList, phEventWaitList); + const size_t numWG = numWG0 * numWG1 * numWG2; const size_t numWGPerThread = numWG / numParallelThreads; const size_t remainderWG = numWG - numWGPerThread * numParallelThreads; @@ -147,13 +177,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( rangeEnd[0] = rangeEnd[3] % numWG0; rangeEnd[1] = (rangeEnd[3] / numWG0) % numWG1; rangeEnd[2] = rangeEnd[3] / (numWG0 * numWG1); - futures.emplace_back( - tp.schedule_task([state, &kernel = *kernel, rangeStart, - rangeEnd = rangeEnd[3], numWG0, numWG1, -#ifndef NATIVECPU_USE_OCK - localSize = ndr.LocalSize, -#endif - numParallelThreads](size_t threadId) mutable { + futures.emplace_back(tp.schedule_task( + [ndr, InEvents, &kernel = *kernel, rangeStart, rangeEnd = rangeEnd[3], + numWG0, numWG1, numParallelThreads](size_t threadId) { + auto state = getState(ndr); + InEvents.wait(); for (size_t g0 = rangeStart[0], g1 = rangeStart[1], g2 = rangeStart[2], g3 = rangeStart[3]; g3 < rangeEnd; ++g3) { @@ -162,9 +190,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( kernel._subhandler( kernel.getArgs(numParallelThreads, threadId).data(), &state); #else - for (size_t local2 = 0; local2 < localSize[2]; ++local2) { - for (size_t local1 = 0; local1 < localSize[1]; ++local1) { - for (size_t local0 = 0; local0 < localSize[0]; ++local0) { + for (size_t local2 = 0; local2 < ndr.LocalSize[2]; ++local2) { + for (size_t local1 = 0; local1 < ndr.LocalSize[1]; ++local1) { + for (size_t local0 = 0; local0 < ndr.LocalSize[0]; ++local0) { state.update(g0, g1, g2, local0, local1, local2); kernel._subhandler( kernel.getArgs(numParallelThreads, threadId).data(), @@ -189,7 +217,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch( if (phEvent) { *phEvent = event; } - event->set_callback([kernel = std::move(kernel), hKernel, event]() { + event->set_callback([kernel = std::move(kernel), hKernel, event, + InEvents = InEvents.getUniquePtr()]() { event->tick_end(); // TODO: avoid calling clear() here. hKernel->_localArgInfo.clear(); @@ -207,20 +236,32 @@ static inline ur_result_t withTimingEvent(ur_command_t command_type, ur_queue_handle_t hQueue, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent, T &&f) { - urEventWait(numEventsInWaitList, phEventWaitList); - ur_event_handle_t event = nullptr; + ur_event_handle_t *phEvent, T &&f, bool blocking = true) { if (phEvent) { - event = new ur_event_handle_t_(hQueue, command_type); + ur_event_handle_t event = new ur_event_handle_t_(hQueue, command_type); + *phEvent = event; event->tick_start(); + if (blocking || hQueue->isInOrder()) { + urEventWait(numEventsInWaitList, phEventWaitList); + ur_result_t result = f(); + event->tick_end(); + return result; + } + auto &tp = hQueue->getDevice()->tp; + std::vector> futures; + auto InEvents = + native_cpu::getWaitInfo(numEventsInWaitList, phEventWaitList); + futures.emplace_back(tp.schedule_task([f, InEvents](size_t) { + InEvents.wait(); + f(); + })); + event->set_futures(futures); + event->set_callback( + [event, InEvents = InEvents.getUniquePtr()]() { event->tick_end(); }); + return UR_RESULT_SUCCESS; } - + urEventWait(numEventsInWaitList, phEventWaitList); ur_result_t result = f(); - - if (phEvent) { - event->tick_end(); - *phEvent = event; - } return result; } @@ -231,7 +272,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait( // TODO: the wait here should be async return withTimingEvent(UR_COMMAND_EVENTS_WAIT, hQueue, numEventsInWaitList, phEventWaitList, phEvent, - [&]() { return UR_RESULT_SUCCESS; }); + []() { return UR_RESULT_SUCCESS; }); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( @@ -239,7 +280,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { return withTimingEvent(UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, hQueue, numEventsInWaitList, phEventWaitList, phEvent, - [&]() { return UR_RESULT_SUCCESS; }); + []() { return UR_RESULT_SUCCESS; }); } UR_APIEXPORT ur_result_t urEnqueueEventsWaitWithBarrierExt( @@ -250,9 +291,43 @@ UR_APIEXPORT ur_result_t urEnqueueEventsWaitWithBarrierExt( phEventWaitList, phEvent); } +template +static inline void MemBufferReadWriteRect_impl( + ur_mem_handle_t Buff, ur_rect_offset_t BufferOffset, + ur_rect_offset_t HostOffset, ur_rect_region_t region, size_t BufferRowPitch, + size_t BufferSlicePitch, size_t HostRowPitch, size_t HostSlicePitch, + typename std::conditional::type DstMem) { + // TODO: check other constraints, performance optimizations + // More sharing with level_zero where possible + + if (BufferRowPitch == 0) + BufferRowPitch = region.width; + if (BufferSlicePitch == 0) + BufferSlicePitch = BufferRowPitch * region.height; + if (HostRowPitch == 0) + HostRowPitch = region.width; + if (HostSlicePitch == 0) + HostSlicePitch = HostRowPitch * region.height; + for (size_t w = 0; w < region.width; w++) + for (size_t h = 0; h < region.height; h++) + for (size_t d = 0; d < region.depth; d++) { + size_t buff_orign = (d + BufferOffset.z) * BufferSlicePitch + + (h + BufferOffset.y) * BufferRowPitch + w + + BufferOffset.x; + size_t host_origin = (d + HostOffset.z) * HostSlicePitch + + (h + HostOffset.y) * HostRowPitch + w + + HostOffset.x; + int8_t &buff_mem = ur_cast(Buff->_mem)[buff_orign]; + if constexpr (IsRead) + ur_cast(DstMem)[host_origin] = buff_mem; + else + buff_mem = ur_cast(DstMem)[host_origin]; + } +} + template static inline ur_result_t enqueueMemBufferReadWriteRect_impl( - ur_queue_handle_t hQueue, ur_mem_handle_t Buff, bool, + ur_queue_handle_t hQueue, ur_mem_handle_t Buff, bool blocking, ur_rect_offset_t BufferOffset, ur_rect_offset_t HostOffset, ur_rect_region_t region, size_t BufferRowPitch, size_t BufferSlicePitch, size_t HostRowPitch, size_t HostSlicePitch, @@ -265,71 +340,63 @@ static inline ur_result_t enqueueMemBufferReadWriteRect_impl( else command_t = UR_COMMAND_MEM_BUFFER_WRITE_RECT; return withTimingEvent( - command_t, hQueue, NumEventsInWaitList, phEventWaitList, phEvent, [&]() { - // TODO: blocking, check other constraints, performance optimizations - // More sharing with level_zero where possible - - if (BufferRowPitch == 0) - BufferRowPitch = region.width; - if (BufferSlicePitch == 0) - BufferSlicePitch = BufferRowPitch * region.height; - if (HostRowPitch == 0) - HostRowPitch = region.width; - if (HostSlicePitch == 0) - HostSlicePitch = HostRowPitch * region.height; - for (size_t w = 0; w < region.width; w++) - for (size_t h = 0; h < region.height; h++) - for (size_t d = 0; d < region.depth; d++) { - size_t buff_orign = (d + BufferOffset.z) * BufferSlicePitch + - (h + BufferOffset.y) * BufferRowPitch + w + - BufferOffset.x; - size_t host_origin = (d + HostOffset.z) * HostSlicePitch + - (h + HostOffset.y) * HostRowPitch + w + - HostOffset.x; - int8_t &buff_mem = ur_cast(Buff->_mem)[buff_orign]; - if constexpr (IsRead) - ur_cast(DstMem)[host_origin] = buff_mem; - else - buff_mem = ur_cast(DstMem)[host_origin]; - } - + command_t, hQueue, NumEventsInWaitList, phEventWaitList, phEvent, + [BufferRowPitch, region, BufferSlicePitch, HostRowPitch, HostSlicePitch, + BufferOffset, HostOffset, Buff, DstMem]() { + MemBufferReadWriteRect_impl( + Buff, BufferOffset, HostOffset, region, BufferRowPitch, + BufferSlicePitch, HostRowPitch, HostSlicePitch, DstMem); return UR_RESULT_SUCCESS; - }); + }, + blocking); } -static inline ur_result_t doCopy_impl(ur_queue_handle_t hQueue, void *DstPtr, - const void *SrcPtr, size_t Size, - uint32_t numEventsInWaitList, - const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent, - ur_command_t command_type) { - return withTimingEvent(command_type, hQueue, numEventsInWaitList, - phEventWaitList, phEvent, [&]() { - if (SrcPtr != DstPtr && Size) - memmove(DstPtr, SrcPtr, Size); - return UR_RESULT_SUCCESS; - }); +template +static inline ur_result_t doCopy_impl( + ur_queue_handle_t hQueue, void *DstPtr, const void *SrcPtr, size_t Size, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent, ur_command_t command_type, bool blocking) { + if (SrcPtr == DstPtr || Size == 0) { + bool hasInEvents = numEventsInWaitList && phEventWaitList; + return withTimingEvent( + command_type, hQueue, numEventsInWaitList, phEventWaitList, phEvent, + []() { return UR_RESULT_SUCCESS; }, blocking || !hasInEvents); + } + + return withTimingEvent( + command_type, hQueue, numEventsInWaitList, phEventWaitList, phEvent, + [DstPtr, SrcPtr, Size]() { + if constexpr (AllowPartialOverlap) { + memmove(DstPtr, SrcPtr, Size); + } else { + memcpy(DstPtr, SrcPtr, Size); + } + return UR_RESULT_SUCCESS; + }, + blocking); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferRead( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool /*blockingRead*/, + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingRead, size_t offset, size_t size, void *pDst, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { void *FromPtr = /*Src*/ hBuffer->_mem + offset; auto res = doCopy_impl(hQueue, pDst, FromPtr, size, numEventsInWaitList, - phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_READ); + phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_READ, + blockingRead); return res; } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferWrite( - ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool /*blockingWrite*/, + ur_queue_handle_t hQueue, ur_mem_handle_t hBuffer, bool blockingWrite, size_t offset, size_t size, const void *pSrc, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { void *ToPtr = hBuffer->_mem + offset; auto res = doCopy_impl(hQueue, ToPtr, pSrc, size, numEventsInWaitList, - phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_WRITE); + phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_WRITE, + blockingWrite); return res; } @@ -368,7 +435,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopy( const void *SrcPtr = hBufferSrc->_mem + srcOffset; void *DstPtr = hBufferDst->_mem + dstOffset; return doCopy_impl(hQueue, DstPtr, SrcPtr, size, numEventsInWaitList, - phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_COPY); + phEventWaitList, phEvent, UR_COMMAND_MEM_BUFFER_COPY, + true /*TODO: check false for non-blocking*/); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( @@ -379,7 +447,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferCopyRect( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { return enqueueMemBufferReadWriteRect_impl( - hQueue, hBufferSrc, false /*todo: check blocking*/, srcOrigin, + hQueue, hBufferSrc, true /*todo: check false for non-blocking*/, + srcOrigin, /*HostOffset*/ dstOrigin, region, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, hBufferDst->_mem, numEventsInWaitList, phEventWaitList, phEvent); @@ -390,12 +459,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill( size_t patternSize, size_t offset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); return withTimingEvent( UR_COMMAND_MEM_BUFFER_FILL, hQueue, numEventsInWaitList, phEventWaitList, - phEvent, [&]() { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - + phEvent, [hBuffer, offset, size, patternSize, pPattern]() { // TODO: error checking // TODO: handle async void *startingPtr = hBuffer->_mem + offset; @@ -449,7 +516,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap( ur_event_handle_t *phEvent, void **ppRetMap) { return withTimingEvent(UR_COMMAND_MEM_BUFFER_MAP, hQueue, numEventsInWaitList, - phEventWaitList, phEvent, [&]() { + phEventWaitList, phEvent, + [ppRetMap, hBuffer, offset]() { *ppRetMap = hBuffer->_mem + offset; return UR_RESULT_SUCCESS; }); @@ -461,7 +529,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap( ur_event_handle_t *phEvent) { return withTimingEvent(UR_COMMAND_MEM_UNMAP, hQueue, numEventsInWaitList, phEventWaitList, phEvent, - [&]() { return UR_RESULT_SUCCESS; }); + []() { return UR_RESULT_SUCCESS; }); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( @@ -470,7 +538,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { return withTimingEvent( UR_COMMAND_USM_FILL, hQueue, numEventsInWaitList, phEventWaitList, - phEvent, [&]() { + phEvent, [ptr, pPattern, patternSize, size]() { UR_ASSERT(ptr, UR_RESULT_ERROR_INVALID_NULL_POINTER); UR_ASSERT(pPattern, UR_RESULT_ERROR_INVALID_NULL_POINTER); UR_ASSERT(patternSize != 0, UR_RESULT_ERROR_INVALID_SIZE) @@ -520,20 +588,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( - ur_queue_handle_t hQueue, bool /*blocking*/, void *pDst, const void *pSrc, + ur_queue_handle_t hQueue, bool blocking, void *pDst, const void *pSrc, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { - return withTimingEvent( - UR_COMMAND_USM_MEMCPY, hQueue, numEventsInWaitList, phEventWaitList, - phEvent, [&]() { - UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_QUEUE); - UR_ASSERT(pDst, UR_RESULT_ERROR_INVALID_NULL_POINTER); - UR_ASSERT(pSrc, UR_RESULT_ERROR_INVALID_NULL_POINTER); - - memcpy(pDst, pSrc, size); + UR_ASSERT(hQueue, UR_RESULT_ERROR_INVALID_QUEUE); + UR_ASSERT(pDst, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_ASSERT(pSrc, UR_RESULT_ERROR_INVALID_NULL_POINTER); - return UR_RESULT_SUCCESS; - }); + return doCopy_impl( + hQueue, pDst, pSrc, size, numEventsInWaitList, phEventWaitList, phEvent, + UR_COMMAND_USM_MEMCPY, blocking); } UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMPrefetch( From 0040e71e4600e0651c0b1ec415e3cccec7d9426f Mon Sep 17 00:00:00 2001 From: Sergey Semenov Date: Mon, 18 Aug 2025 13:49:24 +0200 Subject: [PATCH 02/15] [SYCL][NFC] Fix Coverity hits (#19807) --- sycl/source/detail/device_binary_image.hpp | 2 +- sycl/source/detail/global_handler.cpp | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index ee96ccc998d2..ac4fb92d3f9a 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -250,7 +250,7 @@ class RTDeviceBinaryImage { protected: sycl_device_binary get() const { return Bin; } - sycl_device_binary Bin; + sycl_device_binary Bin = nullptr; ur::DeviceBinaryType Format = SYCL_DEVICE_BINARY_TYPE_NONE; RTDeviceBinaryImage::PropertyRange SpecConstIDMap; diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index ff89080ad19e..66b181a9bc0e 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -272,7 +272,9 @@ void GlobalHandler::releaseDefaultContexts() { // For Linux, early shutdown is here, and late shutdown is called from // a low priority destructor. struct StaticVarShutdownHandler { - + StaticVarShutdownHandler(const StaticVarShutdownHandler &) = delete; + StaticVarShutdownHandler & + operator=(const StaticVarShutdownHandler &) = delete; ~StaticVarShutdownHandler() { try { #ifdef _WIN32 From 9448e0ffe9ce392f739aecb7b91193feef6285a2 Mon Sep 17 00:00:00 2001 From: Maosu Zhao Date: Mon, 18 Aug 2025 22:59:49 +0800 Subject: [PATCH 03/15] [DevMSAN] Unpoison sret argument for builtin function to get spec constant (#19800) * For builtin func like "__sycl_getComposite2020SpecConstantValue", if structs which are larger than 64b will be returned via sret arguments and will be initialized inside the function. So we need to unpoison the sret arguments. * Always set ZE_AFFINITY_MAST to 0 when running device sanitizer tests since device sanitizer only support one GPU card now. --- libdevice/sanitizer/msan_rtl.cpp | 28 +++++++++-- .../Instrumentation/MemorySanitizer.cpp | 48 +++++++++++++++++-- .../MemorySanitizer/SPIRV/spec_constants.ll | 22 +++++++++ sycl/test-e2e/AddressSanitizer/lit.local.cfg | 2 + sycl/test-e2e/MemorySanitizer/lit.local.cfg | 2 + sycl/test-e2e/ThreadSanitizer/lit.local.cfg | 2 + 6 files changed, 95 insertions(+), 9 deletions(-) create mode 100644 llvm/test/Instrumentation/MemorySanitizer/SPIRV/spec_constants.ll diff --git a/libdevice/sanitizer/msan_rtl.cpp b/libdevice/sanitizer/msan_rtl.cpp index 68c0db600049..87d57fc6950c 100644 --- a/libdevice/sanitizer/msan_rtl.cpp +++ b/libdevice/sanitizer/msan_rtl.cpp @@ -671,7 +671,7 @@ __msan_unpoison_shadow_dynamic_local(uptr ptr, uint32_t num_args) { "__msan_unpoison_shadow_dynamic_local")); } -static __SYCL_CONSTANT__ const char __msan_print_set_shadow_private[] = +static __SYCL_CONSTANT__ const char __msan_print_set_shadow[] = "[kernel] __msan_set_value(beg=%p, end=%p, val=%02X)\n"; // We outline the function of setting shadow memory of private memory, because @@ -684,8 +684,7 @@ DEVICE_EXTERN_C_NOINLINE void __msan_poison_stack(__SYCL_PRIVATE__ void *ptr, MSAN_DEBUG(__spirv_ocl_printf(__msan_print_func_beg, "__msan_poison_stack")); auto shadow_address = MemToShadow((uptr)ptr, ADDRESS_SPACE_PRIVATE); - MSAN_DEBUG(__spirv_ocl_printf(__msan_print_set_shadow_private, - (void *)shadow_address, + MSAN_DEBUG(__spirv_ocl_printf(__msan_print_set_shadow, (void *)shadow_address, (void *)(shadow_address + size), 0xff)); if (shadow_address != GetMsanLaunchInfo->CleanShadow) { @@ -704,8 +703,7 @@ DEVICE_EXTERN_C_NOINLINE void __msan_unpoison_stack(__SYCL_PRIVATE__ void *ptr, __spirv_ocl_printf(__msan_print_func_beg, "__msan_unpoison_stack")); auto shadow_address = MemToShadow((uptr)ptr, ADDRESS_SPACE_PRIVATE); - MSAN_DEBUG(__spirv_ocl_printf(__msan_print_set_shadow_private, - (void *)shadow_address, + MSAN_DEBUG(__spirv_ocl_printf(__msan_print_set_shadow, (void *)shadow_address, (void *)(shadow_address + size), 0x0)); if (shadow_address != GetMsanLaunchInfo->CleanShadow) { @@ -716,6 +714,26 @@ DEVICE_EXTERN_C_NOINLINE void __msan_unpoison_stack(__SYCL_PRIVATE__ void *ptr, __spirv_ocl_printf(__msan_print_func_end, "__msan_unpoison_stack")); } +DEVICE_EXTERN_C_NOINLINE void __msan_unpoison_shadow(uptr ptr, uint32_t as, + uptr size) { + if (!GetMsanLaunchInfo) + return; + + MSAN_DEBUG( + __spirv_ocl_printf(__msan_print_func_beg, "__msan_unpoison_shadow")); + + auto shadow_address = MemToShadow(ptr, as); + MSAN_DEBUG(__spirv_ocl_printf(__msan_print_set_shadow, (void *)shadow_address, + (void *)(shadow_address + size), 0x0)); + + if (shadow_address != GetMsanLaunchInfo->CleanShadow) { + Memset((__SYCL_GLOBAL__ char *)shadow_address, 0, size); + } + + MSAN_DEBUG( + __spirv_ocl_printf(__msan_print_func_end, "__msan_unpoison_shadow")); +} + static __SYCL_CONSTANT__ const char __msan_print_private_base[] = "[kernel] __msan_set_private_base(sid=%llu): %p\n"; diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index bb9c7611059b..f14c538ec863 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -813,6 +813,8 @@ class MemorySanitizerOnSpirv { Constant *getOrCreateGlobalString(StringRef Name, StringRef Value, unsigned AddressSpace); + static bool isSupportedBuiltIn(StringRef Name); + operator bool() const { return IsSPIRV; } private: @@ -823,7 +825,6 @@ class MemorySanitizerOnSpirv { void instrumentKernelsMetadata(int TrackOrigins); void instrumentPrivateArguments(Function &F, Instruction *FnPrologueEnd); void instrumentPrivateBase(Function &F); - void initializeRetVecMap(Function *F); void initializeKernelCallerMap(Function *F); @@ -856,6 +857,7 @@ class MemorySanitizerOnSpirv { FunctionCallee MsanUnpoisonShadowDynamicLocalFunc; FunctionCallee MsanBarrierFunc; FunctionCallee MsanUnpoisonStackFunc; + FunctionCallee MsanUnpoisonShadowFunc; FunctionCallee MsanSetPrivateBaseFunc; FunctionCallee MsanUnpoisonStridedCopyFunc; }; @@ -949,6 +951,14 @@ void MemorySanitizerOnSpirv::initializeCallbacks() { MsanUnpoisonStackFunc = M.getOrInsertFunction( "__msan_unpoison_stack", IRB.getVoidTy(), PtrTy, IntptrTy); + // __msan_unpoison_( + // uptr ptr, + // uint32_t as, + // size_t size + // ) + MsanUnpoisonShadowFunc = M.getOrInsertFunction( + "__msan_unpoison_shadow", IRB.getVoidTy(), IntptrTy, Int32Ty, IntptrTy); + // __msan_set_private_base( // as(0) void * ptr // ) @@ -987,9 +997,16 @@ void MemorySanitizerOnSpirv::instrumentGlobalVariables() { G.setName("nameless_global"); if (isUnsupportedDeviceGlobal(&G)) { - for (auto *User : G.users()) - if (auto *Inst = dyn_cast(User)) - Inst->setNoSanitizeMetadata(); + for (auto *User : G.users()) { + if (!isa(User)) + continue; + if (auto *CI = dyn_cast(User)) { + Function *Callee = CI->getCalledFunction(); + if (Callee && isSupportedBuiltIn(Callee->getName())) + continue; + } + cast(User)->setNoSanitizeMetadata(); + } continue; } @@ -1150,6 +1167,10 @@ void MemorySanitizerOnSpirv::instrumentPrivateBase(Function &F) { IRB.CreateCall(MsanSetPrivateBaseFunc, {PrivateBase}); } +bool MemorySanitizerOnSpirv::isSupportedBuiltIn(StringRef Name) { + return Name.contains("__sycl_getComposite2020SpecConstantValue"); +} + void MemorySanitizerOnSpirv::instrumentPrivateArguments( Function &F, Instruction *FnPrologueEnd) { if (!ClSpirOffloadPrivates) @@ -6994,6 +7015,25 @@ struct MemorySanitizerVisitor : public InstVisitor { IRB.CreatePointerCast(Src, MS.Spirv.IntptrTy), IRB.getInt32(Src->getType()->getPointerAddressSpace()), IRB.getInt32(ElementSize), NumElements, Stride}); + } else if (FuncName.contains( + "__sycl_getComposite2020SpecConstantValue")) { + // clang-format off + // Handle builtin functions like "_Z40__sycl_getComposite2020SpecConstantValue" + // Structs which are larger than 64b will be returned via sret arguments + // and will be initialized inside the function. So we need to unpoison + // the sret arguments. + // clang-format on + if (Func->hasStructRetAttr()) { + Type *SCTy = Func->getParamStructRetType(0); + unsigned Size = Func->getDataLayout().getTypeStoreSize(SCTy); + auto *Addr = CB.getArgOperand(0); + IRB.CreateCall( + MS.Spirv.MsanUnpoisonShadowFunc, + {IRB.CreatePointerCast(Addr, MS.Spirv.IntptrTy), + ConstantInt::get(MS.Spirv.Int32Ty, + Addr->getType()->getPointerAddressSpace()), + ConstantInt::get(MS.Spirv.IntptrTy, Size)}); + } } } } diff --git a/llvm/test/Instrumentation/MemorySanitizer/SPIRV/spec_constants.ll b/llvm/test/Instrumentation/MemorySanitizer/SPIRV/spec_constants.ll new file mode 100644 index 000000000000..f910af0cf92b --- /dev/null +++ b/llvm/test/Instrumentation/MemorySanitizer/SPIRV/spec_constants.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -passes=msan -msan-instrumentation-with-call-threshold=0 -msan-eager-checks=1 -msan-poison-stack-with-call=1 -S | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +%"class.sycl::_V1::specialization_id" = type { %"struct.user_def_types::no_cnstr" } +%"struct.user_def_types::no_cnstr" = type { float, i32, i8 } + +@__usid_str = external addrspace(4) constant [57 x i8] +@_Z19spec_const_externalIN14user_def_types8no_cnstrELi1EE = external addrspace(1) constant %"class.sycl::_V1::specialization_id" + +define spir_func i1 @_Z50check_kernel_handler_by_reference_external_handlerRN4sycl3_V114kernel_handlerEN14user_def_types8no_cnstrE() { +entry: + %ref.tmp.i = alloca %"struct.user_def_types::no_cnstr", align 4 + %ref.tmp.ascast.i = addrspacecast ptr %ref.tmp.i to ptr addrspace(4) +; CHECK: [[REG1:%[0-9]+]] = ptrtoint ptr addrspace(4) %ref.tmp.ascast.i to i64 +; CHECK: call void @__msan_unpoison_shadow(i64 [[REG1]], i32 4, i64 12) + call spir_func void @_Z40__sycl_getComposite2020SpecConstantValueIN14user_def_types8no_cnstrEET_PKcPKvS6_(ptr addrspace(4) dead_on_unwind writable sret(%"struct.user_def_types::no_cnstr") align 4 %ref.tmp.ascast.i, ptr addrspace(4) noundef @__usid_str, ptr addrspace(4) noundef addrspacecast (ptr addrspace(1) @_Z19spec_const_externalIN14user_def_types8no_cnstrELi1EE to ptr addrspace(4)), ptr addrspace(4) noundef null) + ret i1 false +} + +declare spir_func void @_Z40__sycl_getComposite2020SpecConstantValueIN14user_def_types8no_cnstrEET_PKcPKvS6_(ptr addrspace(4) sret(%"struct.user_def_types::no_cnstr"), ptr addrspace(4), ptr addrspace(4), ptr addrspace(4)) diff --git a/sycl/test-e2e/AddressSanitizer/lit.local.cfg b/sycl/test-e2e/AddressSanitizer/lit.local.cfg index 8a4709eb254b..c2bc429f1bb3 100644 --- a/sycl/test-e2e/AddressSanitizer/lit.local.cfg +++ b/sycl/test-e2e/AddressSanitizer/lit.local.cfg @@ -28,3 +28,5 @@ unsupported_san_flags = [ ] if any(flag in config.cxx_flags for flag in unsupported_san_flags): config.unsupported=True + +config.environment["ZE_AFFINITY_MASK"] = "0" diff --git a/sycl/test-e2e/MemorySanitizer/lit.local.cfg b/sycl/test-e2e/MemorySanitizer/lit.local.cfg index d6da6eb7bf3b..617db32b6062 100644 --- a/sycl/test-e2e/MemorySanitizer/lit.local.cfg +++ b/sycl/test-e2e/MemorySanitizer/lit.local.cfg @@ -35,3 +35,5 @@ unsupported_san_flags = [ ] if any(flag in config.cxx_flags for flag in unsupported_san_flags): config.unsupported=True + +config.environment["ZE_AFFINITY_MASK"] = "0" diff --git a/sycl/test-e2e/ThreadSanitizer/lit.local.cfg b/sycl/test-e2e/ThreadSanitizer/lit.local.cfg index fe03e06b8d89..aee25f0a5ba0 100644 --- a/sycl/test-e2e/ThreadSanitizer/lit.local.cfg +++ b/sycl/test-e2e/ThreadSanitizer/lit.local.cfg @@ -33,3 +33,5 @@ unsupported_san_flags = [ ] if any(flag in config.cxx_flags for flag in unsupported_san_flags): config.unsupported=True + +config.environment["ZE_AFFINITY_MASK"] = "0" From 91c0446ded75a859714b4c18739a9278f70843b2 Mon Sep 17 00:00:00 2001 From: Weronika Lewandowska Date: Mon, 18 Aug 2025 17:01:06 +0200 Subject: [PATCH 04/15] [Benchmarks] Add toggle abstraction layer (#19799) --- devops/scripts/benchmarks/html/scripts.js | 232 ++++++++++++---------- 1 file changed, 132 insertions(+), 100 deletions(-) diff --git a/devops/scripts/benchmarks/html/scripts.js b/devops/scripts/benchmarks/html/scripts.js index 558021a13ab4..28e3a708c295 100644 --- a/devops/scripts/benchmarks/html/scripts.js +++ b/devops/scripts/benchmarks/html/scripts.js @@ -17,6 +17,125 @@ let annotationsOptions = new Map(); // Global options map for annotations let archivedDataLoaded = false; let loadedBenchmarkRuns = []; // Loaded results from the js/json files +// Toggle configuration and abstraction +// +// HOW TO ADD A NEW TOGGLE: +// 1. Add HTML checkbox to index.html: +// +// +// 2. Add configuration below: +// 'my-toggle': { +// defaultValue: false, // true = enabled by default, false = disabled by default +// urlParam: 'myParam', // Name shown in URL (?myParam=true) +// invertUrlParam: false, // false = normal behavior, true = legacy inverted logic +// onChange: function(isEnabled) { // Function called when toggle state changes +// // Your logic here +// updateURL(); // Always call this to update the browser URL +// } +// } +// +// 3. (Optional) Add helper function for cleaner, more readable code: +// function isMyToggleEnabled() { return isToggleEnabled('my-toggle'); } +// +// This lets you write: if (isMyToggleEnabled()) { ... } +// Instead of: if (isToggleEnabled('my-toggle')) { ... } +// + +const toggleConfigs = { + 'show-notes': { + defaultValue: true, + urlParam: 'notes', + invertUrlParam: true, // Store false in URL when enabled (legacy behavior) + onChange: function(isEnabled) { + document.querySelectorAll('.benchmark-note').forEach(note => { + note.style.display = isEnabled ? 'block' : 'none'; + }); + updateURL(); + } + }, + 'show-unstable': { + defaultValue: false, + urlParam: 'unstable', + invertUrlParam: false, + onChange: function(isEnabled) { + document.querySelectorAll('.benchmark-unstable').forEach(warning => { + warning.style.display = isEnabled ? 'block' : 'none'; + }); + filterCharts(); + } + }, + 'custom-range': { + defaultValue: false, + urlParam: 'customRange', + invertUrlParam: false, + onChange: function(isEnabled) { + updateCharts(); + } + }, + 'show-archived-data': { + defaultValue: false, + urlParam: 'archived', + invertUrlParam: false, + onChange: function(isEnabled) { + if (isEnabled) { + loadArchivedData(); + } else { + if (archivedDataLoaded) { + location.reload(); + } + } + updateURL(); + } + } +}; + +// Generic toggle helper functions +function isToggleEnabled(toggleId) { + const toggle = document.getElementById(toggleId); + return toggle ? toggle.checked : toggleConfigs[toggleId]?.defaultValue || false; +} + +function setupToggle(toggleId, config) { + const toggle = document.getElementById(toggleId); + if (!toggle) return; + + // Set up event listener + toggle.addEventListener('change', function() { + config.onChange(toggle.checked); + }); + + // Initialize from URL params if present + const urlParam = getQueryParam(config.urlParam); + if (urlParam !== null) { + const urlValue = urlParam === 'true'; + // Handle inverted URL params (like notes where false means enabled) + toggle.checked = config.invertUrlParam ? !urlValue : urlValue; + } else { + // Use default value + toggle.checked = config.defaultValue; + } +} + +function updateToggleURL(toggleId, config, url) { + const isEnabled = isToggleEnabled(toggleId); + + if (config.invertUrlParam) { + // For inverted params, store in URL when disabled + if (isEnabled) { + url.searchParams.delete(config.urlParam); + } else { + url.searchParams.set(config.urlParam, 'false'); + } + } else { + // For normal params, store in URL when enabled + if (!isEnabled) { + url.searchParams.delete(config.urlParam); + } else { + url.searchParams.set(config.urlParam, 'true'); + } + } +} + // DOM Elements let runSelect, selectedRunsDiv, suiteFiltersContainer, tagFiltersContainer; @@ -627,30 +746,10 @@ function updateURL() { url.searchParams.delete('runs'); } - // Add toggle states to URL - if (isNotesEnabled()) { - url.searchParams.delete('notes'); - } else { - url.searchParams.set('notes', 'false'); - } - - if (!isUnstableEnabled()) { - url.searchParams.delete('unstable'); - } else { - url.searchParams.set('unstable', 'true'); - } - - if (!isCustomRangesEnabled()) { - url.searchParams.delete('customRange'); - } else { - url.searchParams.set('customRange', 'true'); - } - - if (!isArchivedDataEnabled()) { - url.searchParams.delete('archived'); - } else { - url.searchParams.set('archived', 'true'); - } + // Update toggle states in URL using the generic helper + Object.entries(toggleConfigs).forEach(([toggleId, config]) => { + updateToggleURL(toggleId, config, url); + }); history.replaceState(null, '', url); } @@ -949,94 +1048,26 @@ function setupSuiteFilters() { } function isNotesEnabled() { - const notesToggle = document.getElementById('show-notes'); - return notesToggle.checked; + return isToggleEnabled('show-notes'); } function isUnstableEnabled() { - const unstableToggle = document.getElementById('show-unstable'); - return unstableToggle.checked; + return isToggleEnabled('show-unstable'); } function isCustomRangesEnabled() { - const rangesToggle = document.getElementById('custom-range'); - return rangesToggle.checked; + return isToggleEnabled('custom-range'); } function isArchivedDataEnabled() { - const archivedDataToggle = document.getElementById('show-archived-data'); - return archivedDataToggle.checked; + return isToggleEnabled('show-archived-data'); } function setupToggles() { - const notesToggle = document.getElementById('show-notes'); - const unstableToggle = document.getElementById('show-unstable'); - const customRangeToggle = document.getElementById('custom-range'); - const archivedDataToggle = document.getElementById('show-archived-data'); - - notesToggle.addEventListener('change', function () { - // Update all note elements visibility - document.querySelectorAll('.benchmark-note').forEach(note => { - note.style.display = isNotesEnabled() ? 'block' : 'none'; - }); - updateURL(); - }); - - unstableToggle.addEventListener('change', function () { - // Update all unstable warning elements visibility - document.querySelectorAll('.benchmark-unstable').forEach(warning => { - warning.style.display = isUnstableEnabled() ? 'block' : 'none'; - }); - filterCharts(); - }); - - customRangeToggle.addEventListener('change', function () { - // redraw all charts - updateCharts(); + // Set up all toggles using the configuration + Object.entries(toggleConfigs).forEach(([toggleId, config]) => { + setupToggle(toggleId, config); }); - - // Add event listener for archived data toggle - if (archivedDataToggle) { - archivedDataToggle.addEventListener('change', function() { - if (archivedDataToggle.checked) { - loadArchivedData(); - } else { - if (archivedDataLoaded) { - // Reload the page to reset - location.reload(); - } - } - updateURL(); - }); - } - - // Initialize from URL params if present - const notesParam = getQueryParam('notes'); - const unstableParam = getQueryParam('unstable'); - const archivedParam = getQueryParam('archived'); - - if (notesParam !== null) { - let showNotes = notesParam === 'true'; - notesToggle.checked = showNotes; - } - - if (unstableParam !== null) { - let showUnstable = unstableParam === 'true'; - unstableToggle.checked = showUnstable; - } - - const customRangesParam = getQueryParam('customRange'); - if (customRangesParam !== null) { - customRangeToggle.checked = customRangesParam === 'true'; - } - - if (archivedDataToggle && archivedParam !== null) { - archivedDataToggle.checked = archivedParam === 'true'; - - if (archivedDataToggle.checked) { - loadArchivedData(); - } - } } function setupTagFilters() { @@ -1154,9 +1185,10 @@ function initializeCharts() { // Setup UI components setupRunSelector(); setupSuiteFilters(); - setupTagFilters(); setupToggles(); initializePlatformTab(); + // Setup tag filters after everything else is ready + setupTagFilters(); // Apply URL parameters const regexParam = getQueryParam('regex'); From e424d0718e47abd2d602c03b149dea998f565f64 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Mon, 18 Aug 2025 08:26:40 -0700 Subject: [PATCH 05/15] [SYCL][Doc] Add ONEAPI_PVC_SEND_WAR_WA env variable description (#19791) --- sycl/doc/EnvironmentVariables.md | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md index 7e995509da35..d587d0e4ed11 100644 --- a/sycl/doc/EnvironmentVariables.md +++ b/sycl/doc/EnvironmentVariables.md @@ -8,6 +8,7 @@ compiler and runtime. | Environment variable | Values | Description | | -------------------- | ------ | ----------- | | `ONEAPI_DEVICE_SELECTOR` | [See below.](#oneapi_device_selector) | This device selection environment variable can be used to limit the choice of devices available when the SYCL-using application is run. Useful for limiting devices to a certain type (like GPUs or accelerators) or backends (like Level Zero or OpenCL). This device selection mechanism is replacing `SYCL_DEVICE_FILTER` . The `ONEAPI_DEVICE_SELECTOR` syntax is shared with OpenMP and also allows sub-devices to be chosen. [See below.](#oneapi_device_selector) for a full description. | +| `ONEAPI_PVC_SEND_WAR_WA` | '1' or '0' | Controls the workaround for Erratum "FP64 register ordering violation" on Intel Ponte Vecchio GPUs. Setting `ONEAPI_PVC_SEND_WAR_WA=0` disables the workaround and is only safe if the secondary FP64 pipeline is disabled. Default is enabled ('1') and applied throughout the oneAPI software stack - including OneDNN, OneMKL, OpenCL and Level Zero Runtimes, and Intel Graphics Compiler. | | `SYCL_DEVICE_ALLOWLIST` | See [below](#sycl_device_allowlist) | Filter out devices that do not match the pattern specified. `BackendName` accepts `host`, `opencl`, `level_zero`, `native_cpu` or `cuda`. `DeviceType` accepts `host`, `cpu`, `gpu`, `fpga`, or `acc`. `fpga` and `acc` are handled in the same manner. `DeviceVendorId` accepts uint32_t in hex form (`0xXYZW`). `DriverVersion`, `PlatformVersion`, `DeviceName` and `PlatformName` accept regular expression. Special characters, such as parenthesis, must be escaped. DPC++ runtime will select only those devices which satisfy provided values above and regex. More than one device can be specified using the piping symbol "\|".| | `SYCL_DISABLE_PARALLEL_FOR_RANGE_ROUNDING` | Any(\*) | Disables automatic rounding-up of `parallel_for` invocation ranges. | | `SYCL_CACHE_DIR` | Path | Path to persistent cache root directory. Default values are `%AppData%\libsycl_cache` for Windows and `$XDG_CACHE_HOME/libsycl_cache` on Linux, if `XDG_CACHE_HOME` is not set then `$HOME/.cache/libsycl_cache`. When none of the environment variables are set SYCL persistent cache is disabled. | From 4b40ee28821212d9eeec12a25d8dede0aadfb916 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Mon, 18 Aug 2025 09:06:39 -0700 Subject: [PATCH 06/15] [CI] Change codeowners for backward ABI-compatibility exclude lists (#19820) https://github.com/intel/llvm/pull/19719 and https://github.com/intel/llvm/pull/19761 added pre-commit jobs to run E2E tests pre-built with latest "open-source" releases against the newly built sycl-toolchain libraries. Those can fail if either an actual break is happenning or if the test was doing some `FileCheck`ing and that output has changed in some way (which might not be an actual ABI break). However, I think the testing is still good enough to require an explicit approvals by folks in charge of ABI breaking changes. For the case of just output change the author should be able to convince owners that the change isn't ABI-breaking relatively easily. --- .github/CODEOWNERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 0713a66ef3ef..36551f70b10a 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -223,3 +223,6 @@ llvm/test/Instrumentation/ThreadSanitizer/ @intel/dpcpp-sanitizers-review sycl/test-e2e/AddressSanitizer/ @intel/dpcpp-sanitizers-review sycl/test-e2e/MemorySanitizer/ @intel/dpcpp-sanitizers-review sycl/test-e2e/ThreadSanitizer/ @intel/dpcpp-sanitizers-review + +# ABI compatibility +devops/compat_ci_exclude.sycl-rel-** @gmlueck @xtian-github From e6de77594d15963a51d687cf8b351aca9c60da0c Mon Sep 17 00:00:00 2001 From: HPS-1 Date: Mon, 18 Aug 2025 12:56:51 -0400 Subject: [PATCH 07/15] [SYCL] XFAIL ext_intel_cslice.cpp on DG2 (#19811) This test is currently failing on Windows DG2 machines. The minimal reproducer is: [minimal_reproducer.cpp](https://github.com/user-attachments/files/21798993/minimal_reproducer.cpp) It turns out that removing "ZEX_NUMBER_OF_CCS=0:4" from the minimal reproducer makes it pass. This matches the comments in https://github.com/intel/llvm/issues/18576 : the root cause here is that DG2 never supported `ZEX_NUMBER_OF_CCS`. Therefore, XFAIL-ing this test on all DG2 machines (previously it is only XFAIL-ed on Linux DG2 machines). Signed-off-by: Hu, Peisen --- sycl/test-e2e/Adapters/level_zero/ext_intel_cslice.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sycl/test-e2e/Adapters/level_zero/ext_intel_cslice.cpp b/sycl/test-e2e/Adapters/level_zero/ext_intel_cslice.cpp index e30c3b8f1f94..951ae6ce8c47 100644 --- a/sycl/test-e2e/Adapters/level_zero/ext_intel_cslice.cpp +++ b/sycl/test-e2e/Adapters/level_zero/ext_intel_cslice.cpp @@ -4,7 +4,10 @@ // XFAIL: gpu-intel-pvc-1T // XFAIL-TRACKER: https://github.com/intel/llvm/issues/15699 -// XFAIL: linux && run-mode && (arch-intel_gpu_bmg_g21 || gpu-intel-dg2) && !igc-dev +// XFAIL: gpu-intel-dg2 +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/18576 + +// XFAIL: linux && run-mode && arch-intel_gpu_bmg_g21 && !igc-dev // XFAIL-TRACKER: https://github.com/intel/llvm/issues/18576 // XFAIL: windows && arch-intel_gpu_bmg_g21 From d7ed8713afabb828b50225f59fb7fccde1420e8c Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 18 Aug 2025 11:06:36 -0700 Subject: [PATCH 08/15] revisiting after changes to DG Signed-off-by: Chris Perkins --- .../ClangOffloadWrapper.cpp | 52 ++++++- .../Offloading/SYCLOffloadWrapper.cpp | 54 ++++++- .../program_manager/program_manager.cpp | 10 +- .../Inputs/incrementing_lib.cpp | 25 ++++ .../IntermediateLib/multi_lib_app.cpp | 140 ++++++++++++++++++ 5 files changed, 272 insertions(+), 9 deletions(-) create mode 100644 sycl/test-e2e/IntermediateLib/Inputs/incrementing_lib.cpp create mode 100644 sycl/test-e2e/IntermediateLib/multi_lib_app.cpp diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index ee913fc8d0ee..75a4baea8bb0 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -1293,6 +1293,49 @@ class BinaryWrapper { appendToGlobalDtors(M, Func, /*Priority*/ 1); } + void createSyclRegisterWithAtexitUnregister(GlobalVariable *BinDesc) { + auto *UnregFuncTy = + FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); + auto *UnregFunc = + Function::Create(UnregFuncTy, GlobalValue::InternalLinkage, + "sycl.descriptor_unreg.atexit", &M); + UnregFunc->setSection(".text.startup"); + + // Declaration for __sycl_unregister_lib(void*). + auto *UnregTargetTy = + FunctionType::get(Type::getVoidTy(C), getPtrTy(), false); + FunctionCallee UnregTargetC = + M.getOrInsertFunction("__sycl_unregister_lib", UnregTargetTy); + + IRBuilder<> UnregBuilder(BasicBlock::Create(C, "entry", UnregFunc)); + UnregBuilder.CreateCall(UnregTargetC, BinDesc); + UnregBuilder.CreateRetVoid(); + + auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); + auto *RegFunc = Function::Create(RegFuncTy, GlobalValue::InternalLinkage, + "sycl.descriptor_reg", &M); + RegFunc->setSection(".text.startup"); + + auto *RegTargetTy = + FunctionType::get(Type::getVoidTy(C), getPtrTy(), false); + FunctionCallee RegTargetC = + M.getOrInsertFunction("__sycl_register_lib", RegTargetTy); + + // `atexit` takes a `void(*)()` function pointer. In LLVM IR, this is + // typically represented as `i32 (ptr)`. + FunctionType *AtExitTy = + FunctionType::get(Type::getInt32Ty(C), getPtrTy(), false); + FunctionCallee AtExitC = M.getOrInsertFunction("atexit", AtExitTy); + + IRBuilder<> RegBuilder(BasicBlock::Create(C, "entry", RegFunc)); + RegBuilder.CreateCall(RegTargetC, BinDesc); + RegBuilder.CreateCall(AtExitC, UnregFunc); + RegBuilder.CreateRetVoid(); + + // Add this function to global destructors. + appendToGlobalCtors(M, RegFunc, /*Priority*/ 1); + } + public: BinaryWrapper(StringRef Target, StringRef ToolName, StringRef SymPropBCFiles = "") @@ -1370,8 +1413,13 @@ class BinaryWrapper { if (EmitRegFuncs) { GlobalVariable *Desc = *DescOrErr; - createRegisterFunction(Kind, Desc); - createUnregisterFunction(Kind, Desc); + if (Kind == OffloadKind::SYCL && + Triple(M.getTargetTriple()).isOSWindows()) { + createSyclRegisterWithAtexitUnregister(Desc); + } else { + createRegisterFunction(Kind, Desc); + createUnregisterFunction(Kind, Desc); + } } } return &M; diff --git a/llvm/lib/Frontend/Offloading/SYCLOffloadWrapper.cpp b/llvm/lib/Frontend/Offloading/SYCLOffloadWrapper.cpp index 3d227d0c2e05..f54d552f1062 100644 --- a/llvm/lib/Frontend/Offloading/SYCLOffloadWrapper.cpp +++ b/llvm/lib/Frontend/Offloading/SYCLOffloadWrapper.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/PropertySetIO.h" +#include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/ModuleUtils.h" #include #include @@ -734,6 +735,51 @@ struct Wrapper { // Add this function to global destructors. appendToGlobalDtors(M, Func, /*Priority*/ 1); } + + void createSyclRegisterWithAtexitUnregister(GlobalVariable *FatbinDesc) { + auto *UnregFuncTy = + FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); + auto *UnregFunc = + Function::Create(UnregFuncTy, GlobalValue::InternalLinkage, + "sycl.descriptor_unreg.atexit", &M); + UnregFunc->setSection(".text.startup"); + + // Declaration for __sycl_unregister_lib(void*). + auto *UnregTargetTy = + FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C), false); + FunctionCallee UnregTargetC = + M.getOrInsertFunction("__sycl_unregister_lib", UnregTargetTy); + + // Body of the unregister wrapper. + IRBuilder<> UnregBuilder(BasicBlock::Create(C, "entry", UnregFunc)); + UnregBuilder.CreateCall(UnregTargetC, FatbinDesc); + UnregBuilder.CreateRetVoid(); + + auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false); + auto *RegFunc = Function::Create(RegFuncTy, GlobalValue::InternalLinkage, + "sycl.descriptor_reg", &M); + RegFunc->setSection(".text.startup"); + + auto *RegTargetTy = + FunctionType::get(Type::getVoidTy(C), PointerType::getUnqual(C), false); + FunctionCallee RegTargetC = + M.getOrInsertFunction("__sycl_register_lib", RegTargetTy); + + // `atexit` takes a `void(*)()` function pointer. In LLVM IR, this is + // typically represented as `i32 (ptr)`. + FunctionType *AtExitTy = FunctionType::get( + Type::getInt32Ty(C), PointerType::getUnqual(C), false); + FunctionCallee AtExitC = M.getOrInsertFunction("atexit", AtExitTy); + + IRBuilder<> RegBuilder(BasicBlock::Create(C, "entry", RegFunc)); + RegBuilder.CreateCall(RegTargetC, FatbinDesc); + RegBuilder.CreateCall(AtExitC, UnregFunc); + RegBuilder.CreateRetVoid(); + + // Finally, add to global constructors. + appendToGlobalCtors(M, RegFunc, /*Priority*/ 1); + } + }; // end of Wrapper } // anonymous namespace @@ -747,7 +793,11 @@ Error llvm::offloading::wrapSYCLBinaries(llvm::Module &M, return createStringError(inconvertibleErrorCode(), "No binary descriptors created."); - W.createRegisterFatbinFunction(Desc); - W.createUnregisterFunction(Desc); + if (Triple(M.getTargetTriple()).isOSWindows()) { + W.createSyclRegisterWithAtexitUnregister(Desc); + } else { + W.createRegisterFatbinFunction(Desc); + W.createUnregisterFunction(Desc); + } return Error::success(); } diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 77f28a5131f8..4a6ab5949960 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -3887,9 +3887,9 @@ extern "C" void __sycl_register_lib(sycl_device_binaries desc) { // Executed as a part of current module's (.exe, .dll) static initialization extern "C" void __sycl_unregister_lib(sycl_device_binaries desc) { // Partial cleanup is not necessary at shutdown -#ifndef _WIN32 - if (!sycl::detail::GlobalHandler::instance().isOkToDefer()) - return; - sycl::detail::ProgramManager::getInstance().removeImages(desc); -#endif +// #ifndef _WIN32 +// if (!sycl::detail::GlobalHandler::instance().isOkToDefer()) +// return; + sycl::detail::ProgramManager::getInstance().removeImages(desc); +// #endif } diff --git a/sycl/test-e2e/IntermediateLib/Inputs/incrementing_lib.cpp b/sycl/test-e2e/IntermediateLib/Inputs/incrementing_lib.cpp new file mode 100644 index 000000000000..62a2d0638128 --- /dev/null +++ b/sycl/test-e2e/IntermediateLib/Inputs/incrementing_lib.cpp @@ -0,0 +1,25 @@ +#include + +#if defined(_WIN32) +#define API_EXPORT __declspec(dllexport) +#else +#define API_EXPORT +#endif + +#ifndef INC +#define INC 1 +#endif + +#ifndef CLASSNAME +#define CLASSNAME same +#endif + +extern "C" API_EXPORT void performIncrementation(sycl::queue &q, + sycl::buffer &buf) { + sycl::range<1> r = buf.get_range(); + q.submit([&](sycl::handler &cgh) { + auto acc = buf.get_access(cgh); + cgh.parallel_for( + r, [=](sycl::id<1> idx) { acc[idx] += INC; }); + }); +} \ No newline at end of file diff --git a/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp b/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp new file mode 100644 index 000000000000..072169df892c --- /dev/null +++ b/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp @@ -0,0 +1,140 @@ +// UNSUPPORTED: cuda || hip +// UNSUPPORTED-TRACKER: CMPLRLLVM-69415 + +// DEFINE: %{fPIC_flag} = %if windows %{%} %else %{-fPIC%} +// DEFINE: %{shared_lib_ext} = %if windows %{dll%} %else %{so%} + +// clang-format off +// IMPORTANT -DSO_PATH='R"(%T)"' +// We need to capture %T, the build directory, in a string +// and the normal STRINGIFY() macros hack won't work. +// Because on Windows, the path delimiters are \, +// which C++ preprocessor converts to escape sequences, +// which becomes a nightmare. +// So the hack here is to put heredoc in the definition +// and use single quotes, which Python forgivingly accepts. +// clang-format on + +// RUN: %{build} %{fPIC_flag} -DSO_PATH='R"(%T)"' -o %t.out + +// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=1 -o %T/lib_a.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp +// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=2 -o %T/lib_b.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp +// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=4 -o %T/lib_c.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp + +// RUN: env UR_L0_LEAKS_DEBUG=1 %{run} %t.out + +// This test uses a kernel of the same name in three different shared libraries. +// It loads each library, calls the kernel, and checks that the incrementation +// is done correctly, and then unloads the library. +// This test ensures that __sycl_register_lib() and __sycl_unregister_lib() +// are called correctly, and that the device images are cleaned up properly. + +#include + +using namespace sycl::ext::oneapi::experimental; + + +#ifdef _WIN32 +#include + +void *loadOsLibrary(const std::string &LibraryPath) { + HMODULE h = + LoadLibraryExA(LibraryPath.c_str(), NULL, LOAD_WITH_ALTERED_SEARCH_PATH); + return (void *)h; +} +int unloadOsLibrary(void *Library) { + return FreeLibrary((HMODULE)Library) ? 0 : 1; +} +void *getOsLibraryFuncAddress(void *Library, const std::string &FunctionName) { + return (void *)GetProcAddress((HMODULE)Library, FunctionName.c_str()); +} + +#else +#include + +void *loadOsLibrary(const std::string &LibraryPath) { + void *so = dlopen(LibraryPath.c_str(), RTLD_NOW); + if (!so) { + char *Error = dlerror(); + std::cerr << "dlopen(" << LibraryPath << ") failed with <" + << (Error ? Error : "unknown error") << ">" << std::endl; + } + return so; +} + +int unloadOsLibrary(void *Library) { return dlclose(Library); } + +void *getOsLibraryFuncAddress(void *Library, const std::string &FunctionName) { + return dlsym(Library, FunctionName.c_str()); +} +#endif + +// Define the function pointer type for performIncrementation +using IncFuncT = void(sycl::queue &, sycl::buffer &); + +void initializeBuffer(sycl::buffer &buf) { + auto acc = sycl::host_accessor(buf); + for (size_t i = 0; i < buf.size(); ++i) + acc[i] = 0; +} + +void checkIncrementation(sycl::buffer &buf, int val) { + auto acc = sycl::host_accessor(buf); + for (size_t i = 0; i < buf.size(); ++i) { + std::cout << acc[i] << " "; + assert(acc[i] == val); + } + std::cout << std::endl; +} + +int main() { + sycl::queue q; + + sycl::range<1> r(8); + sycl::buffer buf(r); + initializeBuffer(buf); + + std::string base_path = SO_PATH; + +#ifdef _WIN32 + std::string path_to_lib_a = base_path + "\\lib_a.dll"; + std::string path_to_lib_b = base_path + "\\lib_b.dll"; + std::string path_to_lib_c = base_path + "\\lib_c.dll"; +#else + std::string path_to_lib_a = base_path + "/lib_a.so"; + std::string path_to_lib_b = base_path + "/lib_b.so"; + std::string path_to_lib_c = base_path + "/lib_c.so"; +#endif + + std::cout << "paths: " << path_to_lib_a << std::endl; + std::cout << "SO_PATH: " << SO_PATH << std::endl; + + void *lib_a = loadOsLibrary(path_to_lib_a); + void *f = getOsLibraryFuncAddress(lib_a, "performIncrementation"); + auto performIncrementationFuncA = reinterpret_cast(f); + performIncrementationFuncA(q, buf); // call the function from lib_a + q.wait(); + checkIncrementation(buf, 1); + unloadOsLibrary(lib_a); + std::cout << "lib_a done" << std::endl; + + void *lib_b = loadOsLibrary(path_to_lib_b); + f = getOsLibraryFuncAddress(lib_b, "performIncrementation"); + auto performIncrementationFuncB = reinterpret_cast(f); + performIncrementationFuncB(q, buf); // call the function from lib_b + q.wait(); + checkIncrementation(buf, 1 + 2); + unloadOsLibrary(lib_b); + std::cout << "lib_b done" << std::endl; + + void *lib_c = loadOsLibrary(path_to_lib_c); + f = getOsLibraryFuncAddress(lib_c, "performIncrementation"); + auto performIncrementationFuncC = reinterpret_cast(f); + q.wait(); + performIncrementationFuncC(q, buf); // call the function from lib_c + checkIncrementation(buf, 1 + 2 + 4); + unloadOsLibrary(lib_c); + std::cout << "lib_c done" << std::endl; + + return 0; +} \ No newline at end of file From d0bebe22df11b8d0138fccaf8c5610a68b04ca8e Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 18 Aug 2025 15:07:26 -0700 Subject: [PATCH 09/15] logging Signed-off-by: Chris Perkins --- sycl/source/detail/context_impl.cpp | 1 + .../source/detail/device_global_map_entry.cpp | 4 +++ .../program_manager/program_manager.cpp | 1 + .../IntermediateLib/multi_lib_app.cpp | 26 +++++++++++++++++++ 4 files changed, 32 insertions(+) diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 5e027466d794..e23765443656 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -116,6 +116,7 @@ cl_context context_impl::get() const { } context_impl::~context_impl() { + std::cout << "~context_impl destructor. " << std::endl; try { // Free all events associated with the initialization of device globals. for (auto &DeviceGlobalInitializer : MDeviceGlobalInitializers) diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index 1f82a605056d..9a5728902364 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -23,6 +23,8 @@ DeviceGlobalUSMMem::~DeviceGlobalUSMMem() { // these here. assert(MPtr == nullptr && "MPtr has not been cleaned up."); assert(MInitEvent == nullptr && "MInitEvent has not been cleaned up."); + + std::cout << "~DeviceGlobalUSMMem destructor. " << (MPtr == nullptr) << " " << (MInitEvent == nullptr) << std::endl; } OwnedUrEvent DeviceGlobalUSMMem::getInitEvent(adapter_impl &Adapter) { @@ -152,6 +154,7 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(const context &Context) { void DeviceGlobalMapEntry::removeAssociatedResources( const context_impl *CtxImpl) { + std::cout << "DeviceGlobalMapEntry::removeAssociatedResources() entered." << std::endl; std::lock_guard Lock{MDeviceToUSMPtrMapMutex}; for (device_impl &Device : CtxImpl->getDevices()) { auto USMPtrIt = MDeviceToUSMPtrMap.find({&Device, CtxImpl}); @@ -170,6 +173,7 @@ void DeviceGlobalMapEntry::removeAssociatedResources( MDeviceToUSMPtrMap.erase(USMPtrIt); } } + std::cout << "DeviceGlobalMapEntry::removeAssociatedResources() exiting." << std::endl; } void DeviceGlobalMapEntry::cleanup() { diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 4a6ab5949960..bf61c9120a52 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -3890,6 +3890,7 @@ extern "C" void __sycl_unregister_lib(sycl_device_binaries desc) { // #ifndef _WIN32 // if (!sycl::detail::GlobalHandler::instance().isOkToDefer()) // return; + std::cout << "__sycl_unregister_lib()" << std::endl; sycl::detail::ProgramManager::getInstance().removeImages(desc); // #endif } diff --git a/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp b/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp index 072169df892c..d725c27f69b5 100644 --- a/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp +++ b/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp @@ -29,6 +29,32 @@ // This test ensures that __sycl_register_lib() and __sycl_unregister_lib() // are called correctly, and that the device images are cleaned up properly. + +/* + CP -- Other Notes + + test-e2e/DeviceGlobals/device_global_static.cpp => dgs.bin + + LINUX + ./dgs.bin + ~context_impl destructor. + DeviceGlobalMapEntry::removeAssociatedResources() entered. + ~DeviceGlobalUSMMem destructor. 1 1 + DeviceGlobalMapEntry::removeAssociatedResources() exiting. + __sycl_unregister_lib() + + + multi_lib_app test: +# | lib_b done +# | 7 7 7 7 7 7 7 7 +# | __sycl_unregister_lib() +# | lib_c done +# | ~context_impl destructor. +# | __sycl_unregister_lib() + + +*/ + #include using namespace sycl::ext::oneapi::experimental; From f9867bb030fc24d6e10e78e15d841867249fd2ab Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 18 Aug 2025 15:23:39 -0700 Subject: [PATCH 10/15] moar logging Signed-off-by: Chris Perkins --- sycl/source/detail/device_global_map.hpp | 3 +++ sycl/test-e2e/IntermediateLib/multi_lib_app.cpp | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/device_global_map.hpp b/sycl/source/detail/device_global_map.hpp index 256c48066ec8..9b1f58ab5065 100644 --- a/sycl/source/detail/device_global_map.hpp +++ b/sycl/source/detail/device_global_map.hpp @@ -76,6 +76,7 @@ class DeviceGlobalMap { void eraseEntries(const RTDeviceBinaryImage *Img) { const auto &DeviceGlobals = Img->getDeviceGlobals(); std::lock_guard DeviceGlobalsGuard(MDeviceGlobalsMutex); + std::cout << "DeviceGlobalMap::eraseEntries() with: " << DeviceGlobals.size() << " entries." << std::endl; for (const sycl_device_binary_property &DeviceGlobal : DeviceGlobals) { if (auto DevGlobalIt = MDeviceGlobals.find(DeviceGlobal->Name); DevGlobalIt != MDeviceGlobals.end()) { @@ -85,6 +86,8 @@ class DeviceGlobalMap { const std::pair &Entry) { return Entry.second == DevGlobalIt->second.get(); }); + std::cout << "About to Erase: " << (findDevGlobalByValue != MPtr2DeviceGlobal.end() ? "MPtr2DeviceGlobal.erase(findDevGlobalByValue)" : " ") + << " and MDeviceGlobals.erase(DevGlobalIt)" << std::endl; if (findDevGlobalByValue != MPtr2DeviceGlobal.end()) MPtr2DeviceGlobal.erase(findDevGlobalByValue); MDeviceGlobals.erase(DevGlobalIt); diff --git a/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp b/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp index d725c27f69b5..3c2cc9bf13a5 100644 --- a/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp +++ b/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp @@ -36,12 +36,14 @@ test-e2e/DeviceGlobals/device_global_static.cpp => dgs.bin LINUX - ./dgs.bin + $ ./dgs.bin ~context_impl destructor. DeviceGlobalMapEntry::removeAssociatedResources() entered. ~DeviceGlobalUSMMem destructor. 1 1 DeviceGlobalMapEntry::removeAssociatedResources() exiting. __sycl_unregister_lib() + DeviceGlobalMap::eraseEntries() with: 1 entries. + About to Erase: MPtr2DeviceGlobal.erase(findDevGlobalByValue) and MDeviceGlobals.erase(DevGlobalIt) multi_lib_app test: @@ -52,6 +54,10 @@ # | ~context_impl destructor. # | __sycl_unregister_lib() +WINDOWS + ./dgs.exe + __sycl_unregister_lib() + ~DeviceGlobalUSMMem destructor <-- asserts */ From 1f45299947f3ef5129d410358e33006d6a9e5ba3 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 18 Aug 2025 15:32:13 -0700 Subject: [PATCH 11/15] loggg Signed-off-by: Chris Perkins --- sycl/source/detail/device_global_map_entry.cpp | 4 +++- sycl/test-e2e/IntermediateLib/multi_lib_app.cpp | 8 +++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index 9a5728902364..23fcbf6cba68 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -18,13 +18,15 @@ inline namespace _V1 { namespace detail { DeviceGlobalUSMMem::~DeviceGlobalUSMMem() { + + std::cout << "~DeviceGlobalUSMMem destructor. " << (MPtr == nullptr) << " " << (MInitEvent == nullptr) << std::endl; // removeAssociatedResources is expected to have cleaned up both the pointer // and the event. When asserts are enabled the values are set, so we check // these here. assert(MPtr == nullptr && "MPtr has not been cleaned up."); assert(MInitEvent == nullptr && "MInitEvent has not been cleaned up."); - std::cout << "~DeviceGlobalUSMMem destructor. " << (MPtr == nullptr) << " " << (MInitEvent == nullptr) << std::endl; + } OwnedUrEvent DeviceGlobalUSMMem::getInitEvent(adapter_impl &Adapter) { diff --git a/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp b/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp index 3c2cc9bf13a5..8b9093c98cbb 100644 --- a/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp +++ b/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp @@ -55,9 +55,11 @@ # | __sycl_unregister_lib() WINDOWS - ./dgs.exe - __sycl_unregister_lib() - ~DeviceGlobalUSMMem destructor <-- asserts +$ ./dgs.exe +__sycl_unregister_lib() +DeviceGlobalMap::eraseEntries() with: 1 entries. +About to Erase: MPtr2DeviceGlobal.erase(findDevGlobalByValue) and MDeviceGlobals.erase(DevGlobalIt) +Assertion failed: MPtr == nullptr && "MPtr has not been cleaned up.", file C:\iusers\cperkins\sycl_workspace\llvm\sycl\source\detail\device_global_map_entry.cpp, line 24 */ From 3fbad2eff7ba7a04f4446472b5fa1c308c91dcda Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 18 Aug 2025 16:08:49 -0700 Subject: [PATCH 12/15] log Signed-off-by: Chris Perkins --- sycl/source/detail/device_global_map.hpp | 5 +++-- sycl/test-e2e/IntermediateLib/multi_lib_app.cpp | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/device_global_map.hpp b/sycl/source/detail/device_global_map.hpp index 9b1f58ab5065..c80250be1a51 100644 --- a/sycl/source/detail/device_global_map.hpp +++ b/sycl/source/detail/device_global_map.hpp @@ -86,10 +86,11 @@ class DeviceGlobalMap { const std::pair &Entry) { return Entry.second == DevGlobalIt->second.get(); }); - std::cout << "About to Erase: " << (findDevGlobalByValue != MPtr2DeviceGlobal.end() ? "MPtr2DeviceGlobal.erase(findDevGlobalByValue)" : " ") - << " and MDeviceGlobals.erase(DevGlobalIt)" << std::endl; + std::cout << "About to Erase: " << (findDevGlobalByValue != MPtr2DeviceGlobal.end() ? "MPtr2DeviceGlobal.erase(findDevGlobalByValue)" : " ") << std::endl; if (findDevGlobalByValue != MPtr2DeviceGlobal.end()) MPtr2DeviceGlobal.erase(findDevGlobalByValue); + + std::cout << " and MDeviceGlobals.erase(DevGlobalIt)" << std::endl; MDeviceGlobals.erase(DevGlobalIt); } } diff --git a/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp b/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp index 8b9093c98cbb..a81a95282827 100644 --- a/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp +++ b/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp @@ -55,11 +55,12 @@ # | __sycl_unregister_lib() WINDOWS -$ ./dgs.exe +$ dgs.exe __sycl_unregister_lib() DeviceGlobalMap::eraseEntries() with: 1 entries. About to Erase: MPtr2DeviceGlobal.erase(findDevGlobalByValue) and MDeviceGlobals.erase(DevGlobalIt) -Assertion failed: MPtr == nullptr && "MPtr has not been cleaned up.", file C:\iusers\cperkins\sycl_workspace\llvm\sycl\source\detail\device_global_map_entry.cpp, line 24 +~DeviceGlobalUSMMem destructor. 0 0 +Assertion failed: MPtr == nullptr && "MPtr has not been cleaned up.", file C:\iusers\cperkins\sycl_workspace\llvm\sycl\source\detail\device_global_map_entry.cpp, line 26 */ From e79ac8f0dc80645625816f51e53b2fb3bad5903f Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 19 Aug 2025 13:35:52 -0700 Subject: [PATCH 13/15] if wishes were horses Signed-off-by: Chris Perkins --- sycl/source/detail/context_impl.cpp | 3 ++- sycl/source/detail/device_global_map.hpp | 12 ++++++--- .../IntermediateLib/multi_lib_app.cpp | 27 +++++++++++++++---- 3 files changed, 32 insertions(+), 10 deletions(-) diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index e23765443656..8bd7feffed40 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -126,7 +126,8 @@ context_impl::~context_impl() { DeviceGlobalMapEntry *DGEntry = detail::ProgramManager::getInstance().getDeviceGlobalEntry( DeviceGlobal); - DGEntry->removeAssociatedResources(this); + if (DGEntry != nullptr) + DGEntry->removeAssociatedResources(this); } MCachedLibPrograms.clear(); // TODO catch an exception and put it to list of asynchronous exceptions diff --git a/sycl/source/detail/device_global_map.hpp b/sycl/source/detail/device_global_map.hpp index c80250be1a51..e50994e3d8c2 100644 --- a/sycl/source/detail/device_global_map.hpp +++ b/sycl/source/detail/device_global_map.hpp @@ -90,8 +90,10 @@ class DeviceGlobalMap { if (findDevGlobalByValue != MPtr2DeviceGlobal.end()) MPtr2DeviceGlobal.erase(findDevGlobalByValue); - std::cout << " and MDeviceGlobals.erase(DevGlobalIt)" << std::endl; - MDeviceGlobals.erase(DevGlobalIt); + std::cout << " and MDeviceGlobals.erase(DevGlobalIt) no more" + << std::endl; + // CP -- removing this. + // MDeviceGlobals.erase(DevGlobalIt); } } } @@ -116,8 +118,10 @@ class DeviceGlobalMap { DeviceGlobalMapEntry *getEntry(const void *DeviceGlobalPtr) { std::lock_guard DeviceGlobalsGuard(MDeviceGlobalsMutex); auto Entry = MPtr2DeviceGlobal.find(DeviceGlobalPtr); - assert(Entry != MPtr2DeviceGlobal.end() && "Device global entry not found"); - return Entry->second; + // CP + // assert(Entry != MPtr2DeviceGlobal.end() && "Device global entry not + // found"); + return (Entry != MPtr2DeviceGlobal.end()) ? Entry->second : nullptr; } DeviceGlobalMapEntry * diff --git a/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp b/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp index a81a95282827..2f82e0e3235e 100644 --- a/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp +++ b/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp @@ -56,14 +56,31 @@ WINDOWS $ dgs.exe -__sycl_unregister_lib() -DeviceGlobalMap::eraseEntries() with: 1 entries. -About to Erase: MPtr2DeviceGlobal.erase(findDevGlobalByValue) and MDeviceGlobals.erase(DevGlobalIt) -~DeviceGlobalUSMMem destructor. 0 0 -Assertion failed: MPtr == nullptr && "MPtr has not been cleaned up.", file C:\iusers\cperkins\sycl_workspace\llvm\sycl\source\detail\device_global_map_entry.cpp, line 26 + __sycl_unregister_lib() + DeviceGlobalMap::eraseEntries() with: 1 entries. + About to Erase: MPtr2DeviceGlobal.erase(findDevGlobalByValue) + and MDeviceGlobals.erase(DevGlobalIt) + ~DeviceGlobalUSMMem destructor. 0 0 + Assertion failed: MPtr == nullptr && "MPtr has not been cleaned up.", file C:\iusers\cperkins\sycl_workspace\llvm\sycl\source\detail\device_global_map_entry.cpp, line 26 + + + +NEUTER MDeviceGlobals.erase(DevGlobalIt). THEN +$ dgs.exe + __sycl_unregister_lib() + DeviceGlobalMap::eraseEntries() with: 1 entries. + About to Erase: MPtr2DeviceGlobal.erase(findDevGlobalByValue) + and MDeviceGlobals.erase(DevGlobalIt) no more + ~context_impl destructor. + Assertion failed: Entry != MPtr2DeviceGlobal.end() && "Device global entry not found", file C:\iusers\cperkins\sycl_workspace\llvm\sycl\source\detail/device_global_map.hpp, line 119 + + + */ + + #include using namespace sycl::ext::oneapi::experimental; From 3fb81050c89b8429248beb6615157cd3a2695060 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 19 Aug 2025 15:38:55 -0700 Subject: [PATCH 14/15] fix for win, test reload of shared lib with device global Signed-off-by: Chris Perkins --- .../source/detail/device_global_map_entry.cpp | 6 ++--- .../Inputs/incrementing_lib.cpp | 10 +++++++++ .../IntermediateLib/multi_lib_app.cpp | 22 ++++++++++++++----- 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index 23fcbf6cba68..efc3c8daf92b 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -23,10 +23,8 @@ DeviceGlobalUSMMem::~DeviceGlobalUSMMem() { // removeAssociatedResources is expected to have cleaned up both the pointer // and the event. When asserts are enabled the values are set, so we check // these here. - assert(MPtr == nullptr && "MPtr has not been cleaned up."); - assert(MInitEvent == nullptr && "MInitEvent has not been cleaned up."); - - + // assert(MPtr == nullptr && "MPtr has not been cleaned up."); + // assert(MInitEvent == nullptr && "MInitEvent has not been cleaned up."); } OwnedUrEvent DeviceGlobalUSMMem::getInitEvent(adapter_impl &Adapter) { diff --git a/sycl/test-e2e/IntermediateLib/Inputs/incrementing_lib.cpp b/sycl/test-e2e/IntermediateLib/Inputs/incrementing_lib.cpp index 62a2d0638128..cd86d85c4673 100644 --- a/sycl/test-e2e/IntermediateLib/Inputs/incrementing_lib.cpp +++ b/sycl/test-e2e/IntermediateLib/Inputs/incrementing_lib.cpp @@ -14,6 +14,16 @@ #define CLASSNAME same #endif +// Macro to concatenate DGVar with CLASSNAME +#define CONCAT_HELPER(a, b) a##b +#define CONCAT(a, b) CONCAT_HELPER(a, b) + +// #ifdef WITH_DEVICE_GLOBALS +using SomeProperties = decltype(sycl::ext::oneapi::experimental::properties{}); +sycl::ext::oneapi::experimental::device_global + CONCAT(DGVar, CLASSNAME) __attribute__((visibility("default"))); +// #endif + extern "C" API_EXPORT void performIncrementation(sycl::queue &q, sycl::buffer &buf) { sycl::range<1> r = buf.get_range(); diff --git a/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp b/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp index 2f82e0e3235e..92428979e0a7 100644 --- a/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp +++ b/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp @@ -17,9 +17,9 @@ // RUN: %{build} %{fPIC_flag} -DSO_PATH='R"(%T)"' -o %t.out -// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=1 -o %T/lib_a.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp -// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=2 -o %T/lib_b.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp -// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=4 -o %T/lib_c.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp +// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=1 -DCLASSNAME=one -o %T/lib_a.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp +// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=2 -DCLASSNAME=two -o %T/lib_b.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp +// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=4 -DCLASSNAME=fou -o %T/lib_c.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp // RUN: env UR_L0_LEAKS_DEBUG=1 %{run} %t.out @@ -170,12 +170,24 @@ int main() { unloadOsLibrary(lib_a); std::cout << "lib_a done" << std::endl; + + // Now RELOAD lib_a and try it again. + lib_a = loadOsLibrary(path_to_lib_a); + f = getOsLibraryFuncAddress(lib_a, "performIncrementation"); + performIncrementationFuncA = reinterpret_cast(f); + performIncrementationFuncA(q, buf); // call the function from lib_a + q.wait(); + checkIncrementation(buf, 1 + 1); + unloadOsLibrary(lib_a); + std::cout << "reload of lib_a done" << std::endl; + + void *lib_b = loadOsLibrary(path_to_lib_b); f = getOsLibraryFuncAddress(lib_b, "performIncrementation"); auto performIncrementationFuncB = reinterpret_cast(f); performIncrementationFuncB(q, buf); // call the function from lib_b q.wait(); - checkIncrementation(buf, 1 + 2); + checkIncrementation(buf, 1 + 1 + 2); unloadOsLibrary(lib_b); std::cout << "lib_b done" << std::endl; @@ -184,7 +196,7 @@ int main() { auto performIncrementationFuncC = reinterpret_cast(f); q.wait(); performIncrementationFuncC(q, buf); // call the function from lib_c - checkIncrementation(buf, 1 + 2 + 4); + checkIncrementation(buf, 1 + 1 + 2 + 4); unloadOsLibrary(lib_c); std::cout << "lib_c done" << std::endl; From 519533374c046d0ae5e2ae4e4097d09086cbaa1a Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 19 Aug 2025 16:23:45 -0700 Subject: [PATCH 15/15] cleanup Signed-off-by: Chris Perkins --- sycl/source/detail/context_impl.cpp | 1 - sycl/source/detail/device_global_map.hpp | 9 +-- .../source/detail/device_global_map_entry.cpp | 12 ---- .../source/detail/device_global_map_entry.hpp | 2 +- .../program_manager/program_manager.cpp | 8 +-- .../Inputs/incrementing_lib.cpp | 9 ++- .../IntermediateLib/multi_lib_app.cpp | 58 ++----------------- 7 files changed, 13 insertions(+), 86 deletions(-) diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 8bd7feffed40..6fb2dd375fe3 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -116,7 +116,6 @@ cl_context context_impl::get() const { } context_impl::~context_impl() { - std::cout << "~context_impl destructor. " << std::endl; try { // Free all events associated with the initialization of device globals. for (auto &DeviceGlobalInitializer : MDeviceGlobalInitializers) diff --git a/sycl/source/detail/device_global_map.hpp b/sycl/source/detail/device_global_map.hpp index e50994e3d8c2..8ff48b3f29c5 100644 --- a/sycl/source/detail/device_global_map.hpp +++ b/sycl/source/detail/device_global_map.hpp @@ -86,14 +86,10 @@ class DeviceGlobalMap { const std::pair &Entry) { return Entry.second == DevGlobalIt->second.get(); }); - std::cout << "About to Erase: " << (findDevGlobalByValue != MPtr2DeviceGlobal.end() ? "MPtr2DeviceGlobal.erase(findDevGlobalByValue)" : " ") << std::endl; if (findDevGlobalByValue != MPtr2DeviceGlobal.end()) MPtr2DeviceGlobal.erase(findDevGlobalByValue); - std::cout << " and MDeviceGlobals.erase(DevGlobalIt) no more" - << std::endl; - // CP -- removing this. - // MDeviceGlobals.erase(DevGlobalIt); + MDeviceGlobals.erase(DevGlobalIt); } } } @@ -118,9 +114,6 @@ class DeviceGlobalMap { DeviceGlobalMapEntry *getEntry(const void *DeviceGlobalPtr) { std::lock_guard DeviceGlobalsGuard(MDeviceGlobalsMutex); auto Entry = MPtr2DeviceGlobal.find(DeviceGlobalPtr); - // CP - // assert(Entry != MPtr2DeviceGlobal.end() && "Device global entry not - // found"); return (Entry != MPtr2DeviceGlobal.end()) ? Entry->second : nullptr; } diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index efc3c8daf92b..7b5ecd0a6213 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -17,16 +17,6 @@ namespace sycl { inline namespace _V1 { namespace detail { -DeviceGlobalUSMMem::~DeviceGlobalUSMMem() { - - std::cout << "~DeviceGlobalUSMMem destructor. " << (MPtr == nullptr) << " " << (MInitEvent == nullptr) << std::endl; - // removeAssociatedResources is expected to have cleaned up both the pointer - // and the event. When asserts are enabled the values are set, so we check - // these here. - // assert(MPtr == nullptr && "MPtr has not been cleaned up."); - // assert(MInitEvent == nullptr && "MInitEvent has not been cleaned up."); -} - OwnedUrEvent DeviceGlobalUSMMem::getInitEvent(adapter_impl &Adapter) { std::lock_guard Lock(MInitEventMutex); if (MInitEvent == nullptr) @@ -154,7 +144,6 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(const context &Context) { void DeviceGlobalMapEntry::removeAssociatedResources( const context_impl *CtxImpl) { - std::cout << "DeviceGlobalMapEntry::removeAssociatedResources() entered." << std::endl; std::lock_guard Lock{MDeviceToUSMPtrMapMutex}; for (device_impl &Device : CtxImpl->getDevices()) { auto USMPtrIt = MDeviceToUSMPtrMap.find({&Device, CtxImpl}); @@ -173,7 +162,6 @@ void DeviceGlobalMapEntry::removeAssociatedResources( MDeviceToUSMPtrMap.erase(USMPtrIt); } } - std::cout << "DeviceGlobalMapEntry::removeAssociatedResources() exiting." << std::endl; } void DeviceGlobalMapEntry::cleanup() { diff --git a/sycl/source/detail/device_global_map_entry.hpp b/sycl/source/detail/device_global_map_entry.hpp index 1796e8d179db..19d37f321029 100644 --- a/sycl/source/detail/device_global_map_entry.hpp +++ b/sycl/source/detail/device_global_map_entry.hpp @@ -33,7 +33,7 @@ using EventImplPtr = std::shared_ptr; struct DeviceGlobalUSMMem { DeviceGlobalUSMMem(void *Ptr) : MPtr(Ptr) {} - ~DeviceGlobalUSMMem(); + ~DeviceGlobalUSMMem() = default; void *const &getPtr() const noexcept { return MPtr; } diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index bf61c9120a52..dd8a3dd72d6b 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -3886,11 +3886,5 @@ extern "C" void __sycl_register_lib(sycl_device_binaries desc) { // Executed as a part of current module's (.exe, .dll) static initialization extern "C" void __sycl_unregister_lib(sycl_device_binaries desc) { - // Partial cleanup is not necessary at shutdown -// #ifndef _WIN32 -// if (!sycl::detail::GlobalHandler::instance().isOkToDefer()) -// return; - std::cout << "__sycl_unregister_lib()" << std::endl; - sycl::detail::ProgramManager::getInstance().removeImages(desc); -// #endif + sycl::detail::ProgramManager::getInstance().removeImages(desc); } diff --git a/sycl/test-e2e/IntermediateLib/Inputs/incrementing_lib.cpp b/sycl/test-e2e/IntermediateLib/Inputs/incrementing_lib.cpp index cd86d85c4673..eae3329599de 100644 --- a/sycl/test-e2e/IntermediateLib/Inputs/incrementing_lib.cpp +++ b/sycl/test-e2e/IntermediateLib/Inputs/incrementing_lib.cpp @@ -14,15 +14,18 @@ #define CLASSNAME same #endif -// Macro to concatenate DGVar with CLASSNAME +#ifdef WITH_DEVICE_GLOBALS +// Using device globals within the shared libraries only +// works if the names do not collide. Note that we cannot +// load a library multiple times if it has a device global. #define CONCAT_HELPER(a, b) a##b #define CONCAT(a, b) CONCAT_HELPER(a, b) -// #ifdef WITH_DEVICE_GLOBALS using SomeProperties = decltype(sycl::ext::oneapi::experimental::properties{}); sycl::ext::oneapi::experimental::device_global CONCAT(DGVar, CLASSNAME) __attribute__((visibility("default"))); -// #endif + +#endif // WITH_DEVICE_GLOBALS extern "C" API_EXPORT void performIncrementation(sycl::queue &q, sycl::buffer &buf) { diff --git a/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp b/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp index 92428979e0a7..e161ae0f6557 100644 --- a/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp +++ b/sycl/test-e2e/IntermediateLib/multi_lib_app.cpp @@ -17,70 +17,20 @@ // RUN: %{build} %{fPIC_flag} -DSO_PATH='R"(%T)"' -o %t.out -// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=1 -DCLASSNAME=one -o %T/lib_a.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp -// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=2 -DCLASSNAME=two -o %T/lib_b.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp -// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=4 -DCLASSNAME=fou -o %T/lib_c.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp +// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=1 -o %T/lib_a.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp +// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=2 -o %T/lib_b.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp +// RUN: %clangxx -fsycl %{fPIC_flag} -shared -DINC=4 -o %T/lib_c.%{shared_lib_ext} %S/Inputs/incrementing_lib.cpp // RUN: env UR_L0_LEAKS_DEBUG=1 %{run} %t.out // This test uses a kernel of the same name in three different shared libraries. // It loads each library, calls the kernel, and checks that the incrementation // is done correctly, and then unloads the library. +// It also reloads the first library after unloading it. // This test ensures that __sycl_register_lib() and __sycl_unregister_lib() // are called correctly, and that the device images are cleaned up properly. -/* - CP -- Other Notes - - test-e2e/DeviceGlobals/device_global_static.cpp => dgs.bin - - LINUX - $ ./dgs.bin - ~context_impl destructor. - DeviceGlobalMapEntry::removeAssociatedResources() entered. - ~DeviceGlobalUSMMem destructor. 1 1 - DeviceGlobalMapEntry::removeAssociatedResources() exiting. - __sycl_unregister_lib() - DeviceGlobalMap::eraseEntries() with: 1 entries. - About to Erase: MPtr2DeviceGlobal.erase(findDevGlobalByValue) and MDeviceGlobals.erase(DevGlobalIt) - - - multi_lib_app test: -# | lib_b done -# | 7 7 7 7 7 7 7 7 -# | __sycl_unregister_lib() -# | lib_c done -# | ~context_impl destructor. -# | __sycl_unregister_lib() - -WINDOWS -$ dgs.exe - __sycl_unregister_lib() - DeviceGlobalMap::eraseEntries() with: 1 entries. - About to Erase: MPtr2DeviceGlobal.erase(findDevGlobalByValue) - and MDeviceGlobals.erase(DevGlobalIt) - ~DeviceGlobalUSMMem destructor. 0 0 - Assertion failed: MPtr == nullptr && "MPtr has not been cleaned up.", file C:\iusers\cperkins\sycl_workspace\llvm\sycl\source\detail\device_global_map_entry.cpp, line 26 - - - -NEUTER MDeviceGlobals.erase(DevGlobalIt). THEN -$ dgs.exe - __sycl_unregister_lib() - DeviceGlobalMap::eraseEntries() with: 1 entries. - About to Erase: MPtr2DeviceGlobal.erase(findDevGlobalByValue) - and MDeviceGlobals.erase(DevGlobalIt) no more - ~context_impl destructor. - Assertion failed: Entry != MPtr2DeviceGlobal.end() && "Device global entry not found", file C:\iusers\cperkins\sycl_workspace\llvm\sycl\source\detail/device_global_map.hpp, line 119 - - - - -*/ - - - #include using namespace sycl::ext::oneapi::experimental;