From 3e8e258896b52415ceea33754b2eab722e349896 Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Tue, 6 May 2025 16:55:12 +0000 Subject: [PATCH 1/2] [SYCL][UR][L0] Fix urEnqueueEventsWait The path for non-immediate command lists was incorrect. queue->synchronize() was called without closing and executing command lists leading to deadlock. Fix this by add call to executeAllOpenCommandLists --- .../source/adapters/level_zero/event.cpp | 40 +++++++++---------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/unified-runtime/source/adapters/level_zero/event.cpp b/unified-runtime/source/adapters/level_zero/event.cpp index c53bfa1089100..807e3b8b2fa2c 100644 --- a/unified-runtime/source/adapters/level_zero/event.cpp +++ b/unified-runtime/source/adapters/level_zero/event.cpp @@ -114,35 +114,33 @@ ur_result_t urEnqueueEventsWait( false /*OKToBatchCommand*/); } - { - // If wait-list is empty, then this particular command should wait until - // all previous enqueued commands to the command-queue have completed. - // - // TODO: find a way to do that without blocking the host. + // If wait-list is empty, then this particular command should wait until + // all previous enqueued commands to the command-queue have completed. + // + // TODO: find a way to do that without blocking the host. - // Lock automatically releases when this goes out of scope. - std::scoped_lock lock(Queue->Mutex); + // Lock automatically releases when this goes out of scope. + std::scoped_lock lock(Queue->Mutex); - if (OutEvent) { - UR_CALL(createEventAndAssociateQueue(Queue, OutEvent, - UR_COMMAND_EVENTS_WAIT, - Queue->CommandListMap.end(), false, - /* IsInternal */ false)); - } + if (OutEvent) { + UR_CALL(createEventAndAssociateQueue(Queue, OutEvent, + UR_COMMAND_EVENTS_WAIT, + Queue->CommandListMap.end(), false, + /* IsInternal */ false)); + } - UR_CALL(Queue->synchronize()); + UR_CALL(Queue->executeAllOpenCommandLists()); + UR_CALL(Queue->synchronize()); - if (OutEvent) { - Queue->LastCommandEvent = reinterpret_cast(*OutEvent); + if (OutEvent) { + Queue->LastCommandEvent = reinterpret_cast(*OutEvent); - if (!(*OutEvent)->CounterBasedEventsEnabled) - ZE2UR_CALL(zeEventHostSignal, ((*OutEvent)->ZeEvent)); - (*OutEvent)->Completed = true; - } + if (!(*OutEvent)->CounterBasedEventsEnabled) + ZE2UR_CALL(zeEventHostSignal, ((*OutEvent)->ZeEvent)); + (*OutEvent)->Completed = true; } if (!Queue->UsingImmCmdLists) { - std::unique_lock Lock(Queue->Mutex); resetCommandLists(Queue); } From 601b23ad97c880efdd4b4a0fa04897a9567fe890 Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Tue, 6 May 2025 20:07:04 +0000 Subject: [PATCH 2/2] Add test --- .../enqueue/urEnqueueEventsWait.cpp | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/unified-runtime/test/conformance/enqueue/urEnqueueEventsWait.cpp b/unified-runtime/test/conformance/enqueue/urEnqueueEventsWait.cpp index adbdf8d0e1065..1d9baa2f149c5 100644 --- a/unified-runtime/test/conformance/enqueue/urEnqueueEventsWait.cpp +++ b/unified-runtime/test/conformance/enqueue/urEnqueueEventsWait.cpp @@ -38,6 +38,33 @@ struct urEnqueueEventsWaitTest : uur::urMultiQueueTest { UUR_INSTANTIATE_DEVICE_TEST_SUITE(urEnqueueEventsWaitTest); +TEST_P(urEnqueueEventsWaitTest, SuccessWithEmptyWaitList) { + void *ptr1, *ptr2; + size_t size = 1024; + size_t count = size / sizeof(int); + + ASSERT_SUCCESS( + urUSMDeviceAlloc(context, device, nullptr, nullptr, size, &ptr1)); + ASSERT_SUCCESS( + urUSMDeviceAlloc(context, device, nullptr, nullptr, size, &ptr2)); + + std::vector input(count, 99); + std::vector output(count, 0); + + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue1, false, ptr1, input.data(), size, 0, + nullptr, nullptr)); + ASSERT_SUCCESS( + urEnqueueUSMMemcpy(queue1, false, ptr2, ptr1, size, 0, nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue1, false, output.data(), ptr2, size, 0, + nullptr, nullptr)); + + ur_event_handle_t event; + ASSERT_SUCCESS(urEnqueueEventsWait(queue1, 0, nullptr, &event)); + ASSERT_SUCCESS(urEventWait(1, &event)); + + ASSERT_EQ(input, output); +} + TEST_P(urEnqueueEventsWaitTest, Success) { UUR_KNOWN_FAILURE_ON(uur::LevelZero{}, uur::NativeCPU{});