Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 60 additions & 44 deletions source/adapters/level_zero/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,48 +171,63 @@ ur_result_t urEnqueueEventsWaitWithBarrier(
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);

// Helper function for appending a barrier to a command list.
auto insertBarrierIntoCmdList =
[&Queue](ur_command_list_ptr_t CmdList,
const _ur_ze_event_list_t &EventWaitList,
ur_event_handle_t &Event, bool IsInternal) {
UR_CALL(createEventAndAssociateQueue(
Queue, &Event, UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, CmdList,
IsInternal, false));

Event->WaitList = EventWaitList;

// For in-order queue we don't need a real barrier, just wait for
// requested events in potentially different queues and add a "barrier"
// event signal because it is already guaranteed that previous commands
// in this queue are completed when the signal is started.
//
// Only consideration here is that when profiling is used, signalEvent
// cannot be used if EventWaitList.Lenght == 0. In those cases, we need
// to fallback directly to barrier to have correct timestamps. See here:
// https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=appendsignalevent#_CPPv430zeCommandListAppendSignalEvent24ze_command_list_handle_t17ze_event_handle_t
//
// TODO: this and other special handling of in-order queues to be
// updated when/if Level Zero adds native support for in-order queues.
//
if (Queue->isInOrderQueue() && InOrderBarrierBySignal &&
!Queue->isProfilingEnabled()) {
// If we are using driver in order lists, then append wait on events
// is unnecessary and we can signal the event created.
if (EventWaitList.Length && !CmdList->second.IsInOrderList) {
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
(CmdList->first, EventWaitList.Length,
EventWaitList.ZeEventList));
auto insertBarrierIntoCmdList = [&Queue](ur_command_list_ptr_t CmdList,
_ur_ze_event_list_t &EventWaitList,
ur_event_handle_t &Event,
bool IsInternal) {
UR_CALL(createEventAndAssociateQueue(Queue, &Event,
UR_COMMAND_EVENTS_WAIT_WITH_BARRIER,
CmdList, IsInternal, false));

Event->WaitList = EventWaitList;

// For in-order queue we don't need a real barrier, just wait for
// requested events in potentially different queues and add a "barrier"
// event signal because it is already guaranteed that previous commands
// in this queue are completed when the signal is started.
//
// Only consideration here is that when profiling is used, signalEvent
// cannot be used if EventWaitList.Lenght == 0. In those cases, we need
// to fallback directly to barrier to have correct timestamps. See here:
// https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=appendsignalevent#_CPPv430zeCommandListAppendSignalEvent24ze_command_list_handle_t17ze_event_handle_t
//
// TODO: this and other special handling of in-order queues to be
// updated when/if Level Zero adds native support for in-order queues.
//
if (Queue->isInOrderQueue() && InOrderBarrierBySignal &&
!Queue->isProfilingEnabled()) {
// If we are using driver in order lists, then append wait on events
// is unnecessary IF the cmdlists match.
if (EventWaitList.Length) {
if (CmdList->second.IsInOrderList) {
for (unsigned i = EventWaitList.Length; i-- < 0;) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/unified-runtime-src/source/adapters/level_zero/event.cpp:203:55: warning: comparison of unsigned expression in ‘< 0’ is always false [-Wtype-limits]
203 | for (unsigned i = EventWaitList.Length; i-- < 0;) {
| ~~~~^~~

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, we are aware of that typo. Thanks. This patch will solve the problem: #2134

// if the events is from the same cmdlist, we can remove it
// from the waitlist.
if (EventWaitList.UrEventList[i]->CommandList == CmdList) {
EventWaitList.Length--;
if (EventWaitList.Length != i) {
std::swap(EventWaitList.UrEventList[i],
EventWaitList.UrEventList[EventWaitList.Length]);
std::swap(EventWaitList.ZeEventList[i],
EventWaitList.ZeEventList[EventWaitList.Length]);
}
}
}
ZE2UR_CALL(zeCommandListAppendSignalEvent,
(CmdList->first, Event->ZeEvent));
} else {
ZE2UR_CALL(zeCommandListAppendBarrier,
(CmdList->first, Event->ZeEvent, EventWaitList.Length,
EventWaitList.ZeEventList));
}
ZE2UR_CALL(
zeCommandListAppendWaitOnEvents,
(CmdList->first, EventWaitList.Length, EventWaitList.ZeEventList));
}
ZE2UR_CALL(zeCommandListAppendSignalEvent,
(CmdList->first, Event->ZeEvent));
} else {
ZE2UR_CALL(zeCommandListAppendBarrier,
(CmdList->first, Event->ZeEvent, EventWaitList.Length,
EventWaitList.ZeEventList));
}

return UR_RESULT_SUCCESS;
};
return UR_RESULT_SUCCESS;
};

// If the queue is in-order then each command in it effectively acts as a
// barrier, so we don't need to do anything except if we were requested
Expand Down Expand Up @@ -349,9 +364,9 @@ ur_result_t urEnqueueEventsWaitWithBarrier(
// command-lists.
std::vector<ur_event_handle_t> EventWaitVector(CmdLists.size());
for (size_t I = 0; I < CmdLists.size(); ++I) {
UR_CALL(insertBarrierIntoCmdList(CmdLists[I], _ur_ze_event_list_t{},
EventWaitVector[I],
true /*IsInternal*/));
_ur_ze_event_list_t waitlist;
UR_CALL(insertBarrierIntoCmdList(
CmdLists[I], waitlist, EventWaitVector[I], true /*IsInternal*/));
}
// If there were multiple queues we need to create a "convergence" event to
// be our active barrier. This convergence event is signalled by a barrier
Expand All @@ -376,8 +391,9 @@ ur_result_t urEnqueueEventsWaitWithBarrier(
// If there is only a single queue then insert a barrier and the single
// result event can be used as our active barrier and used as the return
// event. Take into account whether output event is discarded or not.
UR_CALL(insertBarrierIntoCmdList(CmdLists[0], _ur_ze_event_list_t{},
ResultEvent, IsInternal));
_ur_ze_event_list_t waitlist;
UR_CALL(insertBarrierIntoCmdList(CmdLists[0], waitlist, ResultEvent,
IsInternal));
}

// Execute each command list so the barriers can be encountered.
Expand Down
1 change: 1 addition & 0 deletions test/adapters/level_zero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ if(UR_BUILD_ADAPTER_L0)
SOURCES
urProgramLink.cpp
urKernelCreateWithNativeHandle.cpp
urEventCreateWithNativeHandle.cpp
ENVIRONMENT
"UR_ADAPTERS_FORCE_LOAD=\"$<TARGET_FILE:ur_adapter_level_zero>\""
)
Expand Down
109 changes: 109 additions & 0 deletions test/adapters/level_zero/urEventCreateWithNativeHandle.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
// Copyright (C) 2024 Intel Corporation
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
// See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include "ur_api.h"
#include "uur/checks.h"
#include "ze_api.h"
#include <cstring>
#include <thread>
#include <uur/fixtures.h>

using namespace std::chrono_literals;
using urLevelZeroEventNativeHandleTest = uur::urQueueTest;
UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urLevelZeroEventNativeHandleTest);

#define TEST_MEMCPY_SIZE 4096

TEST_P(urLevelZeroEventNativeHandleTest, WaitForNative) {
ze_event_pool_desc_t desc;
desc.stype = ZE_STRUCTURE_TYPE_EVENT_POOL_DESC;
desc.pNext = nullptr;
desc.count = 1;
desc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;

ur_native_handle_t nativeContext;
ASSERT_SUCCESS(urContextGetNativeHandle(context, &nativeContext));

ur_native_handle_t nativeDevice;
ASSERT_SUCCESS(urDeviceGetNativeHandle(device, &nativeDevice));

ze_event_pool_handle_t pool = nullptr;

ASSERT_EQ(zeEventPoolCreate((ze_context_handle_t)nativeContext, &desc, 1,
(ze_device_handle_t *)&nativeDevice, &pool),
ZE_RESULT_SUCCESS);

ze_event_desc_t eventDesc;
eventDesc.pNext = nullptr;
eventDesc.stype = ZE_STRUCTURE_TYPE_EVENT_DESC;
eventDesc.index = 0;
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
eventDesc.wait = 0;

ze_event_handle_t zeEvent;
ASSERT_EQ(zeEventCreate(pool, &eventDesc, &zeEvent), ZE_RESULT_SUCCESS);

ur_event_native_properties_t pprops;
pprops.isNativeHandleOwned = false;
pprops.pNext = nullptr;
pprops.stype = UR_STRUCTURE_TYPE_EVENT_NATIVE_PROPERTIES;

ur_event_handle_t urEvent;
ASSERT_SUCCESS(urEventCreateWithNativeHandle((ur_native_handle_t)zeEvent,
context, &pprops, &urEvent));

int *src = (int *)malloc(TEST_MEMCPY_SIZE);
memset(src, 0xc, TEST_MEMCPY_SIZE);

int *dst = (int *)malloc(TEST_MEMCPY_SIZE);
memset(dst, 0, TEST_MEMCPY_SIZE);

int *dst2 = (int *)malloc(TEST_MEMCPY_SIZE);
memset(dst, 0, TEST_MEMCPY_SIZE);

ur_event_handle_t memcpyEvent2;
ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, dst2, src, TEST_MEMCPY_SIZE,
0, nullptr, &memcpyEvent2));

ur_event_handle_t memcpyEvent3;
ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, dst2, src, TEST_MEMCPY_SIZE,
0, nullptr, &memcpyEvent3));

// just to make wait lists contain more than 1 event
ur_event_handle_t events[] = {memcpyEvent2, urEvent, memcpyEvent3};

ur_event_handle_t waitEvent;
ASSERT_SUCCESS(
urEnqueueEventsWaitWithBarrier(queue, 3, events, &waitEvent));

ur_event_handle_t memcpyEvent;
ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, dst, src, TEST_MEMCPY_SIZE,
1, &waitEvent, &memcpyEvent));

// urQueueFinish would hang, so we flush and then wait
// some time to make sure the gpu had plenty of time
// to do the memcpy.
urQueueFlush(queue);
std::this_thread::sleep_for(500ms);

ASSERT_NE(memcmp(src, dst, TEST_MEMCPY_SIZE), 0);

zeEventHostSignal(zeEvent);

urQueueFinish(queue);

ASSERT_EQ(memcmp(src, dst, 4096), 0);

free(src);
free(dst);
free(dst2);
urEventRelease(urEvent);
urEventRelease(waitEvent);
urEventRelease(memcpyEvent);
urEventRelease(memcpyEvent2);
urEventRelease(memcpyEvent3);
zeEventDestroy(zeEvent);
zeEventPoolDestroy(pool);
}
Loading