diff --git a/CMakeLists.txt b/CMakeLists.txt index c1aebe3b10..c539dc080b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception cmake_minimum_required(VERSION 3.20.0 FATAL_ERROR) -project(unified-runtime VERSION 0.10.4) +project(unified-runtime VERSION 0.10.5) # Check if unified runtime is built as a standalone project. if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR UR_STANDALONE_BUILD) diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index 041f088d04..98c99f0685 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -529,6 +529,8 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( counterBasedExt.flags = ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_NON_IMMEDIATE; } + logger::debug("ze_event_pool_desc_t counter based flags set to: {}", + counterBasedExt.flags); ZeEventPoolDesc.pNext = &counterBasedExt; } diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index f4dee0d661..7c040ccde3 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -197,7 +197,9 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the // if (Queue->isInOrderQueue() && InOrderBarrierBySignal && !Queue->isProfilingEnabled()) { - if (EventWaitList.Length) { + // If we are using driver in order lists, then append wait on events + // is unnecessary and we can signal the event created. + if (EventWaitList.Length && !CmdList->second.IsInOrderList) { ZE2UR_CALL(zeCommandListAppendWaitOnEvents, (CmdList->first, EventWaitList.Length, EventWaitList.ZeEventList)); @@ -1537,8 +1539,13 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( ZE2UR_CALL(zeCommandListAppendWaitOnEvents, (ZeCommandList, 1u, &EventList[I]->ZeEvent)); - if (!MultiDeviceEvent->CounterBasedEventsEnabled) + if (!MultiDeviceEvent->CounterBasedEventsEnabled) { ZE2UR_CALL(zeEventHostSignal, (MultiDeviceZeEvent)); + } else { + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (ZeCommandList, MultiDeviceZeEvent)); + } + MultiDeviceEvent->Completed = true; UR_CALL(Queue->executeCommandList(CommandList, /* IsBlocking */ false, /* OkToBatchCommand */ true)); diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index 9e5670ae5d..e2514d251a 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -598,11 +598,19 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueDeviceGlobalVariableWrite( auto Queue = this; std::scoped_lock lock(Queue->Mutex); + ze_module_handle_t ZeModule{}; + auto It = Program->ZeModuleMap.find(Queue->Device->ZeDevice); + if (It != Program->ZeModuleMap.end()) { + ZeModule = It->second; + } else { + ZeModule = Program->ZeModule; + } + // Find global variable pointer size_t GlobalVarSize = 0; void *GlobalVarPtr = nullptr; ZE2UR_CALL(zeModuleGetGlobalPointer, - (Program->ZeModule, Name, &GlobalVarSize, &GlobalVarPtr)); + (ZeModule, Name, &GlobalVarSize, &GlobalVarPtr)); if (GlobalVarSize < Offset + Count) { setErrorMessage("Write device global variable is out of range.", UR_RESULT_ERROR_INVALID_VALUE, diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 34da252c74..1a25851514 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -2371,6 +2371,7 @@ ur_queue_handle_legacy_t_::ur_queue_group_t::getImmCmdList() { ZeCommandQueueDesc.ordinal = QueueOrdinal; ZeCommandQueueDesc.index = QueueIndex; ZeCommandQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + bool isInOrderList = false; const char *Priority = "Normal"; if (Queue->isPriorityLow()) { ZeCommandQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW; @@ -2386,6 +2387,7 @@ ur_queue_handle_legacy_t_::ur_queue_group_t::getImmCmdList() { } if (Queue->Device->useDriverInOrderLists() && Queue->isInOrderQueue()) { + isInOrderList = true; ZeCommandQueueDesc.flags |= ZE_COMMAND_QUEUE_FLAG_IN_ORDER; } @@ -2434,7 +2436,7 @@ ur_queue_handle_legacy_t_::ur_queue_group_t::getImmCmdList() { ZeCommandList, ur_command_list_info_t( nullptr, true, false, nullptr, ZeCommandQueueDesc, - Queue->useCompletionBatching(), true, false, true)}) + Queue->useCompletionBatching(), true, isInOrderList, true)}) .first; return ImmCmdLists[Index];