From 4cac1ab5b9b54f900b85c4ea98857ffd5b7cf18b Mon Sep 17 00:00:00 2001 From: smaslov Date: Fri, 12 May 2023 17:31:45 -0700 Subject: [PATCH 1/4] [SYCL][L0] Optimize barrier for in-order queue Signed-off-by: smaslov --- sycl/plugins/level_zero/pi_level_zero.cpp | 43 ++++++++++++++++------- 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 6a45313f132da..027987682602f 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -5744,20 +5744,37 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, std::scoped_lock lock(Queue->Mutex); // Helper function for appending a barrier to a command list. - auto insertBarrierIntoCmdList = - [&Queue](pi_command_list_ptr_t CmdList, - const _pi_ze_event_list_t &EventWaitList, pi_event &Event, - bool IsInternal) { - if (auto Res = createEventAndAssociateQueue( - Queue, &Event, PI_COMMAND_TYPE_USER, CmdList, IsInternal)) - return Res; + auto insertBarrierIntoCmdList = [&Queue]( + pi_command_list_ptr_t CmdList, + const _pi_ze_event_list_t &EventWaitList, + pi_event &Event, bool IsInternal) { + if (auto Res = createEventAndAssociateQueue( + Queue, &Event, PI_COMMAND_TYPE_USER, CmdList, IsInternal)) + return Res; - Event->WaitList = EventWaitList; - ZE_CALL(zeCommandListAppendBarrier, - (CmdList->first, Event->ZeEvent, EventWaitList.Length, - EventWaitList.ZeEventList)); - return PI_SUCCESS; - }; + Event->WaitList = EventWaitList; + + // For in-order queue we don't need a real barrier, just add a "barrier" + // event signal because it is already guaranteed that previous commands + // are completed when the signal is started. + if (Queue->isInOrderQueue()) { + ZE_CALL(zeCommandListAppendSignalEvent, (CmdList->first, Event->ZeEvent)); + } else { + ZE_CALL(zeCommandListAppendBarrier, + (CmdList->first, Event->ZeEvent, EventWaitList.Length, + EventWaitList.ZeEventList)); + } + return PI_SUCCESS; + }; + + // If the queue is in-order then each command in it effectively acts as a + // barrier, so we don't need to do anything except if we were requested + // a "barrier" event to be created. If we have to create an event then + // the "barrier" will just be a signal of that event. + // + if (Queue->isInOrderQueue() && OutEvent == nullptr) { + return PI_SUCCESS; + } pi_event InternalEvent; bool IsInternal = OutEvent == nullptr; From c6d2215323f74e9dbf489574fc5f8f2bbbfd6bdc Mon Sep 17 00:00:00 2001 From: smaslov Date: Mon, 15 May 2023 19:46:40 -0700 Subject: [PATCH 2/4] add UR_L0_IN_ORDER_BARRIER_BY_SIGNAL to control new behavior Signed-off-by: smaslov --- sycl/plugins/level_zero/pi_level_zero.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 027987682602f..4f41cf7c6069f 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -961,6 +961,13 @@ static const zeCommandListBatchConfig ZeCommandListBatchCopyConfig = [] { return ZeCommandListBatchConfig(IsCopy{true}); }(); +// Control if wait with barrier is implemented by signal of an event +// as opposed by true barrier command for in-order queue. +static const bool InOrderBarrierBySignal = [] { + const char *UrRet = std::getenv("UR_L0_IN_ORDER_BARRIER_BY_SIGNAL"); + return (UrRet ? std::atoi(UrRet) : true); +}(); + _pi_queue::_pi_queue(std::vector &ComputeQueues, std::vector &CopyQueues, pi_context Context, pi_device Device, @@ -5757,7 +5764,10 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, // For in-order queue we don't need a real barrier, just add a "barrier" // event signal because it is already guaranteed that previous commands // are completed when the signal is started. - if (Queue->isInOrderQueue()) { + // TODO: this and other special handling of in-order queues to be + // updated when/if Level Zero adds native support for in-order queues. + // + if (Queue->isInOrderQueue() && InOrderBarrierBySignal) { ZE_CALL(zeCommandListAppendSignalEvent, (CmdList->first, Event->ZeEvent)); } else { ZE_CALL(zeCommandListAppendBarrier, From 91101918cf5cfba26538fe43c6f56baa8f848038 Mon Sep 17 00:00:00 2001 From: smaslov Date: Thu, 18 May 2023 18:15:55 -0700 Subject: [PATCH 3/4] rework based on reviews Signed-off-by: smaslov --- sycl/plugins/level_zero/pi_level_zero.cpp | 26 ++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 4f41cf7c6069f..c274d9ca592d9 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -5755,19 +5755,35 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, pi_command_list_ptr_t CmdList, const _pi_ze_event_list_t &EventWaitList, pi_event &Event, bool IsInternal) { + // For in-order queue and empty wait-list just use the last command + // event as the barrier event. + if (Queue->isInOrderQueue() && !EventWaitList.Length && + Queue->LastCommandEvent) { + PI_CALL(piEventRetain(Queue->LastCommandEvent)); + Event = Queue->LastCommandEvent; + return PI_SUCCESS; + } + if (auto Res = createEventAndAssociateQueue( Queue, &Event, PI_COMMAND_TYPE_USER, CmdList, IsInternal)) return Res; Event->WaitList = EventWaitList; - // For in-order queue we don't need a real barrier, just add a "barrier" - // event signal because it is already guaranteed that previous commands + // For in-order queue we don't need a real barrier, just wait for requested + // events in potentially different queues and add a "barrier" event signal + // because it is already guaranteed that previous commands in this queue // are completed when the signal is started. + // // TODO: this and other special handling of in-order queues to be // updated when/if Level Zero adds native support for in-order queues. // if (Queue->isInOrderQueue() && InOrderBarrierBySignal) { + if (EventWaitList.Length) { + ZE_CALL( + zeCommandListAppendWaitOnEvents, + (CmdList->first, EventWaitList.Length, EventWaitList.ZeEventList)); + } ZE_CALL(zeCommandListAppendSignalEvent, (CmdList->first, Event->ZeEvent)); } else { ZE_CALL(zeCommandListAppendBarrier, @@ -5779,10 +5795,10 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, // If the queue is in-order then each command in it effectively acts as a // barrier, so we don't need to do anything except if we were requested - // a "barrier" event to be created. If we have to create an event then - // the "barrier" will just be a signal of that event. + // a "barrier" event to be created. Or if we need to wait for events in + // potentially different queues. // - if (Queue->isInOrderQueue() && OutEvent == nullptr) { + if (Queue->isInOrderQueue() && NumEventsInWaitList == 0 && !OutEvent) { return PI_SUCCESS; } From a8453559ea6043f7dcc75cfdf5cc2ff52c5766bf Mon Sep 17 00:00:00 2001 From: smaslov Date: Thu, 18 May 2023 20:05:29 -0700 Subject: [PATCH 4/4] check if not discarded Signed-off-by: smaslov --- sycl/plugins/level_zero/pi_level_zero.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index c274d9ca592d9..8b8cf3d2858e4 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -5758,7 +5758,7 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, // For in-order queue and empty wait-list just use the last command // event as the barrier event. if (Queue->isInOrderQueue() && !EventWaitList.Length && - Queue->LastCommandEvent) { + Queue->LastCommandEvent && !Queue->LastCommandEvent->IsDiscarded) { PI_CALL(piEventRetain(Queue->LastCommandEvent)); Event = Queue->LastCommandEvent; return PI_SUCCESS;