@@ -961,6 +961,13 @@ static const zeCommandListBatchConfig ZeCommandListBatchCopyConfig = [] {
961961 return ZeCommandListBatchConfig (IsCopy{true });
962962}();
963963
964+ // Control if wait with barrier is implemented by signal of an event
965+ // as opposed by true barrier command for in-order queue.
966+ static const bool InOrderBarrierBySignal = [] {
967+ const char *UrRet = std::getenv (" UR_L0_IN_ORDER_BARRIER_BY_SIGNAL" );
968+ return (UrRet ? std::atoi (UrRet) : true );
969+ }();
970+
964971_pi_queue::_pi_queue (std::vector<ze_command_queue_handle_t > &ComputeQueues,
965972 std::vector<ze_command_queue_handle_t > &CopyQueues,
966973 pi_context Context, pi_device Device,
@@ -5744,20 +5751,56 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue,
57445751 std::scoped_lock<ur_shared_mutex> lock (Queue->Mutex );
57455752
57465753 // Helper function for appending a barrier to a command list.
5747- auto insertBarrierIntoCmdList =
5748- [&Queue](pi_command_list_ptr_t CmdList,
5749- const _pi_ze_event_list_t &EventWaitList, pi_event &Event,
5750- bool IsInternal) {
5751- if (auto Res = createEventAndAssociateQueue (
5752- Queue, &Event, PI_COMMAND_TYPE_USER, CmdList, IsInternal))
5753- return Res;
5754+ auto insertBarrierIntoCmdList = [&Queue](
5755+ pi_command_list_ptr_t CmdList,
5756+ const _pi_ze_event_list_t &EventWaitList,
5757+ pi_event &Event, bool IsInternal) {
5758+ // For in-order queue and empty wait-list just use the last command
5759+ // event as the barrier event.
5760+ if (Queue->isInOrderQueue () && !EventWaitList.Length &&
5761+ Queue->LastCommandEvent && !Queue->LastCommandEvent ->IsDiscarded ) {
5762+ PI_CALL (piEventRetain (Queue->LastCommandEvent ));
5763+ Event = Queue->LastCommandEvent ;
5764+ return PI_SUCCESS;
5765+ }
57545766
5755- Event->WaitList = EventWaitList;
5756- ZE_CALL (zeCommandListAppendBarrier,
5757- (CmdList->first , Event->ZeEvent , EventWaitList.Length ,
5758- EventWaitList.ZeEventList ));
5759- return PI_SUCCESS;
5760- };
5767+ if (auto Res = createEventAndAssociateQueue (
5768+ Queue, &Event, PI_COMMAND_TYPE_USER, CmdList, IsInternal))
5769+ return Res;
5770+
5771+ Event->WaitList = EventWaitList;
5772+
5773+ // For in-order queue we don't need a real barrier, just wait for requested
5774+ // events in potentially different queues and add a "barrier" event signal
5775+ // because it is already guaranteed that previous commands in this queue
5776+ // are completed when the signal is started.
5777+ //
5778+ // TODO: this and other special handling of in-order queues to be
5779+ // updated when/if Level Zero adds native support for in-order queues.
5780+ //
5781+ if (Queue->isInOrderQueue () && InOrderBarrierBySignal) {
5782+ if (EventWaitList.Length ) {
5783+ ZE_CALL (
5784+ zeCommandListAppendWaitOnEvents,
5785+ (CmdList->first , EventWaitList.Length , EventWaitList.ZeEventList ));
5786+ }
5787+ ZE_CALL (zeCommandListAppendSignalEvent, (CmdList->first , Event->ZeEvent ));
5788+ } else {
5789+ ZE_CALL (zeCommandListAppendBarrier,
5790+ (CmdList->first , Event->ZeEvent , EventWaitList.Length ,
5791+ EventWaitList.ZeEventList ));
5792+ }
5793+ return PI_SUCCESS;
5794+ };
5795+
5796+ // If the queue is in-order then each command in it effectively acts as a
5797+ // barrier, so we don't need to do anything except if we were requested
5798+ // a "barrier" event to be created. Or if we need to wait for events in
5799+ // potentially different queues.
5800+ //
5801+ if (Queue->isInOrderQueue () && NumEventsInWaitList == 0 && !OutEvent) {
5802+ return PI_SUCCESS;
5803+ }
57615804
57625805 pi_event InternalEvent;
57635806 bool IsInternal = OutEvent == nullptr ;
0 commit comments