@@ -1134,6 +1134,31 @@ _pi_queue::_pi_queue(std::vector<ze_command_queue_handle_t> &ComputeQueues,
11341134 CopyCommandBatch.QueueBatchSize = ZeCommandListBatchCopyConfig.startSize ();
11351135}
11361136
1137+ // Reset signalled command lists in the queue and put them to the cache of
1138+ // command lists. A caller must not lock the queue mutex.
1139+ pi_result _pi_queue::resetCommandLists () {
1140+ // We check for command lists that have been already signalled, but have not
1141+ // been added to the available list yet. Each command list has a fence
1142+ // associated which tracks if a command list has completed dispatch of its
1143+ // commands and is ready for reuse. If a command list is found to have been
1144+ // signalled, then the command list & fence are reset and command list is
1145+ // returned to the command list cache. All events associated with command list
1146+ // are cleaned up if command list was reset.
1147+ std::scoped_lock Lock (this ->Mutex );
1148+ for (auto &&it = CommandListMap.begin (); it != CommandListMap.end (); ++it) {
1149+ // It is possible that the fence was already noted as signalled and
1150+ // reset. In that case the InUse flag will be false.
1151+ if (it->second .InUse ) {
1152+ ze_result_t ZeResult =
1153+ ZE_CALL_NOCHECK (zeFenceQueryStatus, (it->second .ZeFence ));
1154+ if (ZeResult == ZE_RESULT_SUCCESS) {
1155+ PI_CALL (resetCommandList (it, true ));
1156+ }
1157+ }
1158+ }
1159+ return PI_SUCCESS;
1160+ }
1161+
11371162// Retrieve an available command list to be used in a PI call.
11381163pi_result
11391164_pi_context::getAvailableCommandList (pi_queue Queue,
@@ -1213,28 +1238,6 @@ _pi_context::getAvailableCommandList(pi_queue Queue,
12131238 }
12141239 }
12151240
1216- // If there are no available command lists in the cache, then we check for
1217- // command lists that have already signalled, but have not been added to the
1218- // available list yet. Each command list has a fence associated which tracks
1219- // if a command list has completed dispatch of its commands and is ready for
1220- // reuse. If a command list is found to have been signalled, then the
1221- // command list & fence are reset and we return.
1222- for (auto it = Queue->CommandListMap .begin ();
1223- it != Queue->CommandListMap .end (); ++it) {
1224- // Make sure this is the command list type needed.
1225- if (UseCopyEngine != it->second .isCopy (Queue))
1226- continue ;
1227-
1228- ze_result_t ZeResult =
1229- ZE_CALL_NOCHECK (zeFenceQueryStatus, (it->second .ZeFence ));
1230- if (ZeResult == ZE_RESULT_SUCCESS) {
1231- Queue->resetCommandList (it, false );
1232- CommandList = it;
1233- CommandList->second .InUse = true ;
1234- return PI_SUCCESS;
1235- }
1236- }
1237-
12381241 // If there are no available command lists nor signalled command lists, then
12391242 // we must create another command list if we have not exceed the maximum
12401243 // command lists we can create.
@@ -3392,51 +3395,54 @@ pi_result piQueueFinish(pi_queue Queue) {
33923395
33933396 Queue->synchronize ();
33943397 return PI_SUCCESS;
3395- }
3396-
3397- std::unique_lock Lock (Queue->Mutex );
3398- std::vector<ze_command_queue_handle_t > ZeQueues;
3398+ } else {
3399+ std::unique_lock Lock (Queue->Mutex );
3400+ std::vector<ze_command_queue_handle_t > ZeQueues;
33993401
3400- // execute any command list that may still be open.
3401- if (auto Res = Queue->executeAllOpenCommandLists ())
3402- return Res;
3402+ // execute any command list that may still be open.
3403+ if (auto Res = Queue->executeAllOpenCommandLists ())
3404+ return Res;
34033405
3404- // Make a copy of queues to sync and release the lock.
3405- ZeQueues = Queue->CopyQueueGroup .ZeQueues ;
3406- std::copy (Queue->ComputeQueueGroup .ZeQueues .begin (),
3407- Queue->ComputeQueueGroup .ZeQueues .end (),
3408- std::back_inserter (ZeQueues));
3406+ // Make a copy of queues to sync and release the lock.
3407+ ZeQueues = Queue->CopyQueueGroup .ZeQueues ;
3408+ std::copy (Queue->ComputeQueueGroup .ZeQueues .begin (),
3409+ Queue->ComputeQueueGroup .ZeQueues .end (),
3410+ std::back_inserter (ZeQueues));
34093411
3410- // Remember the last command's event.
3411- auto LastCommandEvent = Queue->LastCommandEvent ;
3412+ // Remember the last command's event.
3413+ auto LastCommandEvent = Queue->LastCommandEvent ;
34123414
3413- // Don't hold a lock to the queue's mutex while waiting.
3414- // This allows continue working with the queue from other threads.
3415- // TODO: this currently exhibits some issues in the driver, so
3416- // we control this with an env var. Remove this control when
3417- // we settle one way or the other.
3418- static bool HoldLock =
3419- std::getenv (" SYCL_PI_LEVEL_ZERO_QUEUE_FINISH_HOLD_LOCK" ) != nullptr ;
3420- if (!HoldLock) {
3421- Lock.unlock ();
3422- }
3415+ // Don't hold a lock to the queue's mutex while waiting.
3416+ // This allows continue working with the queue from other threads.
3417+ // TODO: this currently exhibits some issues in the driver, so
3418+ // we control this with an env var. Remove this control when
3419+ // we settle one way or the other.
3420+ static bool HoldLock =
3421+ std::getenv (" SYCL_PI_LEVEL_ZERO_QUEUE_FINISH_HOLD_LOCK" ) != nullptr ;
3422+ if (!HoldLock) {
3423+ Lock.unlock ();
3424+ }
34233425
3424- for (auto ZeQueue : ZeQueues) {
3425- if (ZeQueue)
3426- ZE_CALL (zeHostSynchronize, (ZeQueue));
3427- }
3426+ for (auto ZeQueue : ZeQueues) {
3427+ if (ZeQueue)
3428+ ZE_CALL (zeHostSynchronize, (ZeQueue));
3429+ }
34283430
3429- // Prevent unneeded already finished events to show up in the wait list.
3430- // We can only do so if nothing else was submitted to the queue
3431- // while we were synchronizing it.
3432- if (!HoldLock) {
3433- std::scoped_lock Lock (Queue->Mutex );
3434- if (LastCommandEvent == Queue->LastCommandEvent ) {
3431+ // Prevent unneeded already finished events to show up in the wait list.
3432+ // We can only do so if nothing else was submitted to the queue
3433+ // while we were synchronizing it.
3434+ if (!HoldLock) {
3435+ std::scoped_lock Lock (Queue->Mutex );
3436+ if (LastCommandEvent == Queue->LastCommandEvent ) {
3437+ Queue->LastCommandEvent = nullptr ;
3438+ }
3439+ } else {
34353440 Queue->LastCommandEvent = nullptr ;
34363441 }
3437- } else {
3438- Queue->LastCommandEvent = nullptr ;
34393442 }
3443+ // Reset signalled command lists and return them back to the cache of
3444+ // available command lists.
3445+ Queue->resetCommandLists ();
34403446 return PI_SUCCESS;
34413447}
34423448
@@ -5865,25 +5871,30 @@ pi_result piEnqueueEventsWait(pi_queue Queue, pi_uint32 NumEventsInWaitList,
58655871 return Queue->executeCommandList (CommandList);
58665872 }
58675873
5868- // If wait-list is empty, then this particular command should wait until
5869- // all previous enqueued commands to the command-queue have completed.
5870- //
5871- // TODO: find a way to do that without blocking the host.
5874+ {
5875+ // If wait-list is empty, then this particular command should wait until
5876+ // all previous enqueued commands to the command-queue have completed.
5877+ //
5878+ // TODO: find a way to do that without blocking the host.
58725879
5873- // Lock automatically releases when this goes out of scope.
5874- std::scoped_lock lock (Queue->Mutex );
5880+ // Lock automatically releases when this goes out of scope.
5881+ std::scoped_lock lock (Queue->Mutex );
58755882
5876- auto Res = createEventAndAssociateQueue (Queue, Event, PI_COMMAND_TYPE_USER,
5877- Queue->CommandListMap .end ());
5878- if (Res != PI_SUCCESS)
5879- return Res;
5883+ auto Res = createEventAndAssociateQueue (Queue, Event, PI_COMMAND_TYPE_USER,
5884+ Queue->CommandListMap .end ());
5885+ if (Res != PI_SUCCESS)
5886+ return Res;
5887+
5888+ Queue->synchronize ();
58805889
5881- Queue->synchronize ();
5890+ Queue->LastCommandEvent = *Event;
5891+
5892+ ZE_CALL (zeEventHostSignal, ((*Event)->ZeEvent ));
5893+ (*Event)->Completed = true ;
5894+ }
58825895
5883- Queue->LastCommandEvent = *Event ;
5896+ Queue->resetCommandLists () ;
58845897
5885- ZE_CALL (zeEventHostSignal, ((*Event)->ZeEvent ));
5886- (*Event)->Completed = true ;
58875898 return PI_SUCCESS;
58885899}
58895900
0 commit comments