@@ -449,6 +449,59 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
449449 urDeviceRetain (Device);
450450}
451451
452+ ur_result_t ur_exp_command_buffer_handle_t_::cleanupCommandBufferResources () {
453+ ur_result_t RetResult = UR_RESULT_SUCCESS;
454+ if (SignalEvent) {
455+ ur_result_t Result = CleanupCompletedEvent (SignalEvent, false );
456+ RetResult = (Result != UR_RESULT_SUCCESS) ? Result : RetResult;
457+ }
458+ if (WaitEvent) {
459+ ur_result_t Result = CleanupCompletedEvent (WaitEvent, false );
460+ RetResult = (Result != UR_RESULT_SUCCESS) ? Result : RetResult;
461+ }
462+ if (AllResetEvent) {
463+ ur_result_t Result = CleanupCompletedEvent (AllResetEvent, false );
464+ RetResult = (Result != UR_RESULT_SUCCESS) ? Result : RetResult;
465+ }
466+
467+ // Release events added to the command_buffer
468+ for (auto &Sync : SyncPoints) {
469+ auto &Event = Sync.second ;
470+ ur_result_t Result = CleanupCompletedEvent (Event, false );
471+ RetResult = (Result != UR_RESULT_SUCCESS) ? Result : RetResult;
472+ }
473+
474+ auto ReleaseIndirectMem = [](ur_kernel_handle_t Kernel) {
475+ if (IndirectAccessTrackingEnabled) {
476+ // urKernelRelease is called by CleanupCompletedEvent(Event) as soon as
477+ // kernel execution has finished. This is the place where we need to
478+ // release memory allocations. If kernel is not in use (not submitted by
479+ // some other thread) then release referenced memory allocations. As a
480+ // result, memory can be deallocated and context can be removed from
481+ // container in the platform. That's why we need to lock a mutex here.
482+ ur_platform_handle_t Platform = Kernel->Program ->Context ->getPlatform ();
483+ std::scoped_lock<ur_shared_mutex> ContextsLock (Platform->ContextsMutex );
484+
485+ if (--Kernel->SubmissionsCount == 0 ) {
486+ // Kernel is not submitted for execution, release referenced memory
487+ // allocations.
488+ for (auto &MemAlloc : Kernel->MemAllocs ) {
489+ // std::pair<void *const, MemAllocRecord> *, Hash
490+ USMFreeHelper (MemAlloc->second .Context , MemAlloc->first ,
491+ MemAlloc->second .OwnNativeHandle );
492+ }
493+ Kernel->MemAllocs .clear ();
494+ }
495+ }
496+ };
497+
498+ for (auto &AssociatedKernel : KernelsList) {
499+ ReleaseIndirectMem (AssociatedKernel);
500+ }
501+
502+ return RetResult;
503+ }
504+
452505// The ur_exp_command_buffer_handle_t_ destructor releases all the memory
453506// objects allocated for command_buffer management.
454507ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_ () {
@@ -475,22 +528,18 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
475528
476529 // Release additional signal and wait events used by command_buffer
477530 if (SignalEvent) {
478- CleanupCompletedEvent (SignalEvent, false );
479531 urEventReleaseInternal (SignalEvent);
480532 }
481533 if (WaitEvent) {
482- CleanupCompletedEvent (WaitEvent, false );
483534 urEventReleaseInternal (WaitEvent);
484535 }
485536 if (AllResetEvent) {
486- CleanupCompletedEvent (AllResetEvent, false );
487537 urEventReleaseInternal (AllResetEvent);
488538 }
489539
490540 // Release events added to the command_buffer
491541 for (auto &Sync : SyncPoints) {
492542 auto &Event = Sync.second ;
493- CleanupCompletedEvent (Event, false );
494543 urEventReleaseInternal (Event);
495544 }
496545
@@ -500,32 +549,7 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
500549 ZE_CALL_NOCHECK (zeFenceDestroy, (ZeFence));
501550 }
502551
503- auto ReleaseIndirectMem = [](ur_kernel_handle_t Kernel) {
504- if (IndirectAccessTrackingEnabled) {
505- // urKernelRelease is called by CleanupCompletedEvent(Event) as soon as
506- // kernel execution has finished. This is the place where we need to
507- // release memory allocations. If kernel is not in use (not submitted by
508- // some other thread) then release referenced memory allocations. As a
509- // result, memory can be deallocated and context can be removed from
510- // container in the platform. That's why we need to lock a mutex here.
511- ur_platform_handle_t Platform = Kernel->Program ->Context ->getPlatform ();
512- std::scoped_lock<ur_shared_mutex> ContextsLock (Platform->ContextsMutex );
513-
514- if (--Kernel->SubmissionsCount == 0 ) {
515- // Kernel is not submitted for execution, release referenced memory
516- // allocations.
517- for (auto &MemAlloc : Kernel->MemAllocs ) {
518- // std::pair<void *const, MemAllocRecord> *, Hash
519- USMFreeHelper (MemAlloc->second .Context , MemAlloc->first ,
520- MemAlloc->second .OwnNativeHandle );
521- }
522- Kernel->MemAllocs .clear ();
523- }
524- }
525- };
526-
527552 for (auto &AssociatedKernel : KernelsList) {
528- ReleaseIndirectMem (AssociatedKernel);
529553 urKernelRelease (AssociatedKernel);
530554 }
531555}
@@ -727,8 +751,9 @@ urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t CommandBuffer) {
727751 if (!CommandBuffer->RefCount .decrementAndTest ())
728752 return UR_RESULT_SUCCESS;
729753
754+ ur_result_t Result = CommandBuffer->cleanupCommandBufferResources ();
730755 delete CommandBuffer;
731- return UR_RESULT_SUCCESS ;
756+ return Result ;
732757}
733758
734759UR_APIEXPORT ur_result_t UR_APICALL
0 commit comments