@@ -99,6 +99,16 @@ static const bool UseMultipleCmdlistBarriers = [] {
9999 return std::stoi (UseMultipleCmdlistBarriersFlag) > 0 ;
100100}();
101101
102+ // This is an experimental option that allows to disable caching of events in
103+ // the context.
104+ static const bool DisableEventsCaching = [] {
105+ const char *DisableEventsCachingFlag =
106+ std::getenv (" SYCL_PI_LEVEL_ZERO_DISABLE_EVENTS_CACHING" );
107+ if (!DisableEventsCachingFlag)
108+ return false ;
109+ return std::stoi (DisableEventsCachingFlag) != 0 ;
110+ }();
111+
102112// This class encapsulates actions taken along with a call to Level Zero API.
103113class ZeCall {
104114private:
@@ -468,10 +478,18 @@ _pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &Pool,
468478 std::list<ze_event_pool_handle_t > *ZePoolCache =
469479 getZeEventPoolCache (HostVisible, ProfilingEnabled);
470480
471- // Remove full pool from the cache.
472481 if (!ZePoolCache->empty ()) {
473482 if (NumEventsAvailableInEventPool[ZePoolCache->front ()] == 0 ) {
474- ZePoolCache->erase (ZePoolCache->begin ());
483+ if (DisableEventsCaching) {
484+ // Remove full pool from the cache if events caching is disabled.
485+ ZePoolCache->erase (ZePoolCache->begin ());
486+ } else {
487+ // If event caching is enabled then we don't destroy events so there is
488+ // no need to remove pool from the cache and add it back when it has
489+ // available slots. Just keep it in the tail of the cache so that all
490+ // pools can be destroyed during context destruction.
491+ ZePoolCache->push_front (nullptr );
492+ }
475493 }
476494 }
477495 if (ZePoolCache->empty ()) {
@@ -868,7 +886,18 @@ pi_result _pi_context::initialize() {
868886pi_result _pi_context::finalize () {
869887 // This function is called when pi_context is deallocated, piContextRelease.
870888 // There could be some memory that may have not been deallocated.
871- // For example, event pool caches would be still alive.
889+ // For example, event and event pool caches would be still alive.
890+
891+ if (!DisableEventsCaching) {
892+ std::scoped_lock Lock (EventCacheMutex);
893+ for (auto &EventCache : EventCaches) {
894+ for (auto Event : EventCache) {
895+ ZE_CALL (zeEventDestroy, (Event->ZeEvent ));
896+ delete Event;
897+ }
898+ EventCache.clear ();
899+ }
900+ }
872901 {
873902 std::scoped_lock Lock (ZeEventPoolCacheMutex);
874903 for (auto &ZePoolCache : ZeEventPoolCache) {
@@ -5430,24 +5459,67 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) {
54305459 return PI_SUCCESS;
54315460}
54325461
5462+ pi_result _pi_event::reset () {
5463+ Queue = nullptr ;
5464+ CleanedUp = false ;
5465+ Completed = false ;
5466+ CommandData = nullptr ;
5467+ CommandType = PI_COMMAND_TYPE_USER;
5468+ WaitList = {};
5469+ RefCount.reset ();
5470+
5471+ if (!isHostVisible ())
5472+ HostVisibleEvent = nullptr ;
5473+
5474+ ZE_CALL (zeEventHostReset, (ZeEvent));
5475+ return PI_SUCCESS;
5476+ }
5477+
5478+ pi_event _pi_context::getEventFromCache (bool HostVisible, bool WithProfiling) {
5479+ std::scoped_lock Lock (EventCacheMutex);
5480+ auto Cache = getEventCache (HostVisible, WithProfiling);
5481+ if (Cache->empty ())
5482+ return nullptr ;
5483+
5484+ auto It = Cache->begin ();
5485+ pi_event Event = *It;
5486+ Cache->erase (It);
5487+ return Event;
5488+ }
5489+
5490+ void _pi_context::addEventToCache (pi_event Event) {
5491+ std::scoped_lock Lock (EventCacheMutex);
5492+ auto Cache =
5493+ getEventCache (Event->isHostVisible (), Event->isProfilingEnabled ());
5494+ Event->reset ();
5495+ Cache->emplace_back (Event);
5496+ }
5497+
54335498// Helper function for creating a PI event.
54345499// The "Queue" argument specifies the PI queue where a command is submitted.
54355500// The "HostVisible" argument specifies if event needs to be allocated from
54365501// a host-visible pool.
54375502//
54385503static pi_result EventCreate (pi_context Context, pi_queue Queue,
54395504 bool HostVisible, pi_event *RetEvent) {
5440-
54415505 bool ProfilingEnabled =
54425506 !Queue || (Queue->Properties & PI_QUEUE_PROFILING_ENABLE) != 0 ;
54435507
5444- size_t Index = 0 ;
5508+ if (auto CachedEvent =
5509+ Context->getEventFromCache (HostVisible, ProfilingEnabled)) {
5510+ *RetEvent = CachedEvent;
5511+ return PI_SUCCESS;
5512+ }
5513+
5514+ ze_event_handle_t ZeEvent;
54455515 ze_event_pool_handle_t ZeEventPool = {};
5516+
5517+ size_t Index = 0 ;
5518+
54465519 if (auto Res = Context->getFreeSlotInExistingOrNewPool (
54475520 ZeEventPool, Index, HostVisible, ProfilingEnabled))
54485521 return Res;
54495522
5450- ze_event_handle_t ZeEvent;
54515523 ZeStruct<ze_event_desc_t > ZeEventDesc;
54525524 ZeEventDesc.index = Index;
54535525 ZeEventDesc.wait = 0 ;
@@ -5456,9 +5528,9 @@ static pi_result EventCreate(pi_context Context, pi_queue Queue,
54565528 ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
54575529 } else {
54585530 //
5459- // Set the scope to "device" for every event. This is sufficient for global
5460- // device access and peer device access. If needed to be seen on the host
5461- // we are doing special handling, see EventsScope options.
5531+ // Set the scope to "device" for every event. This is sufficient for
5532+ // global device access and peer device access. If needed to be seen on
5533+ // the host we are doing special handling, see EventsScope options.
54625534 //
54635535 // TODO: see if "sub-device" (ZE_EVENT_SCOPE_FLAG_SUBDEVICE) can better be
54645536 // used in some circumstances.
@@ -5819,7 +5891,12 @@ pi_result piEventRelease(pi_event Event) {
58195891 Event->CommandData = nullptr ;
58205892 }
58215893 if (Event->OwnZeEvent ) {
5822- ZE_CALL (zeEventDestroy, (Event->ZeEvent ));
5894+ if (DisableEventsCaching) {
5895+ ZE_CALL (zeEventDestroy, (Event->ZeEvent ));
5896+ auto Context = Event->Context ;
5897+ if (auto Res = Context->decrementUnreleasedEventsInPool (Event))
5898+ return Res;
5899+ }
58235900 }
58245901 // It is possible that host-visible event was never created.
58255902 // In case it was check if that's different from this same event
@@ -5829,18 +5906,19 @@ pi_result piEventRelease(pi_event Event) {
58295906 PI_CALL (piEventRelease (Event->HostVisibleEvent ));
58305907 }
58315908
5832- auto Context = Event->Context ;
5833- if (auto Res = Context->decrementUnreleasedEventsInPool (Event))
5834- return Res;
5835-
58365909 // We intentionally incremented the reference counter when an event is
58375910 // created so that we can avoid pi_queue is released before the associated
58385911 // pi_event is released. Here we have to decrement it so pi_queue
58395912 // can be released successfully.
58405913 if (Event->Queue ) {
58415914 PI_CALL (piQueueReleaseInternal (Event->Queue ));
58425915 }
5843- delete Event;
5916+
5917+ if (DisableEventsCaching || !Event->OwnZeEvent ) {
5918+ delete Event;
5919+ } else {
5920+ Event->Context ->addEventToCache (Event);
5921+ }
58445922
58455923 return PI_SUCCESS;
58465924}
0 commit comments