Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 93 additions & 15 deletions sycl/plugins/level_zero/pi_level_zero.cpp
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,16 @@ static const bool UseMultipleCmdlistBarriers = [] {
return std::stoi(UseMultipleCmdlistBarriersFlag) > 0;
}();

// This is an experimental option that allows to disable caching of events in
// the context.
static const bool DisableEventsCaching = [] {
const char *DisableEventsCachingFlag =
std::getenv("SYCL_PI_LEVEL_ZERO_DISABLE_EVENTS_CACHING");
if (!DisableEventsCachingFlag)
return false;
return std::stoi(DisableEventsCachingFlag) != 0;
}();

// This class encapsulates actions taken along with a call to Level Zero API.
class ZeCall {
private:
Expand Down Expand Up @@ -468,10 +478,18 @@ _pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &Pool,
std::list<ze_event_pool_handle_t> *ZePoolCache =
getZeEventPoolCache(HostVisible, ProfilingEnabled);

// Remove full pool from the cache.
if (!ZePoolCache->empty()) {
if (NumEventsAvailableInEventPool[ZePoolCache->front()] == 0) {
ZePoolCache->erase(ZePoolCache->begin());
if (DisableEventsCaching) {
// Remove full pool from the cache if events caching is disabled.
ZePoolCache->erase(ZePoolCache->begin());
} else {
// If event caching is enabled then we don't destroy events so there is
// no need to remove pool from the cache and add it back when it has
// available slots. Just keep it in the tail of the cache so that all
// pools can be destroyed during context destruction.
ZePoolCache->push_front(nullptr);
}
}
}
if (ZePoolCache->empty()) {
Expand Down Expand Up @@ -868,7 +886,18 @@ pi_result _pi_context::initialize() {
pi_result _pi_context::finalize() {
// This function is called when pi_context is deallocated, piContextRelease.
// There could be some memory that may have not been deallocated.
// For example, event pool caches would be still alive.
// For example, event and event pool caches would be still alive.

if (!DisableEventsCaching) {
std::scoped_lock Lock(EventCacheMutex);
for (auto &EventCache : EventCaches) {
for (auto Event : EventCache) {
ZE_CALL(zeEventDestroy, (Event->ZeEvent));
delete Event;
}
EventCache.clear();
}
}
{
std::scoped_lock Lock(ZeEventPoolCacheMutex);
for (auto &ZePoolCache : ZeEventPoolCache) {
Expand Down Expand Up @@ -5430,24 +5459,67 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) {
return PI_SUCCESS;
}

pi_result _pi_event::reset() {
Queue = nullptr;
CleanedUp = false;
Completed = false;
CommandData = nullptr;
CommandType = PI_COMMAND_TYPE_USER;
WaitList = {};
RefCount.reset(1);

if (!isHostVisible())
HostVisibleEvent = nullptr;

ZE_CALL(zeEventHostReset, (ZeEvent));
return PI_SUCCESS;
}

pi_event _pi_context::getEventFromCache(bool HostVisible, bool WithProfiling) {
std::scoped_lock Lock(EventCacheMutex);
auto Cache = getEventCache(HostVisible, WithProfiling);
if (Cache->empty())
return nullptr;

auto It = Cache->begin();
pi_event Event = *It;
Cache->erase(It);
return Event;
}

void _pi_context::addEventToCache(pi_event Event) {
std::scoped_lock Lock(EventCacheMutex);
auto Cache =
getEventCache(Event->isHostVisible(), Event->isProfilingEnabled());
Event->reset();
Cache->emplace_back(Event);
}

// Helper function for creating a PI event.
// The "Queue" argument specifies the PI queue where a command is submitted.
// The "HostVisible" argument specifies if event needs to be allocated from
// a host-visible pool.
//
static pi_result EventCreate(pi_context Context, pi_queue Queue,
bool HostVisible, pi_event *RetEvent) {

bool ProfilingEnabled =
!Queue || (Queue->Properties & PI_QUEUE_PROFILING_ENABLE) != 0;

size_t Index = 0;
if (auto CachedEvent =
Context->getEventFromCache(HostVisible, ProfilingEnabled)) {
*RetEvent = CachedEvent;
return PI_SUCCESS;
}

ze_event_handle_t ZeEvent;
ze_event_pool_handle_t ZeEventPool = {};

size_t Index = 0;

if (auto Res = Context->getFreeSlotInExistingOrNewPool(
ZeEventPool, Index, HostVisible, ProfilingEnabled))
return Res;

ze_event_handle_t ZeEvent;
ZeStruct<ze_event_desc_t> ZeEventDesc;
ZeEventDesc.index = Index;
ZeEventDesc.wait = 0;
Expand All @@ -5456,9 +5528,9 @@ static pi_result EventCreate(pi_context Context, pi_queue Queue,
ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
} else {
//
// Set the scope to "device" for every event. This is sufficient for global
// device access and peer device access. If needed to be seen on the host
// we are doing special handling, see EventsScope options.
// Set the scope to "device" for every event. This is sufficient for
// global device access and peer device access. If needed to be seen on
// the host we are doing special handling, see EventsScope options.
//
// TODO: see if "sub-device" (ZE_EVENT_SCOPE_FLAG_SUBDEVICE) can better be
// used in some circumstances.
Expand Down Expand Up @@ -5819,7 +5891,12 @@ pi_result piEventRelease(pi_event Event) {
Event->CommandData = nullptr;
}
if (Event->OwnZeEvent) {
ZE_CALL(zeEventDestroy, (Event->ZeEvent));
if (DisableEventsCaching) {
ZE_CALL(zeEventDestroy, (Event->ZeEvent));
auto Context = Event->Context;
if (auto Res = Context->decrementUnreleasedEventsInPool(Event))
return Res;
}
}
// It is possible that host-visible event was never created.
// In case it was check if that's different from this same event
Expand All @@ -5829,18 +5906,19 @@ pi_result piEventRelease(pi_event Event) {
PI_CALL(piEventRelease(Event->HostVisibleEvent));
}

auto Context = Event->Context;
if (auto Res = Context->decrementUnreleasedEventsInPool(Event))
return Res;

// We intentionally incremented the reference counter when an event is
// created so that we can avoid pi_queue is released before the associated
// pi_event is released. Here we have to decrement it so pi_queue
// can be released successfully.
if (Event->Queue) {
PI_CALL(piQueueReleaseInternal(Event->Queue));
}
delete Event;

if (DisableEventsCaching || !Event->OwnZeEvent) {
delete Event;
} else {
Event->Context->addEventToCache(Event);
}

return PI_SUCCESS;
}
Expand Down
25 changes: 25 additions & 0 deletions sycl/plugins/level_zero/pi_level_zero.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@ template <class T> struct ZeCache : private T {
struct ReferenceCounter {
ReferenceCounter(pi_uint32 InitVal) : RefCount{InitVal} {}

void reset(pi_uint32 InitVal) { RefCount = InitVal; }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is strange that it takes an arbitrary integer (I think you only call it with "1")
why is this needed?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, fixed.


// Used when retaining an object.
void increment() { RefCount++; }

Expand Down Expand Up @@ -750,6 +752,12 @@ struct _pi_context : _pi_object {
// when kernel has finished execution.
std::unordered_map<void *, MemAllocRecord> MemAllocs;

// Get pi_event from cache.
pi_event getEventFromCache(bool HostVisible, bool WithProfiling);

// Add pi_event to cache.
void addEventToCache(pi_event);

private:
// If context contains one device then return this device.
// If context contains sub-devices of the same device, then return this parent
Expand Down Expand Up @@ -798,6 +806,20 @@ struct _pi_context : _pi_object {
// Mutex to control operations on event pool caches and the helper maps
// holding the current pool usage counts.
pi_mutex ZeEventPoolCacheMutex;

// Mutex to control operations on event caches.
pi_mutex EventCacheMutex;

// Caches for events.
std::vector<std::list<pi_event>> EventCaches{4};

// Get the cache of events for a provided scope and profiling mode.
auto getEventCache(bool HostVisible, bool WithProfiling) {
if (HostVisible)
return WithProfiling ? &EventCaches[0] : &EventCaches[1];
else
return WithProfiling ? &EventCaches[2] : &EventCaches[3];
}
};

struct _pi_queue : _pi_object {
Expand Down Expand Up @@ -1350,6 +1372,9 @@ struct _pi_event : _pi_object {
// L0 event (if any) is not guranteed to have been signalled, or
// being visible to the host at all.
bool Completed = {false};

// Reset _pi_event object.
pi_result reset();
};

struct _pi_program : _pi_object {
Expand Down