From 610acbe99ab97784c664db44515eea50b40b07cd Mon Sep 17 00:00:00 2001 From: Sergey V Maslov Date: Thu, 20 Jan 2022 14:59:31 -0800 Subject: [PATCH 1/6] [SYCL] mode where only last command in each batch yeilds a host-visible event Signed-off-by: Sergey V Maslov --- sycl/plugins/level_zero/pi_level_zero.cpp | 147 ++++++++++++++++------ 1 file changed, 111 insertions(+), 36 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index cb279a3ba0c4d..90c6868e86af8 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -186,12 +186,22 @@ static void zePrint(const char *Format, ...) { } } -// Controls whether device-scope events are used. -static const bool ZeAllHostVisibleEvents = [] { +// Controls whether device-scope events are used, and how. +static const enum EventsScope { + AllHostVisible, + OnDemandHostVisibleProxy, + LastCommandInBatchHostVisible +} EventsScope = [] { const auto DeviceEventsStr = std::getenv("SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS"); - bool result = (DeviceEventsStr ? (std::atoi(DeviceEventsStr) == 0) : true); - return result; + + switch (DeviceEventsStr ? std::atoi(DeviceEventsStr) : 2) { + case 1: + return OnDemandHostVisibleProxy; + case 2: + return LastCommandInBatchHostVisible; + } + return AllHostVisible; }(); // Maximum number of events that can be present in an event ZePool is captured @@ -415,7 +425,7 @@ _pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &Pool, ze_event_pool_flag_t ZePoolFlag = {}; std::list *ZePoolCache; - if (ZeAllHostVisibleEvents) { + if (EventsScope == AllHostVisible) { ZePoolFlag = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ZePoolCache = &ZeEventPoolCache; } else if (HostVisible) { @@ -479,7 +489,8 @@ pi_result _pi_context::decrementUnreleasedEventsInPool(pi_event Event) { NumEventsAvailableInEventPool[Event->ZeEventPool] = MaxNumEventsPerPool; } - if (Event->ZeHostVisibleEventPool) { + if (Event->ZeHostVisibleEventPool && + EventsScope == OnDemandHostVisibleProxy) { if (NumEventsUnreleasedInEventPool[Event->ZeHostVisibleEventPool] == 0) die("Invalid host visible event release: host visible event pool doesn't " "have unreleased events"); @@ -1313,6 +1324,52 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList, KernelsToBeSubmitted.clear(); } + // In this mode all inner-batch events have device visibility only, + // and we want the last command in the batch to signal a host-visible + // event that anybody waiting for any event in the batch will + // really be using. + // + if (EventsScope == LastCommandInBatchHostVisible) { + // Create a "proxy" host-visible event. + // + size_t Index; + ze_event_pool_handle_t ZeEventPool = {}; + if (auto Res = + Context->getFreeSlotInExistingOrNewPool(ZeEventPool, Index, true)) + return Res; + + ze_event_handle_t ZeHostVisibleEvent = nullptr; + ZeStruct ZeEventDesc; + ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; + ZeEventDesc.wait = 0; + ZeEventDesc.index = Index; + ZE_CALL(zeEventCreate, (ZeEventPool, &ZeEventDesc, &ZeHostVisibleEvent)); + + // Wrap it into PI event as we need it reference-counted. + pi_event HostVisibleEvent = new _pi_event( + ZeHostVisibleEvent, ZeEventPool, Context, PI_COMMAND_TYPE_USER, true); + + // Update each command's event in the command-list to "see" this + // proxy event as a host-visible counterpart. + for (auto &Event : CommandList->second.EventList) { + Event->ZeHostVisibleEventPool = ZeEventPool; + Event->ZeHostVisibleEvent = + reinterpret_cast(HostVisibleEvent); + PI_CALL(piEventRetain(HostVisibleEvent)); + } + + // Release this event once such that it is destroyed when the last event + // from this command-list is destroyed. + // PI_CALL(piEventRelease(HostVisibleEvent)); + + // Finally set to signal the host-visible event at the end of the + // command-list. + // TODO: see if we need a barrier here (or explicit wait for all events in + // the batch). + ZE_CALL(zeCommandListAppendSignalEvent, + (CommandList->first, ZeHostVisibleEvent)); + } + // Close the command list and have it ready for dispatch. ZE_CALL(zeCommandListClose, (CommandList->first)); // Offload command list to the GPU for asynchronous execution @@ -1767,8 +1824,11 @@ pi_result piPlatformsGet(pi_uint32 NumEntries, pi_platform *Platforms, if (NumPlatforms) *NumPlatforms = PiPlatformsCache->size(); - zePrint("Using %s events\n", - ZeAllHostVisibleEvents ? "all host-visible" : "device-only"); + zePrint("Using events scope: %s\n", + EventsScope == AllHostVisible ? "all host-visible" + : EventsScope == OnDemandHostVisibleProxy + ? "on demand host-visible proxy" + : "only last command in a batch is host-visible"); return PI_SUCCESS; } @@ -4668,9 +4728,13 @@ pi_result piextKernelGetNativeHandle(pi_kernel Kernel, // Events // ze_event_handle_t _pi_event::getHostVisibleEvent() const { - if (ZeAllHostVisibleEvents) { + if (EventsScope == AllHostVisible) { return ZeEvent; } else if (ZeHostVisibleEvent) { + if (EventsScope == LastCommandInBatchHostVisible) { + auto Event = reinterpret_cast(ZeHostVisibleEvent); + return Event->ZeEvent; + } return ZeHostVisibleEvent; } else { return nullptr; @@ -4680,24 +4744,22 @@ ze_event_handle_t _pi_event::getHostVisibleEvent() const { pi_result _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &HostVisibleEvent) { - if (ZeAllHostVisibleEvents) { + if (EventsScope == AllHostVisible) { HostVisibleEvent = ZeEvent; } else if (ZeHostVisibleEvent) { HostVisibleEvent = ZeHostVisibleEvent; } else { + if (EventsScope != OnDemandHostVisibleProxy) + die("getOrCreateHostVisibleEvent: missing host-visible event"); + + // Create a "proxy" host-visible event on demand. + // size_t Index; ze_event_pool_handle_t ZeEventPool = {}; if (auto Res = Context->getFreeSlotInExistingOrNewPool(ZeEventPool, Index, true)) return Res; - // Create a "proxy" host-visible event. - // - // TODO: consider creating just single host-visible proxy event to - // represent multiple device-scope events. E.g. have a host-visible - // event at the end of each command-list to represent device-scope - // events from every command in that command-list. - // ZeStruct ZeEventDesc; ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; ZeEventDesc.wait = 0; @@ -4745,17 +4807,18 @@ pi_result piEventCreate(pi_context Context, pi_event *RetEvent) { ZeStruct ZeEventDesc; ZeEventDesc.index = Index; ZeEventDesc.wait = 0; - // - // Set the scope to "device" for every event. This is sufficient for global - // device access and peer device access. If needed to be waited on the host - // we are doing special handling, see piEventsWait. - // - // TODO: see if "sub-device" (ZE_EVENT_SCOPE_FLAG_SUBDEVICE) can better be - // used in some circumstances. - // - if (ZeAllHostVisibleEvents) { + + if (EventsScope == AllHostVisible) { ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; } else { + // + // Set the scope to "device" for every event. This is sufficient for global + // device access and peer device access. If needed to be seen on the host + // we are doing special handling, see EventsScope options. + // + // TODO: see if "sub-device" (ZE_EVENT_SCOPE_FLAG_SUBDEVICE) can better be + // used in some circumstances. + // ZeEventDesc.signal = 0; } @@ -5031,15 +5094,17 @@ pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList) { if (NumEvents && !EventList) { return PI_INVALID_EVENT; } - // Make sure to add all host-visible "proxy" event signals if needed. - // This ensures that all signalling commands are submitted below and - // thus proxy events can be waited without a deadlock. - // - for (uint32_t I = 0; I < NumEvents; I++) { - ze_event_handle_t ZeHostVisibleEvent; - if (auto Res = - EventList[I]->getOrCreateHostVisibleEvent(ZeHostVisibleEvent)) - return Res; + if (EventsScope == OnDemandHostVisibleProxy) { + // Make sure to add all host-visible "proxy" event signals if needed. + // This ensures that all signalling commands are submitted below and + // thus proxy events can be waited without a deadlock. + // + for (uint32_t I = 0; I < NumEvents; I++) { + ze_event_handle_t ZeHostVisibleEvent; + if (auto Res = + EventList[I]->getOrCreateHostVisibleEvent(ZeHostVisibleEvent)) + return Res; + } } // Submit dependent open command lists for execution, if any for (uint32_t I = 0; I < NumEvents; I++) { @@ -5090,6 +5155,8 @@ pi_result piEventSetStatus(pi_event Event, pi_int32 ExecutionStatus) { } pi_result piEventRetain(pi_event Event) { + // fprintf(stderr, "piEventRetain(%p): %d\n", (void*)Event, + // (int)Event->RefCount); ++(Event->RefCount); return PI_SUCCESS; } @@ -5099,6 +5166,8 @@ pi_result piEventRelease(pi_event Event) { } static pi_result EventRelease(pi_event Event, pi_queue LockedQueue) { + // fprintf(stderr, "piEventRelease(%p): %d\n", (void*)Event, + // (int)Event->RefCount); PI_ASSERT(Event, PI_INVALID_EVENT); if (!Event->RefCount) { die("piEventRelease: called on a destroyed event"); @@ -5119,7 +5188,13 @@ static pi_result EventRelease(pi_event Event, pi_queue LockedQueue) { ZE_CALL(zeEventDestroy, (Event->ZeEvent)); } if (Event->ZeHostVisibleEvent) { - ZE_CALL(zeEventDestroy, (Event->ZeHostVisibleEvent)); + // Destroy the host-visible proxy event when it is owned by this event. + if (EventsScope == OnDemandHostVisibleProxy) { + ZE_CALL(zeEventDestroy, (Event->ZeHostVisibleEvent)); + } else if (EventsScope == LastCommandInBatchHostVisible) { + PI_CALL(piEventRelease( + reinterpret_cast(Event->ZeHostVisibleEvent))); + } } auto Context = Event->Context; From cdbd9169b9677b78eaca3fc4594f928f5bbe2891 Mon Sep 17 00:00:00 2001 From: Sergey V Maslov Date: Fri, 21 Jan 2022 12:48:07 -0800 Subject: [PATCH 2/6] refactor into a more production worth code Signed-off-by: Sergey V Maslov --- sycl/plugins/level_zero/pi_level_zero.cpp | 173 +++++++++------------- sycl/plugins/level_zero/pi_level_zero.hpp | 25 ++-- 2 files changed, 82 insertions(+), 116 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 90c6868e86af8..f0b354c3e5e7f 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -32,6 +32,8 @@ extern "C" { // Forward declarartions. static pi_result EventRelease(pi_event Event, pi_queue LockedQueue); static pi_result QueueRelease(pi_queue Queue, pi_queue LockedQueue); +static pi_result EventCreate(pi_context Context, bool HostVisible, + pi_event *RetEvent); } namespace { @@ -188,8 +190,17 @@ static void zePrint(const char *Format, ...) { // Controls whether device-scope events are used, and how. static const enum EventsScope { + // All events are created host-visible (the default mode) AllHostVisible, + // All events are created with device-scope and only when + // host waits them or queries their status that a proxy + // host-visible event is created and set to signal after + // original event signals. OnDemandHostVisibleProxy, + // All events are created with device-scope and only + // when a batch of commands is submitted for execution a + // last command in that batch is added to signal host-visible + // completion of each command in this batch. LastCommandInBatchHostVisible } EventsScope = [] { const auto DeviceEventsStr = @@ -425,14 +436,11 @@ _pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &Pool, ze_event_pool_flag_t ZePoolFlag = {}; std::list *ZePoolCache; - if (EventsScope == AllHostVisible) { - ZePoolFlag = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; - ZePoolCache = &ZeEventPoolCache; - } else if (HostVisible) { + if (HostVisible) { ZePoolFlag = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; ZePoolCache = &ZeHostVisibleEventPoolCache; } else { - ZePoolCache = &ZeEventPoolCache; + ZePoolCache = &ZeDeviceScopeEventPoolCache; } // Remove full pool from the cache. @@ -478,31 +486,24 @@ pi_result _pi_context::decrementUnreleasedEventsInPool(pi_event Event) { return PI_SUCCESS; } + std::list *ZePoolCache; + if (Event->IsHostVisible()) { + ZePoolCache = &ZeHostVisibleEventPoolCache; + } else { + ZePoolCache = &ZeDeviceScopeEventPoolCache; + } + // Put the empty pool to the cache of the pools. std::lock_guard Lock(ZeEventPoolCacheMutex); if (NumEventsUnreleasedInEventPool[Event->ZeEventPool] == 0) die("Invalid event release: event pool doesn't have unreleased events"); if (--NumEventsUnreleasedInEventPool[Event->ZeEventPool] == 0) { - if (ZeEventPoolCache.front() != Event->ZeEventPool) { - ZeEventPoolCache.push_back(Event->ZeEventPool); + if (ZePoolCache->front() != Event->ZeEventPool) { + ZePoolCache->push_back(Event->ZeEventPool); } NumEventsAvailableInEventPool[Event->ZeEventPool] = MaxNumEventsPerPool; } - if (Event->ZeHostVisibleEventPool && - EventsScope == OnDemandHostVisibleProxy) { - if (NumEventsUnreleasedInEventPool[Event->ZeHostVisibleEventPool] == 0) - die("Invalid host visible event release: host visible event pool doesn't " - "have unreleased events"); - if (--NumEventsUnreleasedInEventPool[Event->ZeHostVisibleEventPool] == 0) { - if (ZeHostVisibleEventPoolCache.front() != - Event->ZeHostVisibleEventPool) { - ZeHostVisibleEventPoolCache.push_back(Event->ZeHostVisibleEventPool); - } - NumEventsAvailableInEventPool[Event->ZeHostVisibleEventPool] = - MaxNumEventsPerPool; - } - } return PI_SUCCESS; } @@ -799,12 +800,12 @@ pi_result _pi_context::finalize() { // For example, event pool caches would be still alive. { std::lock_guard Lock(ZeEventPoolCacheMutex); - for (auto &ZePool : ZeEventPoolCache) + for (auto &ZePool : ZeDeviceScopeEventPoolCache) ZE_CALL(zeEventPoolDestroy, (ZePool)); for (auto &ZePool : ZeHostVisibleEventPoolCache) ZE_CALL(zeEventPoolDestroy, (ZePool)); - ZeEventPoolCache.clear(); + ZeDeviceScopeEventPoolCache.clear(); ZeHostVisibleEventPoolCache.clear(); } @@ -1332,42 +1333,29 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList, if (EventsScope == LastCommandInBatchHostVisible) { // Create a "proxy" host-visible event. // - size_t Index; - ze_event_pool_handle_t ZeEventPool = {}; - if (auto Res = - Context->getFreeSlotInExistingOrNewPool(ZeEventPool, Index, true)) - return Res; - - ze_event_handle_t ZeHostVisibleEvent = nullptr; - ZeStruct ZeEventDesc; - ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; - ZeEventDesc.wait = 0; - ZeEventDesc.index = Index; - ZE_CALL(zeEventCreate, (ZeEventPool, &ZeEventDesc, &ZeHostVisibleEvent)); - - // Wrap it into PI event as we need it reference-counted. - pi_event HostVisibleEvent = new _pi_event( - ZeHostVisibleEvent, ZeEventPool, Context, PI_COMMAND_TYPE_USER, true); + pi_event HostVisibleEvent; + PI_CALL(EventCreate(Context, true, &HostVisibleEvent)); // Update each command's event in the command-list to "see" this // proxy event as a host-visible counterpart. for (auto &Event : CommandList->second.EventList) { - Event->ZeHostVisibleEventPool = ZeEventPool; - Event->ZeHostVisibleEvent = - reinterpret_cast(HostVisibleEvent); + Event->HostVisibleEvent = HostVisibleEvent; PI_CALL(piEventRetain(HostVisibleEvent)); } - // Release this event once such that it is destroyed when the last event - // from this command-list is destroyed. - // PI_CALL(piEventRelease(HostVisibleEvent)); + // Decrement the reference count by 1 so all the remaining references + // are from the other commands in this batch. This host-visible event + // will be destroyed after all events in the batch are gone. + PI_CALL(piEventRelease(HostVisibleEvent)); + // Indicate no cleanup is needed for this PI event as it is special. + HostVisibleEvent->CleanedUp = true; // Finally set to signal the host-visible event at the end of the // command-list. // TODO: see if we need a barrier here (or explicit wait for all events in // the batch). ZE_CALL(zeCommandListAppendSignalEvent, - (CommandList->first, ZeHostVisibleEvent)); + (CommandList->first, HostVisibleEvent->ZeEvent)); } // Close the command list and have it ready for dispatch. @@ -1536,9 +1524,10 @@ pi_result _pi_ze_event_list_t::createAndRetainPiZeEventList( auto ZeEvent = EventList[I]->ZeEvent; // Poll of the host-visible events. - auto ZeEventHostVisible = EventList[I]->getHostVisibleEvent(); - if (FilterEventWaitList && ZeEventHostVisible) { - auto Res = ZE_CALL_NOCHECK(zeEventQueryStatus, (ZeEventHostVisible)); + auto HostVisibleEvent = EventList[I]->HostVisibleEvent; + if (FilterEventWaitList && HostVisibleEvent) { + auto Res = + ZE_CALL_NOCHECK(zeEventQueryStatus, (HostVisibleEvent->ZeEvent)); if (Res == ZE_RESULT_SUCCESS) { // Event has already completed, don't put it into the list continue; @@ -4727,47 +4716,16 @@ pi_result piextKernelGetNativeHandle(pi_kernel Kernel, // // Events // -ze_event_handle_t _pi_event::getHostVisibleEvent() const { - if (EventsScope == AllHostVisible) { - return ZeEvent; - } else if (ZeHostVisibleEvent) { - if (EventsScope == LastCommandInBatchHostVisible) { - auto Event = reinterpret_cast(ZeHostVisibleEvent); - return Event->ZeEvent; - } - return ZeHostVisibleEvent; - } else { - return nullptr; - } -} - pi_result -_pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &HostVisibleEvent) { +_pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) { - if (EventsScope == AllHostVisible) { - HostVisibleEvent = ZeEvent; - } else if (ZeHostVisibleEvent) { - HostVisibleEvent = ZeHostVisibleEvent; - } else { + if (!HostVisibleEvent) { if (EventsScope != OnDemandHostVisibleProxy) die("getOrCreateHostVisibleEvent: missing host-visible event"); // Create a "proxy" host-visible event on demand. - // - size_t Index; - ze_event_pool_handle_t ZeEventPool = {}; - if (auto Res = - Context->getFreeSlotInExistingOrNewPool(ZeEventPool, Index, true)) - return Res; - - ZeStruct ZeEventDesc; - ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; - ZeEventDesc.wait = 0; - ZeEventDesc.index = Index; - - ZE_CALL(zeEventCreate, (ZeEventPool, &ZeEventDesc, &ZeHostVisibleEvent)); - ZeHostVisibleEventPool = ZeEventPool; - HostVisibleEvent = ZeHostVisibleEvent; + PI_CALL(EventCreate(Context, true, &HostVisibleEvent)); + HostVisibleEvent->CleanedUp = true; // Submit the command(s) signalling the proxy event to the queue. // We have to first submit a wait for the device-only event for which this @@ -4788,19 +4746,23 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &HostVisibleEvent) { ZE_CALL(zeCommandListAppendWaitOnEvents, (CommandList->first, 1, &ZeEvent)); ZE_CALL(zeCommandListAppendSignalEvent, - (CommandList->first, ZeHostVisibleEvent)); + (CommandList->first, HostVisibleEvent->ZeEvent)); if (auto Res = Queue->executeCommandList(CommandList, false, OkToBatch)) return Res; } } + + ZeHostVisibleEvent = HostVisibleEvent->ZeEvent; return PI_SUCCESS; } -pi_result piEventCreate(pi_context Context, pi_event *RetEvent) { +static pi_result EventCreate(pi_context Context, bool HostVisible, + pi_event *RetEvent) { size_t Index = 0; ze_event_pool_handle_t ZeEventPool = {}; - if (auto Res = Context->getFreeSlotInExistingOrNewPool(ZeEventPool, Index)) + if (auto Res = Context->getFreeSlotInExistingOrNewPool(ZeEventPool, Index, + HostVisible)) return Res; ze_event_handle_t ZeEvent; @@ -4808,7 +4770,7 @@ pi_result piEventCreate(pi_context Context, pi_event *RetEvent) { ZeEventDesc.index = Index; ZeEventDesc.wait = 0; - if (EventsScope == AllHostVisible) { + if (HostVisible) { ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; } else { // @@ -4834,9 +4796,17 @@ pi_result piEventCreate(pi_context Context, pi_event *RetEvent) { } catch (...) { return PI_ERROR_UNKNOWN; } + + if (HostVisible) + (*RetEvent)->HostVisibleEvent = *RetEvent; + return PI_SUCCESS; } +pi_result piEventCreate(pi_context Context, pi_event *RetEvent) { + return EventCreate(Context, EventsScope == AllHostVisible, RetEvent); +} + pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, size_t ParamValueSize, void *ParamValue, size_t *ParamValueSizeRet) { @@ -4882,10 +4852,11 @@ pi_result piEventGetInfo(pi_event Event, pi_event_info ParamName, // Make sure that we query a host-visible event only. // If one wasn't yet created then don't create it here as well, and // just conservatively return that event is not yet completed. - auto ZeHostVisibleEvent = Event->getHostVisibleEvent(); - if (ZeHostVisibleEvent) { + auto HostVisibleEvent = Event->HostVisibleEvent; + if (HostVisibleEvent) { ze_result_t ZeResult; - ZeResult = ZE_CALL_NOCHECK(zeEventQueryStatus, (ZeHostVisibleEvent)); + ZeResult = + ZE_CALL_NOCHECK(zeEventQueryStatus, (HostVisibleEvent->ZeEvent)); if (ZeResult == ZE_RESULT_SUCCESS) { return getInfo(ParamValueSize, ParamValue, ParamValueSizeRet, pi_int32{CL_COMPLETE}); // Untie from OpenCL @@ -5120,10 +5091,11 @@ pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList) { } } for (uint32_t I = 0; I < NumEvents; I++) { - ze_event_handle_t ZeEvent = EventList[I]->getHostVisibleEvent(); - if (!ZeEvent) + auto HostVisibleEvent = EventList[I]->HostVisibleEvent; + if (!HostVisibleEvent) die("The host-visible proxy event missing"); + ze_event_handle_t ZeEvent = HostVisibleEvent->ZeEvent; zePrint("ZeEvent = %#lx\n", pi_cast(ZeEvent)); ZE_CALL(zeHostSynchronize, (ZeEvent)); @@ -5155,8 +5127,6 @@ pi_result piEventSetStatus(pi_event Event, pi_int32 ExecutionStatus) { } pi_result piEventRetain(pi_event Event) { - // fprintf(stderr, "piEventRetain(%p): %d\n", (void*)Event, - // (int)Event->RefCount); ++(Event->RefCount); return PI_SUCCESS; } @@ -5166,8 +5136,6 @@ pi_result piEventRelease(pi_event Event) { } static pi_result EventRelease(pi_event Event, pi_queue LockedQueue) { - // fprintf(stderr, "piEventRelease(%p): %d\n", (void*)Event, - // (int)Event->RefCount); PI_ASSERT(Event, PI_INVALID_EVENT); if (!Event->RefCount) { die("piEventRelease: called on a destroyed event"); @@ -5187,14 +5155,9 @@ static pi_result EventRelease(pi_event Event, pi_queue LockedQueue) { if (Event->OwnZeEvent) { ZE_CALL(zeEventDestroy, (Event->ZeEvent)); } - if (Event->ZeHostVisibleEvent) { - // Destroy the host-visible proxy event when it is owned by this event. - if (EventsScope == OnDemandHostVisibleProxy) { - ZE_CALL(zeEventDestroy, (Event->ZeHostVisibleEvent)); - } else if (EventsScope == LastCommandInBatchHostVisible) { - PI_CALL(piEventRelease( - reinterpret_cast(Event->ZeHostVisibleEvent))); - } + if (Event->HostVisibleEvent != Event) { + // Decrement ref-count of the host-visible proxy event. + PI_CALL(piEventRelease(Event->HostVisibleEvent)); } auto Context = Event->Context; diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index ec5f4c3fdafdb..ec96eb1e88c04 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -558,7 +558,7 @@ struct _pi_context : _pi_object { // pool then create new one. The HostVisible parameter tells if we need a // slot for a host-visible event. pi_result getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &, size_t &, - bool HostVisible = false); + bool HostVisible); // Decrement number of events living in the pool upon event destroy // and return the pool to the cache if there are no unreleased events. @@ -595,7 +595,7 @@ struct _pi_context : _pi_object { // head. In case there is no next pool, a new pool is created and made the // head. // - std::list ZeEventPoolCache; + std::list ZeDeviceScopeEventPoolCache; // Cache of event pools to which host-visible events are added to. std::list ZeHostVisibleEventPoolCache; @@ -958,18 +958,21 @@ struct _pi_event : _pi_object { // Level Zero event pool handle. ze_event_pool_handle_t ZeEventPool; - // In case we use device-only events/pools these are their host-visible - // counterparts. The idea is that two Level-Zero events co-exist: - // - one is always created with device-scope and used for GPU book-keeping. - // - the other host-visible proxy event is created on demand when we need - // to query/wait on a device-scope event from the host. + // In case we use device-only events this holds their host-visible + // counterpart. If this event is itself host-visble then HostVisibleEvent + // points to this event. If this event is not host-visible then this field can + // be: 1) null, meaning that a host-visible event wasn't yet created 2) a PI + // event created internally that host will actually be redirected + // to wait/query instead of this PI event. // - ze_event_handle_t ZeHostVisibleEvent = {nullptr}; - ze_event_pool_handle_t ZeHostVisibleEventPool = {nullptr}; + // The HostVisibleEvent is a reference counted PI event and can be used more + // than by just this one event, depending on the mode (see EventsScope). + // + pi_event HostVisibleEvent = {nullptr}; + bool IsHostVisible() const { return this == HostVisibleEvent; } + // Get the host-visible event or create one and enqueue its signal. pi_result getOrCreateHostVisibleEvent(ze_event_handle_t &HostVisibleEvent); - // Return the host-visible event if one was already created before, or null. - ze_event_handle_t getHostVisibleEvent() const; // Level Zero command list where the command signaling this event was appended // to. This is currently used to remember/destroy the command list after all From 2a74debc1e77b8823303133518b399c6606d3e99 Mon Sep 17 00:00:00 2001 From: Sergey V Maslov Date: Mon, 24 Jan 2022 12:18:28 -0800 Subject: [PATCH 3/6] change the default back to all-host-visible for now Signed-off-by: Sergey V Maslov --- sycl/plugins/level_zero/pi_level_zero.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index f0b354c3e5e7f..c71d8c6a983ea 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -206,7 +206,7 @@ static const enum EventsScope { const auto DeviceEventsStr = std::getenv("SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS"); - switch (DeviceEventsStr ? std::atoi(DeviceEventsStr) : 2) { + switch (DeviceEventsStr ? std::atoi(DeviceEventsStr) : 0) { case 1: return OnDemandHostVisibleProxy; case 2: From a12721c77d7caa8e062743f26e2201d36418c47b Mon Sep 17 00:00:00 2001 From: Sergey V Maslov Date: Mon, 24 Jan 2022 17:02:19 -0800 Subject: [PATCH 4/6] address review comments Signed-off-by: Sergey V Maslov --- sycl/doc/EnvironmentVariables.md | 8 +++++--- sycl/plugins/level_zero/pi_level_zero.cpp | 5 ++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md index b59bca96a0654..a85024aeef2cb 100644 --- a/sycl/doc/EnvironmentVariables.md +++ b/sycl/doc/EnvironmentVariables.md @@ -1,4 +1,4 @@ -# Environment Variables +#Environment Variables This document describes environment variables that are having effect on DPC++ compiler and runtime. @@ -27,7 +27,9 @@ compiler and runtime. ### `SYCL_DEVICE_ALLOWLIST` A list of devices and their driver version following the pattern: -`BackendName:XXX,DeviceType:YYY,DeviceVendorId:0xXYZW,DriverVersion:{{X.Y.Z.W}}`. +`BackendName:XXX,DeviceType:YYY,DeviceVendorId:0xXYZW,DriverVersion:{ + { X.Y.Z.W } +}`. Also may contain `PlatformVersion`, `DeviceName` and `PlatformName`. There is no fixed order of properties in the pattern. @@ -145,7 +147,7 @@ variables in production code. | `SYCL_PI_LEVEL_ZERO_FILTER_EVENT_WAIT_LIST` | Integer | When set to 0, disables filtering of signaled events from wait lists when using the Level Zero backend. The default is 1. | | `SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE` | Any(\*) | This environment variable enables users to control use of copy engines for copy operations. If the value is an integer, it will allow the use of copy engines, if available in the device, in Level Zero plugin to transfer SYCL buffer or image data between the host and/or device(s) and to fill SYCL buffer or image data in device or shared memory. The value of this environment variable can also be a pair of the form "lower_index:upper_index" where the indices point to copy engines in a list of all available copy engines. The default is 1. | | `SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY` (experimental) | Integer | Allows the use of copy engine, if available in the device, in Level Zero plugin for device to device copy operations. The default is 0. This option is experimental and will be removed once heuristics are added to make a decision about use of copy engine for device to device copy operations. | -| `SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS` | Any(\*) | Enable support of device-scope events whose state is not visible to the host. If enabled the Level Zero plugin would create all events having device-scope only and create proxy host-visible events for them when their status is needed (wait/query) on the host. The default is 0, meaning all events are host-visible. | +| `SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS` | Any(\*) | Enable support of device-scope events whose state is not visible to the host. If enabled mode is SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=1 the Level Zero plugin would create all events having device-scope only and create proxy host-visible events for them when their status is needed (wait/query) on the host. If enabled mode is SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=2 the Level Zero plugin would create all events having device-scope and add proxy host-visible event at the end of each command-list submission. The default is 0, meaning all events are host-visible. | ## Debugging variables for CUDA Plugin diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index c71d8c6a983ea..e2718a022db6b 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -5155,7 +5155,10 @@ static pi_result EventRelease(pi_event Event, pi_queue LockedQueue) { if (Event->OwnZeEvent) { ZE_CALL(zeEventDestroy, (Event->ZeEvent)); } - if (Event->HostVisibleEvent != Event) { + // It is possible that host-visible event was never created. + // In case it was check if that's different from this same event + // and release a reference to it. + if (Event->HostVisibleEvent && Event->HostVisibleEvent != Event) { // Decrement ref-count of the host-visible proxy event. PI_CALL(piEventRelease(Event->HostVisibleEvent)); } From 6fc775b1d80f16c0d043501281660a40d35d0dba Mon Sep 17 00:00:00 2001 From: smaslov-intel Date: Mon, 24 Jan 2022 19:41:49 -0600 Subject: [PATCH 5/6] Update sycl/doc/EnvironmentVariables.md --- sycl/doc/EnvironmentVariables.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md index a85024aeef2cb..7df1163057e13 100644 --- a/sycl/doc/EnvironmentVariables.md +++ b/sycl/doc/EnvironmentVariables.md @@ -27,9 +27,7 @@ compiler and runtime. ### `SYCL_DEVICE_ALLOWLIST` A list of devices and their driver version following the pattern: -`BackendName:XXX,DeviceType:YYY,DeviceVendorId:0xXYZW,DriverVersion:{ - { X.Y.Z.W } -}`. +`BackendName:XXX,DeviceType:YYY,DeviceVendorId:0xXYZW,DriverVersion:{{X.Y.Z.W}}`. Also may contain `PlatformVersion`, `DeviceName` and `PlatformName`. There is no fixed order of properties in the pattern. From dd287b54abe825683801d6328fef35eb5155d1dd Mon Sep 17 00:00:00 2001 From: smaslov-intel Date: Mon, 24 Jan 2022 19:43:56 -0600 Subject: [PATCH 6/6] Update sycl/doc/EnvironmentVariables.md --- sycl/doc/EnvironmentVariables.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/EnvironmentVariables.md b/sycl/doc/EnvironmentVariables.md index 7df1163057e13..5f1c386a93cac 100644 --- a/sycl/doc/EnvironmentVariables.md +++ b/sycl/doc/EnvironmentVariables.md @@ -1,4 +1,4 @@ -#Environment Variables +# Environment Variables This document describes environment variables that are having effect on DPC++ compiler and runtime.