@@ -32,8 +32,8 @@ extern "C" {
3232// Forward declarartions.
3333static pi_result EventRelease (pi_event Event, pi_queue LockedQueue);
3434static pi_result QueueRelease (pi_queue Queue, pi_queue LockedQueue);
35- static pi_result EventCreate (pi_context Context, bool HostVisible ,
36- pi_event *RetEvent);
35+ static pi_result EventCreate (pi_context Context, pi_queue Queue ,
36+ bool HostVisible, pi_event *RetEvent);
3737}
3838
3939namespace {
@@ -428,20 +428,13 @@ pi_result _pi_mem::removeMapping(void *MappedTo, Mapping &MapInfo) {
428428
429429pi_result
430430_pi_context::getFreeSlotInExistingOrNewPool (ze_event_pool_handle_t &Pool,
431- size_t &Index, bool HostVisible) {
431+ size_t &Index, bool HostVisible,
432+ bool ProfilingEnabled) {
432433 // Lock while updating event pool machinery.
433434 std::lock_guard<std::mutex> Lock (ZeEventPoolCacheMutex);
434435
435- // Setup for host-visible pool as needed.
436- ze_event_pool_flag_t ZePoolFlag = {};
437- std::list<ze_event_pool_handle_t > *ZePoolCache;
438-
439- if (HostVisible) {
440- ZePoolFlag = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
441- ZePoolCache = &ZeHostVisibleEventPoolCache;
442- } else {
443- ZePoolCache = &ZeDeviceScopeEventPoolCache;
444- }
436+ std::list<ze_event_pool_handle_t > *ZePoolCache =
437+ getZeEventPoolCache (HostVisible, ProfilingEnabled);
445438
446439 // Remove full pool from the cache.
447440 if (!ZePoolCache->empty ()) {
@@ -460,7 +453,12 @@ _pi_context::getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &Pool,
460453 if (*ZePool == nullptr ) {
461454 ZeStruct<ze_event_pool_desc_t > ZeEventPoolDesc;
462455 ZeEventPoolDesc.count = MaxNumEventsPerPool;
463- ZeEventPoolDesc.flags = ZePoolFlag | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
456+ ZeEventPoolDesc.flags = 0 ;
457+ if (HostVisible)
458+ ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
459+ if (ProfilingEnabled)
460+ ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
461+ zePrint (" ze_event_pool_desc_t flags set to: %d\n " , ZeEventPoolDesc.flags );
464462
465463 std::vector<ze_device_handle_t > ZeDevices;
466464 std::for_each (Devices.begin (), Devices.end (),
@@ -486,12 +484,8 @@ pi_result _pi_context::decrementUnreleasedEventsInPool(pi_event Event) {
486484 return PI_SUCCESS;
487485 }
488486
489- std::list<ze_event_pool_handle_t > *ZePoolCache;
490- if (Event->IsHostVisible ()) {
491- ZePoolCache = &ZeHostVisibleEventPoolCache;
492- } else {
493- ZePoolCache = &ZeDeviceScopeEventPoolCache;
494- }
487+ std::list<ze_event_pool_handle_t > *ZePoolCache =
488+ getZeEventPoolCache (Event->isHostVisible (), Event->isProfilingEnabled ());
495489
496490 // Put the empty pool to the cache of the pools.
497491 std::lock_guard<std::mutex> Lock (ZeEventPoolCacheMutex);
@@ -611,13 +605,15 @@ inline static void piQueueRetainNoLock(pi_queue Queue) { Queue->RefCount++; }
611605// \param Event a pointer to hold the newly created pi_event
612606// \param CommandType various command type determined by the caller
613607// \param CommandList is the command list where the event is added
614- inline static pi_result
615- createEventAndAssociateQueue (pi_queue Queue, pi_event *Event,
616- pi_command_type CommandType,
617- pi_command_list_ptr_t CommandList) {
618- pi_result Res = piEventCreate (Queue->Context , Event);
619- if (Res != PI_SUCCESS)
620- return Res;
608+ // \param ForceHostVisible tells if the event must be created in
609+ // the host-visible pool
610+ inline static pi_result createEventAndAssociateQueue (
611+ pi_queue Queue, pi_event *Event, pi_command_type CommandType,
612+ pi_command_list_ptr_t CommandList, bool ForceHostVisible = false ) {
613+
614+ PI_CALL (EventCreate (Queue->Context , Queue,
615+ ForceHostVisible ? true : EventsScope == AllHostVisible,
616+ Event));
621617
622618 (*Event)->Queue = Queue;
623619 (*Event)->CommandType = CommandType;
@@ -806,13 +802,11 @@ pi_result _pi_context::finalize() {
806802 // For example, event pool caches would be still alive.
807803 {
808804 std::lock_guard<std::mutex> Lock (ZeEventPoolCacheMutex);
809- for (auto &ZePool : ZeDeviceScopeEventPoolCache)
810- ZE_CALL (zeEventPoolDestroy, (ZePool));
811- for (auto &ZePool : ZeHostVisibleEventPoolCache)
812- ZE_CALL (zeEventPoolDestroy, (ZePool));
813-
814- ZeDeviceScopeEventPoolCache.clear ();
815- ZeHostVisibleEventPoolCache.clear ();
805+ for (auto &ZePoolCache : ZeEventPoolCache) {
806+ for (auto &ZePool : ZePoolCache)
807+ ZE_CALL (zeEventPoolDestroy, (ZePool));
808+ ZePoolCache.clear ();
809+ }
816810 }
817811
818812 // Destroy the command list used for initializations
@@ -841,8 +835,7 @@ pi_result _pi_context::finalize() {
841835
842836bool _pi_queue::isInOrderQueue () const {
843837 // If out-of-order queue property is not set, then this is a in-order queue.
844- return ((this ->PiQueueProperties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) ==
845- 0 );
838+ return ((this ->Properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0 );
846839}
847840
848841pi_result _pi_queue::resetCommandList (pi_command_list_ptr_t CommandList,
@@ -1032,11 +1025,10 @@ static const zeCommandListBatchConfig ZeCommandListBatchCopyConfig = [] {
10321025_pi_queue::_pi_queue (ze_command_queue_handle_t Queue,
10331026 std::vector<ze_command_queue_handle_t > &CopyQueues,
10341027 pi_context Context, pi_device Device,
1035- bool OwnZeCommandQueue,
1036- pi_queue_properties PiQueueProperties)
1028+ bool OwnZeCommandQueue, pi_queue_properties Properties)
10371029 : ZeComputeCommandQueue{Queue}, ZeCopyCommandQueues{CopyQueues},
10381030 Context{Context}, Device{Device}, OwnZeCommandQueue{OwnZeCommandQueue},
1039- PiQueueProperties (PiQueueProperties ) {
1031+ Properties (Properties ) {
10401032 ComputeCommandBatch.OpenCommandList = CommandListMap.end ();
10411033 CopyCommandBatch.OpenCommandList = CommandListMap.end ();
10421034 ComputeCommandBatch.QueueBatchSize =
@@ -1350,7 +1342,10 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
13501342 // Create a "proxy" host-visible event.
13511343 //
13521344 pi_event HostVisibleEvent;
1353- PI_CALL (EventCreate (Context, true , &HostVisibleEvent));
1345+ auto Res = createEventAndAssociateQueue (
1346+ this , &HostVisibleEvent, PI_COMMAND_TYPE_USER, CommandList, true );
1347+ if (Res)
1348+ return Res;
13541349
13551350 // Update each command's event in the command-list to "see" this
13561351 // proxy event as a host-visible counterpart.
@@ -1359,10 +1354,14 @@ pi_result _pi_queue::executeCommandList(pi_command_list_ptr_t CommandList,
13591354 PI_CALL (piEventRetain (HostVisibleEvent));
13601355 }
13611356
1362- // Decrement the reference count by 1 so all the remaining references
1363- // are from the other commands in this batch. This host-visible event
1364- // will be destroyed after all events in the batch are gone.
1357+ // Decrement the reference count of the event such that all the remaining
1358+ // references are from the other commands in this batch. This host-visible
1359+ // event will not be waited/release by SYCL RT, so it must be destroyed
1360+ // after all events in the batch are gone.
13651361 PI_CALL (piEventRelease (HostVisibleEvent));
1362+ PI_CALL (piEventRelease (HostVisibleEvent));
1363+ PI_CALL (piEventRelease (HostVisibleEvent));
1364+
13661365 // Indicate no cleanup is needed for this PI event as it is special.
13671366 HostVisibleEvent->CleanedUp = true ;
13681367
@@ -2105,7 +2104,7 @@ pi_result piDevicesGet(pi_platform Platform, pi_device_type DeviceType,
21052104 *NumDevices = ZeDeviceCount;
21062105
21072106 if (NumEntries == 0 ) {
2108- // Devices should be nullptr when querying the number of devices
2107+ // Devices should be nullptr when querying the number of devices.
21092108 PI_ASSERT (Devices == nullptr , PI_INVALID_VALUE);
21102109 return PI_SUCCESS;
21112110 }
@@ -4955,10 +4954,6 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) {
49554954 if (EventsScope != OnDemandHostVisibleProxy)
49564955 die (" getOrCreateHostVisibleEvent: missing host-visible event" );
49574956
4958- // Create a "proxy" host-visible event on demand.
4959- PI_CALL (EventCreate (Context, true , &HostVisibleEvent));
4960- HostVisibleEvent->CleanedUp = true ;
4961-
49624957 // Submit the command(s) signalling the proxy event to the queue.
49634958 // We have to first submit a wait for the device-only event for which this
49644959 // proxy is created.
@@ -4975,6 +4970,13 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) {
49754970 Queue, CommandList, false /* UseCopyEngine */ , OkToBatch))
49764971 return Res;
49774972
4973+ // Create a "proxy" host-visible event.
4974+ auto Res = createEventAndAssociateQueue (
4975+ Queue, &HostVisibleEvent, PI_COMMAND_TYPE_USER, CommandList, true );
4976+ // HostVisibleEvent->CleanedUp = true;
4977+ if (Res != PI_SUCCESS)
4978+ return Res;
4979+
49784980 ZE_CALL (zeCommandListAppendWaitOnEvents,
49794981 (CommandList->first , 1 , &ZeEvent));
49804982 ZE_CALL (zeCommandListAppendSignalEvent,
@@ -4989,12 +4991,21 @@ _pi_event::getOrCreateHostVisibleEvent(ze_event_handle_t &ZeHostVisibleEvent) {
49894991 return PI_SUCCESS;
49904992}
49914993
4992- static pi_result EventCreate (pi_context Context, bool HostVisible,
4993- pi_event *RetEvent) {
4994+ // Helper function for creating a PI event.
4995+ // The "Queue" argument specifies the PI queue where a command is submitted.
4996+ // The "HostVisible" argument specifies if event needs to be allocated from
4997+ // a host-visible pool.
4998+ //
4999+ static pi_result EventCreate (pi_context Context, pi_queue Queue,
5000+ bool HostVisible, pi_event *RetEvent) {
5001+
5002+ bool ProfilingEnabled =
5003+ !Queue || (Queue->Properties & PI_QUEUE_PROFILING_ENABLE) != 0 ;
5004+
49945005 size_t Index = 0 ;
49955006 ze_event_pool_handle_t ZeEventPool = {};
4996- if (auto Res = Context->getFreeSlotInExistingOrNewPool (ZeEventPool, Index,
4997- HostVisible))
5007+ if (auto Res = Context->getFreeSlotInExistingOrNewPool (
5008+ ZeEventPool, Index, HostVisible, ProfilingEnabled ))
49985009 return Res;
49995010
50005011 ze_event_handle_t ZeEvent;
@@ -5035,8 +5046,9 @@ static pi_result EventCreate(pi_context Context, bool HostVisible,
50355046 return PI_SUCCESS;
50365047}
50375048
5049+ // Exteral PI API entry
50385050pi_result piEventCreate (pi_context Context, pi_event *RetEvent) {
5039- return EventCreate (Context, EventsScope == AllHostVisible, RetEvent);
5051+ return EventCreate (Context, nullptr , EventsScope == AllHostVisible, RetEvent);
50405052}
50415053
50425054pi_result piEventGetInfo (pi_event Event, pi_event_info ParamName,
@@ -5102,6 +5114,11 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName,
51025114
51035115 PI_ASSERT (Event, PI_INVALID_EVENT);
51045116
5117+ if (Event->Queue &&
5118+ (Event->Queue ->Properties & PI_QUEUE_PROFILING_ENABLE) == 0 ) {
5119+ return PI_PROFILING_INFO_NOT_AVAILABLE;
5120+ }
5121+
51055122 uint64_t ZeTimerResolution =
51065123 Event->Queue
51075124 ? Event->Queue ->Device ->ZeDeviceProperties ->timerResolution
@@ -5374,7 +5391,7 @@ static pi_result EventRelease(pi_event Event, pi_queue LockedQueue) {
53745391 // and release a reference to it.
53755392 if (Event->HostVisibleEvent && Event->HostVisibleEvent != Event) {
53765393 // Decrement ref-count of the host-visible proxy event.
5377- PI_CALL (piEventRelease (Event->HostVisibleEvent ));
5394+ PI_CALL (EventRelease (Event->HostVisibleEvent , LockedQueue ));
53785395 }
53795396
53805397 auto Context = Event->Context ;
0 commit comments