@@ -463,32 +463,66 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueGetNativeHandle(
463463 // Lock automatically releases when this goes out of scope.
464464 std::shared_lock<ur_shared_mutex> lock (Queue->Mutex );
465465
466- auto ZeQueue = ur_cast<ze_command_queue_handle_t *>(NativeQueue);
467-
468- // Extract a Level Zero compute queue handle from the given PI queue
466+ // Get handle to this thread's queue group.
469467 auto &QueueGroup = Queue->getQueueGroup (false /* compute*/ );
470- uint32_t QueueGroupOrdinalUnused;
471- *ZeQueue = QueueGroup.getZeQueue (&QueueGroupOrdinalUnused);
468+
469+ if (Queue->UsingImmCmdLists ) {
470+ auto ZeCmdList = ur_cast<ze_command_list_handle_t *>(NativeQueue);
471+ // Extract the Level Zero command list handle from the given PI queue
472+ *ZeCmdList = QueueGroup.getImmCmdList ()->first ;
473+ // TODO: How to pass this up in the urQueueGetNativeHandle interface?
474+ // *NativeHandleDesc = true;
475+ } else {
476+ auto ZeQueue = ur_cast<ze_command_queue_handle_t *>(NativeQueue);
477+
478+ // Extract a Level Zero compute queue handle from the given PI queue
479+ auto &QueueGroup = Queue->getQueueGroup (false /* compute*/ );
480+ uint32_t QueueGroupOrdinalUnused;
481+ *ZeQueue = QueueGroup.getZeQueue (&QueueGroupOrdinalUnused);
482+ // TODO: How to pass this up in the urQueueGetNativeHandle interface?
483+ // *NativeHandleDesc = false;
484+ }
472485
473486 return UR_RESULT_SUCCESS;
474487}
475488
489+ void ur_queue_handle_t_::pi_queue_group_t::setImmCmdList (
490+ ze_command_list_handle_t ZeCommandList) {
491+ ImmCmdLists = std::vector<ur_command_list_ptr_t >(
492+ 1 ,
493+ Queue->CommandListMap
494+ .insert (std::pair<ze_command_list_handle_t , pi_command_list_info_t >{
495+ ZeCommandList, {nullptr , true , false , nullptr , 0 }})
496+ .first );
497+ }
498+
476499UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle (
477500 ur_native_handle_t NativeQueue, // /< [in] the native handle of the queue.
478501 ur_context_handle_t Context, // /< [in] handle of the context object
479502 ur_device_handle_t Device, // /
480- const ur_queue_native_properties_t *Properties , // /
503+ const ur_queue_native_properties_t *NativeProperties , // /
481504 ur_queue_handle_t
482505 *RetQueue // /< [out] pointer to the handle of the queue object created.
483506) {
484- auto ZeQueue = ur_cast<ze_command_queue_handle_t >(NativeQueue);
485- // Assume this is the "0" index queue in the compute command-group.
486- std::vector<ze_command_queue_handle_t > ZeQueues{ZeQueue};
507+ bool OwnNativeHandle = false ;
508+ ur_queue_flags_t Flags{};
487509
488- // TODO: see what we can do to correctly initialize PI queue for
489- // compute vs. copy Level-Zero queue. Currently we will send
490- // all commands to the "ZeQueue".
491- std::vector<ze_command_queue_handle_t > ZeroCopyQueues;
510+ if (NativeProperties) {
511+ OwnNativeHandle = NativeProperties->isNativeHandleOwned ;
512+ if (NativeProperties->pNext ) {
513+ const ur_base_properties_t *extendedProperties =
514+ reinterpret_cast <const ur_base_properties_t *>(
515+ NativeProperties->pNext );
516+ if (extendedProperties->stype == UR_STRUCTURE_TYPE_QUEUE_PROPERTIES) {
517+ const ur_queue_properties_t *UrProperties =
518+ reinterpret_cast <const ur_queue_properties_t *>(extendedProperties);
519+ Flags = UrProperties->flags ;
520+ }
521+ }
522+ }
523+
524+ // TODO: How to pass this up in the urQueueCreateWithNativeHandle interface?
525+ int32_t NativeHandleDesc = 0 ;
492526
493527 // Get the device handle from first device in the platform
494528 // Maybe this is not completely correct.
@@ -502,15 +536,42 @@ UR_APIEXPORT ur_result_t UR_APICALL urQueueCreateWithNativeHandle(
502536 nullptr ));
503537 }
504538
505- try {
506- ur_queue_handle_t_ *Queue =
507- new ur_queue_handle_t_ (ZeQueues, ZeroCopyQueues, Context, UrDevice,
508- Properties->isNativeHandleOwned );
509- *RetQueue = reinterpret_cast <ur_queue_handle_t >(Queue);
510- } catch (const std::bad_alloc &) {
511- return UR_RESULT_ERROR_OUT_OF_RESOURCES;
512- } catch (...) {
513- return UR_RESULT_ERROR_UNKNOWN;
539+ // The NativeHandleDesc has value if if the native handle is an immediate
540+ // command list.
541+ if (NativeHandleDesc == 1 ) {
542+ std::vector<ze_command_queue_handle_t > ComputeQueues{nullptr };
543+ std::vector<ze_command_queue_handle_t > CopyQueues;
544+
545+ try {
546+ ur_queue_handle_t_ *Queue = new ur_queue_handle_t_ (
547+ ComputeQueues, CopyQueues, Context, UrDevice, OwnNativeHandle, Flags);
548+ *RetQueue = reinterpret_cast <ur_queue_handle_t >(Queue);
549+ } catch (const std::bad_alloc &) {
550+ return UR_RESULT_ERROR_OUT_OF_RESOURCES;
551+ } catch (...) {
552+ return UR_RESULT_ERROR_UNKNOWN;
553+ }
554+ auto &InitialGroup = (*RetQueue)->ComputeQueueGroupsByTID .begin ()->second ;
555+ InitialGroup.setImmCmdList (ur_cast<ze_command_list_handle_t >(NativeQueue));
556+ } else {
557+ auto ZeQueue = ur_cast<ze_command_queue_handle_t >(NativeQueue);
558+ // Assume this is the "0" index queue in the compute command-group.
559+ std::vector<ze_command_queue_handle_t > ZeQueues{ZeQueue};
560+
561+ // TODO: see what we can do to correctly initialize PI queue for
562+ // compute vs. copy Level-Zero queue. Currently we will send
563+ // all commands to the "ZeQueue".
564+ std::vector<ze_command_queue_handle_t > ZeroCopyQueues;
565+
566+ try {
567+ ur_queue_handle_t_ *Queue = new ur_queue_handle_t_ (
568+ ZeQueues, ZeroCopyQueues, Context, UrDevice, OwnNativeHandle, Flags);
569+ *RetQueue = reinterpret_cast <ur_queue_handle_t >(Queue);
570+ } catch (const std::bad_alloc &) {
571+ return UR_RESULT_ERROR_OUT_OF_RESOURCES;
572+ } catch (...) {
573+ return UR_RESULT_ERROR_UNKNOWN;
574+ }
514575 }
515576
516577 return UR_RESULT_SUCCESS;
@@ -757,6 +818,8 @@ ur_queue_handle_t_::ur_queue_handle_t_(
757818 bool OwnZeCommandQueue, ur_queue_flags_t Properties, int ForceComputeIndex)
758819 : Context{Context}, Device{Device}, OwnZeCommandQueue{OwnZeCommandQueue},
759820 Properties (Properties) {
821+ // Set the type of commandlists the queue will use.
822+ UsingImmCmdLists = Device->useImmediateCommandLists ();
760823 // Compute group initialization.
761824 // First, see if the queue's device allows for round-robin or it is
762825 // fixed to one particular compute CCS (it is so for sub-sub-devices).
@@ -766,7 +829,7 @@ ur_queue_handle_t_::ur_queue_handle_t_(
766829 ComputeQueueGroup.ZeQueues = ComputeQueues;
767830 // Create space to hold immediate commandlists corresponding to the
768831 // ZeQueues
769- if (Device-> ImmCommandListUsed ) {
832+ if (UsingImmCmdLists ) {
770833 ComputeQueueGroup.ImmCmdLists = std::vector<ur_command_list_ptr_t >(
771834 ComputeQueueGroup.ZeQueues .size (), CommandListMap.end ());
772835 }
@@ -798,7 +861,7 @@ ur_queue_handle_t_::ur_queue_handle_t_(
798861 die (" No compute queue available/allowed." );
799862 }
800863 }
801- if (Device-> ImmCommandListUsed ) {
864+ if (UsingImmCmdLists ) {
802865 // Create space to hold immediate commandlists corresponding to the
803866 // ZeQueues
804867 ComputeQueueGroup.ImmCmdLists = std::vector<ur_command_list_ptr_t >(
0 commit comments