diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index e92f9b40eff45..5f3229c3ed798 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -20,6 +20,8 @@ #include // for kernel_impl #include // for kernel_bundle_impl +#include + #include // for assert #include // for shared_ptr, unique_ptr #include // for size_t @@ -253,7 +255,6 @@ class CGExecKernel : public CG { std::shared_ptr MSyclKernel; std::shared_ptr MKernelBundle; std::vector MArgs; - KernelNameStrT MKernelName; DeviceKernelInfo &MDeviceKernelInfo; std::vector> MStreams; std::vector> MAuxiliaryResources; @@ -269,7 +270,7 @@ class CGExecKernel : public CG { std::shared_ptr SyclKernel, std::shared_ptr KernelBundle, CG::StorageInitHelper CGData, std::vector Args, - KernelNameStrT KernelName, DeviceKernelInfo &DeviceKernelInfo, + DeviceKernelInfo &DeviceKernelInfo, std::vector> Streams, std::vector> AuxiliaryResources, CGType Type, ur_kernel_cache_config_t KernelCacheConfig, @@ -278,8 +279,7 @@ class CGExecKernel : public CG { : CG(Type, std::move(CGData), std::move(loc)), MNDRDesc(NDRDesc), MHostKernel(std::move(HKernel)), MSyclKernel(std::move(SyclKernel)), MKernelBundle(std::move(KernelBundle)), MArgs(std::move(Args)), - MKernelName(std::move(KernelName)), MDeviceKernelInfo(DeviceKernelInfo), - MStreams(std::move(Streams)), + MDeviceKernelInfo(DeviceKernelInfo), MStreams(std::move(Streams)), MAuxiliaryResources(std::move(AuxiliaryResources)), MAlternativeKernels{}, MKernelCacheConfig(std::move(KernelCacheConfig)), MKernelIsCooperative(KernelIsCooperative), @@ -291,7 +291,9 @@ class CGExecKernel : public CG { CGExecKernel(const CGExecKernel &CGExec) = default; const std::vector &getArguments() const { return MArgs; } - KernelNameStrRefT getKernelName() const { return MKernelName; } + std::string_view getKernelName() const { + return static_cast(MDeviceKernelInfo.Name); + } const std::vector> &getStreams() const { return MStreams; } diff --git a/sycl/source/detail/graph/graph_impl.cpp b/sycl/source/detail/graph/graph_impl.cpp index 4583efbe881dc..70b96eaa8660e 100644 --- a/sycl/source/detail/graph/graph_impl.cpp +++ b/sycl/source/detail/graph/graph_impl.cpp @@ -739,9 +739,8 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect( CGExec->MLine, CGExec->MColumn); std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData( sycl::detail::GSYCLStreamID, CGExec->MSyclKernel, CodeLoc, - CGExec->MIsTopCodeLoc, CGExec->MKernelName.data(), - CGExec->MDeviceKernelInfo, nullptr, CGExec->MNDRDesc, - CGExec->MKernelBundle.get(), CGExec->MArgs); + CGExec->MIsTopCodeLoc, CGExec->MDeviceKernelInfo, nullptr, + CGExec->MNDRDesc, CGExec->MKernelBundle.get(), CGExec->MArgs); if (CmdTraceEvent) sycl::detail::emitInstrumentationGeneral(sycl::detail::GSYCLStreamID, InstanceID, CmdTraceEvent, @@ -1401,14 +1400,14 @@ void exec_graph_impl::update(std::shared_ptr GraphImpl) { sycl::detail::CGExecKernel *TargetCGExec = static_cast( MNodeStorage[i]->MCommandGroup.get()); - KernelNameStrRefT TargetKernelName = TargetCGExec->getKernelName(); + std::string_view TargetKernelName = TargetCGExec->getKernelName(); sycl::detail::CGExecKernel *SourceCGExec = static_cast( GraphImpl->MNodeStorage[i]->MCommandGroup.get()); - KernelNameStrRefT SourceKernelName = SourceCGExec->getKernelName(); + std::string_view SourceKernelName = SourceCGExec->getKernelName(); - if (TargetKernelName.compare(SourceKernelName) != 0) { + if (TargetKernelName != SourceKernelName) { std::stringstream ErrorStream( "Cannot update using a graph with mismatched kernel " "types. Source node type "); @@ -1568,14 +1567,14 @@ void exec_graph_impl::populateURKernelUpdateStructs( UrKernel = Kernel->getHandleRef(); EliminatedArgMask = Kernel->getKernelArgMask(); } else if (auto SyclKernelImpl = - KernelBundleImplPtr - ? KernelBundleImplPtr->tryGetKernel(ExecCG.MKernelName) - : std::shared_ptr{nullptr}) { + KernelBundleImplPtr ? KernelBundleImplPtr->tryGetKernel( + ExecCG.MDeviceKernelInfo.Name) + : std::shared_ptr{nullptr}) { UrKernel = SyclKernelImpl->getHandleRef(); EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); } else { BundleObjs = sycl::detail::ProgramManager::getInstance().getOrCreateKernel( - ContextImpl, DeviceImpl, ExecCG.MKernelName, ExecCG.MDeviceKernelInfo); + ContextImpl, DeviceImpl, ExecCG.MDeviceKernelInfo); UrKernel = BundleObjs->MKernelHandle; EliminatedArgMask = BundleObjs->MKernelArgMask; } diff --git a/sycl/source/detail/graph/node_impl.hpp b/sycl/source/detail/graph/node_impl.hpp index fdcae10a5ceb4..1c7291aad2156 100644 --- a/sycl/source/detail/graph/node_impl.hpp +++ b/sycl/source/detail/graph/node_impl.hpp @@ -341,7 +341,8 @@ class node_impl : public std::enable_shared_from_this { static_cast(MCommandGroup.get()); sycl::detail::CGExecKernel *ExecKernelB = static_cast(Node.MCommandGroup.get()); - return ExecKernelA->MKernelName.compare(ExecKernelB->MKernelName) == 0; + return std::string_view{ExecKernelA->MDeviceKernelInfo.Name} == + std::string_view{ExecKernelB->MDeviceKernelInfo.Name}; } case sycl::detail::CGType::CopyUSM: { sycl::detail::CGCopyUSM *CopyA = @@ -543,7 +544,9 @@ class node_impl : public std::enable_shared_from_this { Stream << "CGExecKernel \\n"; sycl::detail::CGExecKernel *Kernel = static_cast(MCommandGroup.get()); - Stream << "NAME = " << Kernel->MKernelName << "\\n"; + Stream << "NAME = " + << static_cast(Kernel->MDeviceKernelInfo.Name) + << "\\n"; if (Verbose) { Stream << "ARGS = \\n"; for (size_t i = 0; i < Kernel->MArgs.size(); i++) { diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index a8b217a1e64fa..f62bedc72d553 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -60,6 +60,11 @@ class handler_impl { HandlerSubmissionState::EXPLICIT_KERNEL_BUNDLE_STATE; } + KernelNameStrRefT getKernelName() const { + assert(MDeviceKernelInfoPtr); + return static_cast(MDeviceKernelInfoPtr->Name); + } + /// Registers mutually exclusive submission states. HandlerSubmissionState MSubmissionState = HandlerSubmissionState::NO_STATE; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index d1dbac8460959..c99df57c534b4 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1084,25 +1084,32 @@ ProgramManager::getBuiltURProgram(const BinImgWithDeps &ImgWithDeps, FastKernelCacheValPtr ProgramManager::getOrCreateKernel( context_impl &ContextImpl, device_impl &DeviceImpl, - KernelNameStrRefT KernelName, DeviceKernelInfo &DeviceKernelInfo, - const NDRDescT &NDRDesc) { + DeviceKernelInfo &DeviceKernelInfo, const NDRDescT &NDRDesc) { if constexpr (DbgProgMgr > 0) { std::cerr << ">>> ProgramManager::getOrCreateKernel(" << &ContextImpl - << ", " << &DeviceImpl << ", " << KernelName << ")\n"; + << ", " << &DeviceImpl << ", " + << static_cast(DeviceKernelInfo.Name) << ")\n"; } KernelProgramCache &Cache = ContextImpl.getKernelProgramCache(); ur_device_handle_t UrDevice = DeviceImpl.getHandleRef(); if (SYCLConfig::get()) { - if (auto KernelCacheValPtr = Cache.tryToGetKernelFast( - KernelName, UrDevice, DeviceKernelInfo.getKernelSubcache())) { + if (auto KernelCacheValPtr = + Cache.tryToGetKernelFast(DeviceKernelInfo.Name, UrDevice, + DeviceKernelInfo.getKernelSubcache())) { return KernelCacheValPtr; } } - Managed Program = - getBuiltURProgram(ContextImpl, DeviceImpl, KernelName, NDRDesc); + Managed Program = getBuiltURProgram( + ContextImpl, DeviceImpl, DeviceKernelInfo.Name, NDRDesc); +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES +// Simplify this once `DeviceKernelInfo.Name`'s type is known. +// Using `decltype(auto)` insteado of just `auto` to get reference when +// possible. +#endif + decltype(auto) KernelName = KernelNameStrRefT{DeviceKernelInfo.Name}; auto BuildF = [this, &Program, &KernelName, &ContextImpl] { adapter_impl &Adapter = ContextImpl.getAdapter(); Managed Kernel{Adapter}; @@ -1125,7 +1132,8 @@ FastKernelCacheValPtr ProgramManager::getOrCreateKernel( return std::make_pair(std::move(Kernel), ArgMask); }; - auto GetCachedBuildF = [&Cache, &KernelName, &Program]() { + auto GetCachedBuildF = [&Cache, &KernelName = DeviceKernelInfo.Name, + &Program]() { return Cache.getOrInsertKernel(Program, KernelName); }; @@ -1147,7 +1155,7 @@ FastKernelCacheValPtr ProgramManager::getOrCreateKernel( auto ret_val = std::make_shared( KernelArgMaskPair.first.retain(), &(BuildResult->MBuildResultMutex), KernelArgMaskPair.second, std::move(Program), ContextImpl.getAdapter()); - Cache.saveKernel(KernelName, UrDevice, ret_val, + Cache.saveKernel(DeviceKernelInfo.Name, UrDevice, ret_val, DeviceKernelInfo.getKernelSubcache()); return ret_val; } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index fd1a686bf14b0..8f18f2b7078b1 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -200,7 +200,6 @@ class ProgramManager { FastKernelCacheValPtr getOrCreateKernel(context_impl &ContextImpl, device_impl &DeviceImpl, - KernelNameStrRefT KernelName, DeviceKernelInfo &DeviceKernelInfo, const NDRDescT &NDRDesc = {}); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 3d1f1b6dbce20..dad5d976b3114 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1985,7 +1985,7 @@ std::string instrumentationGetKernelName( void instrumentationAddExtraKernelMetadata( xpti_td *&CmdTraceEvent, const NDRDescT &NDRDesc, detail::kernel_bundle_impl *KernelBundleImplPtr, - KernelNameStrRefT KernelName, DeviceKernelInfo &DeviceKernelInfo, + DeviceKernelInfo &DeviceKernelInfo, const std::shared_ptr &SyclKernel, queue_impl *Queue, std::vector &CGArgs) // CGArgs are not const since they could be // sorted in this function @@ -2002,7 +2002,7 @@ void instrumentationAddExtraKernelMetadata( EliminatedArgMask = SyclKernel->getKernelArgMask(); } else if (auto SyclKernelImpl = KernelBundleImplPtr - ? KernelBundleImplPtr->tryGetKernel(KernelName) + ? KernelBundleImplPtr->tryGetKernel(DeviceKernelInfo.Name) : std::shared_ptr{nullptr}) { EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); } else if (Queue) { @@ -2011,8 +2011,7 @@ void instrumentationAddExtraKernelMetadata( // by graph API, when a modifiable graph is finalized. FastKernelCacheValPtr FastKernelCacheVal = detail::ProgramManager::getInstance().getOrCreateKernel( - Queue->getContextImpl(), Queue->getDeviceImpl(), KernelName, - DeviceKernelInfo); + Queue->getContextImpl(), Queue->getDeviceImpl(), DeviceKernelInfo); EliminatedArgMask = FastKernelCacheVal->MKernelArgMask; } @@ -2099,9 +2098,8 @@ std::pair emitKernelInstrumentationData( xpti::stream_id_t StreamID, const std::shared_ptr &SyclKernel, const detail::code_location &CodeLoc, bool IsTopCodeLoc, - const std::string_view SyclKernelName, DeviceKernelInfo &DeviceKernelInfo, - queue_impl *Queue, const NDRDescT &NDRDesc, - detail::kernel_bundle_impl *KernelBundleImplPtr, + DeviceKernelInfo &DeviceKernelInfo, queue_impl *Queue, + const NDRDescT &NDRDesc, detail::kernel_bundle_impl *KernelBundleImplPtr, std::vector &CGArgs) { auto XptiObjects = std::make_pair(nullptr, -1); @@ -2111,8 +2109,9 @@ std::pair emitKernelInstrumentationData( void *Address = nullptr; std::optional FromSource; - std::string KernelName = instrumentationGetKernelName( - SyclKernel, CodeLoc.functionName(), SyclKernelName, Address, FromSource); + std::string KernelName = + instrumentationGetKernelName(SyclKernel, CodeLoc.functionName(), + DeviceKernelInfo.Name, Address, FromSource); auto &[CmdTraceEvent, InstanceID] = XptiObjects; @@ -2137,10 +2136,9 @@ std::pair emitKernelInstrumentationData( if (Queue) xpti::framework::stash_tuple(XPTI_QUEUE_INSTANCE_ID_KEY, getQueueID(Queue)); - instrumentationAddExtraKernelMetadata( - CmdTraceEvent, NDRDesc, KernelBundleImplPtr, - std::string(SyclKernelName), DeviceKernelInfo, SyclKernel, Queue, - CGArgs); + instrumentationAddExtraKernelMetadata(CmdTraceEvent, NDRDesc, + KernelBundleImplPtr, DeviceKernelInfo, + SyclKernel, Queue, CGArgs); xptiNotifySubscribers( StreamID, NotificationTraceType, detail::GSYCLGraphEvent, CmdTraceEvent, @@ -2195,8 +2193,8 @@ void ExecCGCommand::emitInstrumentationData() { reinterpret_cast(MCommandGroup.get()); instrumentationAddExtraKernelMetadata( CmdTraceEvent, KernelCG->MNDRDesc, KernelCG->getKernelBundle().get(), - KernelCG->MKernelName, KernelCG->MDeviceKernelInfo, - KernelCG->MSyclKernel, MQueue.get(), KernelCG->MArgs); + KernelCG->MDeviceKernelInfo, KernelCG->MSyclKernel, MQueue.get(), + KernelCG->MArgs); } xptiNotifySubscribers( @@ -2397,20 +2395,17 @@ static ur_result_t SetKernelParamsAndLaunch( const std::function &getMemAllocationFunc, bool IsCooperative, bool KernelUsesClusterLaunch, uint32_t WorkGroupMemorySize, const RTDeviceBinaryImage *BinImage, - KernelNameStrRefT KernelName, DeviceKernelInfo &DeviceKernelInfo, - void *KernelFuncPtr = nullptr, int KernelNumArgs = 0, - detail::kernel_param_desc_t (*KernelParamDescGetter)(int) = nullptr, - bool KernelHasSpecialCaptures = true) { + DeviceKernelInfo &DeviceKernelInfo, void *KernelFuncPtr = nullptr) { adapter_impl &Adapter = Queue.getAdapter(); if (SYCLConfig::get()) { std::vector Empty; Kernel = Scheduler::getInstance().completeSpecConstMaterialization( - Queue, BinImage, KernelName, + Queue, BinImage, DeviceKernelInfo.Name, DeviceImageImpl ? DeviceImageImpl->get_spec_const_blob_ref() : Empty); } - if (KernelFuncPtr && !KernelHasSpecialCaptures) { + if (KernelFuncPtr && !DeviceKernelInfo.HasSpecialCaptures) { auto setFunc = [&Adapter, Kernel, KernelFuncPtr](const detail::kernel_param_desc_t &ParamDesc, size_t NextTrueIndex) { @@ -2432,8 +2427,8 @@ static ur_result_t SetKernelParamsAndLaunch( throw std::runtime_error("Direct kernel argument copy failed."); } }; - applyFuncOnFilteredArgs(EliminatedArgMask, KernelNumArgs, - KernelParamDescGetter, setFunc); + applyFuncOnFilteredArgs(EliminatedArgMask, DeviceKernelInfo.NumParams, + DeviceKernelInfo.ParamDescGetter, setFunc); } else { auto setFunc = [&Adapter, Kernel, &DeviceImageImpl, &getMemAllocationFunc, &Queue](detail::ArgDesc &Arg, size_t NextTrueIndex) { @@ -2539,7 +2534,7 @@ getCGKernelInfo(const CGExecKernel &CommandGroup, context_impl &ContextImpl, EliminatedArgMask = Kernel->getKernelArgMask(); } else if (auto SyclKernelImpl = KernelBundleImplPtr ? KernelBundleImplPtr->tryGetKernel( - CommandGroup.MKernelName) + CommandGroup.MDeviceKernelInfo.Name) : std::shared_ptr{nullptr}) { UrKernel = SyclKernelImpl->getHandleRef(); DeviceImageImpl = &SyclKernelImpl->getDeviceImage(); @@ -2547,8 +2542,7 @@ getCGKernelInfo(const CGExecKernel &CommandGroup, context_impl &ContextImpl, } else { FastKernelCacheValPtr FastKernelCacheVal = sycl::detail::ProgramManager::getInstance().getOrCreateKernel( - ContextImpl, DeviceImpl, CommandGroup.MKernelName, - CommandGroup.MDeviceKernelInfo); + ContextImpl, DeviceImpl, CommandGroup.MDeviceKernelInfo); UrKernel = FastKernelCacheVal->MKernelHandle; EliminatedArgMask = FastKernelCacheVal->MKernelArgMask; // To keep UrKernel valid, we return FastKernelCacheValPtr. @@ -2662,15 +2656,12 @@ ur_result_t enqueueImpCommandBufferKernel( void enqueueImpKernel( queue_impl &Queue, NDRDescT &NDRDesc, std::vector &Args, detail::kernel_bundle_impl *KernelBundleImplPtr, - const detail::kernel_impl *MSyclKernel, KernelNameStrRefT KernelName, - DeviceKernelInfo &DeviceKernelInfo, + const detail::kernel_impl *MSyclKernel, DeviceKernelInfo &DeviceKernelInfo, std::vector &RawEvents, detail::event_impl *OutEventImpl, const std::function &getMemAllocationFunc, ur_kernel_cache_config_t KernelCacheConfig, const bool KernelIsCooperative, const bool KernelUsesClusterLaunch, const size_t WorkGroupMemorySize, - const RTDeviceBinaryImage *BinImage, void *KernelFuncPtr, int KernelNumArgs, - detail::kernel_param_desc_t (*KernelParamDescGetter)(int), - bool KernelHasSpecialCaptures) { + const RTDeviceBinaryImage *BinImage, void *KernelFuncPtr) { // Run OpenCL kernel context_impl &ContextImpl = Queue.getContextImpl(); device_impl &DeviceImpl = Queue.getDeviceImpl(); @@ -2699,7 +2690,7 @@ void enqueueImpKernel( EliminatedArgMask = MSyclKernel->getKernelArgMask(); } else if ((SyclKernelImpl = KernelBundleImplPtr - ? KernelBundleImplPtr->tryGetKernel(KernelName) + ? KernelBundleImplPtr->tryGetKernel(DeviceKernelInfo.Name) : std::shared_ptr{nullptr})) { Kernel = SyclKernelImpl->getHandleRef(); DeviceImageImpl = &SyclKernelImpl->getDeviceImage(); @@ -2710,7 +2701,7 @@ void enqueueImpKernel( KernelMutex = SyclKernelImpl->getCacheMutex(); } else { KernelCacheVal = detail::ProgramManager::getInstance().getOrCreateKernel( - ContextImpl, DeviceImpl, KernelName, DeviceKernelInfo, NDRDesc); + ContextImpl, DeviceImpl, DeviceKernelInfo, NDRDesc); Kernel = KernelCacheVal->MKernelHandle; KernelMutex = KernelCacheVal->MMutex; Program = KernelCacheVal->MProgramHandle; @@ -2757,8 +2748,7 @@ void enqueueImpKernel( Queue, Args, DeviceImageImpl, Kernel, NDRDesc, EventsWaitList, OutEventImpl, EliminatedArgMask, getMemAllocationFunc, KernelIsCooperative, KernelUsesClusterLaunch, WorkGroupMemorySize, - BinImage, KernelName, DeviceKernelInfo, KernelFuncPtr, KernelNumArgs, - KernelParamDescGetter, KernelHasSpecialCaptures); + BinImage, DeviceKernelInfo, KernelFuncPtr); } if (UR_RESULT_SUCCESS != Error) { // If we have got non-success error code, let's analyze it to emit nice @@ -3232,7 +3222,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { const std::shared_ptr &SyclKernel = ExecKernel->MSyclKernel; - KernelNameStrRefT KernelName = ExecKernel->MKernelName; + KernelNameStrRefT KernelName = ExecKernel->MDeviceKernelInfo.Name; if (!EventImpl) { // Kernel only uses assert if it's non interop one @@ -3250,8 +3240,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { } enqueueImpKernel( *MQueue, NDRDesc, Args, ExecKernel->getKernelBundle().get(), - SyclKernel.get(), KernelName, ExecKernel->MDeviceKernelInfo, RawEvents, - EventImpl, getMemAllocationFunc, ExecKernel->MKernelCacheConfig, + SyclKernel.get(), ExecKernel->MDeviceKernelInfo, RawEvents, EventImpl, + getMemAllocationFunc, ExecKernel->MKernelCacheConfig, ExecKernel->MKernelIsCooperative, ExecKernel->MKernelUsesClusterLaunch, ExecKernel->MKernelWorkGroupMemorySize, BinImage); diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 046764dcd7054..202d49ec68382 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -627,16 +627,13 @@ class MemCpyCommandHost : public Command { void enqueueImpKernel( queue_impl &Queue, NDRDescT &NDRDesc, std::vector &Args, detail::kernel_bundle_impl *KernelBundleImplPtr, - const detail::kernel_impl *MSyclKernel, KernelNameStrRefT KernelName, - DeviceKernelInfo &DeviceKernelInfo, + const detail::kernel_impl *MSyclKernel, DeviceKernelInfo &DeviceKernelInfo, std::vector &RawEvents, detail::event_impl *OutEventImpl, const std::function &getMemAllocationFunc, ur_kernel_cache_config_t KernelCacheConfig, bool KernelIsCooperative, const bool KernelUsesClusterLaunch, const size_t WorkGroupMemorySize, const RTDeviceBinaryImage *BinImage = nullptr, - void *KernelFuncPtr = nullptr, int KernelNumArgs = 0, - detail::kernel_param_desc_t (*KernelParamDescGetter)(int) = nullptr, - bool KernelHasSpecialCaptures = true); + void *KernelFuncPtr = nullptr); /// The exec CG command enqueues execution of kernel or explicit memory /// operation. @@ -694,9 +691,8 @@ std::pair emitKernelInstrumentationData( xpti::stream_id_t StreamID, const std::shared_ptr &SyclKernel, const detail::code_location &CodeLoc, bool IsTopCodeLoc, - std::string_view SyclKernelName, DeviceKernelInfo &DeviceKernelInfo, - queue_impl *Queue, const NDRDescT &NDRDesc, - detail::kernel_bundle_impl *KernelBundleImplPtr, + DeviceKernelInfo &DeviceKernelInfo, queue_impl *Queue, + const NDRDescT &NDRDesc, detail::kernel_bundle_impl *KernelBundleImplPtr, std::vector &CGArgs); #endif diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 783ca053336ef..ddb09c5f76653 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -583,11 +583,11 @@ event handler::finalize() { !(MKernel && MKernel->isInterop()) && (KernelBundleImpPtr->empty() || KernelBundleImpPtr->hasSYCLOfflineImages()) && - !KernelBundleImpPtr->tryGetKernel(toKernelNameStrT(MKernelName))) { + !KernelBundleImpPtr->tryGetKernel(impl->getKernelName())) { detail::device_impl &Dev = impl->get_device(); kernel_id KernelID = detail::ProgramManager::getInstance().getSYCLKernelID( - toKernelNameStrT(MKernelName)); + impl->getKernelName()); bool KernelInserted = KernelBundleImpPtr->add_kernel( KernelID, detail::createSyclObjFromImpl(Dev)); // If kernel was not inserted and the bundle is in input mode we try @@ -668,9 +668,8 @@ event handler::finalize() { if (xptiEnabled) { std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData( detail::GSYCLStreamID, MKernel, MCodeLoc, impl->MIsTopCodeLoc, - MKernelName.data(), *impl->MDeviceKernelInfoPtr, - impl->get_queue_or_null(), impl->MNDRDesc, KernelBundleImpPtr, - impl->MArgs); + *impl->MDeviceKernelInfoPtr, impl->get_queue_or_null(), + impl->MNDRDesc, KernelBundleImpPtr, impl->MArgs); detail::emitInstrumentationGeneral(detail::GSYCLStreamID, InstanceID, CmdTraceEvent, xpti::trace_task_begin, nullptr); @@ -678,18 +677,16 @@ event handler::finalize() { #endif const detail::RTDeviceBinaryImage *BinImage = nullptr; if (detail::SYCLConfig::get()) { - BinImage = detail::retrieveKernelBinary( - impl->get_queue(), toKernelNameStrT(MKernelName)); + BinImage = detail::retrieveKernelBinary(impl->get_queue(), + impl->getKernelName()); assert(BinImage && "Failed to obtain a binary image."); } enqueueImpKernel( impl->get_queue(), impl->MNDRDesc, impl->MArgs, KernelBundleImpPtr, - MKernel.get(), toKernelNameStrT(MKernelName), - *impl->MDeviceKernelInfoPtr, RawEvents, ResultEvent.get(), nullptr, - impl->MKernelCacheConfig, impl->MKernelIsCooperative, - impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, - BinImage, impl->MKernelFuncPtr, impl->MKernelNumArgs, - impl->MKernelParamDescGetter, impl->MKernelHasSpecialCaptures); + MKernel.get(), *impl->MDeviceKernelInfoPtr, RawEvents, + ResultEvent.get(), nullptr, impl->MKernelCacheConfig, + impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, + impl->MKernelWorkGroupMemorySize, BinImage, impl->MKernelFuncPtr); #ifdef XPTI_ENABLE_INSTRUMENTATION if (xptiEnabled) { // Emit signal only when event is created @@ -746,10 +743,9 @@ event handler::finalize() { CommandGroup.reset(new detail::CGExecKernel( impl->MNDRDesc, std::move(MHostKernel), std::move(MKernel), std::move(impl->MKernelBundle), std::move(impl->CGData), - std::move(impl->MArgs), toKernelNameStrT(MKernelName), - *impl->MDeviceKernelInfoPtr, std::move(MStreamStorage), - std::move(impl->MAuxiliaryResources), getType(), - impl->MKernelCacheConfig, impl->MKernelIsCooperative, + std::move(impl->MArgs), *impl->MDeviceKernelInfoPtr, + std::move(MStreamStorage), std::move(impl->MAuxiliaryResources), + getType(), impl->MKernelCacheConfig, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, MCodeLoc)); break; diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index e6543927894a4..244110aff12cf 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -148,11 +148,10 @@ class MockHandler : public sycl::handler { std::move(impl->MNDRDesc), std::move(CGH->MHostKernel), std::move(CGH->MKernel), std::move(impl->MKernelBundle), std::move(impl->CGData), std::move(impl->MArgs), - CGH->MKernelName.data(), *impl->MDeviceKernelInfoPtr, - std::move(CGH->MStreamStorage), std::move(impl->MAuxiliaryResources), - impl->MCGType, {}, impl->MKernelIsCooperative, - impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, - CGH->MCodeLoc)); + *impl->MDeviceKernelInfoPtr, std::move(CGH->MStreamStorage), + std::move(impl->MAuxiliaryResources), impl->MCGType, {}, + impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, + impl->MKernelWorkGroupMemorySize, CGH->MCodeLoc)); break; } default: @@ -184,7 +183,7 @@ const sycl::detail::KernelArgMask *getKernelArgMaskFromBundle( << "Expect command group to contain kernel bundle"; auto SyclKernelImpl = - KernelBundleImplPtr->tryGetKernel(ExecKernel->MKernelName); + KernelBundleImplPtr->tryGetKernel(ExecKernel->MDeviceKernelInfo.Name); EXPECT_TRUE(SyclKernelImpl != nullptr); sycl::detail::device_image_impl &DeviceImageImpl = SyclKernelImpl->getDeviceImage(); @@ -194,7 +193,7 @@ const sycl::detail::KernelArgMask *getKernelArgMaskFromBundle( !ExecKernel->MSyclKernel->isCreatedFromSource()); return sycl::detail::ProgramManager::getInstance().getEliminatedKernelArgMask( - Program, ExecKernel->MKernelName); + Program, ExecKernel->MDeviceKernelInfo.Name); } // After both kernels are compiled ProgramManager.NativePrograms contains info diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index c3bdb342170de..a7572e0b97c6f 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -253,9 +253,7 @@ class MockHandler : public sycl::handler { return impl->CGData.MEvents; } std::vector &getArgs() { return impl->MArgs; } - sycl::detail::KernelNameStrT getKernelName() { - return toKernelNameStrT(MKernelName); - } + std::string_view getKernelName() { return impl->MDeviceKernelInfoPtr->Name; } std::shared_ptr &getKernel() { return MKernel; } std::shared_ptr &getHostTask() { return impl->MHostTask; @@ -303,7 +301,7 @@ class MockHandlerCustomFinalize : public MockHandler { CommandGroup.reset(new sycl::detail::CGExecKernel( getNDRDesc(), std::move(getHostKernel()), getKernel(), std::move(impl->MKernelBundle), std::move(CGData), getArgs(), - getKernelName(), *impl->MDeviceKernelInfoPtr, getStreamStorage(), + *impl->MDeviceKernelInfoPtr, getStreamStorage(), impl->MAuxiliaryResources, getType(), {}, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, getCodeLoc())); diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index 2b6d0cc563431..55a2893fdf7e8 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -33,9 +33,9 @@ class MockHandlerStreamInit : public MockHandler { detail::CG::StorageInitHelper(getArgsStorage(), getAccStorage(), getSharedPtrStorage(), getRequirements(), getEvents()), - getArgs(), getKernelName(), *impl->MDeviceKernelInfoPtr, - getStreamStorage(), std::move(impl->MAuxiliaryResources), getType(), - {}, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, + getArgs(), *impl->MDeviceKernelInfoPtr, getStreamStorage(), + std::move(impl->MAuxiliaryResources), getType(), {}, + impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, getCodeLoc())); break; }