diff --git a/sycl/include/sycl/detail/compile_time_kernel_info.hpp b/sycl/include/sycl/detail/compile_time_kernel_info.hpp new file mode 100644 index 0000000000000..f2eb59e874cd8 --- /dev/null +++ b/sycl/include/sycl/detail/compile_time_kernel_info.hpp @@ -0,0 +1,51 @@ +//==------------------- compile_time_kernel_info.hpp -----------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { +inline namespace compile_time_kernel_info_v1 { + +// This is being passed across ABI boundary, so we don't use std::string_view, +// at least for as long as we support user apps built with GNU libstdc++'s +// pre-C++11 ABI. +struct CompileTimeKernelInfoTy { + detail::string_view Name; + unsigned NumParams = 0; + bool IsESIMD = false; + detail::string_view FileName{}; + detail::string_view FunctionName{}; + unsigned LineNumber = 0; + unsigned ColumnNumber = 0; + int64_t KernelSize = 0; + using ParamDescGetterT = kernel_param_desc_t (*)(int); + ParamDescGetterT ParamDescGetter = nullptr; + bool HasSpecialCaptures = true; +}; + +template +inline constexpr CompileTimeKernelInfoTy CompileTimeKernelInfo{ + std::string_view(getKernelName()), + getKernelNumParams(), + isKernelESIMD(), + std::string_view(getKernelFileName()), + std::string_view(getKernelFunctionName()), + getKernelLineNumber(), + getKernelColumnNumber(), + getKernelSize(), + &getKernelParamDesc, + hasSpecialCaptures()}; + +} // namespace compile_time_kernel_info_v1 +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/detail/get_device_kernel_info.hpp b/sycl/include/sycl/detail/get_device_kernel_info.hpp new file mode 100644 index 0000000000000..021f4077b9a3a --- /dev/null +++ b/sycl/include/sycl/detail/get_device_kernel_info.hpp @@ -0,0 +1,39 @@ +//==--------------------- get_device_kernel_info.hpp -----------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +class DeviceKernelInfo; +// Lifetime of the underlying `DeviceKernelInfo` is tied to the availability of +// the `sycl_device_binaries` corresponding to this kernel. In other words, once +// user library is unloaded (see __sycl_unregister_lib), program manager destoys +// this `DeviceKernelInfo` object and the reference returned from here becomes +// stale. +__SYCL_EXPORT DeviceKernelInfo & +getDeviceKernelInfo(const CompileTimeKernelInfoTy &); + +template DeviceKernelInfo &getDeviceKernelInfo() { + static DeviceKernelInfo &Info = + getDeviceKernelInfo(CompileTimeKernelInfo); + return Info; +} + +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +struct KernelNameBasedCacheT; +__SYCL_EXPORT KernelNameBasedCacheT *createKernelNameBasedCache(); +#endif + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/include/sycl/detail/kernel_desc.hpp b/sycl/include/sycl/detail/kernel_desc.hpp index 09d294d1b2d9e..ae01f46c57052 100644 --- a/sycl/include/sycl/detail/kernel_desc.hpp +++ b/sycl/include/sycl/detail/kernel_desc.hpp @@ -277,7 +277,6 @@ template constexpr bool hasSpecialCaptures() { } return FoundSpecialCapture; } - } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/detail/kernel_name_based_cache.hpp b/sycl/include/sycl/detail/kernel_name_based_cache.hpp deleted file mode 100644 index 6bd2e38edc8e7..0000000000000 --- a/sycl/include/sycl/detail/kernel_name_based_cache.hpp +++ /dev/null @@ -1,29 +0,0 @@ -//==--------------------- kernel_name_based_cache.hpp ----------------------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -#pragma once - -#include - -namespace sycl { -inline namespace _V1 { -namespace detail { - -struct KernelNameBasedCacheT; -__SYCL_EXPORT KernelNameBasedCacheT *createKernelNameBasedCache(); - -// Retrieves a cache pointer unique to a kernel name type that can be used to -// avoid kernel name based lookup in the runtime. -template -KernelNameBasedCacheT *getKernelNameBasedCache() { - static KernelNameBasedCacheT *Instance = createKernelNameBasedCache(); - return Instance; -} - -} // namespace detail -} // namespace _V1 -} // namespace sycl diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index 0185c611bec59..a17358775e812 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -14,11 +14,11 @@ #include #include #include +#include #include #include #include #include -#include #include #include #include @@ -863,6 +863,7 @@ class __SYCL_EXPORT handler { constexpr std::string_view KernelNameStr = detail::getKernelName(); MKernelName = KernelNameStr; + setDeviceKernelInfoPtr(&detail::getDeviceKernelInfo()); } else { // In case w/o the integration header it is necessary to process // accessors from the list(which are associated with this handler) as @@ -870,7 +871,6 @@ class __SYCL_EXPORT handler { // later during finalize. setArgsToAssociatedAccessors(); } - setKernelNameBasedCachePtr(detail::getKernelNameBasedCache()); // If the kernel lambda is callable with a kernel_handler argument, manifest // the associated kernel handler. @@ -3685,8 +3685,11 @@ class __SYCL_EXPORT handler { sycl::handler &h, size_t size, const ext::oneapi::experimental::memory_pool &pool); +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void setKernelNameBasedCachePtr( detail::KernelNameBasedCacheT *KernelNameBasedCachePtr); +#endif + void setDeviceKernelInfoPtr(detail::DeviceKernelInfo *DeviceKernelInfoPtr); queue getQueue(); diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 24a471eacb0f6..38ffd232fcbbe 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -288,7 +288,8 @@ set(SYCL_COMMON_SOURCES "detail/kernel_compiler/kernel_compiler_opencl.cpp" "detail/kernel_compiler/kernel_compiler_sycl.cpp" "detail/kernel_impl.cpp" - "detail/kernel_name_based_cache.cpp" + "detail/get_device_kernel_info.cpp" + "detail/device_kernel_info.cpp" "detail/kernel_program_cache.cpp" "detail/memory_export.cpp" "detail/memory_manager.cpp" diff --git a/sycl/source/detail/cg.hpp b/sycl/source/detail/cg.hpp index 87e7b088951ac..e92f9b40eff45 100644 --- a/sycl/source/detail/cg.hpp +++ b/sycl/source/detail/cg.hpp @@ -254,7 +254,7 @@ class CGExecKernel : public CG { std::shared_ptr MKernelBundle; std::vector MArgs; KernelNameStrT MKernelName; - KernelNameBasedCacheT *MKernelNameBasedCachePtr; + DeviceKernelInfo &MDeviceKernelInfo; std::vector> MStreams; std::vector> MAuxiliaryResources; /// Used to implement ext_oneapi_graph dynamic_command_group. Stores the list @@ -269,8 +269,7 @@ class CGExecKernel : public CG { std::shared_ptr SyclKernel, std::shared_ptr KernelBundle, CG::StorageInitHelper CGData, std::vector Args, - KernelNameStrT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, + KernelNameStrT KernelName, DeviceKernelInfo &DeviceKernelInfo, std::vector> Streams, std::vector> AuxiliaryResources, CGType Type, ur_kernel_cache_config_t KernelCacheConfig, @@ -279,8 +278,7 @@ class CGExecKernel : public CG { : CG(Type, std::move(CGData), std::move(loc)), MNDRDesc(NDRDesc), MHostKernel(std::move(HKernel)), MSyclKernel(std::move(SyclKernel)), MKernelBundle(std::move(KernelBundle)), MArgs(std::move(Args)), - MKernelName(std::move(KernelName)), - MKernelNameBasedCachePtr(KernelNameBasedCachePtr), + MKernelName(std::move(KernelName)), MDeviceKernelInfo(DeviceKernelInfo), MStreams(std::move(Streams)), MAuxiliaryResources(std::move(AuxiliaryResources)), MAlternativeKernels{}, MKernelCacheConfig(std::move(KernelCacheConfig)), diff --git a/sycl/source/detail/device_kernel_info.cpp b/sycl/source/detail/device_kernel_info.cpp new file mode 100644 index 0000000000000..30f2db1ec40bc --- /dev/null +++ b/sycl/source/detail/device_kernel_info.cpp @@ -0,0 +1,89 @@ +//==---------------------- device_kernel_info.cpp ----------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace detail { + +DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info) + : CompileTimeKernelInfoTy(Info) +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + , + Name(Info.Name.data()) +#endif +{ + init(Name.data()); +} + +void DeviceKernelInfo::init(KernelNameStrRefT KernelName) { + auto &PM = detail::ProgramManager::getInstance(); + MUsesAssert = PM.kernelUsesAssert(KernelName); + MImplicitLocalArgPos = PM.kernelImplicitLocalArgPos(KernelName); +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + MInitialized.store(true); +#endif +} + +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES +void DeviceKernelInfo::initIfNeeded(KernelNameStrRefT KernelName) { + if (!MInitialized.load()) + init(KernelName); +} +#endif + +template +inline constexpr bool operator==(const CompileTimeKernelInfoTy &LHS, + const OtherTy &RHS) { + // TODO replace with std::tie(...) == std::tie(...) once there is + // implicit conversion from detail to std string_view. + return std::string_view{LHS.Name} == std::string_view{RHS.Name} && + LHS.NumParams == RHS.NumParams && LHS.IsESIMD == RHS.IsESIMD && + std::string_view{LHS.FileName} == std::string_view{RHS.FileName} && + std::string_view{LHS.FunctionName} == + std::string_view{RHS.FunctionName} && + LHS.LineNumber == RHS.LineNumber && + LHS.ColumnNumber == RHS.ColumnNumber && + LHS.KernelSize == RHS.KernelSize && + LHS.ParamDescGetter == RHS.ParamDescGetter && + LHS.HasSpecialCaptures == RHS.HasSpecialCaptures; +} + +void DeviceKernelInfo::setCompileTimeInfoIfNeeded( + const CompileTimeKernelInfoTy &Info) { + if (isCompileTimeInfoSet()) + CompileTimeKernelInfoTy::operator=(Info); + assert(isCompileTimeInfoSet()); + assert(Info == *this); +} + +FastKernelSubcacheT &DeviceKernelInfo::getKernelSubcache() { + assertInitialized(); + return MFastKernelSubcache; +} +bool DeviceKernelInfo::usesAssert() { + assertInitialized(); + return MUsesAssert; +} +const std::optional &DeviceKernelInfo::getImplicitLocalArgPos() { + assertInitialized(); + return MImplicitLocalArgPos; +} + +bool DeviceKernelInfo::isCompileTimeInfoSet() const { return KernelSize != 0; } + +void DeviceKernelInfo::assertInitialized() { +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + assert(MInitialized.load() && "Data needs to be initialized before use"); +#endif +} + +} // namespace detail +} // namespace _V1 +} // namespace sycl diff --git a/sycl/source/detail/kernel_name_based_cache_t.hpp b/sycl/source/detail/device_kernel_info.hpp similarity index 57% rename from sycl/source/detail/kernel_name_based_cache_t.hpp rename to sycl/source/detail/device_kernel_info.hpp index e7c9b049a3199..acf524030b684 100644 --- a/sycl/source/detail/kernel_name_based_cache_t.hpp +++ b/sycl/source/detail/device_kernel_info.hpp @@ -1,4 +1,4 @@ -//==-------------------- kernel_name_based_cache_t.hpp ---------------------==// +//==---------------------- device_kernel_info.hpp ----------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -10,6 +10,8 @@ #include #include #include +#include +#include #include #include @@ -23,9 +25,9 @@ using FastKernelCacheKeyT = std::pair; struct FastKernelCacheVal { Managed MKernelHandle; /* UR kernel. */ - std::mutex *MMutex; /* Mutex guarding this kernel. When - caching is disabled, the pointer is - nullptr. */ + std::mutex *MMutex; /* Mutex guarding this kernel. When + caching is disabled, the pointer is + nullptr. */ const KernelArgMask *MKernelArgMask; /* Eliminated kernel argument mask. */ Managed MProgramHandle; /* UR program handle corresponding to this kernel. */ @@ -71,18 +73,53 @@ struct FastKernelEntryT { using FastKernelSubcacheEntriesT = std::vector; +// Structure for caching built kernels with a specific name. +// Used by instances of the kernel program cache class (potentially multiple). struct FastKernelSubcacheT { FastKernelSubcacheEntriesT Entries; FastKernelSubcacheMutexT Mutex; }; -struct KernelNameBasedCacheT { - FastKernelSubcacheT FastKernelSubcache; - std::optional UsesAssert; - // Implicit local argument position is represented by an optional int, this - // uses another optional on top of that to represent lazy initialization of - // the cached value. - std::optional> ImplicitLocalArgPos; +// This class aggregates information specific to device kernels (i.e. +// information that is uniform between different submissions of the same +// kernel). Pointers to instances of this class are stored in header function +// templates as a static variable to avoid repeated runtime lookup overhead. +// TODO Currently this class duplicates information fetched from the program +// manager. Instead, we should merge all of this information +// into this structure and get rid of the other KernelName -> * maps. +class DeviceKernelInfo : public CompileTimeKernelInfoTy { +public: +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + // Needs to own the kernel name string in non-preview builds since we pass it + // using a temporary string instead of a string view there. + std::string Name; +#endif + +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + DeviceKernelInfo() = default; +#endif + DeviceKernelInfo(const CompileTimeKernelInfoTy &Info); + + void init(KernelNameStrRefT KernelName); +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + void initIfNeeded(KernelNameStrRefT KernelName); +#endif + void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info); + + FastKernelSubcacheT &getKernelSubcache(); + bool usesAssert(); + const std::optional &getImplicitLocalArgPos(); + +private: + void assertInitialized(); + bool isCompileTimeInfoSet() const; + +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + std::atomic MInitialized = false; +#endif + FastKernelSubcacheT MFastKernelSubcache; + bool MUsesAssert; + std::optional MImplicitLocalArgPos; }; } // namespace detail diff --git a/sycl/source/detail/kernel_name_based_cache.cpp b/sycl/source/detail/get_device_kernel_info.cpp similarity index 60% rename from sycl/source/detail/kernel_name_based_cache.cpp rename to sycl/source/detail/get_device_kernel_info.cpp index 17356e7f38fc8..084eeeb60d714 100644 --- a/sycl/source/detail/kernel_name_based_cache.cpp +++ b/sycl/source/detail/get_device_kernel_info.cpp @@ -1,4 +1,4 @@ -//==--------------------- kernel_name_based_cache.cpp ----------------------==// +//==-------------------- get_device_kernel_info.cpp --------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,16 +6,24 @@ // //===----------------------------------------------------------------------===// +#include + #include -#include +#include namespace sycl { inline namespace _V1 { namespace detail { +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES KernelNameBasedCacheT *createKernelNameBasedCache() { return GlobalHandler::instance().createKernelNameBasedCache(); } +#endif + +DeviceKernelInfo &getDeviceKernelInfo(const CompileTimeKernelInfoTy &Info) { + return ProgramManager::getInstance().getOrCreateDeviceKernelInfo(Info); +} } // namespace detail } // namespace _V1 diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 5d22ce3831d56..94a7e9f7b70dc 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -13,8 +13,8 @@ #include #include +#include #include -#include #include #include #include @@ -231,12 +231,15 @@ ThreadPool &GlobalHandler::getHostTaskThreadPool() { return TP; } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES KernelNameBasedCacheT *GlobalHandler::createKernelNameBasedCache() { - static std::deque &KernelNameBasedCaches = - getOrCreate(MKernelNameBasedCaches); - LockGuard LG{MKernelNameBasedCaches.Lock}; - return &KernelNameBasedCaches.emplace_back(); + static std::deque &DeviceKernelInfoStorage = + getOrCreate(MDeviceKernelInfoStorage); + LockGuard LG{MDeviceKernelInfoStorage.Lock}; + return reinterpret_cast( + &DeviceKernelInfoStorage.emplace_back()); } +#endif void GlobalHandler::releaseDefaultContexts() { // Release shared-pointers to SYCL objects. @@ -372,9 +375,11 @@ void shutdown_late() { Handler->MScheduler.Inst.reset(nullptr); Handler->MProgramManager.Inst.reset(nullptr); - // Cache stores handles to the adapter, so clear it before - // releasing adapters. - Handler->MKernelNameBasedCaches.Inst.reset(nullptr); +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + // Kernel cache, which is part of device kernel info, + // stores handles to the adapter, so clear it before releasing adapters. + Handler->MDeviceKernelInfoStorage.Inst.reset(nullptr); +#endif // Clear the adapters and reset the instance if it was there. Handler->unloadAdapters(); diff --git a/sycl/source/detail/global_handler.hpp b/sycl/source/detail/global_handler.hpp index 915924738474b..ec7bf7da48b6a 100644 --- a/sycl/source/detail/global_handler.hpp +++ b/sycl/source/detail/global_handler.hpp @@ -11,7 +11,9 @@ #include #include +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES #include +#endif #include #include @@ -27,7 +29,10 @@ class adapter_impl; class ods_target_list; class XPTIRegistry; class ThreadPool; +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES struct KernelNameBasedCacheT; +class DeviceKernelInfo; +#endif /// Wrapper class for global data structures with non-trivial destructors. /// @@ -73,7 +78,9 @@ class GlobalHandler { ods_target_list &getOneapiDeviceSelectorTargets(const std::string &InitValue); XPTIRegistry &getXPTIRegistry(); ThreadPool &getHostTaskThreadPool(); +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES KernelNameBasedCacheT *createKernelNameBasedCache(); +#endif static void registerStaticVarShutdownHandler(); bool isOkToDefer() const; @@ -125,7 +132,9 @@ class GlobalHandler { InstWithLock MXPTIRegistry; // Thread pool for host task and event callbacks execution InstWithLock MHostTaskThreadPool; - InstWithLock> MKernelNameBasedCaches; +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + InstWithLock> MDeviceKernelInfoStorage; +#endif }; } // namespace detail } // namespace _V1 diff --git a/sycl/source/detail/graph/graph_impl.cpp b/sycl/source/detail/graph/graph_impl.cpp index 968100d2d9ea4..4583efbe881dc 100644 --- a/sycl/source/detail/graph/graph_impl.cpp +++ b/sycl/source/detail/graph/graph_impl.cpp @@ -740,7 +740,7 @@ ur_exp_command_buffer_sync_point_t exec_graph_impl::enqueueNodeDirect( std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData( sycl::detail::GSYCLStreamID, CGExec->MSyclKernel, CodeLoc, CGExec->MIsTopCodeLoc, CGExec->MKernelName.data(), - CGExec->MKernelNameBasedCachePtr, nullptr, CGExec->MNDRDesc, + CGExec->MDeviceKernelInfo, nullptr, CGExec->MNDRDesc, CGExec->MKernelBundle.get(), CGExec->MArgs); if (CmdTraceEvent) sycl::detail::emitInstrumentationGeneral(sycl::detail::GSYCLStreamID, @@ -1575,8 +1575,7 @@ void exec_graph_impl::populateURKernelUpdateStructs( EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); } else { BundleObjs = sycl::detail::ProgramManager::getInstance().getOrCreateKernel( - ContextImpl, DeviceImpl, ExecCG.MKernelName, - ExecCG.MKernelNameBasedCachePtr); + ContextImpl, DeviceImpl, ExecCG.MKernelName, ExecCG.MDeviceKernelInfo); UrKernel = BundleObjs->MKernelHandle; EliminatedArgMask = BundleObjs->MKernelArgMask; } diff --git a/sycl/source/detail/handler_impl.hpp b/sycl/source/detail/handler_impl.hpp index edf32dfa80f7e..a8b217a1e64fa 100644 --- a/sycl/source/detail/handler_impl.hpp +++ b/sycl/source/detail/handler_impl.hpp @@ -243,8 +243,9 @@ class handler_impl { bool MKernelIsESIMD = false; bool MKernelHasSpecialCaptures = true; - // A pointer to a kernel name based cache retrieved on the application side. - KernelNameBasedCacheT *MKernelNameBasedCachePtr = nullptr; + // A pointer to device kernel information. Cached on the application side in + // headers or retrieved from program manager. + DeviceKernelInfo *MDeviceKernelInfoPtr = nullptr; }; } // namespace detail diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp index e8bf8e5bba047..8106795804654 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp @@ -10,8 +10,8 @@ #include "sycl/exception.hpp" #include +#include #include -#include #include #include #include @@ -219,25 +219,18 @@ class KernelProgramCache { class FastKernelSubcacheWrapper { public: - FastKernelSubcacheWrapper(FastKernelSubcacheT *CachePtr, + FastKernelSubcacheWrapper(FastKernelSubcacheT &Subcache, ur_context_handle_t UrContext) - : MSubcachePtr{CachePtr}, MUrContext{UrContext} { - if (!MSubcachePtr) { - MOwnsSubcache = true; - MSubcachePtr = new FastKernelSubcacheT(); - } - } + : MSubcachePtr{&Subcache}, MUrContext{UrContext} {} FastKernelSubcacheWrapper(const FastKernelSubcacheWrapper &) = delete; FastKernelSubcacheWrapper(FastKernelSubcacheWrapper &&Other) - : MSubcachePtr{Other.MSubcachePtr}, MOwnsSubcache{Other.MOwnsSubcache}, - MUrContext{Other.MUrContext} { + : MSubcachePtr{Other.MSubcachePtr}, MUrContext{Other.MUrContext} { Other.MSubcachePtr = nullptr; } FastKernelSubcacheWrapper & operator=(const FastKernelSubcacheWrapper &) = delete; FastKernelSubcacheWrapper &operator=(FastKernelSubcacheWrapper &&Other) { MSubcachePtr = Other.MSubcachePtr; - MOwnsSubcache = Other.MOwnsSubcache; MUrContext = Other.MUrContext; Other.MSubcachePtr = nullptr; return *this; @@ -247,11 +240,6 @@ class KernelProgramCache { if (!MSubcachePtr) return; - if (MOwnsSubcache) { - delete MSubcachePtr; - return; - } - // Single subcache might be used by different contexts. // Remove all entries from the subcache that are associated with the // current context. @@ -268,7 +256,6 @@ class KernelProgramCache { private: FastKernelSubcacheT *MSubcachePtr = nullptr; - bool MOwnsSubcache = false; ur_context_handle_t MUrContext = nullptr; }; @@ -455,18 +442,9 @@ class KernelProgramCache { FastKernelCacheValPtr tryToGetKernelFast(KernelNameStrRefT KernelName, ur_device_handle_t Device, - FastKernelSubcacheT *KernelSubcacheHint) { - FastKernelCacheWriteLockT Lock(MFastKernelCacheMutex); - if (!KernelSubcacheHint) { - auto It = MFastKernelCache.try_emplace( - KernelName, - FastKernelSubcacheWrapper(KernelSubcacheHint, getURContext())); - KernelSubcacheHint = &It.first->second.get(); - } - - const FastKernelSubcacheEntriesT &SubcacheEntries = - KernelSubcacheHint->Entries; - FastKernelSubcacheReadLockT SubcacheLock{KernelSubcacheHint->Mutex}; + FastKernelSubcacheT &KernelSubcache) { + const FastKernelSubcacheEntriesT &SubcacheEntries = KernelSubcache.Entries; + FastKernelSubcacheReadLockT SubcacheLock{KernelSubcache.Mutex}; ur_context_handle_t Context = getURContext(); const FastKernelCacheKeyT RequiredKey(Device, Context); // Search for the kernel in the subcache. @@ -484,7 +462,7 @@ class KernelProgramCache { void saveKernel(KernelNameStrRefT KernelName, ur_device_handle_t Device, const FastKernelCacheValPtr &CacheVal, - FastKernelSubcacheT *KernelSubcacheHint) { + FastKernelSubcacheT &KernelSubcache) { if (SYCLConfig:: isProgramCacheEvictionEnabled()) { // Save kernel in fast cache only if the corresponding program is also @@ -504,15 +482,13 @@ class KernelProgramCache { // if no insertion took place, then some other thread has already inserted // smth in the cache traceKernel("Kernel inserted.", KernelName, true); - auto It = MFastKernelCache.try_emplace( - KernelName, - FastKernelSubcacheWrapper(KernelSubcacheHint, getURContext())); - KernelSubcacheHint = &It.first->second.get(); + MFastKernelCache.try_emplace( + KernelName, FastKernelSubcacheWrapper(KernelSubcache, getURContext())); - FastKernelSubcacheWriteLockT SubcacheLock{KernelSubcacheHint->Mutex}; + FastKernelSubcacheWriteLockT SubcacheLock{KernelSubcache.Mutex}; ur_context_handle_t Context = getURContext(); - KernelSubcacheHint->Entries.emplace_back( - FastKernelCacheKeyT(Device, Context), CacheVal); + KernelSubcache.Entries.emplace_back(FastKernelCacheKeyT(Device, Context), + CacheVal); } // Expects locked program cache diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 77f28a5131f8a..f1a84b92b622e 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1084,8 +1084,8 @@ ProgramManager::getBuiltURProgram(const BinImgWithDeps &ImgWithDeps, FastKernelCacheValPtr ProgramManager::getOrCreateKernel( context_impl &ContextImpl, device_impl &DeviceImpl, - KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, const NDRDescT &NDRDesc) { + KernelNameStrRefT KernelName, DeviceKernelInfo &DeviceKernelInfo, + const NDRDescT &NDRDesc) { if constexpr (DbgProgMgr > 0) { std::cerr << ">>> ProgramManager::getOrCreateKernel(" << &ContextImpl << ", " << &DeviceImpl << ", " << KernelName << ")\n"; @@ -1093,12 +1093,9 @@ FastKernelCacheValPtr ProgramManager::getOrCreateKernel( KernelProgramCache &Cache = ContextImpl.getKernelProgramCache(); ur_device_handle_t UrDevice = DeviceImpl.getHandleRef(); - FastKernelSubcacheT *CacheHintPtr = - KernelNameBasedCachePtr ? &KernelNameBasedCachePtr->FastKernelSubcache - : nullptr; if (SYCLConfig::get()) { - if (auto KernelCacheValPtr = - Cache.tryToGetKernelFast(KernelName, UrDevice, CacheHintPtr)) { + if (auto KernelCacheValPtr = Cache.tryToGetKernelFast( + KernelName, UrDevice, DeviceKernelInfo.getKernelSubcache())) { return KernelCacheValPtr; } } @@ -1150,7 +1147,8 @@ FastKernelCacheValPtr ProgramManager::getOrCreateKernel( auto ret_val = std::make_shared( KernelArgMaskPair.first.retain(), &(BuildResult->MBuildResultMutex), KernelArgMaskPair.second, std::move(Program), ContextImpl.getAdapter()); - Cache.saveKernel(KernelName, UrDevice, ret_val, CacheHintPtr); + Cache.saveKernel(KernelName, UrDevice, ret_val, + DeviceKernelInfo.getKernelSubcache()); return ret_val; } @@ -1814,24 +1812,27 @@ void ProgramManager::cacheKernelImplicitLocalArg( } } -std::optional ProgramManager::kernelImplicitLocalArgPos( - KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr) const { - auto getLocalArgPos = [&]() -> std::optional { - auto it = m_KernelImplicitLocalArgPos.find(KernelName); - if (it != m_KernelImplicitLocalArgPos.end()) - return it->second; - return {}; - }; +std::optional +ProgramManager::kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const { + auto it = m_KernelImplicitLocalArgPos.find(KernelName); + if (it != m_KernelImplicitLocalArgPos.end()) + return it->second; + return {}; +} - if (!KernelNameBasedCachePtr) - return getLocalArgPos(); - std::optional> &ImplicitLocalArgPos = - KernelNameBasedCachePtr->ImplicitLocalArgPos; - if (!ImplicitLocalArgPos.has_value()) { - ImplicitLocalArgPos = getLocalArgPos(); - } - return ImplicitLocalArgPos.value(); +DeviceKernelInfo &ProgramManager::getOrCreateDeviceKernelInfo( + const CompileTimeKernelInfoTy &Info) { + auto Result = + m_DeviceKernelInfoMap.try_emplace(KernelNameStrT{Info.Name.data()}, Info); + Result.first->second.setCompileTimeInfoIfNeeded(Info); + return Result.first->second; +} + +DeviceKernelInfo & +ProgramManager::getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName) { + auto Result = m_DeviceKernelInfoMap.try_emplace( + KernelName, CompileTimeKernelInfoTy{std::string_view(KernelName)}); + return Result.first->second; } static bool isBfloat16DeviceLibImage(sycl_device_binary RawImg, @@ -2152,55 +2153,6 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { // Drop the kernel argument mask map m_EliminatedKernelArgMasks.erase(Img); - // Unmap the unique kernel IDs for the offload entries - for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; - EntriesIt = EntriesIt->Increment()) { - detail::KernelNameStrT Name = EntriesIt->GetName(); - // Drop entry for service kernel - if (Name.find("__sycl_service_kernel__") != std::string::npos) { - removeFromMultimapByVal(m_ServiceKernels, Name, Img); - continue; - } - - // Exported device functions won't have a kernel ID - if (m_ExportedSymbolImages.find(std::string(Name)) != - m_ExportedSymbolImages.end()) { - continue; - } - - auto Name2IDIt = m_KernelName2KernelIDs.find(Name); - if (Name2IDIt != m_KernelName2KernelIDs.end()) - removeFromMultimapByVal(m_KernelIDs2BinImage, Name2IDIt->second, Img); - - auto RefCountIt = m_KernelNameRefCount.find(Name); - assert(RefCountIt != m_KernelNameRefCount.end()); - int &RefCount = RefCountIt->second; - assert(RefCount > 0); - - // Remove everything associated with this KernelName if this is the last - // image referencing it. - if (--RefCount == 0) { - // TODO aggregate all these maps into a single one since their entries - // share lifetime. - m_KernelUsesAssert.erase(Name); - m_KernelImplicitLocalArgPos.erase(Name); - m_KernelNameRefCount.erase(RefCountIt); - if (Name2IDIt != m_KernelName2KernelIDs.end()) - m_KernelName2KernelIDs.erase(Name2IDIt); - } - } - - // Drop reverse mapping - m_BinImg2KernelIDs.erase(Img); - - // Unregister exported symbol -> Img pair (needs to happen after the ID - // unmap loop) - for (const sycl_device_binary_property &ESProp : - Img->getExportedSymbols()) { - removeFromMultimapByVal(m_ExportedSymbolImages, ESProp->Name, Img, - /*AssertContains*/ false); - } - for (const sycl_device_binary_property &VFProp : Img->getVirtualFunctions()) { std::string StrValue = DeviceBinaryProperty(VFProp).asCString(); @@ -2258,6 +2210,56 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { } } + // Unmap the unique kernel IDs for the offload entries + for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; + EntriesIt = EntriesIt->Increment()) { + detail::KernelNameStrT Name = EntriesIt->GetName(); + // Drop entry for service kernel + if (Name.find("__sycl_service_kernel__") != std::string::npos) { + removeFromMultimapByVal(m_ServiceKernels, Name, Img); + continue; + } + + // Exported device functions won't have a kernel ID + if (m_ExportedSymbolImages.find(std::string(Name)) != + m_ExportedSymbolImages.end()) { + continue; + } + + auto Name2IDIt = m_KernelName2KernelIDs.find(Name); + if (Name2IDIt != m_KernelName2KernelIDs.end()) + removeFromMultimapByVal(m_KernelIDs2BinImage, Name2IDIt->second, Img); + + auto RefCountIt = m_KernelNameRefCount.find(Name); + assert(RefCountIt != m_KernelNameRefCount.end()); + int &RefCount = RefCountIt->second; + assert(RefCount > 0); + + // Remove everything associated with this KernelName if this is the last + // image referencing it. + if (--RefCount == 0) { + // TODO aggregate all these maps into a single one since their entries + // share lifetime. + m_KernelUsesAssert.erase(Name); + m_KernelImplicitLocalArgPos.erase(Name); + m_DeviceKernelInfoMap.erase(Name); + m_KernelNameRefCount.erase(RefCountIt); + if (Name2IDIt != m_KernelName2KernelIDs.end()) + m_KernelName2KernelIDs.erase(Name2IDIt); + } + } + + // Drop reverse mapping + m_BinImg2KernelIDs.erase(Img); + + // Unregister exported symbol -> Img pair (needs to happen after the ID + // unmap loop) + for (const sycl_device_binary_property &ESProp : + Img->getExportedSymbols()) { + removeFromMultimapByVal(m_ExportedSymbolImages, ESProp->Name, Img, + /*AssertContains*/ false); + } + m_DeviceImages.erase(DevImgIt); } } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index eaea458f95e84..63a1a61b55c6c 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -11,9 +11,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -198,11 +198,11 @@ class ProgramManager { const DevImgPlainWithDeps *DevImgWithDeps = nullptr, const SerializedObj &SpecConsts = {}); - FastKernelCacheValPtr - getOrCreateKernel(context_impl &ContextImpl, device_impl &DeviceImpl, - KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, - const NDRDescT &NDRDesc = {}); + FastKernelCacheValPtr getOrCreateKernel(context_impl &ContextImpl, + device_impl &DeviceImpl, + KernelNameStrRefT KernelName, + DeviceKernelInfo &DeviceKernelInfo, + const NDRDescT &NDRDesc = {}); ur_kernel_handle_t getCachedMaterializedKernel( KernelNameStrRefT KernelName, @@ -367,23 +367,18 @@ class ProgramManager { ~ProgramManager() = default; template - bool kernelUsesAssert(const NameT &KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr) const { - if (!KernelNameBasedCachePtr) - return m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end(); - - std::optional &UsesAssert = KernelNameBasedCachePtr->UsesAssert; - if (!UsesAssert.has_value()) - UsesAssert = - m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end(); - return UsesAssert.value(); + bool kernelUsesAssert(const NameT &KernelName) const { + return m_KernelUsesAssert.find(KernelName) != m_KernelUsesAssert.end(); } SanitizerType kernelUsesSanitizer() const { return m_SanitizerFoundInImage; } - std::optional kernelImplicitLocalArgPos( - KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr) const; + std::optional + kernelImplicitLocalArgPos(KernelNameStrRefT KernelName) const; + + DeviceKernelInfo & + getOrCreateDeviceKernelInfo(const CompileTimeKernelInfoTy &Info); + DeviceKernelInfo &getOrCreateDeviceKernelInfo(KernelNameStrRefT KernelName); std::set getRawDeviceImages(const std::vector &KernelIDs); @@ -461,7 +456,7 @@ class ProgramManager { /// Keeps track of binary image to kernel name reference count. /// Used for checking if the last image referencing the kernel name - /// is removed in order to trigger cleanup of kernel name based information. + /// is removed in order to trigger cleanup of kernel specific information. /// Access must be guarded by the m_KernelIDsMutex mutex. std::unordered_map m_KernelNameRefCount; @@ -541,6 +536,10 @@ class ProgramManager { KernelUsesAssertSet m_KernelUsesAssert; std::unordered_map m_KernelImplicitLocalArgPos; + // Map for storing device kernel information. Runtime lookup should be avoided + // by caching the pointers when possible. + std::unordered_map m_DeviceKernelInfoMap; + // Sanitizer type used in device image SanitizerType m_SanitizerFoundInImage; diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 6066ed6b3de50..ca800e1511032 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -872,9 +872,7 @@ class queue_impl : public std::enable_shared_from_this { // Kernel only uses assert if it's non interop one KernelUsesAssert = (!Handler.MKernel || Handler.MKernel->hasSYCLMetadata()) && - ProgramManager::getInstance().kernelUsesAssert( - Handler.MKernelName.data(), - Handler.impl->MKernelNameBasedCachePtr); + Handler.impl->MDeviceKernelInfoPtr->usesAssert(); auto &PostProcess = *PostProcessorFunc; PostProcess(IsKernel, KernelUsesAssert, Event); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 19beb3235e21b..3d1f1b6dbce20 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1985,8 +1985,7 @@ std::string instrumentationGetKernelName( void instrumentationAddExtraKernelMetadata( xpti_td *&CmdTraceEvent, const NDRDescT &NDRDesc, detail::kernel_bundle_impl *KernelBundleImplPtr, - KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, + KernelNameStrRefT KernelName, DeviceKernelInfo &DeviceKernelInfo, const std::shared_ptr &SyclKernel, queue_impl *Queue, std::vector &CGArgs) // CGArgs are not const since they could be // sorted in this function @@ -2013,7 +2012,7 @@ void instrumentationAddExtraKernelMetadata( FastKernelCacheValPtr FastKernelCacheVal = detail::ProgramManager::getInstance().getOrCreateKernel( Queue->getContextImpl(), Queue->getDeviceImpl(), KernelName, - KernelNameBasedCachePtr); + DeviceKernelInfo); EliminatedArgMask = FastKernelCacheVal->MKernelArgMask; } @@ -2100,9 +2099,9 @@ std::pair emitKernelInstrumentationData( xpti::stream_id_t StreamID, const std::shared_ptr &SyclKernel, const detail::code_location &CodeLoc, bool IsTopCodeLoc, - const std::string_view SyclKernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, queue_impl *Queue, - const NDRDescT &NDRDesc, detail::kernel_bundle_impl *KernelBundleImplPtr, + const std::string_view SyclKernelName, DeviceKernelInfo &DeviceKernelInfo, + queue_impl *Queue, const NDRDescT &NDRDesc, + detail::kernel_bundle_impl *KernelBundleImplPtr, std::vector &CGArgs) { auto XptiObjects = std::make_pair(nullptr, -1); @@ -2140,7 +2139,7 @@ std::pair emitKernelInstrumentationData( getQueueID(Queue)); instrumentationAddExtraKernelMetadata( CmdTraceEvent, NDRDesc, KernelBundleImplPtr, - std::string(SyclKernelName), KernelNameBasedCachePtr, SyclKernel, Queue, + std::string(SyclKernelName), DeviceKernelInfo, SyclKernel, Queue, CGArgs); xptiNotifySubscribers( @@ -2196,7 +2195,7 @@ void ExecCGCommand::emitInstrumentationData() { reinterpret_cast(MCommandGroup.get()); instrumentationAddExtraKernelMetadata( CmdTraceEvent, KernelCG->MNDRDesc, KernelCG->getKernelBundle().get(), - KernelCG->MKernelName, KernelCG->MKernelNameBasedCachePtr, + KernelCG->MKernelName, KernelCG->MDeviceKernelInfo, KernelCG->MSyclKernel, MQueue.get(), KernelCG->MArgs); } @@ -2398,8 +2397,7 @@ static ur_result_t SetKernelParamsAndLaunch( const std::function &getMemAllocationFunc, bool IsCooperative, bool KernelUsesClusterLaunch, uint32_t WorkGroupMemorySize, const RTDeviceBinaryImage *BinImage, - KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, + KernelNameStrRefT KernelName, DeviceKernelInfo &DeviceKernelInfo, void *KernelFuncPtr = nullptr, int KernelNumArgs = 0, detail::kernel_param_desc_t (*KernelParamDescGetter)(int) = nullptr, bool KernelHasSpecialCaptures = true) { @@ -2445,9 +2443,8 @@ static ur_result_t SetKernelParamsAndLaunch( applyFuncOnFilteredArgs(EliminatedArgMask, Args, setFunc); } - std::optional ImplicitLocalArg = - ProgramManager::getInstance().kernelImplicitLocalArgPos( - KernelName, KernelNameBasedCachePtr); + const std::optional &ImplicitLocalArg = + DeviceKernelInfo.getImplicitLocalArgPos(); // Set the implicit local memory buffer to support // get_work_group_scratch_memory. This is for backend not supporting // CUDA-style local memory setting. Note that we may have -1 as a position, @@ -2551,7 +2548,7 @@ getCGKernelInfo(const CGExecKernel &CommandGroup, context_impl &ContextImpl, FastKernelCacheValPtr FastKernelCacheVal = sycl::detail::ProgramManager::getInstance().getOrCreateKernel( ContextImpl, DeviceImpl, CommandGroup.MKernelName, - CommandGroup.MKernelNameBasedCachePtr); + CommandGroup.MDeviceKernelInfo); UrKernel = FastKernelCacheVal->MKernelHandle; EliminatedArgMask = FastKernelCacheVal->MKernelArgMask; // To keep UrKernel valid, we return FastKernelCacheValPtr. @@ -2666,7 +2663,7 @@ void enqueueImpKernel( queue_impl &Queue, NDRDescT &NDRDesc, std::vector &Args, detail::kernel_bundle_impl *KernelBundleImplPtr, const detail::kernel_impl *MSyclKernel, KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, + DeviceKernelInfo &DeviceKernelInfo, std::vector &RawEvents, detail::event_impl *OutEventImpl, const std::function &getMemAllocationFunc, ur_kernel_cache_config_t KernelCacheConfig, const bool KernelIsCooperative, @@ -2713,7 +2710,7 @@ void enqueueImpKernel( KernelMutex = SyclKernelImpl->getCacheMutex(); } else { KernelCacheVal = detail::ProgramManager::getInstance().getOrCreateKernel( - ContextImpl, DeviceImpl, KernelName, KernelNameBasedCachePtr, NDRDesc); + ContextImpl, DeviceImpl, KernelName, DeviceKernelInfo, NDRDesc); Kernel = KernelCacheVal->MKernelHandle; KernelMutex = KernelCacheVal->MMutex; Program = KernelCacheVal->MProgramHandle; @@ -2760,8 +2757,8 @@ void enqueueImpKernel( Queue, Args, DeviceImageImpl, Kernel, NDRDesc, EventsWaitList, OutEventImpl, EliminatedArgMask, getMemAllocationFunc, KernelIsCooperative, KernelUsesClusterLaunch, WorkGroupMemorySize, - BinImage, KernelName, KernelNameBasedCachePtr, KernelFuncPtr, - KernelNumArgs, KernelParamDescGetter, KernelHasSpecialCaptures); + BinImage, KernelName, DeviceKernelInfo, KernelFuncPtr, KernelNumArgs, + KernelParamDescGetter, KernelHasSpecialCaptures); } if (UR_RESULT_SUCCESS != Error) { // If we have got non-success error code, let's analyze it to emit nice @@ -3239,10 +3236,8 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { if (!EventImpl) { // Kernel only uses assert if it's non interop one - bool KernelUsesAssert = - (!SyclKernel || SyclKernel->hasSYCLMetadata()) && - ProgramManager::getInstance().kernelUsesAssert( - KernelName, ExecKernel->MKernelNameBasedCachePtr); + bool KernelUsesAssert = (!SyclKernel || SyclKernel->hasSYCLMetadata()) && + ExecKernel->MDeviceKernelInfo.usesAssert(); if (KernelUsesAssert) { EventImpl = MEvent.get(); } @@ -3255,10 +3250,9 @@ ur_result_t ExecCGCommand::enqueueImpQueue() { } enqueueImpKernel( *MQueue, NDRDesc, Args, ExecKernel->getKernelBundle().get(), - SyclKernel.get(), KernelName, ExecKernel->MKernelNameBasedCachePtr, - RawEvents, EventImpl, getMemAllocationFunc, - ExecKernel->MKernelCacheConfig, ExecKernel->MKernelIsCooperative, - ExecKernel->MKernelUsesClusterLaunch, + SyclKernel.get(), KernelName, ExecKernel->MDeviceKernelInfo, RawEvents, + EventImpl, getMemAllocationFunc, ExecKernel->MKernelCacheConfig, + ExecKernel->MKernelIsCooperative, ExecKernel->MKernelUsesClusterLaunch, ExecKernel->MKernelWorkGroupMemorySize, BinImage); return UR_RESULT_SUCCESS; diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 0fa9835ae1345..046764dcd7054 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -628,7 +628,7 @@ void enqueueImpKernel( queue_impl &Queue, NDRDescT &NDRDesc, std::vector &Args, detail::kernel_bundle_impl *KernelBundleImplPtr, const detail::kernel_impl *MSyclKernel, KernelNameStrRefT KernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, + DeviceKernelInfo &DeviceKernelInfo, std::vector &RawEvents, detail::event_impl *OutEventImpl, const std::function &getMemAllocationFunc, ur_kernel_cache_config_t KernelCacheConfig, bool KernelIsCooperative, @@ -694,9 +694,9 @@ std::pair emitKernelInstrumentationData( xpti::stream_id_t StreamID, const std::shared_ptr &SyclKernel, const detail::code_location &CodeLoc, bool IsTopCodeLoc, - std::string_view SyclKernelName, - KernelNameBasedCacheT *KernelNameBasedCachePtr, queue_impl *Queue, - const NDRDescT &NDRDesc, detail::kernel_bundle_impl *KernelBundleImplPtr, + std::string_view SyclKernelName, DeviceKernelInfo &DeviceKernelInfo, + queue_impl *Queue, const NDRDescT &NDRDesc, + detail::kernel_bundle_impl *KernelBundleImplPtr, std::vector &CGArgs); #endif diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 79bcdbf62aa47..397f01983add4 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -542,6 +542,17 @@ event handler::finalize() { } if (type == detail::CGType::Kernel) { + if (impl->MDeviceKernelInfoPtr) { +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + impl->MDeviceKernelInfoPtr->initIfNeeded(toKernelNameStrT(MKernelName)); +#endif + } else { + // Fetch the device kernel info pointer if it hasn't been set (e.g. + // in kernel bundle or free function cases). + impl->MDeviceKernelInfoPtr = + &detail::ProgramManager::getInstance().getOrCreateDeviceKernelInfo( + toKernelNameStrT(MKernelName)); + } // If there were uses of set_specialization_constant build the kernel_bundle detail::kernel_bundle_impl *KernelBundleImpPtr = getOrInsertHandlerKernelBundlePtr(/*Insert=*/false); @@ -615,10 +626,8 @@ event handler::finalize() { !impl->MEventNeeded && impl->get_queue().supportsDiscardingPiEvents(); if (DiscardEvent) { // Kernel only uses assert if it's non interop one - bool KernelUsesAssert = - !(MKernel && MKernel->isInterop()) && - detail::ProgramManager::getInstance().kernelUsesAssert( - toKernelNameStrT(MKernelName), impl->MKernelNameBasedCachePtr); + bool KernelUsesAssert = !(MKernel && MKernel->isInterop()) && + impl->MDeviceKernelInfoPtr->usesAssert(); DiscardEvent = !KernelUsesAssert; } @@ -638,7 +647,7 @@ event handler::finalize() { if (xptiEnabled) { std::tie(CmdTraceEvent, InstanceID) = emitKernelInstrumentationData( detail::GSYCLStreamID, MKernel, MCodeLoc, impl->MIsTopCodeLoc, - MKernelName.data(), impl->MKernelNameBasedCachePtr, + MKernelName.data(), *impl->MDeviceKernelInfoPtr, impl->get_queue_or_null(), impl->MNDRDesc, KernelBundleImpPtr, impl->MArgs); detail::emitInstrumentationGeneral(detail::GSYCLStreamID, InstanceID, @@ -655,8 +664,8 @@ event handler::finalize() { enqueueImpKernel( impl->get_queue(), impl->MNDRDesc, impl->MArgs, KernelBundleImpPtr, MKernel.get(), toKernelNameStrT(MKernelName), - impl->MKernelNameBasedCachePtr, RawEvents, ResultEvent.get(), - nullptr, impl->MKernelCacheConfig, impl->MKernelIsCooperative, + *impl->MDeviceKernelInfoPtr, RawEvents, ResultEvent.get(), nullptr, + impl->MKernelCacheConfig, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, BinImage, impl->MKernelFuncPtr, impl->MKernelNumArgs, impl->MKernelParamDescGetter, impl->MKernelHasSpecialCaptures); @@ -717,7 +726,7 @@ event handler::finalize() { impl->MNDRDesc, std::move(MHostKernel), std::move(MKernel), std::move(impl->MKernelBundle), std::move(impl->CGData), std::move(impl->MArgs), toKernelNameStrT(MKernelName), - impl->MKernelNameBasedCachePtr, std::move(MStreamStorage), + *impl->MDeviceKernelInfoPtr, std::move(MStreamStorage), std::move(impl->MAuxiliaryResources), getType(), impl->MKernelCacheConfig, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, @@ -2598,9 +2607,18 @@ void handler::setNDRangeDescriptor(sycl::range<1> NumWorkItems, impl->MNDRDesc = NDRDescT{NumWorkItems, LocalSize, Offset}; } +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES void handler::setKernelNameBasedCachePtr( sycl::detail::KernelNameBasedCacheT *KernelNameBasedCachePtr) { - impl->MKernelNameBasedCachePtr = KernelNameBasedCachePtr; + setDeviceKernelInfoPtr(reinterpret_cast( + KernelNameBasedCachePtr)); +} +#endif + +void handler::setDeviceKernelInfoPtr( + sycl::detail::DeviceKernelInfo *DeviceKernelInfoPtr) { + assert(!impl->MDeviceKernelInfoPtr && "Already set!"); + impl->MDeviceKernelInfoPtr = DeviceKernelInfoPtr; } void handler::setKernelInfo( diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index f41c07ee394b7..e9f65ce662488 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3314,6 +3314,7 @@ _ZN4sycl3_V16detail18get_kernel_id_implENS1_11string_viewE _ZN4sycl3_V16detail18make_kernel_bundleEmRKNS0_7contextENS0_12bundle_stateENS0_7backendE _ZN4sycl3_V16detail18make_kernel_bundleEmRKNS0_7contextEbNS0_12bundle_stateENS0_7backendE _ZN4sycl3_V16detail18stringifyErrorCodeEi +_ZN4sycl3_V16detail19getDeviceKernelInfoERKNS1_27compile_time_kernel_info_v123CompileTimeKernelInfoTyE _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_get_kernelENS1_11string_viewE _ZN4sycl3_V16detail19kernel_bundle_plain21ext_oneapi_has_kernelENS1_11string_viewE _ZN4sycl3_V16detail19kernel_bundle_plain28ext_oneapi_has_device_globalENS1_11string_viewE @@ -3334,6 +3335,7 @@ _ZN4sycl3_V16detail21LocalAccessorBaseHost7getSizeEv _ZN4sycl3_V16detail21LocalAccessorBaseHostC1ENS0_5rangeILi3EEEiiRKNS0_13property_listE _ZN4sycl3_V16detail21LocalAccessorBaseHostC2ENS0_5rangeILi3EEEiiRKNS0_13property_listE _ZN4sycl3_V16detail22addHostAccessorAndWaitEPNS1_16AccessorImplHostE +_ZN4sycl3_V16detail22free_function_info_map3addEPKPKcPKjj _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateERKSt8functionIFbRKSt10shared_ptrINS1_17device_image_implEEEE _ZN4sycl3_V16detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EERKNS0_4spanIcLm18446744073709551615EEENS0_12bundle_stateE @@ -3590,6 +3592,7 @@ _ZN4sycl3_V17handler21setKernelWorkGroupMemEm _ZN4sycl3_V17handler21setUserFacingNodeTypeENS0_3ext6oneapi12experimental9node_typeE _ZN4sycl3_V17handler22ext_oneapi_fill2d_implEPvmPKvmmm _ZN4sycl3_V17handler22memcpyFromDeviceGlobalEPvPKvbmm +_ZN4sycl3_V17handler22setDeviceKernelInfoPtrEPNS0_6detail16DeviceKernelInfoE _ZN4sycl3_V17handler22setHandlerKernelBundleENS0_6kernelE _ZN4sycl3_V17handler22setHandlerKernelBundleERKSt10shared_ptrINS0_6detail18kernel_bundle_implEE _ZN4sycl3_V17handler22setKernelClusterLaunchENS0_5rangeILi1EEE @@ -3850,7 +3853,6 @@ _ZNK4sycl3_V16detail19kernel_bundle_plain5emptyEv _ZNK4sycl3_V16detail21LocalAccessorBaseHost11getPropListEv _ZNK4sycl3_V16detail21LocalAccessorBaseHost6getPtrEv _ZNK4sycl3_V16detail21LocalAccessorBaseHost7getSizeEv -_ZN4sycl3_V16detail22free_function_info_map3addEPKPKcPKjj _ZNK4sycl3_V16detail28SampledImageAccessorBaseHost10getSamplerEv _ZNK4sycl3_V16detail28SampledImageAccessorBaseHost11getPropListEv _ZNK4sycl3_V16detail28SampledImageAccessorBaseHost12getNumOfDimsEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 8d6235784776e..eae017c88eac8 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -229,9 +229,9 @@ ??$get_info_impl@Unative_vector_width_int@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unative_vector_width_long@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unative_vector_width_short@device@info@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ +??$get_info_impl@Unode_mask@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unum_args@kernel@info@_V1@sycl@@@kernel@_V1@sycl@@AEBAIXZ ??$get_info_impl@Unum_compute_units@device@info@oneapi@ext@_V1@sycl@@@device@_V1@sycl@@AEBA_KXZ -??$get_info_impl@Unode_mask@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@AEBAIXZ ??$get_info_impl@Uopencl_c_version@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AVstring@detail@12@XZ ??$get_info_impl@Uparent_device@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV012@XZ ??$get_info_impl@Upartition_affinity_domains@device@info@_V1@sycl@@@device@_V1@sycl@@AEBA?AV?$vector@W4partition_affinity_domain@info@_V1@sycl@@V?$allocator@W4partition_affinity_domain@info@_V1@sycl@@@std@@@std@@XZ @@ -4064,6 +4064,7 @@ ?getCurrentDSODir@OSUtil@detail@_V1@sycl@@SA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@XZ ?getDeviceBackend@handler@_V1@sycl@@AEBA?AW4backend@23@XZ ?getDeviceFromHandler@detail@_V1@sycl@@YA?AVdevice@23@AEAVhandler@23@@Z +?getDeviceKernelInfo@detail@_V1@sycl@@YAAEAVDeviceKernelInfo@123@AEBUCompileTimeKernelInfoTy@compile_time_kernel_info_v1@123@@Z ?getDirName@OSUtil@detail@_V1@sycl@@SA?AV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@PEBD@Z ?getElemSize@AccessorBaseHost@detail@_V1@sycl@@QEBAIXZ ?getElementSize@LocalAccessorBaseHost@detail@_V1@sycl@@QEAAHXZ @@ -4188,8 +4189,8 @@ ?get_impl@handler@_V1@sycl@@AEAAPEAVhandler_impl@detail@23@XZ ?get_kernel@kernel_bundle_plain@detail@_V1@sycl@@IEBA?AVkernel@34@AEBVkernel_id@34@@Z ?get_kernel_bundle@kernel@_V1@sycl@@QEBA?AV?$kernel_bundle@$01@23@XZ -?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$span@D$0?0@23@W4bundle_state@23@@Z +?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBV?$vector@Vkernel_id@_V1@sycl@@V?$allocator@Vkernel_id@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@W4bundle_state@23@@Z ?get_kernel_bundle_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBVcontext@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@W4bundle_state@23@AEBV?$function@$$A6A_NAEBV?$shared_ptr@Vdevice_image_impl@detail@_V1@sycl@@@std@@@Z@5@@Z ?get_kernel_id_impl@detail@_V1@sycl@@YA?AVkernel_id@23@Vstring_view@123@@Z @@ -4408,6 +4409,7 @@ ?setArgsHelper@handler@_V1@sycl@@AEAAXH@Z ?setArgsToAssociatedAccessors@handler@_V1@sycl@@AEAAXXZ ?setDevice@HostProfilingInfo@detail@_V1@sycl@@QEAAXPEAVdevice_impl@234@@Z +?setDeviceKernelInfoPtr@handler@_V1@sycl@@AEAAXPEAVDeviceKernelInfo@detail@23@@Z ?setHandlerKernelBundle@handler@_V1@sycl@@AEAAXAEBV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@@Z ?setHandlerKernelBundle@handler@_V1@sycl@@AEAAXVkernel@23@@Z ?setKernelCacheConfig@handler@_V1@sycl@@AEAAXW4StableKernelCacheConfig@123@@Z @@ -4429,8 +4431,8 @@ ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@@Z ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@@Z ?setNDRangeDescriptor@handler@_V1@sycl@@AEAAXV?$range@$02@23@_N@Z -?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@H@Z ?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@0V?$id@$02@23@H@Z +?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@V?$id@$02@23@H@Z ?setNDRangeDescriptorPadded@handler@_V1@sycl@@AEAAXV?$range@$02@23@_NH@Z ?setNDRangeUsed@handler@_V1@sycl@@AEAAX_N@Z ?setStateExplicitKernelBundle@handler@_V1@sycl@@AEAAXXZ diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp index 3087928f17c3f..9a0791c5e9133 100644 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ b/sycl/test/include_deps/sycl_detail_core.hpp.cpp @@ -130,6 +130,8 @@ // CHECK-NEXT: CL/cl_version.h // CHECK-NEXT: CL/cl_platform.h // CHECK-NEXT: CL/cl_ext.h +// CHECK-NEXT: detail/get_device_kernel_info.hpp +// CHECK-NEXT: detail/compile_time_kernel_info.hpp // CHECK-NEXT: detail/id_queries_fit_in_int.hpp // CHECK-NEXT: detail/kernel_launch_helper.hpp // CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp @@ -138,7 +140,6 @@ // CHECK-NEXT: ext/oneapi/kernel_properties/properties.hpp // CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp // CHECK-NEXT: detail/sycl_local_mem_builtins.hpp -// CHECK-NEXT: detail/kernel_name_based_cache.hpp // CHECK-NEXT: detail/kernel_name_str_t.hpp // CHECK-NEXT: detail/reduction_forward.hpp // CHECK-NEXT: detail/ur.hpp diff --git a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp index 82fc0ed56c09d..dedd4ebbcb407 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp @@ -9,6 +9,13 @@ #include +class Kernel1; +class Kernel2; +class Kernel3; +MOCK_INTEGRATION_HEADER(Kernel1) +MOCK_INTEGRATION_HEADER(Kernel2) +MOCK_INTEGRATION_HEADER(Kernel3) + using namespace sycl; using namespace sycl::ext::oneapi; @@ -630,7 +637,7 @@ TEST_F(CommandGraphTest, TransitiveRecordingShortcuts) { Graph1.begin_recording(Q1); - auto GraphEvent1 = Q1.single_task([=] {}); + auto GraphEvent1 = Q1.single_task([=] {}); ASSERT_EQ(Q1.ext_oneapi_get_state(), ext::oneapi::experimental::queue_state::recording); ASSERT_EQ(Q2.ext_oneapi_get_state(), @@ -638,7 +645,7 @@ TEST_F(CommandGraphTest, TransitiveRecordingShortcuts) { ASSERT_EQ(Q3.ext_oneapi_get_state(), ext::oneapi::experimental::queue_state::executing); - auto GraphEvent2 = Q2.single_task(GraphEvent1, [=] {}); + auto GraphEvent2 = Q2.single_task(GraphEvent1, [=] {}); ASSERT_EQ(Q1.ext_oneapi_get_state(), ext::oneapi::experimental::queue_state::recording); ASSERT_EQ(Q2.ext_oneapi_get_state(), @@ -646,8 +653,8 @@ TEST_F(CommandGraphTest, TransitiveRecordingShortcuts) { ASSERT_EQ(Q3.ext_oneapi_get_state(), ext::oneapi::experimental::queue_state::executing); - auto GraphEvent3 = Q3.parallel_for(range<1>{1024}, GraphEvent1, - [=](item<1> Id) {}); + auto GraphEvent3 = + Q3.parallel_for(range<1>{1024}, GraphEvent1, [=](item<1> Id) {}); ASSERT_EQ(Q1.ext_oneapi_get_state(), ext::oneapi::experimental::queue_state::recording); ASSERT_EQ(Q2.ext_oneapi_get_state(), diff --git a/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp b/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp index c6a9333cb02a5..d6fd4d1ec5e4b 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp @@ -12,6 +12,9 @@ using namespace sycl; using namespace sycl::ext::oneapi; +class MockKernel; +MOCK_INTEGRATION_HEADER(MockKernel) + /** * Checks that the operators and constructors of graph related classes meet the * common reference semantics. @@ -70,8 +73,9 @@ TEST_F(CommandGraphTest, NodeSemantics) { experimental::command_graph Graph(Queue.get_context(), Queue.get_device()); auto Factory = [&]() { - return Graph.add( - [&](handler &CGH) { CGH.parallel_for(1, [=](item<1> Item) {}); }); + return Graph.add([&](handler &CGH) { + CGH.parallel_for(1, [=](item<1> Item) {}); + }); }; ASSERT_NO_FATAL_FAILURE(testSemantics(Factory)); } @@ -80,7 +84,9 @@ TEST_F(CommandGraphTest, DynamicCGSemantics) { sycl::queue Queue; experimental::command_graph Graph(Queue.get_context(), Queue.get_device()); - auto CGF = [&](handler &CGH) { CGH.parallel_for(1, [=](item<1> Item) {}); }; + auto CGF = [&](handler &CGH) { + CGH.parallel_for(1, [=](item<1> Item) {}); + }; auto Factory = [&]() { return experimental::dynamic_command_group(Graph, {CGF}); @@ -185,8 +191,9 @@ TEST_F(CommandGraphTest, NodeHash) { experimental::command_graph Graph(Queue.get_context(), Queue.get_device()); auto Factory = [&]() { - return Graph.add( - [&](handler &CGH) { CGH.parallel_for(1, [=](item<1> Item) {}); }); + return Graph.add([&](handler &CGH) { + CGH.parallel_for(1, [=](item<1> Item) {}); + }); }; ASSERT_NO_FATAL_FAILURE(testHash(Factory)); } @@ -195,7 +202,9 @@ TEST_F(CommandGraphTest, DynamicCommandGroupHash) { sycl::queue Queue; experimental::command_graph Graph(Queue.get_context(), Queue.get_device()); - auto CGF = [&](handler &CGH) { CGH.parallel_for(1, [=](item<1> Item) {}); }; + auto CGF = [&](handler &CGH) { + CGH.parallel_for(1, [=](item<1> Item) {}); + }; auto Factory = [&]() { return experimental::dynamic_command_group(Graph, {CGF}); diff --git a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp index 42444dff82898..1a635a751229c 100644 --- a/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Exceptions.cpp @@ -5,6 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#include #include "Common.hpp" @@ -346,7 +347,7 @@ TEST_F(CommandGraphTest, Reductions) { { try { Graph.add([&](handler &CGH) { - CGH.parallel_for( + CGH.parallel_for( range<1>{1}, reduction(&ReduVar, int{0}, sycl::plus<>()), [=](item<1> idx, auto &Sum) {}); }); @@ -366,7 +367,7 @@ TEST_F(CommandGraphTest, Streams) { try { Graph.add([&](handler &CGH) { sycl::stream Out(WorkItems * 16, 16, CGH); - CGH.parallel_for( + CGH.parallel_for( range<1>(WorkItems), [=](item<1> id) { Out << id.get_linear_id() << sycl::endl; }); }); @@ -422,7 +423,7 @@ TEST_F(CommandGraphTest, WorkGroupScratchMemoryCheck) { { try { Graph.add([&](handler &CGH) { - CGH.parallel_for( + CGH.parallel_for( range<1>{1}, ext::oneapi::experimental::properties{ ext::oneapi::experimental::work_group_scratch_size( @@ -679,11 +680,11 @@ TEST_F(CommandGraphTest, TransitiveRecordingWrongContext) { Graph.begin_recording(Q1); auto GraphEvent1 = - Q1.submit([&](handler &CGH) { CGH.single_task([=] {}); }); + Q1.submit([&](handler &CGH) { CGH.single_task([=] {}); }); ASSERT_THROW(Q2.submit([&](handler &CGH) { CGH.depends_on(GraphEvent1); - CGH.single_task([=] {}); + CGH.single_task([=] {}); }), sycl::exception); } @@ -710,11 +711,11 @@ TEST_F(CommandGraphTest, TransitiveRecordingWrongDevice) { Graph.begin_recording(Q1); auto GraphEvent1 = - Q1.submit([&](handler &CGH) { CGH.single_task([=] {}); }); + Q1.submit([&](handler &CGH) { CGH.single_task([=] {}); }); ASSERT_THROW(Q2.submit([&](handler &CGH) { CGH.depends_on(GraphEvent1); - CGH.single_task([=] {}); + CGH.single_task([=] {}); }), sycl::exception); } @@ -736,11 +737,11 @@ TEST_F(CommandGraphTest, RecordingWrongGraphDep) { Graph2.begin_recording(Q2); auto GraphEvent1 = - Q1.submit([&](handler &CGH) { CGH.single_task([=] {}); }); + Q1.submit([&](handler &CGH) { CGH.single_task([=] {}); }); ASSERT_THROW(Q2.submit([&](handler &CGH) { CGH.depends_on(GraphEvent1); - CGH.single_task([=] {}); + CGH.single_task([=] {}); }), sycl::exception); } @@ -779,23 +780,27 @@ TEST_F(CommandGraphTest, DynamicCommandGroupMismatchEventEdges) { Graph.begin_recording(Queue); auto EventA = Queue.submit([&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { PtrA[Item.get_id()] = 1; }); + CGH.parallel_for( + N, [=](item<1> Item) { PtrA[Item.get_id()] = 1; }); }); auto EventB = Queue.submit([&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { PtrB[Item.get_id()] = 4; }); + CGH.parallel_for( + N, [=](item<1> Item) { PtrB[Item.get_id()] = 4; }); }); Graph.end_recording(); auto CGFA = [&](handler &CGH) { CGH.depends_on(EventA); - CGH.parallel_for(N, [=](item<1> Item) { PtrA[Item.get_id()] += 2; }); + CGH.parallel_for( + N, [=](item<1> Item) { PtrA[Item.get_id()] += 2; }); }; auto CGFB = [&](handler &CGH) { CGH.depends_on(EventB); - CGH.parallel_for(N, [=](item<1> Item) { PtrB[Item.get_id()] += 0xA; }); + CGH.parallel_for( + N, [=](item<1> Item) { PtrB[Item.get_id()] += 0xA; }); }; experimental::dynamic_command_group DynCG(Graph, {CGFA, CGFB}); @@ -817,12 +822,14 @@ TEST_F(CommandGraphTest, DynamicCommandGroupBufferThrows) { auto CGFA = [&](handler &CGH) { auto Acc = Buf.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = 2; }); + CGH.parallel_for( + N, [=](item<1> Item) { Acc[Item.get_id()] = 2; }); }; auto CGFB = [&](handler &CGH) { auto Acc = Buf.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = 0xA; }); + CGH.parallel_for( + N, [=](item<1> Item) { Acc[Item.get_id()] = 0xA; }); }; experimental::dynamic_command_group DynCG(Graph, {CGFA, CGFB}); @@ -846,12 +853,14 @@ TEST_F(CommandGraphTest, DynamicCommandGroupBufferHostAccThrows) { {experimental::property::graph::assume_buffer_outlives_graph{}}}; auto CGFA = [&](handler &CGH) { - CGH.parallel_for(N, [=](item<1> Item) { Ptr[Item.get_id()] = 2; }); + CGH.parallel_for( + N, [=](item<1> Item) { Ptr[Item.get_id()] = 2; }); }; auto CGFB = [&](handler &CGH) { auto Acc = Buf.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { Acc[Item.get_id()] = 0xA; }); + CGH.parallel_for( + N, [=](item<1> Item) { Acc[Item.get_id()] = 0xA; }); }; experimental::dynamic_command_group DynCG(Graph, {CGFA, CGFB}); @@ -882,24 +891,28 @@ TEST_F(CommandGraphTest, DynamicCommandGroupMismatchAccessorEdges) { Queue.submit([&](handler &CGH) { auto AccA = BufA.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { AccA[Item.get_id()] = 1; }); + CGH.parallel_for( + N, [=](item<1> Item) { AccA[Item.get_id()] = 1; }); }); Queue.submit([&](handler &CGH) { auto AccB = BufB.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { AccB[Item.get_id()] = 4; }); + CGH.parallel_for( + N, [=](item<1> Item) { AccB[Item.get_id()] = 4; }); }); Graph.end_recording(); auto CGFA = [&](handler &CGH) { auto AccA = BufA.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { AccA[Item.get_id()] += 2; }); + CGH.parallel_for( + N, [=](item<1> Item) { AccA[Item.get_id()] += 2; }); }; auto CGFB = [&](handler &CGH) { auto AccB = BufB.get_access(CGH); - CGH.parallel_for(N, [=](item<1> Item) { AccB[Item.get_id()] += 0xA; }); + CGH.parallel_for( + N, [=](item<1> Item) { AccB[Item.get_id()] += 0xA; }); }; experimental::dynamic_command_group DynCG(Graph, {CGFA, CGFB}); diff --git a/sycl/unittests/Extensions/WorkGroupMemoryBackendArgument.cpp b/sycl/unittests/Extensions/WorkGroupMemoryBackendArgument.cpp index 8febd9676fb9f..97159ba3a278d 100644 --- a/sycl/unittests/Extensions/WorkGroupMemoryBackendArgument.cpp +++ b/sycl/unittests/Extensions/WorkGroupMemoryBackendArgument.cpp @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -20,7 +21,8 @@ using arg_type = syclext::work_group_memory; namespace sycl { inline namespace _V1 { namespace detail { -template <> struct KernelInfo { +template <> +struct KernelInfo : public unittest::MockKernelInfoBase { static constexpr unsigned getNumParams() { return 1; } static constexpr const detail::kernel_param_desc_t &getParamDesc(int) { return WorkGroupMemory; diff --git a/sycl/unittests/helpers/MockKernelInfo.hpp b/sycl/unittests/helpers/MockKernelInfo.hpp index 836346eddd11d..fd1b1ed9435a9 100644 --- a/sycl/unittests/helpers/MockKernelInfo.hpp +++ b/sycl/unittests/helpers/MockKernelInfo.hpp @@ -24,6 +24,11 @@ struct MockKernelInfoBase { static constexpr bool callsThisItem() { return false; } static constexpr bool callsAnyThisFreeFunction() { return false; } static constexpr int64_t getKernelSize() { return 1; } + + static constexpr const char *getFileName() { return ""; } + static constexpr const char *getFunctionName() { return ""; } + static constexpr unsigned getLineNumber() { return 0; } + static constexpr unsigned getColumnNumber() { return 0; } }; } // namespace unittest diff --git a/sycl/unittests/helpers/TestKernel.hpp b/sycl/unittests/helpers/TestKernel.hpp index 85e6f28c5f673..2bf3e7a043018 100644 --- a/sycl/unittests/helpers/TestKernel.hpp +++ b/sycl/unittests/helpers/TestKernel.hpp @@ -10,9 +10,12 @@ #include "MockDeviceImage.hpp" #include "MockKernelInfo.hpp" +#include class TestKernel; class TestKernelWithAcc; +class TestKernelWithStream; +class TestKernelWithPtr; namespace sycl { inline namespace _V1 { @@ -44,11 +47,37 @@ struct KernelInfo : public unittest::MockKernelInfoBase { static constexpr unsigned getColumnNumber() { return 8; } }; +template <> +struct KernelInfo : public unittest::MockKernelInfoBase { + static constexpr const char *getName() { return "TestKernelWithStream"; } + static constexpr int64_t getKernelSize() { return sizeof(sycl::stream); } + static constexpr const char *getFileName() { return "TestKernel.hpp"; } + static constexpr const char *getFunctionName() { + return "TestKernelWithStreamFunctionName"; + } + static constexpr unsigned getLineNumber() { return 15; } + static constexpr unsigned getColumnNumber() { return 8; } +}; + +template <> +struct KernelInfo : public unittest::MockKernelInfoBase { + static constexpr const char *getName() { return "TestKernelWithPtr"; } + static constexpr int64_t getKernelSize() { return sizeof(void *); } + static constexpr const char *getFileName() { return "TestKernel.hpp"; } + static constexpr const char *getFunctionName() { + return "TestKernelWithPtrFunctionName"; + } + static constexpr unsigned getLineNumber() { return 16; } + static constexpr unsigned getColumnNumber() { return 8; } +}; + } // namespace detail } // namespace _V1 } // namespace sycl static sycl::unittest::MockDeviceImage Imgs[] = { sycl::unittest::generateDefaultImage({"TestKernel"}), - sycl::unittest::generateDefaultImage({"TestKernelWithAcc"})}; -static sycl::unittest::MockDeviceImageArray<2> ImgArray{Imgs}; + sycl::unittest::generateDefaultImage({"TestKernelWithAcc"}), + sycl::unittest::generateDefaultImage({"TestKernelWithStream"}), + sycl::unittest::generateDefaultImage({"TestKernelWithPtr"})}; +static sycl::unittest::MockDeviceImageArray<4> ImgArray{Imgs}; diff --git a/sycl/unittests/program_manager/Cleanup.cpp b/sycl/unittests/program_manager/Cleanup.cpp index 4a39f06ee5250..a75d02b6beff6 100644 --- a/sycl/unittests/program_manager/Cleanup.cpp +++ b/sycl/unittests/program_manager/Cleanup.cpp @@ -61,6 +61,12 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { return NativePrograms; } + std::unordered_map & + getDeviceKernelInfoMap() { + return m_DeviceKernelInfoMap; + } + std::unordered_map & getKernelNameRefCount() { return m_KernelNameRefCount; @@ -307,6 +313,9 @@ void checkAllInvolvedContainers(ProgramManagerExposed &PM, checkContainer(PM.getVFSet2BinImage(), ExpectedEntryCount, generateRefNames(ImgIds, "VF"), "VFSet2BinImage " + CommentPostfix); + checkContainer(PM.getDeviceKernelInfoMap(), ExpectedEntryCount, + generateRefNames(ImgIds, "Kernel"), + "Device kernel info map " + CommentPostfix); checkContainer(PM.getKernelNameRefCount(), ExpectedEntryCount, generateRefNames(ImgIds, "Kernel"), "Kernel name reference count " + CommentPostfix); @@ -366,6 +375,10 @@ TEST(ImageRemoval, BaseContainers) { generateRefName("B", "HostPipe").c_str()); PM.addOrInitHostPipeEntry(PipeC::get_host_ptr(), generateRefName("C", "HostPipe").c_str()); + std::vector KernelNames = + generateRefNames({"A", "B", "C"}, "Kernel"); + for (const std::string &Name : KernelNames) + PM.getOrCreateDeviceKernelInfo(Name); checkAllInvolvedContainers(PM, ImagesToRemove.size() + ImagesToKeep.size(), {"A", "B", "C"}, "check failed before removal"); @@ -389,6 +402,8 @@ TEST(ImageRemoval, MultipleImagesPerEntry) { convertAndAddImages(PM, ImagesToRemoveSameEntries, NativeImagesForRemoval, TestBinaries); + std::string KernelName = generateRefName("A", "Kernel"); + PM.getOrCreateDeviceKernelInfo(KernelName); checkAllInvolvedContainers( PM, ImagesToRemoveSameEntries.size() + ImagesToKeepSameEntries.size(), /*ExpectedEntryCount*/ 1, {"A"}, "check failed before removal", diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index 27f860fb25659..e6543927894a4 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -148,7 +148,7 @@ class MockHandler : public sycl::handler { std::move(impl->MNDRDesc), std::move(CGH->MHostKernel), std::move(CGH->MKernel), std::move(impl->MKernelBundle), std::move(impl->CGData), std::move(impl->MArgs), - CGH->MKernelName.data(), impl->MKernelNameBasedCachePtr, + CGH->MKernelName.data(), *impl->MDeviceKernelInfoPtr, std::move(CGH->MStreamStorage), std::move(impl->MAuxiliaryResources), impl->MCGType, {}, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, diff --git a/sycl/unittests/scheduler/SchedulerTestUtils.hpp b/sycl/unittests/scheduler/SchedulerTestUtils.hpp index 30966b26e742a..c3bdb342170de 100644 --- a/sycl/unittests/scheduler/SchedulerTestUtils.hpp +++ b/sycl/unittests/scheduler/SchedulerTestUtils.hpp @@ -303,7 +303,7 @@ class MockHandlerCustomFinalize : public MockHandler { CommandGroup.reset(new sycl::detail::CGExecKernel( getNDRDesc(), std::move(getHostKernel()), getKernel(), std::move(impl->MKernelBundle), std::move(CGData), getArgs(), - getKernelName(), impl->MKernelNameBasedCachePtr, getStreamStorage(), + getKernelName(), *impl->MDeviceKernelInfoPtr, getStreamStorage(), impl->MAuxiliaryResources, getType(), {}, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, getCodeLoc())); diff --git a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp index d038004b1e1e4..2b6d0cc563431 100644 --- a/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp +++ b/sycl/unittests/scheduler/StreamInitDependencyOnHost.cpp @@ -33,7 +33,7 @@ class MockHandlerStreamInit : public MockHandler { detail::CG::StorageInitHelper(getArgsStorage(), getAccStorage(), getSharedPtrStorage(), getRequirements(), getEvents()), - getArgs(), getKernelName(), impl->MKernelNameBasedCachePtr, + getArgs(), getKernelName(), *impl->MDeviceKernelInfoPtr, getStreamStorage(), std::move(impl->MAuxiliaryResources), getType(), {}, impl->MKernelIsCooperative, impl->MKernelUsesClusterLaunch, impl->MKernelWorkGroupMemorySize, getCodeLoc()));