-
Notifications
You must be signed in to change notification settings - Fork 801
[WIP] No handler submit #18842
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: sycl
Are you sure you want to change the base?
[WIP] No handler submit #18842
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2784,6 +2784,112 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> { | |
| CodeLoc); | ||
| } | ||
|
|
||
| // no_handler | ||
|
|
||
| private: | ||
| // NOTE: the name of this function - "kernel_single_task" - is used by the | ||
|
||
| // Front End to determine kernel invocation kind. | ||
| template <typename KernelName, typename KernelType, typename... Props> | ||
| #ifdef __SYCL_DEVICE_ONLY__ | ||
| [[__sycl_detail__::add_ir_attributes_function( | ||
| "sycl-single-task", | ||
| ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name..., | ||
| nullptr, | ||
| ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]] | ||
| #endif | ||
|
|
||
| __SYCL_KERNEL_ATTR__ static void | ||
| kernel_single_task(const KernelType &KernelFunc) { | ||
| #ifdef __SYCL_DEVICE_ONLY__ | ||
| KernelFunc(); | ||
| #else | ||
| (void)KernelFunc; | ||
| #endif | ||
| } | ||
|
|
||
| // NOTE: the name of these functions - "kernel_parallel_for" - are used by the | ||
| // Front End to determine kernel invocation kind. | ||
| template <typename KernelName, typename ElementType, typename KernelType, | ||
| typename... Props> | ||
| #ifdef __SYCL_DEVICE_ONLY__ | ||
| [[__sycl_detail__::add_ir_attributes_function( | ||
| ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::name..., | ||
| ext::oneapi::experimental::detail::PropertyMetaInfo<Props>::value...)]] | ||
| #endif | ||
| __SYCL_KERNEL_ATTR__ static void | ||
| kernel_parallel_for(const KernelType &KernelFunc) { | ||
| #ifdef __SYCL_DEVICE_ONLY__ | ||
| KernelFunc(detail::Builder::getElement(detail::declptr<ElementType>())); | ||
| #else | ||
| (void)KernelFunc; | ||
| #endif | ||
| } | ||
|
|
||
| template <int Dims> static sycl::range<3> padRange(sycl::range<Dims> Range) { | ||
| if constexpr (Dims == 3) { | ||
| return Range; | ||
| } else { | ||
| sycl::range<3> Res{0, 0, 0}; | ||
| for (int I = 0; I < Dims; ++I) | ||
| Res[I] = Range[I]; | ||
| return Res; | ||
| } | ||
| } | ||
|
|
||
| template <int Dims> static sycl::id<3> padId(sycl::id<Dims> Id) { | ||
| if constexpr (Dims == 3) { | ||
| return Id; | ||
| } else { | ||
| sycl::id<3> Res{0, 0, 0}; | ||
| for (int I = 0; I < Dims; ++I) | ||
| Res[I] = Id[I]; | ||
| return Res; | ||
| } | ||
| } | ||
|
|
||
| template <typename KernelName, typename KernelType, int Dims> | ||
| void submit_no_handler(nd_range<Dims> Range, const KernelType &KernelFunc) const { | ||
|
|
||
| using NameT = | ||
| typename detail::get_kernel_name_t<KernelName, KernelType>::name; | ||
|
|
||
| const char *KernelN = detail::getKernelName<NameT>(); | ||
| KernelType Kernel = KernelFunc; | ||
| void *KernelFuncPtr = reinterpret_cast<void *>(&Kernel); | ||
| int KernelNumParams = detail::getKernelNumParams<NameT>(); | ||
| detail::kernel_param_desc_t (*KernelParamDescGetter)(int) = &(detail::getKernelParamDesc<NameT>); | ||
| bool IsKernelESIMD = detail::isKernelESIMD<NameT>(); | ||
| bool HasSpecialCapt = detail::hasSpecialCaptures<NameT>(); | ||
| detail::KernelNameBasedCacheT *KernelNameBasedCachePtr = detail::getKernelNameBasedCache<NameT>(); | ||
|
|
||
| assert(HasSpecialCapt == false); | ||
| assert(IsKernelESIMD == false); | ||
|
|
||
| submit_no_handler_impl(Range, KernelN, KernelFuncPtr, KernelNumParams, KernelParamDescGetter, | ||
| KernelNameBasedCachePtr); | ||
| } | ||
|
|
||
| public: | ||
| /// single_task version not using handler | ||
| template <typename KernelName = detail::auto_name, typename KernelType> | ||
| void single_task_no_handler(const KernelType &KernelFunc) const { | ||
|
|
||
| kernel_single_task<KernelName, KernelType, | ||
| ext::oneapi::experimental::empty_properties_t>(KernelFunc); | ||
| submit_no_handler<KernelName, KernelType, 1>(nd_range<1>{}, KernelFunc); | ||
| } | ||
|
|
||
| template <typename KernelName = detail::auto_name, int Dims, | ||
| typename KernelType> | ||
| void parallel_for_no_handler(nd_range<Dims> Range, const KernelType &KernelFunc) const { | ||
|
|
||
| kernel_parallel_for<KernelName, sycl::nd_item<Dims>, KernelType, | ||
| ext::oneapi::experimental::empty_properties_t>(KernelFunc); | ||
| submit_no_handler<KernelName, KernelType, Dims>(Range, KernelFunc); | ||
| } | ||
|
|
||
|
|
||
|
|
||
| /// parallel_for version with a kernel represented as a lambda + range that | ||
| /// specifies global size only. | ||
| /// | ||
|
|
@@ -3686,6 +3792,13 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> { | |
| const detail::code_location &CodeLoc, | ||
| bool IsTopCodeLoc) const; | ||
|
|
||
| // no_handler | ||
|
|
||
| template<int Dims> | ||
| void submit_no_handler_impl(nd_range<Dims> Range, const char *KernelName, void *KernelFunc, | ||
| int KernelNumParams, detail::kernel_param_desc_t (*KernelParamDescGetter)(int), | ||
| detail::KernelNameBasedCacheT *KernelNameBasedCachePtr) const; | ||
|
|
||
| /// Submits a command group function object to the queue, in order to be | ||
| /// scheduled for execution on the device. | ||
| /// | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -415,6 +415,119 @@ queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF, | |
| } | ||
| #endif | ||
|
|
||
| // no_handler | ||
|
|
||
| void queue_impl::extractArgsAndReqsFromLambda( | ||
| char *LambdaPtr, detail::kernel_param_desc_t (*ParamDescGetter)(int), | ||
| size_t NumKernelParams, std::vector<ArgDesc> &Args) { | ||
slawekptak marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| size_t IndexShift = 0; | ||
|
|
||
| Args.reserve(NumKernelParams); | ||
|
|
||
| for (size_t I = 0; I < NumKernelParams; ++I) { | ||
| detail::kernel_param_desc_t ParamDesc = ParamDescGetter(I); | ||
| void *Ptr = LambdaPtr + ParamDesc.offset; | ||
| const detail::kernel_param_kind_t &Kind = ParamDesc.kind; | ||
| const int &Size = ParamDesc.info; | ||
|
|
||
| Args.emplace_back(Kind, Ptr, Size, I + IndexShift); | ||
| } | ||
| } | ||
|
|
||
| void queue_impl::submit_no_handler( | ||
| const std::shared_ptr<queue_impl> &Self, | ||
slawekptak marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| detail::NDRDescT NDRDesc, const char *KernelName, | ||
|
||
| void *KernelFunc, int KernelNumParams, | ||
| detail::kernel_param_desc_t (*KernelParamDescGetter)(int), | ||
| detail::KernelNameBasedCacheT *KernelNameBasedCachePtr) { | ||
|
||
|
|
||
| std::vector<ur_event_handle_t> RawEvents; | ||
| std::vector<detail::ArgDesc> Args; | ||
|
|
||
| assert(!MQueue->hasCommandGraph()); | ||
|
|
||
| // TODO external event | ||
|
|
||
| bool KernelFastPath = true; | ||
slawekptak marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| std::unique_lock<std::mutex> Lock(MMutex); | ||
|
|
||
| EventImplPtr &LastEvent = MDefaultGraphDeps.LastEventPtr; | ||
|
|
||
| if (isInOrder() && LastEvent && !Scheduler::CheckEventReadiness(MContext, LastEvent)) { | ||
| KernelFastPath = false; | ||
| } | ||
|
|
||
| if (KernelFastPath) { | ||
| EventImplPtr &LastEvent = MDefaultGraphDeps.LastEventPtr; | ||
|
|
||
| if (isInOrder() && LastEvent && LastEvent->getHandle()) { | ||
| RawEvents.push_back(LastEvent->getHandle()); | ||
| } | ||
|
|
||
| enqueueImpKernel( | ||
| Self, | ||
| NDRDesc, // MNDRDesc | ||
| Args, | ||
| nullptr, // KernelBundleImpPtr | ||
| nullptr, // MKernel | ||
| KernelName, | ||
| KernelNameBasedCachePtr, // MKernelNameBasedCachePtr | ||
| RawEvents, | ||
| nullptr, // out event | ||
| nullptr, // getMemAllocationFunc | ||
| UR_KERNEL_CACHE_CONFIG_DEFAULT, // MKernelCacheConfig | ||
| false, // MKernelIsCooperative | ||
| false, // MKernelUsesClusterLaunch | ||
| 0, // MKernelWorkGroupMemorySize | ||
| nullptr, // BinImage | ||
| KernelFunc, // MKernelFuncPtr | ||
| KernelNumParams, // MKernelNumArgs | ||
| KernelParamDescGetter, // MKernelParamDescGetter | ||
| false); // MKernelHasSpecialCaptures | ||
| } else { | ||
| std::unique_ptr<detail::CG> CommandGroup; | ||
| detail::CG::StorageInitHelper CGData; | ||
| std::vector<detail::ArgDesc> Args; | ||
| std::vector<std::shared_ptr<detail::stream_impl>> StreamStorage; | ||
| std::vector<std::shared_ptr<const void>> AuxiliaryResources; | ||
| detail::code_location CodeLoc = {}; | ||
|
|
||
| extractArgsAndReqsFromLambda((char *)KernelFunc, KernelParamDescGetter, | ||
| KernelNumParams, Args); | ||
|
|
||
| EventImplPtr &LastEvent = MDefaultGraphDeps.LastEventPtr; | ||
| CGData.MEvents.push_back(LastEvent); | ||
|
|
||
| CommandGroup.reset(new detail::CGExecKernel( | ||
| std::move(NDRDesc), | ||
| nullptr, // MHostKernel | ||
| nullptr, // MKernel | ||
| nullptr, // MKernelBundle | ||
| std::move(CGData), // CGData | ||
| std::move(Args), // MArgs | ||
| KernelName, // MKernelName | ||
| KernelNameBasedCachePtr, // MKernelNameBasedCachePtr | ||
| std::move(StreamStorage), // MStreamStorage | ||
| std::move(AuxiliaryResources), // MAuxiliaryResources | ||
| detail::CGType::Kernel, | ||
| UR_KERNEL_CACHE_CONFIG_DEFAULT, // MKernelCacheConfig | ||
| false, // MKernelIsCooperative | ||
| false, // MKernelUsesClusterLaunch | ||
| 0, // MKernelWorkGroupMemorySize | ||
| CodeLoc)); // MCodeLoc | ||
|
|
||
| detail::EventImplPtr EventImpl = detail::Scheduler::getInstance().addCG( | ||
| std::move(CommandGroup), | ||
| Self, // MQueue | ||
| false); // MEventNeeded | ||
|
|
||
| if (isInOrder()) { | ||
| MDefaultGraphDeps.LastEventPtr = EventImpl; | ||
| } | ||
| } | ||
slawekptak marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| template <typename HandlerFuncT> | ||
| event queue_impl::submitWithHandler(const std::vector<event> &DepEvents, | ||
| bool CallerNeedsEvent, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why don't we call it
queue::launch_grouped?