-
Notifications
You must be signed in to change notification settings - Fork 798
[SYCL] Implement free function kernel enqueue functions #20698
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: sycl
Are you sure you want to change the base?
Changes from 13 commits
b5ac52a
63d860c
00e0f0d
cd92d0c
4621ff6
76e0f8b
ce2a16b
e88b0f9
b1b3ce9
8b685ea
4cc1d15
4e7847d
51e7aff
4dc3224
0b6a0ac
a7c592e
c65ffbc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,96 @@ | ||
| //==------ launch_config.hpp ------- SYCL kernel launch configuration -----==// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===--------------------------------------------------------------------===// | ||
|
|
||
| #pragma once | ||
|
|
||
| #include <sycl/ext/oneapi/properties/properties.hpp> | ||
| #include <sycl/ext/oneapi/properties/property.hpp> | ||
|
|
||
| namespace sycl { | ||
| inline namespace _V1 { | ||
| template <int Dimensions> class nd_range; | ||
| template <int Dimensions> class range; | ||
|
|
||
| namespace ext::oneapi::experimental { | ||
| namespace detail { | ||
| struct AllowCTADTag; | ||
| // Trait for identifying sycl::range and sycl::nd_range. | ||
| template <typename RangeT> struct is_range_or_nd_range : std::false_type {}; | ||
| template <int Dimensions> | ||
| struct is_range_or_nd_range<range<Dimensions>> : std::true_type {}; | ||
| template <int Dimensions> | ||
| struct is_range_or_nd_range<nd_range<Dimensions>> : std::true_type {}; | ||
|
|
||
| template <typename RangeT> | ||
| constexpr bool is_range_or_nd_range_v = is_range_or_nd_range<RangeT>::value; | ||
|
|
||
| template <typename LCRangeT, typename LCPropertiesT> struct LaunchConfigAccess; | ||
|
|
||
| // Checks that none of the properties in the property list has compile-time | ||
| // effects on the kernel. | ||
| template <typename T> | ||
| struct NoPropertyHasCompileTimeKernelEffect : std::false_type {}; | ||
| template <typename... Ts> | ||
| struct NoPropertyHasCompileTimeKernelEffect<properties_t<Ts...>> { | ||
| static constexpr bool value = | ||
| !(HasCompileTimeEffect<Ts>::value || ... || false); | ||
| }; | ||
| } // namespace detail | ||
|
|
||
| // Available only when Range is range or nd_range | ||
| template < | ||
| typename RangeT, typename PropertiesT = empty_properties_t, | ||
| typename = std::enable_if_t< | ||
| ext::oneapi::experimental::detail::is_range_or_nd_range_v<RangeT>>> | ||
| class launch_config { | ||
| static_assert(ext::oneapi::experimental::detail:: | ||
| NoPropertyHasCompileTimeKernelEffect<PropertiesT>::value, | ||
| "launch_config does not allow properties with compile-time " | ||
| "kernel effects."); | ||
|
|
||
| public: | ||
| launch_config(RangeT Range, PropertiesT Properties = {}) | ||
| : MRange{Range}, MProperties{Properties} {} | ||
|
|
||
| private: | ||
| RangeT MRange; | ||
| PropertiesT MProperties; | ||
|
|
||
| const RangeT &getRange() const noexcept { return MRange; } | ||
|
|
||
| const PropertiesT &getProperties() const noexcept { return MProperties; } | ||
|
|
||
| template <typename LCRangeT, typename LCPropertiesT> | ||
| friend struct detail::LaunchConfigAccess; | ||
| }; | ||
|
|
||
| #ifdef __cpp_deduction_guides | ||
| // CTAD work-around to avoid warning from GCC when using default deduction | ||
| // guidance. | ||
| launch_config(detail::AllowCTADTag) | ||
| -> launch_config<void, empty_properties_t, void>; | ||
| #endif // __cpp_deduction_guides | ||
|
|
||
| namespace detail { | ||
| // Helper for accessing the members of launch_config. | ||
| template <typename LCRangeT, typename LCPropertiesT> struct LaunchConfigAccess { | ||
| LaunchConfigAccess(const launch_config<LCRangeT, LCPropertiesT> &LaunchConfig) | ||
| : MLaunchConfig{LaunchConfig} {} | ||
|
|
||
| const launch_config<LCRangeT, LCPropertiesT> &MLaunchConfig; | ||
|
|
||
| const LCRangeT &getRange() const noexcept { return MLaunchConfig.getRange(); } | ||
|
|
||
| const LCPropertiesT &getProperties() const noexcept { | ||
| return MLaunchConfig.getProperties(); | ||
| } | ||
| }; | ||
| } // namespace detail | ||
| } // namespace ext::oneapi::experimental | ||
| } // namespace _V1 | ||
| } // namespace sycl |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -12,10 +12,13 @@ | |||||
|
|
||||||
| #include <sycl/detail/common.hpp> | ||||||
| #include <sycl/event.hpp> | ||||||
| #include <sycl/ext/oneapi/experimental/detail/properties/launch_config.hpp> | ||||||
| #include <sycl/ext/oneapi/experimental/enqueue_types.hpp> | ||||||
| #include <sycl/ext/oneapi/experimental/free_function_traits.hpp> | ||||||
| #include <sycl/ext/oneapi/experimental/graph.hpp> | ||||||
| #include <sycl/ext/oneapi/properties/properties.hpp> | ||||||
| #include <sycl/handler.hpp> | ||||||
| #include <sycl/kernel_bundle.hpp> | ||||||
| #include <sycl/nd_range.hpp> | ||||||
| #include <sycl/queue.hpp> | ||||||
| #include <sycl/range.hpp> | ||||||
|
|
@@ -25,78 +28,6 @@ inline namespace _V1 { | |||||
| namespace ext::oneapi::experimental { | ||||||
|
|
||||||
| namespace detail { | ||||||
| // Trait for identifying sycl::range and sycl::nd_range. | ||||||
| template <typename RangeT> struct is_range_or_nd_range : std::false_type {}; | ||||||
| template <int Dimensions> | ||||||
| struct is_range_or_nd_range<range<Dimensions>> : std::true_type {}; | ||||||
| template <int Dimensions> | ||||||
| struct is_range_or_nd_range<nd_range<Dimensions>> : std::true_type {}; | ||||||
|
|
||||||
| template <typename RangeT> | ||||||
| constexpr bool is_range_or_nd_range_v = is_range_or_nd_range<RangeT>::value; | ||||||
|
|
||||||
| template <typename LCRangeT, typename LCPropertiesT> struct LaunchConfigAccess; | ||||||
|
|
||||||
| // Checks that none of the properties in the property list has compile-time | ||||||
| // effects on the kernel. | ||||||
| template <typename T> | ||||||
| struct NoPropertyHasCompileTimeKernelEffect : std::false_type {}; | ||||||
| template <typename... Ts> | ||||||
| struct NoPropertyHasCompileTimeKernelEffect<properties_t<Ts...>> { | ||||||
| static constexpr bool value = | ||||||
| !(HasCompileTimeEffect<Ts>::value || ... || false); | ||||||
| }; | ||||||
| } // namespace detail | ||||||
|
|
||||||
| // Available only when Range is range or nd_range | ||||||
| template < | ||||||
| typename RangeT, typename PropertiesT = empty_properties_t, | ||||||
| typename = std::enable_if_t< | ||||||
| ext::oneapi::experimental::detail::is_range_or_nd_range_v<RangeT>>> | ||||||
| class launch_config { | ||||||
| static_assert(ext::oneapi::experimental::detail:: | ||||||
| NoPropertyHasCompileTimeKernelEffect<PropertiesT>::value, | ||||||
| "launch_config does not allow properties with compile-time " | ||||||
| "kernel effects."); | ||||||
|
|
||||||
| public: | ||||||
| launch_config(RangeT Range, PropertiesT Properties = {}) | ||||||
| : MRange{Range}, MProperties{Properties} {} | ||||||
|
|
||||||
| private: | ||||||
| RangeT MRange; | ||||||
| PropertiesT MProperties; | ||||||
|
|
||||||
| const RangeT &getRange() const noexcept { return MRange; } | ||||||
|
|
||||||
| const PropertiesT &getProperties() const noexcept { return MProperties; } | ||||||
|
|
||||||
| template <typename LCRangeT, typename LCPropertiesT> | ||||||
| friend struct detail::LaunchConfigAccess; | ||||||
| }; | ||||||
|
|
||||||
| #ifdef __cpp_deduction_guides | ||||||
| // CTAD work-around to avoid warning from GCC when using default deduction | ||||||
| // guidance. | ||||||
| launch_config(detail::AllowCTADTag) | ||||||
| -> launch_config<void, empty_properties_t, void>; | ||||||
| #endif // __cpp_deduction_guides | ||||||
|
|
||||||
| namespace detail { | ||||||
| // Helper for accessing the members of launch_config. | ||||||
| template <typename LCRangeT, typename LCPropertiesT> struct LaunchConfigAccess { | ||||||
| LaunchConfigAccess(const launch_config<LCRangeT, LCPropertiesT> &LaunchConfig) | ||||||
| : MLaunchConfig{LaunchConfig} {} | ||||||
|
|
||||||
| const launch_config<LCRangeT, LCPropertiesT> &MLaunchConfig; | ||||||
|
|
||||||
| const LCRangeT &getRange() const noexcept { return MLaunchConfig.getRange(); } | ||||||
|
|
||||||
| const LCPropertiesT &getProperties() const noexcept { | ||||||
| return MLaunchConfig.getProperties(); | ||||||
| } | ||||||
| }; | ||||||
|
|
||||||
| template <typename CommandGroupFunc, typename PropertiesT> | ||||||
| void submit_impl(const queue &Q, PropertiesT Props, CommandGroupFunc &&CGF, | ||||||
| const sycl::detail::code_location &CodeLoc) { | ||||||
|
|
@@ -357,6 +288,75 @@ void nd_launch(queue Q, launch_config<nd_range<Dimensions>, Properties> Config, | |||||
| }); | ||||||
| } | ||||||
|
|
||||||
| // Free function kernel enqueue functions | ||||||
| template <auto *Func, typename... ArgsT> | ||||||
| void single_task(queue Q, kernel_function_s<Func> KernelFunc, ArgsT &&...Args) { | ||||||
| (void)KernelFunc; | ||||||
| submit(Q, [&](handler &CGH) { | ||||||
| single_task(CGH, KernelFunc, std::forward<ArgsT>(Args)...); | ||||||
| }); | ||||||
|
Comment on lines
+365
to
+367
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we have a
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, there is no
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can have it in
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I chose to keep it like this rather than use something like
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct. But it means that we need to create this infrastructure. My initial understanding is that the implementation of the free function kernel enqueue should be implemented using handler-less path (
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Tagging @gmlueck. If need be, I will revamp this PR to implement handler-less path for free function kernels but I'd like to consult the spec writers first.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the question? Are you asking if it is important to optimize the "submit" functions that take free-function kernels? The answer to that is "yes". In fact, the team asking for the free-function kernels is the same team that wants to reduce the launch overhead. Therefore, I'm certain that they will also care about the launch overhead when using free function kernels. If the question is specifically about
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is a question of the implementation, not about the spec. Did I miss something?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The spec team can give insight into the intention of the end-user client that motivated the design of the spec which can potentially impact implementation decisions. Greg's response cleared this up for me. |
||||||
| } | ||||||
|
|
||||||
| template <auto *Func, typename... ArgsT> | ||||||
| void single_task(handler &CGH, kernel_function_s<Func> KernelFunc, | ||||||
| ArgsT &&...Args) { | ||||||
| (void)KernelFunc; | ||||||
| queue Q = CGH.getQueue(); | ||||||
| sycl::kernel_bundle Bndl = | ||||||
| get_kernel_bundle<Func, sycl::bundle_state::executable>(Q.get_context()); | ||||||
|
||||||
| std::shared_ptr<detail::queue_impl> impl; | |
| queue(std::shared_ptr<detail::queue_impl> impl) : impl(impl) {} |
and similar for
sycl::context.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So, based on the discussion in #20764 and the fact that these remarks are in my opinion out of scope for this PR, I suggest these be handled separately. The getQueue and getContext functions used in my code here should then automatically reap the benefits of that refactoring without requiring a lot of changes(hopefully none).
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -32,6 +32,7 @@ | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| #include <sycl/ext/oneapi/device_global/device_global.hpp> | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| #include <sycl/ext/oneapi/device_global/properties.hpp> | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| #include <sycl/ext/oneapi/experimental/cluster_group_prop.hpp> | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| #include <sycl/ext/oneapi/experimental/detail/properties/launch_config.hpp> | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| #include <sycl/ext/oneapi/experimental/graph.hpp> | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| #include <sycl/ext/oneapi/experimental/raw_kernel_arg.hpp> | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| #include <sycl/ext/oneapi/experimental/use_root_sync_prop.hpp> | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
@@ -146,6 +147,15 @@ class pipe; | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| namespace ext ::oneapi ::experimental { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| template <auto *> struct kernel_function_s; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| template <auto *Func, typename... Args> | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| void single_task(handler &, kernel_function_s<Func>, Args &&...); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| template <auto *Func, int Dimensions, typename... Args> | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| void nd_launch(handler &, nd_range<Dimensions>, kernel_function_s<Func>, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| Args &&...); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| template <auto *Func, int Dimensions, typename Properties, typename... Args> | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| void nd_launch(handler &, launch_config<nd_range<Dimensions>, Properties>, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| kernel_function_s<Func>, Args &&...); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
| namespace detail { | |
| class HandlerAccess { | |
| public: | |
| static void internalProfilingTagImpl(handler &Handler) { | |
| Handler.internalProfilingTagImpl(); | |
| } | |
| template <typename RangeT, typename PropertiesT> | |
| static void parallelForImpl(handler &Handler, RangeT Range, PropertiesT Props, | |
| kernel Kernel) { | |
| Handler.parallel_for_impl(Range, Props, Kernel); | |
| } | |
| static void swap(handler &LHS, handler &RHS) { | |
| std::swap(LHS.implOwner, RHS.implOwner); | |
| std::swap(LHS.impl, RHS.impl); | |
| std::swap(LHS.MLocalAccStorage, RHS.MLocalAccStorage); | |
| std::swap(LHS.MStreamStorage, RHS.MStreamStorage); | |
| std::swap(LHS.MKernelName, RHS.MKernelName); | |
| std::swap(LHS.MKernel, RHS.MKernel); | |
| std::swap(LHS.MSrcPtr, RHS.MSrcPtr); | |
| std::swap(LHS.MDstPtr, RHS.MDstPtr); | |
| std::swap(LHS.MLength, RHS.MLength); | |
| std::swap(LHS.MPattern, RHS.MPattern); | |
| std::swap(LHS.MHostKernel, RHS.MHostKernel); | |
| std::swap(LHS.MCodeLoc, RHS.MCodeLoc); | |
| } | |
| // pre/postProcess are used only for reductions right now, but the | |
| // abstractions they provide aren't reduction-specific. The main problem they | |
| // solve is | |
| // | |
| // # User code | |
| // q.submit([&](handler &cgh) { | |
| // set_dependencies(cgh); | |
| // enqueue_whatever(cgh); | |
| // }); // single submission | |
| // | |
| // that needs to be implemented as multiple enqueues involving | |
| // pre-/post-processing internally. SYCL prohibits recursive submits from | |
| // inside control group function object (lambda above) so we need some | |
| // internal interface to implement that. | |
| __SYCL_EXPORT static void preProcess(handler &CGH, type_erased_cgfo_ty F); | |
| __SYCL_EXPORT static void postProcess(handler &CGH, type_erased_cgfo_ty F); | |
| template <class FunctorTy> | |
| static void preProcess(handler &CGH, FunctorTy &Func) { | |
| preProcess(CGH, type_erased_cgfo_ty{Func}); | |
| } | |
| template <class FunctorTy> | |
| static void postProcess(handler &CGH, FunctorTy &Func) { | |
| postProcess(CGH, type_erased_cgfo_ty{Func}); | |
| } | |
| }; | |
| } // namespace detail |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, that has been added to access getQueue. Now that you've brought HandlerAccess to my attention, it seems like a better solution so I'll try to migrate it over there instead.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've added the getQueue function to HandlerAccess that just dispatches to the getQueue function of the handler.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
same below.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for the catch, the functions that take
queueactually use the parameter, it's thehandlerones that don't.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This has been taken care of.