Skip to content

Handler-less kernel submit API #19294

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 4 commits into
base: sycl
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions sycl/include/sycl/ext/oneapi/experimental/enqueue_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,16 @@ event submit_with_event_impl(const queue &Q, PropertiesT Props,
return Q.submit_with_event<__SYCL_USE_FALLBACK_ASSERT>(
Props, detail::type_erased_cgfo_ty{CGF}, nullptr, CodeLoc);
}

template <typename PropertiesT, typename KernelName,
typename KernelType, int Dims>
event submit_with_event_impl(const queue &Q, PropertiesT Props,
nd_range<Dims> Range,
const KernelType &KernelFunc,
const sycl::detail::code_location &CodeLoc) {
return Q.submit_with_event<__SYCL_USE_FALLBACK_ASSERT, PropertiesT,
KernelName, KernelType, Dims>(Props, Range, KernelFunc, CodeLoc);
}
} // namespace detail

template <typename CommandGroupFunc, typename PropertiesT>
Expand Down Expand Up @@ -144,6 +154,19 @@ event submit_with_event(const queue &Q, CommandGroupFunc &&CGF,
std::forward<CommandGroupFunc>(CGF), CodeLoc);
}

#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
template <typename PropertiesT, typename KernelName = sycl::detail::auto_name,
typename KernelType, int Dims>
event submit_with_event(const queue &Q, PropertiesT Props,
nd_range<Dims> Range,
const KernelType &KernelFunc,
const sycl::detail::code_location &CodeLoc =
sycl::detail::code_location::current()) {
return sycl::ext::oneapi::experimental::detail::submit_with_event_impl
<PropertiesT, KernelName, KernelType, Dims>(Q, Props, Range, KernelFunc, CodeLoc);
}
#endif

template <typename KernelName = sycl::detail::auto_name, typename KernelType>
void single_task(handler &CGH, const KernelType &KernelObj) {
CGH.single_task<KernelName>(KernelObj);
Expand Down
118 changes: 118 additions & 0 deletions sycl/include/sycl/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,36 @@ class __SYCL_EXPORT SubmissionInfo {
ext::oneapi::experimental::event_mode_enum::none;
};

using KernelParamDescGetterFuncPtr = detail::kernel_param_desc_t (*)(int);

// This class is intended to store the kernel runtime information,
// extracted from the compile time kernel structures.
class __SYCL_EXPORT KernelRuntimeInfo {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please define copy/move ctor and assignment operator. I guess they can be declared as deleted, right?

public:
KernelRuntimeInfo() {}

std::string_view &KernelName() { return MKernelName; }
std::unique_ptr<detail::HostKernelBase> &HostKernel() { return MHostKernel; }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should it be a raw pointer as a return type?

int &KernelNumArgs() { return MKernelNumArgs; }
KernelParamDescGetterFuncPtr &KernelParamDescGetter() {
return MKernelParamDescGetter;
}
bool &KernelIsESIMD() { return MKernelIsESIMD; }
bool &KernelHasSpecialCaptures() { return MKernelHasSpecialCaptures; }
detail::KernelNameBasedCacheT *&KernelNameBasedCachePtr() {
return MKernelNameBasedCachePtr;
}

private:
std::string_view MKernelName;
std::unique_ptr<detail::HostKernelBase> MHostKernel;
int MKernelNumArgs = 0;
KernelParamDescGetterFuncPtr MKernelParamDescGetter = nullptr;
bool MKernelIsESIMD = false;
bool MKernelHasSpecialCaptures = true;
detail::KernelNameBasedCacheT *MKernelNameBasedCachePtr = nullptr;
};

} // namespace v1
} // namespace detail

Expand All @@ -167,6 +197,13 @@ template <typename CommandGroupFunc, typename PropertiesT>
event submit_with_event_impl(const queue &Q, PropertiesT Props,
CommandGroupFunc &&CGF,
const sycl::detail::code_location &CodeLoc);

template <typename PropertiesT, typename KernelName,
typename KernelType, int Dims>
event submit_with_event_impl(const queue &Q, PropertiesT Props,
nd_range<Dims> Range,
const KernelType &KernelFunc,
const sycl::detail::code_location &CodeLoc);
} // namespace detail
} // namespace ext::oneapi::experimental

Expand Down Expand Up @@ -3215,11 +3252,17 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
parallel_for(nd_range<Dims> Range, RestT &&...Rest) {
constexpr detail::code_location CodeLoc = getCodeLocation<KernelName>();
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
#ifdef __DPCPP_ENABLE_UNFINISHED_NO_CGH_SUBMIT
return submit_with_event<false, ext::oneapi::experimental::empty_properties_t,
KernelName>(sycl::ext::oneapi::experimental::empty_properties_t{},
Range, Rest..., CodeLoc);
#else
return submit(
[&](handler &CGH) {
CGH.template parallel_for<KernelName>(Range, Rest...);
},
TlsCodeLocCapture.query());
#endif
}

/// parallel_for version with a kernel represented as a lambda + nd_range that
Expand Down Expand Up @@ -3596,6 +3639,13 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
const queue &Q, PropertiesT Props, CommandGroupFunc &&CGF,
const sycl::detail::code_location &CodeLoc);

template <typename PropertiesT, typename KernelName,
typename KernelType, int Dims>
friend event ext::oneapi::experimental::detail::submit_with_event_impl(
const queue &Q, PropertiesT Props, nd_range<Dims> Range,
const KernelType &KernelFunc,
const sycl::detail::code_location &CodeLoc);

template <typename PropertiesT>
void ProcessSubmitProperties(PropertiesT Props,
detail::v1::SubmissionInfo &SI) const {
Expand All @@ -3609,6 +3659,36 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
}
}

template <int Dims, typename LambdaArgType> struct TransformUserItemType {
using type = std::conditional_t<
std::is_convertible_v<nd_item<Dims>, LambdaArgType>, nd_item<Dims>,
std::conditional_t<std::is_convertible_v<item<Dims>, LambdaArgType>,
item<Dims>, LambdaArgType>>;
};

template <typename PropertiesT, typename KernelName, typename KernelType,
int Dims>
void ProcessKernelRuntimeInfo(
PropertiesT Props, const KernelType &KernelFunc,
detail::v1::KernelRuntimeInfo &KRInfo) const {
using NameT =
typename detail::get_kernel_name_t<KernelName, KernelType>::name;
using LambdaArgType = sycl::detail::lambda_arg_type<KernelType, item<Dims>>;
using TransformedArgType = std::conditional_t<
std::is_integral<LambdaArgType>::value && Dims == 1, item<Dims>,
typename TransformUserItemType<Dims, LambdaArgType>::type>;

KRInfo.HostKernel().reset(
new detail::HostKernel<KernelType, TransformedArgType, Dims>(
KernelFunc));
KRInfo.KernelName() = detail::getKernelName<NameT>();
KRInfo.KernelNumArgs() = detail::getKernelNumParams<NameT>();
KRInfo.KernelParamDescGetter() = &(detail::getKernelParamDesc<NameT>);
KRInfo.KernelIsESIMD() = detail::isKernelESIMD<NameT>();
KRInfo.KernelHasSpecialCaptures() = detail::hasSpecialCaptures<NameT>();
KRInfo.KernelNameBasedCachePtr() = detail::getKernelNameBasedCache<NameT>();
}

#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
/// TODO: Unused. Remove these when ABI-break window is open.
/// Not using `type_erased_cgfo_ty` on purpose.
Expand Down Expand Up @@ -3680,6 +3760,24 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
const detail::code_location &CodeLoc,
bool IsTopCodeLoc) const;

event submit_with_event_impl(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about eventless? It is not done yet, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I think it would be similar, so I've skipped it for now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the disadvantage of returning optional<event> and having somewhere (probably, in SubmissionInfo, as this is mode of submission) a flag, pointing out is it event or eventless mode? I think about bunch of functions that pass arguments by chain and about duplicating them (for event and for eventless) and this is not looks good. What do you think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if returning std::optional<event> is a good idea because of ABI concerns. It might not have a stable ABI across compiler versions or even different standard libraries (libstdc++ vs libc++).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if returning std::optional<event> is a good idea because of ABI concerns. It might not have a stable ABI across compiler versions or even different standard libraries (libstdc++ vs libc++).

Yes, good point.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably, sycl::detail::optional might be considered.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sycl::detail::optional might work, good idea

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But still we need to care about the stable layout of the sycl::detail::optional. I am not sure that we are doing it today.

I think having two versions (that return sycl::event and return void) might be a good alternative.

nd_range<1> Range,
const detail::v1::SubmissionInfo &ExtSubmitInfo,
const detail::v1::KernelRuntimeInfo &KRInfo,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;

event submit_with_event_impl(
nd_range<2> Range,
const detail::v1::SubmissionInfo &ExtSubmitInfo,
const detail::v1::KernelRuntimeInfo &KRInfo,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;

event submit_with_event_impl(
nd_range<3> Range,
const detail::v1::SubmissionInfo &ExtSubmitInfo,
const detail::v1::KernelRuntimeInfo &KRInfo,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const;

/// A template-free version of submit_without_event as const member function.
void submit_without_event_impl(const detail::type_erased_cgfo_ty &CGH,
const detail::v1::SubmissionInfo &SubmitInfo,
Expand Down Expand Up @@ -3763,6 +3861,26 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
TlsCodeLocCapture.isToplevel());
}

template <bool UseFallbackAssert, typename PropertiesT, typename KernelName,
typename KernelType, int Dims>
event submit_with_event(PropertiesT Props, nd_range<Dims> Range,
const KernelType &KernelFunc,
const detail::code_location &CodeLoc =
detail::code_location::current()) const {
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
detail::v1::SubmissionInfo SI{};
detail::v1::KernelRuntimeInfo KRInfo{};

ProcessSubmitProperties(Props, SI);
ProcessKernelRuntimeInfo<PropertiesT, KernelName,
KernelType, Dims>(Props, KernelFunc, KRInfo);

// TODO UseFallbackAssert

return submit_with_event_impl(Range, SI, KRInfo,
TlsCodeLocCapture.query(), TlsCodeLocCapture.isToplevel());
}

/// Submits a command group function object to the queue, in order to be
/// scheduled for execution on the device.
///
Expand Down
39 changes: 39 additions & 0 deletions sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,45 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> {
return createSyclObjFromImpl<event>(ResEvent);
}

event
submit_with_event(nd_range<1> Range,
const detail::v1::SubmissionInfo &SubmitInfo,
const detail::v1::KernelRuntimeInfo &KRInfo,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
(void)Range;
(void)SubmitInfo;
(void)KRInfo;
(void)CodeLoc;
(void)IsTopCodeLoc;
return event();
}

event
submit_with_event(nd_range<2> Range,
const detail::v1::SubmissionInfo &SubmitInfo,
const detail::v1::KernelRuntimeInfo &KRInfo,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
(void)Range;
(void)SubmitInfo;
(void)KRInfo;
(void)CodeLoc;
(void)IsTopCodeLoc;
return event();
}

event
submit_with_event(nd_range<3> Range,
const detail::v1::SubmissionInfo &SubmitInfo,
const detail::v1::KernelRuntimeInfo &KRInfo,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) {
(void)Range;
(void)SubmitInfo;
(void)KRInfo;
(void)CodeLoc;
(void)IsTopCodeLoc;
return event();
}

void submit_without_event(const detail::type_erased_cgfo_ty &CGF,
const v1::SubmissionInfo &SubmitInfo,
const detail::code_location &Loc,
Expand Down
27 changes: 27 additions & 0 deletions sycl/source/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,33 @@ event queue::submit_with_event_impl(
return impl->submit_with_event(CGH, SubmitInfo, CodeLoc, IsTopCodeLoc);
}

event queue::submit_with_event_impl(
nd_range<1> Range,
const detail::v1::SubmissionInfo &SubmitInfo,
const detail::v1::KernelRuntimeInfo &KRInfo,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const {
return impl->submit_with_event(Range, SubmitInfo, KRInfo,
CodeLoc, IsTopCodeLoc);
}

event queue::submit_with_event_impl(
nd_range<2> Range,
const detail::v1::SubmissionInfo &SubmitInfo,
const detail::v1::KernelRuntimeInfo &KRInfo,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const {
return impl->submit_with_event(Range, SubmitInfo, KRInfo,
CodeLoc, IsTopCodeLoc);
}

event queue::submit_with_event_impl(
nd_range<3> Range,
const detail::v1::SubmissionInfo &SubmitInfo,
const detail::v1::KernelRuntimeInfo &KRInfo,
const detail::code_location &CodeLoc, bool IsTopCodeLoc) const {
return impl->submit_with_event(Range, SubmitInfo, KRInfo,
CodeLoc, IsTopCodeLoc);
}

void queue::submit_without_event_impl(
const detail::type_erased_cgfo_ty &CGH,
const detail::v1::SubmissionInfo &SubmitInfo,
Expand Down
Loading