diff --git a/adoc/chapters/architecture.adoc b/adoc/chapters/architecture.adoc index 0e77dc256..005f33d8a 100644 --- a/adoc/chapters/architecture.adoc +++ b/adoc/chapters/architecture.adoc @@ -1901,6 +1901,7 @@ always matches the byte order of the devices. This allows data to be copied between the host and the devices without any byte swapping. +[[subsec:example.sycl.application]] == Example SYCL application Below is a more complex example application, combining some of the features diff --git a/adoc/config/rouge/lib/rouge/lexers/sycl.rb b/adoc/config/rouge/lib/rouge/lexers/sycl.rb index d8e3ca6cd..4883cb29a 100644 --- a/adoc/config/rouge/lib/rouge/lexers/sycl.rb +++ b/adoc/config/rouge/lib/rouge/lexers/sycl.rb @@ -436,6 +436,14 @@ class Sycl < Cpp replace_me # Replace with list of actual keywords ) + # Exposition-only identifiers + sycl_exposition_only = %w( + register-events + register-accessors + register-kernel-bundle + has-tracking + ) + # Here are some interesting tokens # https://pygments.org/docs/tokens/ unused in C++ we can reuse # in SYCL mode: @@ -466,6 +474,8 @@ class Sycl < Cpp # Insert some specific rules at the beginning of the statement # rule of the C++ lexer prepend :statements do + rule %r/(?:#{sycl_exposition_only.join('|')})\b/, + Generic::Emph rule %r/(?:#{sycl_data_types.join('|')})\b/, Keyword::Pseudo rule %r/(?:#{sycl_functions.join('|')})\b/, diff --git a/adoc/config/rouge/lib/rouge/themes/sycl_spec.rb b/adoc/config/rouge/lib/rouge/themes/sycl_spec.rb index 95a8c34f7..b5e5d2129 100644 --- a/adoc/config/rouge/lib/rouge/themes/sycl_spec.rb +++ b/adoc/config/rouge/lib/rouge/themes/sycl_spec.rb @@ -33,7 +33,8 @@ class SYCLspec < Github style Comment::Single, :fg => '#9acd32' # Use a clearer white background style Text, :bg => '#ffffff' - + # Render exposition-only functions in italics to match ISO C++ + style Generic::Emph, :fg => '#000000', :italic => true end end end diff --git a/adoc/extensions/index.adoc b/adoc/extensions/index.adoc index 520ad7dd7..ee94c1cdd 100644 --- a/adoc/extensions/index.adoc +++ b/adoc/extensions/index.adoc @@ -14,4 +14,5 @@ include::sycl_khr_default_context.adoc[leveloffset=2] include::sycl_khr_queue_empty_query.adoc[leveloffset=2] include::sycl_khr_group_interface.adoc[leveloffset=2] include::sycl_khr_max_work_group_queries.adoc[leveloffset=2] -include::sycl_khr_queue_flush.adoc[leveloffset=2] \ No newline at end of file +include::sycl_khr_queue_flush.adoc[leveloffset=2] +include::sycl_khr_free_function_commands.adoc[leveloffset=2] \ No newline at end of file diff --git a/adoc/extensions/sycl_khr_free_function_commands.adoc b/adoc/extensions/sycl_khr_free_function_commands.adoc new file mode 100644 index 000000000..2498d89b3 --- /dev/null +++ b/adoc/extensions/sycl_khr_free_function_commands.adoc @@ -0,0 +1,1053 @@ += sycl_khr_free_function_commands + +This extension provides an alternative mechanism for submitting commands to a +device via free-functions that require developers to opt-in to the handling of +requirements and the creation of [code]#event# objects. + +The creation of [code]#event# objects may incur overheads that increase the +latency of submitting commands to devices, even if the [code]#event# object is +immediately discarded and never used. +Similarly, using a [code]#handler# can incur overheads even when there are no +dependent events or accessors registered. +Requiring developers to opt-in to these features is therefore expected to +improve the performance of many SYCL programs, by ensuring that SYCL developers +only pay the associated costs when necessary. + +== Dependencies + +This extension has no dependencies on other extensions. + +== Feature test macro + +An implementation supporting this extension must predefine the macro +[code]#SYCL_KHR_FREE_FUNCTION_COMMANDS# to one of the values defined in the +table below. + +[%header,cols="1,5"] +|=== +|Value +|Description + +|1 +|Initial version of this extension. +|=== + +== Usage example + +The example below rewrites the application from +<> to demonstrate the usage of this extension. + +[source,role=synopsis] +---- +#include +#include +using namespace sycl; // (optional) avoids need for "sycl::" before SYCL names. + +// Size of the matrices. +constexpr size_t N = 2000; +constexpr size_t M = 3000; + +int main() { + // Create a queue to work on. + queue myQueue; + + // Create some 2D arrays of float for our matrices. + float* a = malloc_shared(N * M, myQueue); + float* b = malloc_shared(N * M, myQueue); + float* c = malloc_shared(N * M, myQueue); + + // Launch an asynchronous kernel to initialize a. + khr::launch(myQueue, range<2> { N, M }, [=](id<2> index) { + size_t i = index[0]; + size_t j = index[1]; + a[i * M + j] = i * 2 + j; + }); + + // Launch an asynchronous kernel to initialize b. + khr::launch(myQueue, range<2> { N, M }, [=](id<2> index) { + size_t i = index[0]; + size_t j = index[1]; + b[i * M + j] = i * 2014 + j * 42; + }); + + // Ensure that the two previous kernels complete before enqueueing more work. + // This does not block the host, but enforces dependencies on the device. + khr::command_barrier(myQueue); + + // Launch an asynchronous kernel to compute matrix addition c = a + b. + // Require that the scheduler create an event to track completion. + range<2> local = { 2, 2 }; + range<2> global = { N, M }; + auto reqs = khr::requirements(khr::tracking()); + auto ev = khr::launch_grouped(myQueue, global, local, reqs, [=](nd_item<2> it) { + size_t i = it.get_global_id(0); + size_t j = it.get_global_id(1); + size_t index = i * M + j; + c[index] = a[index] + b[index]; + }); + + // Wait for all three kernels to complete before accessing the results. + // This blocks the host until all previous kernels have completed. + ev->wait(); + + std::cout << std::endl << "Result:" << std::endl; + for (size_t i = 0; i < N; i++) { + for (size_t j = 0; j < M; j++) { + size_t index = i * M + j; + // Compare the result to the analytic value. + if (c[index] != i * (2 + 2014) + j * (1 + 42)) { + std::cout << "Wrong value " << c[index] << " on element " << i << " " + << j << std::endl; + exit(-1); + } + } + } + + std::cout << "Good computation!" << std::endl; + return 0; +} +---- + +== Requirements + +An instance of the [code]#requirements# class template encapsulates all of the +requirements for scheduling a command. + +[source,role=synopsis] +---- +namespace sycl::khr { + +template +struct is_requirement; + +template +static constexpr bool is_requirement_v = is_requirement::value_type; + +template +class requirements { +public: + // Only available if: all Requirements satisfy is_requirement_v + requirements(Requirements... values); + + void register-accessors(handler& h) const; // exposition only + void register-events(handler& h) const; // exposition only + void register-kernel-bundle(handler& h) const; // exposition only + bool has-tracking() const; // exposition only +}; + +class tracking { +public: + tracking(bool enabled=true); +}; + +} +---- + +Each instance of a type listed below defines a specific scheduling requirement. +For each type, the [code]#is_requirement# type trait is specialized such that +[code]#is_requirement_v# returns [code]#true#. + +* [code]#event#: The command must not begin executing until the event is + complete. + +* [code]#std::vector#: The command must not begin executing until all + events in the vector are complete. + +* [code]#accessor#: The command must not begin executing until the + [code]#buffer# associated with the [code]#accessor# can be accessed in a + manner compatible with the specified [code]#access_mode#. + The [code]#accessor# must have an [code]#AccessTarget# of + [code]#target::device# or [code]#target::host_task#. + +* [code]#tracking#: The command must be submitted such that its status can be + tracked via an [code]#event# when the [code]#tracking# object is constructed + with an [code]#enabled# value of [code]#true#. + +* [code]#kernel_bundle#: The command must be submitted + using a <> from the kernel bundle. + +''' + +.[apititle]#Default constructor# +[source,role=synopsis,id=api:requirements-ctor] +---- +template +requirements(Requirements... values); +---- + +_Constraints_: + +* [code]#is_requirement_v# returns [code]#true# for each type in + [code]#Requirements#; +* [code]#Requirements# contains at most one + [code]#kernel_bundle#; and +* [code]#Requirements# contains at most one [code]#tracking#. + +_Effects_: Constructs a [code]#requirements# object representing the set of +requirements specified via the [code]#values# parameter pack. + +_Remarks_: Unless otherwise specified, if an instance of a requirement appears +more than once in the [code]#values# parameter pack, the [code]#requirements# +object behaves as if it had only been specified once. + +''' + +.[apititle]#requirements::register-accessors# +[source,role=synopsis,id=api:register-accessors] +---- +void register-accessors(handler& h) const; +---- + +This function is exposition only. +It is shown only to help specify the effect of the functions below under "New +free functions". + +_Effects_: Calls [code]#h.require# for each [code]#accessor# passed to the +constructor of this [code]#requirements# object. + +''' + +.[apititle]#requirements::register-events# +[source,role=synopsis,id=api:register-events] +---- +void register-events(handler& h) const; +---- + +This function is exposition only. +It is shown only to help specify the effect of the functions below under "New +free functions". + +_Effects_: Calls [code]#h.depends_on# for each [code]#event# or +[code]#std::vector# passed to the constructor of this +[code]#requirements# object. + +''' + +.[apititle]#requirements::register-kernel-bundle# +[source,role=synopsis,id=api:register-kernel-bundle] +---- +void register-kernel-bundle(handler& h) const; +---- + +This function is exposition only. +It is shown only to help specify the effect of the functions below under "New +free functions". + +_Effects_: Calls [code]#h.use_kernel_bundle# if a [code]#kernel_bundle# in +executable state was passed to the constructor of this [code]#requirements# +object and has no effect otherwise. + +''' + +.[apititle]#requirements::has-tracking# +[source,role=synopsis,id=api:has-tracking] +---- +bool has-tracking() const; +---- + +This function is exposition only. +It is shown only to help specify the effect of the functions below under "New +free functions". + +_Returns_: [code]#true# if this [code]#requirements# object was constructed with +a [code]#tracking# object with tracking enabled, and [code]#false# otherwise. + +''' + +.[apititle]#tracking# constructor +[source,role=synopsis,id=api:tracking-ctor] +---- +namespace sycl::khr { + +tracking(bool enabled=true); + +} +---- + +_Effects_: Construct a [code]#tracking# object, representing a requirement that +a command must be submitted such that its state can be tracked via an +[code]#event# when [code]#enabled# is [code]#true#. + +{note}If an [code]#event# is _not_ required, [code]#tracking(false)# should be +expected to introduce a small amount of overhead compared to providing no +[code]#tracking# requirement.{endnote} + +== New free functions + +=== Kernel launch + +// Launch a basic parallel_for with a function object. +// New form of queue::parallel_for(range, ...) +.[apititle]#launch# (kernel function) +[source,role=synopsis,id=api:launch] +---- +namespace sycl::khr { + +template +std::optional launch(const queue& q, range<1> r, const requirements& reqs, const KernelType& k); (1) + +template +std::optional launch(const queue& q, range<2> r, const requirements& reqs, const KernelType& k); (2) + +template +std::optional launch(const queue& q, range<3> r, const requirements& reqs, const KernelType& k); (3) + +template +std::optional launch(const queue& q, range<1> r, const KernelType& k); (4) + +template +std::optional launch(const queue& q, range<2> r, const KernelType& k); (5) + +template +std::optional launch(const queue& q, range<3> r, const KernelType& k); (6) + +} +---- + +_Constraints_ (1-3): Any accessor in [code]#Requirements# must have a target of +[code]#target::device#. + +_Effects_ (1-3): Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); + reqs.register-kernel-bundle(h); + h.parallel_for(r, k); +}); +return (reqs.has-tracking()) ? ev : std::nullopt; +---- + +_Effects_ (4-6): Equivalent to: [code]#return launch(q, r, {}, k);#. + +''' + +// Launch a basic parallel_for with a sycl::kernel object. +// New form of handler::parallel_for(range, ...) without set_args. +.[apititle]#launch# (kernel object) +[source,role=synopsis,id=api:launch-kernel] +---- +namespace sycl::khr { + +template +std::optional launch(const queue& q, range<1> r, const requirements& reqs, const kernel& k, Args&&... args); (1) + +template +std::optional launch(const queue& q, range<2> r, const requirements& reqs, const kernel& k, Args&&... args); (2) + +template +std::optional launch(const queue& q, range<3> r, const requirements& reqs, const kernel& k, Args&&... args); (3) + +template +std::optional launch(const queue& q, range<1> r, const kernel& k, Args&&... args); (4) + +template +std::optional launch(const queue& q, range<2> r, const kernel& k, Args&&... args); (5) + +template +std::optional launch(const queue& q, range<3> r, const kernel& k, Args&&... args); (6) + +} +---- + +_Constraints_ (1-3): + +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. + +_Effects_ (1-3): Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + h.set_args(args...); + h.parallel_for(r, k); +}); +return (reqs.has-tracking()) ? ev : std::nullopt; +---- + +_Effects_ (4-6): Equivalent to: [code]#+return launch(q, r, {}, k, args...);+#. + +''' + +// Launch a basic parallel_for with a function object and reductions. +// New form of parallel_for(range, reduction, ...) +.[apititle]#launch_reduce# (kernel function) +[source,role=synopsis,id=api:launch_reduce] +---- +namespace sycl::khr { + +template +std::optional launch_reduce(const queue& q, range<1> r, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (1) + +template +std::optional launch_reduce(const queue& q, range<2> r, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (2) + +template +std::optional launch_reduce(const queue& q, range<3> r, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (3) + +template +std::optional launch_reduce(const queue& q, range<1> r, const KernelType& k, Reductions&&... reductions); (4) + +template +std::optional launch_reduce(const queue& q, range<2> r, const KernelType& k, Reductions&&... reductions); (5) + +template +std::optional launch_reduce(const queue& q, range<3> r, const KernelType& k, Reductions&&... reductions); (6) + +} +---- +_Constraints_ (1-3): + +* The parameter pack consists of 1 or more objects created by the +[code]#reduction# function; and +* Any accessor in [code]#Requirements# must have a target of +[code]#target::device#. + +_Constraints_ (4-6): The parameter pack consists of 1 or more objects created by +the [code]#reduction# function. + +_Effects_ (1-3): Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); + reqs.register-kernel-bundle(h); + h.parallel_for(r, reductions..., k); +}); +return (reqs.has-tracking()) ? ev : std::nullopt; +---- + +_Effects_ (4-6): Equivalent to [code]#+return launch_reduce(q, r, {}, +reductions...);+#. + +''' + +// Launch an ND-range parallel_for with a function object. +// New form of parallel_for(nd_range, ...) +.[apititle]#launch_grouped# (kernel function) +[source,role=synopsis,id=api:launch_grouped] +---- +namespace sycl::khr { + +template +std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const requirements& reqs, const KernelType& k); (1) + +template +std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const requirements& reqs, const KernelType& k); (2) + +template +std::optional launch_grouped(const queue& q, range<3> r, range<3> size, const requirements& reqs, const KernelType& k); (3) + +template +std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const KernelType& k); (4) + +template +std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const KernelType& k); (5) + +template +std::optional launch_grouped(const queue& q, range<3> r, range<3> size, const KernelType& k); (6) + +} +---- + +_Constraints_ (1-3): Any accessor in [code]#Requirements# must have a target of +[code]#target::device#. + +_Effects_ (1-3): Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); + reqs.register-kernel-bundle(h); + h.parallel_for(nd_range(r, size), k); +}); +return (reqs.has-tracking()) ? ev : std::nullopt; +---- + +_Effects_ (4-6): Equivalent to [code]#return launch_grouped(q, r, size, {}, +k);#. + +''' + +// Launch an ND-range parallel_for with a sycl::kernel object. +// New form of parallel_for(nd_range, ...) without set_args. +.[apititle]#launch_grouped# (kernel object) +[source,role=synopsis,id=api:launch_grouped-kernel] +---- +namespace sycl::khr { + +template +std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const requirements& reqs, const kernel& k, Args&&... args); (1) + +template +std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const requirements& reqs, const kernel& k, Args&&... args); (2) + +template +std::optional launch_grouped(const queue& q, range<3> r, range<3> size, const requirements& reqs, const kernel& k, Args&&... args); (3) + +template +std::optional launch_grouped(const queue& q, range<1> r, range<1> size, const kernel& k, Args&&... args); (4) + +template +std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const kernel& k, Args&&... args); (5) + +template +std::optional launch_grouped(const queue& q, range<2> r, range<2> size, const kernel& k, Args&&... args); (6) + +} +---- +_Constraints_ (1-3): + +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. + +_Effects_ (1-3): Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + h.set_args(args...); + h.parallel_for(nd_range(r, size), k); +}); +return (reqs.has-tracking()) ? ev : std::nullopt; +---- + +_Effects_ (4-6): Equivalent to: [code]#+return launch_grouped(q, r, size, {}, k, +args...);+#. + +''' + +// Launch an ND-range parallel_for with a function object and reductions. +// New form of parallel_for(nd_range, ...) +.[apititle]#launch_grouped_reduce# (kernel function) +[source,role=synopsis,id=api:launch_grouped_reduce] +---- +namespace sycl::khr { + +template +std::optional launch_grouped_reduce(const queue& q, range<1> r, range<1> size, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (1) + +template +std::optional launch_grouped_reduce(const queue& q, range<2> r, range<2> size, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (2) + +template +std::optional launch_grouped_reduce(const queue& q, range<3> r, range<3> size, const requirements& reqs, const KernelType& k, Reductions&&... reductions); (3) + +template +std::optional launch_grouped_reduce(const queue& q, range<1> r, range<1> size, const KernelType& k, Reductions&&... reductions); (4) + +template +std::optional launch_grouped_reduce(const queue& q, range<2> r, range<2> size, const KernelType& k, Reductions&&... reductions); (5) + +template +std::optional launch_grouped_reduce(const queue& q, range<3> r, range<3> size, const KernelType& k, Reductions&&... reductions); (6) + +} +---- +_Constraints_ (1-3): + +* The parameter pack consists of 1 or more objects created by the +[code]#reduction# function; and +* Any accessor in [code]#Requirements# must have a target of +[code]#target::device#. + +_Constraints_ (4-6): The parameter pack consists of 1 or more objects created by +the [code]#reduction# function. + +_Effects_ (1-3): Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); + reqs.register-kernel-bundle(h); + h.parallel_for(nd_range(r, size), reductions..., k); +}); +return (reqs.has-tracking()) ? ev : std::nullopt; +---- + +_Effects_ (4-6): Equivalent to [code]#+return launch_grouped_reduce(q, r, size, +{}, k, reductions...);+#. + +''' + +// Launch a single work-item with a function object. +// New form of single_task(...) +.[apititle]#launch_task# (kernel function) +[source,role=synopsis,id=api:launch_task] +---- +namespace sycl::khr { + +template +std::optional launch_task(const queue& q, const requirements& reqs, const KernelType& k); (1) + +template +std::optional launch_task(const queue& q, const KernelType& k); (2) + +} +---- + +_Constraints_ (1): Any accessor in [code]#Requirements# must have a target of +[code]#target::device#. + +_Effects_ (1): Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + reqs.register-accessors(h); + reqs.register-kernel-bundle(h); + h.single_task(k); +}); +return (reqs.has-tracking()) ? ev : std::nullopt; +---- + +_Effects_ (2): Equivalent to [code]#return launch_task(q, {}, k);#. + +''' + +// Launch a single work-item with a sycl::kernel object. +// New form of single_task(...) without set_args. +.[apititle]#launch_task# (kernel object) +[source,role=synopsis,id=api:launch_task-kernel] +---- +namespace sycl::khr { + +template +std::optional launch_task(const queue& q, const requirements& reqs, const kernel& k, Args&&... args); (1) + +template +std::optional launch_task(const queue& q, const kernel& k, Args&&... args); (2) + +} +---- +_Constraints_ (1): + +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. + +_Effects_ (1): Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + h.set_args(args...); + h.single_task(k); +}); +return (reqs.has-tracking()) ? ev : std::nullopt; +---- + +_Effects_ (2): Equivalent to [code]#+return launch_task(q, {}, k, args...);+#. + +''' + +=== Memory operations + +.[apititle]#memcpy# +[source,role=synopsis,id=api:memcpy] +---- +namespace sycl::khr { + +template +std::optional memcpy(const queue& q, void* dest, const void* src, size_t numBytes, const requirements& reqs = {}); + +} +---- + +_Constraints_: + +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. + +_Effects_: Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + h.memcpy(dest, src, numBytes); +}); +return (reqs.has-tracking()) ? ev : std::nullopt; +---- + +''' + +.[apititle]#copy# (USM pointers) +[source,role=synopsis,id=api:copy-pointer] +---- +namespace sycl::khr { + +template +std::optional copy(const queue& q, const T* src, T* dest, size_t count, const requirements& reqs = {}); + +} +---- + +Copies between two USM pointers. + +_Constraints_: + +* [code]#T# is <>; +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. + +_Preconditions_: + +* [code]#src# is a host pointer or a pointer within a USM allocation that is + accessible on the device; +* [code]#dest# is a host pointer or a pointer within a USM allocation that is + accessible on the device; +* [code]#src# and [code]#dest# both point to allocations of at least + [code]#count# elements of type [code]#T#; and +* If either [code]#src# or [code]#dest# is a pointer to a USM allocation, that + allocation was created from the same context associated with [code]#q#. + +_Effects_: Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + h.copy(src, dest, count); +}); +return (reqs.has-tracking()) ? ev : std::nullopt; +---- + +''' + +.[apititle]#copy# (accessors, host to device) +[source,role=synopsis,id=api:copy-accessor-h2d] +---- +namespace sycl::khr { + +template +std::optional copy(const queue& q, const SrcT* src, accessor dest, const requirements& reqs = {}); (1) + +template +std::optional copy(const queue& q, std::shared_ptr src, accessor dest, const requirements& reqs = {}); (2) + +} +---- + +Copies from host to device. + +_Constraints_: + +* [code]#SrcT# and [code]#DestT# are <>; +* [code]#DestMode# is [code]#access_mode::write# or + [code]#access_mode::read_write#; +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. + +_Preconditions_: + +* [code]#src# is a host pointer; and +* [code]#src# points to an allocation of at least as many bytes as the range + represented by [code]#dest#. + +_Effects_: Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + h.require(dest); + h.copy(src, dest); +}); +return (reqs.has-tracking()) ? ev : std::nullopt; +---- + +''' + +.[apititle]#copy# (accessors, device to host) +[source,role=synopsis,id=api:copy-accessor-d2h] +---- +namespace sycl::khr { + +template +std::optional copy(const queue& q, accessor src, DestT* dest, const requirements& reqs = {}); (1) + +template +std::optional copy(const queue& q, accessor src, std::shared_ptr dest, const requirements& reqs = {}); (2) + +} +---- + +Copies from device to host. + +_Constraints_: + +* [code]#SrcT# and [code]#DestT# are <>; +* [code]#DestMode# is [code]#access_mode::read# or + [code]#access_mode::read_write#; +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. + +_Preconditions_: + +* [code]#dest# is a host pointer; and +* [code]#dest# points to an allocation of at least as many bytes as the range + represented by [code]#src#. + +_Effects_: Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + h.require(src); + h.copy(src, dest); +}); +return (reqs.has-tracking()) ? ev : std::nullopt; +---- + +''' + +.[apititle]#copy# (accessors, device to device) +[source,role=synopsis,id=api:copy-accessor-d2d] +---- +namespace sycl::khr { + +template +std::optional copy(const queue& q, accessor src, accessor dest, const requirements& reqs = {}); + +} +---- + +Copies between two device accessors. + +_Constraints_: + +* [code]#SrcT# and [code]#DestT# are <>; +* [code]#SrcMode# is [code]#access_mode::read# or + [code]#access_mode::read_write#; +* [code]#DestMode# is [code]#access_mode::write# or + [code]#access_mode::read_write#; +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. + +_Effects_: Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + h.require(src); + h.require(dest); + h.copy(src, dest); +}); +---- + +_Throws_: A synchronous [code]#exception# with the [code]#errc::invalid# error +code if [code]#dest.get_count() < src.get_count()#. + +''' + +.[apititle]#memset# +[source,role=synopsis,id=api:memset] +---- +namespace sycl::khr { + +template +std::optional memset(const queue& q, void* ptr, int value, size_t numBytes, const requirements& reqs = {}); + +} +---- + +_Constraints_: + +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. + +_Effects_: Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + h.memset(ptr, value, numBytes); +}); +return (reqs.has-tracking()) ? ev : std::nullopt; +---- + +''' + +.[apititle]#fill# +[source,role=synopsis,id=api:fill] +---- +namespace sycl::khr { + +template +std::optional fill(const queue& q, T* ptr, const T& pattern, size_t count, const requirements& reqs = {}); (1) + +template +std::optional fill(const queue& q, accessor dest, const T& src, const requirements& reqs = {}); (2) + +} +---- + +_Constraints (1)_: + +* [code]#T# is <>; and +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. + +_Constraints (2)_: + +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. + +_Effects (1)_: Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + h.fill(ptr, pattern, count); +}); +return (reqs.has-tracking()) ? ev : std::nullopt; +---- + +_Effects (2)_: Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + h.fill(dest, src); +}); +return (reqs.has-tracking()) ? ev : std::nullopt; +---- + +''' + +.[apititle]#update_host# +[source,role=synopsis,id=api:update_host] +---- +namespace sycl::khr { + +template +std::optional update_host(const queue& q, accessor acc, const requirements& reqs = {}); + +} +---- + +_Constraints_: + +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. + +_Effects_: Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + h.require(acc); + h.update_host(acc); +}); +return (reqs.has-tracking()) ? ev : std::nullopt; +---- + +''' + +.[apititle]#prefetch# +[source,role=synopsis,id=api:prefetch] +---- +namespace sycl::khr { + +template +std::optional prefetch(const queue& q, void* ptr, size_t numBytes, const requirements& reqs = {}); + +} +---- +_Constraints_: + +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. + +_Effects_: Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + h.prefetch(ptr, numBytes); +}); +return (reqs.has-tracking()) ? ev : std::nullopt; +---- + +''' + +.[apititle]#mem_advise# +[source,role=synopsis,id=api:mem_advise] +---- +namespace sycl::khr { + +template +std::optional mem_advise(const queue& q, void* ptr, size_t numBytes, int advice, const requirements& reqs = {}); + +} +---- +_Constraints_: + +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. + +_Effects_: Equivalent to: + +[source,sycl] +---- +event ev = q.submit([&](handler& h) { + reqs.register-events(h); + h.mem_advise(ptr, numBytes, advice); +}); +return (reqs.has-tracking()) ? ev : std::nullopt; +---- + +''' + +=== Command and event barriers + +.[apititle]#command_barrier# +[source,role=synopsis,id=api:command_barrier] +---- +namespace sycl::khr { + +template +std::optional command_barrier(const queue& q, const requirements& reqs = {}); + +} +---- +_Constraints_: + +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. + +_Effects_: Enqueues a command barrier. +Any commands submitted after this barrier cannot begin execution until: + +* All commands previously submitted to this queue have completed; and +* All requirements in [code]#reqs# are satisfied. + +{note}If a [code]#command_barrier# is submitted to an in-order queue with no +requirements, then this operation may be a no-op.{endnote} + +''' + +.[apititle]#event_barrier# +[source,role=synopsis,id=api:event_barrier] +---- +namespace sycl::khr { + +template +std::optional event_barrier(const queue& q, const requirements& reqs = {}); + +} +---- +_Constraints_: + +* [code]#Requirements# does not contain a [code]#kernel_bundle#; and +* [code]#Requirements# does not contain any accessors. + +_Effects_: Enqueues an event barrier. +Any commands submitted after this barrier cannot begin execution until all +requirements in [code]#reqs# are satisfied. + +{note}If an [code]#event_barrier# is submitted with no requirements, then this +operation may be a no-op.{endnote} + +'''