diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 4b8c33bcf469f..c4d7a2a027057 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1621,12 +1621,14 @@ def SYCLType: InheritableAttr { let Subjects = SubjectList<[CXXRecord, Enum], ErrorDiag>; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let Args = [EnumArgument<"Type", "SYCLType", /*is_string=*/true, - ["accessor", "local_accessor", "work_group_memory", "dynamic_work_group_memory", + ["accessor", "local_accessor", "dynamic_local_accessor", + "work_group_memory", "dynamic_work_group_memory", "specialization_id", "kernel_handler", "buffer_location", "no_alias", "accessor_property_list", "group", "private_memory", "aspect", "annotated_ptr", "annotated_arg", "stream", "sampler", "host_pipe", "multi_ptr"], - ["accessor", "local_accessor", "work_group_memory", "dynamic_work_group_memory", + ["accessor", "local_accessor", "dynamic_local_accessor", + "work_group_memory", "dynamic_work_group_memory", "specialization_id", "kernel_handler", "buffer_location", "no_alias", "accessor_property_list", "group", "private_memory", "aspect", "annotated_ptr", "annotated_arg", diff --git a/clang/include/clang/Sema/SemaSYCL.h b/clang/include/clang/Sema/SemaSYCL.h index 28a9c1859638a..8aac24b8d0079 100644 --- a/clang/include/clang/Sema/SemaSYCL.h +++ b/clang/include/clang/Sema/SemaSYCL.h @@ -64,7 +64,8 @@ class SYCLIntegrationHeader { kind_stream, kind_work_group_memory, kind_dynamic_work_group_memory, - kind_last = kind_dynamic_work_group_memory + kind_dynamic_accessor, + kind_last = kind_dynamic_accessor }; public: diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index ab0864aaf41fe..4a25585ea15e8 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -94,7 +94,8 @@ bool SemaSYCL::isSyclType(QualType Ty, SYCLTypeAttr::SYCLType TypeName) { static bool isSyclAccessorType(QualType Ty) { return SemaSYCL::isSyclType(Ty, SYCLTypeAttr::accessor) || - SemaSYCL::isSyclType(Ty, SYCLTypeAttr::local_accessor); + SemaSYCL::isSyclType(Ty, SYCLTypeAttr::local_accessor) || + SemaSYCL::isSyclType(Ty, SYCLTypeAttr::dynamic_local_accessor); } // FIXME: Accessor property lists should be modified to use compile-time @@ -1152,7 +1153,8 @@ static QualType GetSYCLKernelObjectType(const FunctionDecl *KernelCaller) { /// \return the target of given SYCL accessor type static target getAccessTarget(QualType FieldTy, const ClassTemplateSpecializationDecl *AccTy) { - if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::local_accessor)) + if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::local_accessor) || + SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::dynamic_local_accessor)) return local; return static_cast( @@ -4815,7 +4817,15 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { int Dims = static_cast( AccTy->getTemplateArgs()[1].getAsIntegral().getExtValue()); int Info = getAccessTarget(FieldTy, AccTy) | (Dims << 11); - Header.addParamDesc(SYCLIntegrationHeader::kind_accessor, Info, + + SYCLIntegrationHeader::kernel_param_kind_t ParamKind = + SYCLIntegrationHeader::kind_accessor; + + if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::dynamic_local_accessor)) { + ParamKind = SYCLIntegrationHeader::kind_dynamic_accessor; + } + + Header.addParamDesc(ParamKind, Info, CurOffset + offsetOf(RD, BC.getType()->getAsCXXRecordDecl())); } else if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::work_group_memory)) { @@ -4841,8 +4851,14 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { AccTy->getTemplateArgs()[1].getAsIntegral().getExtValue()); int Info = getAccessTarget(FieldTy, AccTy) | (Dims << 11); - Header.addParamDesc(SYCLIntegrationHeader::kind_accessor, Info, - CurOffset + offsetOf(FD, FieldTy)); + SYCLIntegrationHeader::kernel_param_kind_t ParamKind = + SYCLIntegrationHeader::kind_accessor; + + if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::dynamic_local_accessor)) { + ParamKind = SYCLIntegrationHeader::kind_dynamic_accessor; + } + + Header.addParamDesc(ParamKind, Info, CurOffset + offsetOf(FD, FieldTy)); } else if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::stream)) { addParam(FD, FieldTy, SYCLIntegrationHeader::kind_stream); } else if (SemaSYCL::isSyclType(FieldTy, SYCLTypeAttr::work_group_memory)) { @@ -6025,6 +6041,7 @@ static const char *paramKind2Str(KernelParamKind K) { CASE(pointer); CASE(work_group_memory); CASE(dynamic_work_group_memory); + CASE(dynamic_accessor); } return ""; diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp index 74b5f68edc34c..1cfd0d3ff272c 100644 --- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -680,6 +680,19 @@ __SYCL_TYPE(dynamic_work_group_memory) dynamic_work_group_memory { work_group_memory LocalMem; }; +template +class __attribute__((sycl_special_class)) +__SYCL_TYPE(dynamic_local_accessor) dynamic_local_accessor { +public: + dynamic_local_accessor() = default; + + void __init(__attribute((opencl_local)) DataT *Ptr) { this->LocalMem.__init(Ptr); } + local_accessor get() const { return LocalMem; } + +private: + local_accessor LocalMem; +}; + template class buffer { diff --git a/clang/test/CodeGenSYCL/dynamic_local_accessor.cpp b/clang/test/CodeGenSYCL/dynamic_local_accessor.cpp new file mode 100644 index 0000000000000..64ac94d8d9e67 --- /dev/null +++ b/clang/test/CodeGenSYCL/dynamic_local_accessor.cpp @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -disable-llvm-passes -emit-llvm %s -o %t.ll +// RUN: FileCheck < %t.ll %s --check-prefix CHECK-IR +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -fsycl-int-header=%t.h %s +// RUN: FileCheck < %t.h %s --check-prefix CHECK-INT-HEADER +// +// Tests for dynamic_local_accessor kernel parameter using the dummy implementation in Inputs/sycl.hpp. +// The first two RUN commands verify that the init call is generated with the correct arguments in LLVM IR +// and the second two RUN commands verify the contents of the integration header produced by the frontend. +// +// CHECK-IR: define dso_local spir_kernel void @ +// CHECK-IR-SAME: ptr addrspace(3) noundef align 4 [[PTR:%[a-zA-Z0-9_]+]] +// +// CHECK-IR: [[PTR]].addr = alloca ptr addrspace(3), align 8 +// CHECK-IR: [[PTR]].addr.ascast = addrspacecast ptr [[PTR]].addr to ptr addrspace(4) +// CHECK-IR: store ptr addrspace(3) [[PTR]], ptr addrspace(4) [[PTR]].addr.ascast, align 8 +// CHECK-IR: [[PTR_LOAD:%[a-zA-Z0-9_]+]] = load ptr addrspace(3), ptr addrspace(4) [[PTR]].addr.ascast, align 8 +// +// CHECK-IR: call spir_func void @{{.*}}__init{{.*}}(ptr addrspace(4) noundef align 8 dereferenceable_or_null(8) %{{[a-zA-Z0-9_]+}}, ptr addrspace(3) noundef [[PTR_LOAD]]) +// +// CHECK-INT-HEADER: const kernel_param_desc_t kernel_signatures[] = { +// CHECK-INT-HEADER-NEXT: //--- _ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_EUlNS0_4itemILi1EEEE_ +// CHECK-INT-HEADER-NEXT: { kernel_param_kind_t::kind_dynamic_local_accessor, {{[4,8]}}, 0 }, +// CHECK-INT-HEADER-EMPTY: +// CHECK-INT-HEADER-NEXT: { kernel_param_kind_t::kind_invalid, -987654321, -987654321 }, +// CHECK-INT-HEADER-NEXT: }; + +#include "Inputs/sycl.hpp" + +int main() { + sycl::queue Q; + sycl::dynamic_local_accessor dynLocalAcc; + Q.submit([&](sycl::handler &CGH) { + sycl::range<1> ndr; + CGH.parallel_for(ndr, [=](sycl::item<1> it) { + auto localAcc = dynLocalAcc.get(); + int *ptr = &localAcc; }); + }); + return 0; +} diff --git a/sycl-jit/common/include/Kernel.h b/sycl-jit/common/include/Kernel.h new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/sycl/include/sycl/accessor.hpp b/sycl/include/sycl/accessor.hpp index 06f95f8faa1d6..74fcbdec299ec 100644 --- a/sycl/include/sycl/accessor.hpp +++ b/sycl/include/sycl/accessor.hpp @@ -215,6 +215,7 @@ namespace sycl { inline namespace _V1 { class stream; + namespace ext::intel::esimd::detail { // Forward declare a "back-door" access class to support ESIMD. class AccessorPrivateProxy; @@ -227,6 +228,10 @@ template > class accessor; +namespace ext::oneapi::experimental { +template class dynamic_local_accessor; +} + namespace detail { template @@ -344,6 +349,7 @@ class accessor_common { typename AccType = accessor> + class AccessorSubscript { static constexpr int Dims = Dimensions; @@ -2148,6 +2154,7 @@ class __SYCL_SPECIAL_CLASS local_accessor_base : #endif public detail::accessor_common { + protected: constexpr static int AdjustedDim = Dimensions == 0 ? 1 : Dimensions; @@ -2638,6 +2645,7 @@ class __SYCL_EBO __SYCL_SPECIAL_CLASS __SYCL_TYPE(local_accessor) local_accessor private: friend class sycl::ext::intel::esimd::detail::AccessorPrivateProxy; + template friend class ext::oneapi::experimental::dynamic_local_accessor; }; template class command_graph; class raw_kernel_arg; template class work_group_memory; +//template class local_accessor; namespace detail { // List of sycl features and extensions which are not supported by graphs. Used @@ -107,6 +108,8 @@ class node_impl; class graph_impl; class exec_graph_impl; class dynamic_parameter_impl; +class dynamic_work_group_memory_impl; +class dynamic_local_accessor_impl; class dynamic_command_group_impl; } // namespace detail @@ -517,6 +520,8 @@ class __SYCL_EXPORT dynamic_parameter_base { dynamic_parameter_base() = default; #endif + dynamic_parameter_base(const std::shared_ptr& impl); + dynamic_parameter_base( sycl::ext::oneapi::experimental::command_graph Graph); @@ -546,53 +551,54 @@ class __SYCL_EXPORT dynamic_parameter_base { void updateAccessor(const sycl::detail::AccessorBaseHost *Acc); - void updateWorkGroupMem(size_t BufferSize); - std::shared_ptr impl; template friend const decltype(Obj::impl) & sycl::detail::getSyclObjImpl(const Obj &SyclObject); + + friend class handler; //FIXME }; -class dynamic_work_group_memory_base -#ifndef __SYCL_DEVICE_ONLY__ - : public dynamic_parameter_base -#endif -{ +class __SYCL_EXPORT dynamic_work_group_memory_base + : public dynamic_parameter_base { + public: dynamic_work_group_memory_base() = default; -#ifndef __SYCL_DEVICE_ONLY__ -#ifdef __INTEL_PREVIEW_BREAKING_CHANGES - dynamic_work_group_memory_base(size_t Size) - : dynamic_parameter_base(), BufferSize(Size) {} +#ifdef __INTEL_PREVIEW_BREAKING_CHANGES + dynamic_work_group_memory_base(size_t BufferSizeInBytes); #endif // TODO: Remove in next ABI breaking window dynamic_work_group_memory_base( - experimental::command_graph Graph, size_t Size) - : dynamic_parameter_base(Graph), BufferSize(Size) {} -#else - dynamic_work_group_memory_base(size_t Size) : BufferSize(Size) {} - dynamic_work_group_memory_base( - experimental::command_graph /*Graph*/, - size_t Size) - : BufferSize(Size) {} -#endif + experimental::command_graph Graph, + size_t BufferSizeInBytes); -private: -#ifdef __SYCL_DEVICE_ONLY__ - [[maybe_unused]] unsigned char Padding[sizeof(dynamic_parameter_base)]; -#endif - size_t BufferSize{}; - friend class sycl::handler; +protected: + void updateWorkGroupMem(size_t NewBufferSizeInBytes); }; + +class __SYCL_EXPORT dynamic_local_accessor_base: public dynamic_parameter_base { +public: + dynamic_local_accessor_base() = default; + + dynamic_local_accessor_base( + experimental::command_graph Graph, + sycl::range<3> AllocationSize, int Dims, int ElemSize, const property_list &PropList); + +protected: + void updateLocalAccessor(sycl::range<3> NewAllocationSize); +}; + } // namespace detail template class __SYCL_SPECIAL_CLASS __SYCL_TYPE(dynamic_work_group_memory) dynamic_work_group_memory - : public detail::dynamic_work_group_memory_base { +#ifndef __SYCL_DEVICE_ONLY__ + : public detail::dynamic_work_group_memory_base +#endif +{ public: // Check that DataT is an unbounded array type. static_assert(std::is_array_v && std::extent_v == 0); @@ -623,9 +629,15 @@ __SYCL_TYPE(dynamic_work_group_memory) dynamic_work_group_memory /// @param Graph The graph associated with this object. /// @param Num Number of elements in the unbounded array DataT. dynamic_work_group_memory( - experimental::command_graph Graph, size_t Num) + [[maybe_unused]] experimental::command_graph + Graph, + [[maybe_unused]] size_t Num) +#ifndef __SYCL_DEVICE_ONLY__ : detail::dynamic_work_group_memory_base( - Graph, Num * sizeof(std::remove_extent_t)) {} + Graph, Num * sizeof(std::remove_extent_t)) +#endif + { + } work_group_memory get() const { #ifndef __SYCL_DEVICE_ONLY__ @@ -641,8 +653,7 @@ __SYCL_TYPE(dynamic_work_group_memory) dynamic_work_group_memory /// @param Num The new number of elements in the unbounded array. void update([[maybe_unused]] size_t Num) { #ifndef __SYCL_DEVICE_ONLY__ - detail::dynamic_parameter_base::updateWorkGroupMem( - Num * sizeof(std::remove_extent_t)); + updateWorkGroupMem(Num * sizeof(std::remove_extent_t)); #endif } @@ -656,6 +667,82 @@ __SYCL_TYPE(dynamic_work_group_memory) dynamic_work_group_memory void __init(decoratedPtr Ptr) { this->WorkGroupMem.__init(Ptr); } #endif + +#ifdef __SYCL_DEVICE_ONLY__ + [[maybe_unused]] unsigned char + Padding[sizeof(detail::dynamic_work_group_memory_base)]; +#endif +}; + +template +class __SYCL_SPECIAL_CLASS +__SYCL_TYPE(dynamic_local_accessor) dynamic_local_accessor +#ifndef __SYCL_DEVICE_ONLY__ + : public detail::dynamic_local_accessor_base +#endif +{ +public: + static_assert(Dimensions > 0 && Dimensions <= 3); + + // Frontend requires special types to have a default constructor in order to + // have a uniform way of initializing an object of special type to then call + // the __init method on it. This is purely an implementation detail and not + // part of the spec. + // TODO: Revisit this once https://github.com/intel/llvm/issues/16061 is + // closed. + dynamic_local_accessor() = default; + + /// Constructs a new dynamic_local_accessor object. + /// @param Graph The graph associated with this object. + /// @param AllocationSize The size of the local accessor. + /// @param PropList List of properties for the underlying accessor. + dynamic_local_accessor( + [[maybe_unused]] experimental::command_graph + Graph, + [[maybe_unused]] range AllocationSize, + [[maybe_unused]] const property_list &PropList = {}) +#ifndef __SYCL_DEVICE_ONLY__ + : detail::dynamic_local_accessor_base( + Graph, detail::convertToArrayOfN<3, 1>(AllocationSize), Dimensions, + sizeof(DataT), PropList) +#endif + { + } + + local_accessor get() const { +#ifndef __SYCL_DEVICE_ONLY__ + throw sycl::exception(sycl::make_error_code(errc::invalid), + "Error: dynamic_local_accessor::get() can be only " + "called on the device!"); +#endif + return LocalAccessor; + } + + /// Updates on the host this dynamic_local_accessor and all registered + /// nodes with a new size. + /// @param Num The new number of elements in the unbounded array. + void update([[maybe_unused]] range NewAllocationSize) { +#ifndef __SYCL_DEVICE_ONLY__ + updateLocalAccessor(detail::convertToArrayOfN<3, 1>(NewAllocationSize)); +#endif + } + +private: + local_accessor + LocalAccessor; // FIXME is this needed on the host? + +#ifdef __SYCL_DEVICE_ONLY__ + void __init(typename local_accessor::ConcreteASPtrType Ptr, + range AccessRange, range range, + id id) { + this->LocalAccessor.__init(Ptr, AccessRange, range, id); + } +#endif + +#ifdef __SYCL_DEVICE_ONLY__ + [[maybe_unused]] unsigned char + Padding[sizeof(detail::dynamic_local_accessor_base)]; +#endif }; template diff --git a/sycl/include/sycl/handler.hpp b/sycl/include/sycl/handler.hpp index bfda64639683c..cc08c0ad02288 100644 --- a/sycl/include/sycl/handler.hpp +++ b/sycl/include/sycl/handler.hpp @@ -161,9 +161,13 @@ __SYCL_EXPORT void *async_malloc_from_pool(sycl::handler &h, size_t size, } // namespace ext::oneapi::experimental namespace ext::oneapi::experimental::detail { -class graph_impl; class dynamic_parameter_base; class dynamic_work_group_memory_base; +class dynamic_local_accessor_base; +class graph_impl; +class dynamic_parameter_impl; +class dynamic_work_group_memory_impl; +class dynamic_local_accessor_impl; } // namespace ext::oneapi::experimental::detail namespace detail { @@ -705,16 +709,40 @@ class __SYCL_EXPORT handler { *static_cast(detail::getValueFromDynamicParameter(DynamicParam)); // Set the arg in the handler as normal setArgHelper(ArgIndex, std::move(ArgValue)); + + ext::oneapi::experimental::detail::dynamic_parameter_impl *DynParamImpl = + detail::getSyclObjImpl(DynamicParam).get(); + // Register the dynamic parameter with the handler for later association // with the node being added - registerDynamicParameter(DynamicParam, ArgIndex); + registerDynamicParameter(DynParamImpl, ArgIndex); } - // setArgHelper for graph dynamic_work_group_memory void setArgHelper(int ArgIndex, ext::oneapi::experimental::detail::dynamic_work_group_memory_base - &DynWorkGroupBase); + &DynWorkGroupBase) { + + ext::oneapi::experimental::detail::dynamic_parameter_impl *DynParamImpl = + detail::getSyclObjImpl(DynWorkGroupBase).get(); + + addArg(detail::kernel_param_kind_t::kind_dynamic_work_group_memory, + DynParamImpl, 0, ArgIndex); + registerDynamicParameter(DynParamImpl, ArgIndex); + } + + void + setArgHelper(int ArgIndex, + ext::oneapi::experimental::detail::dynamic_local_accessor_base + &DynLocalAccessorBase) { + + ext::oneapi::experimental::detail::dynamic_parameter_impl *DynParamImpl = + detail::getSyclObjImpl(DynLocalAccessorBase).get(); + + addArg(detail::kernel_param_kind_t::kind_dynamic_accessor, DynParamImpl, 0, + ArgIndex); + registerDynamicParameter(DynParamImpl, ArgIndex); + } // setArgHelper for the raw_kernel_arg extension type. void setArgHelper(int ArgIndex, @@ -726,11 +754,11 @@ class __SYCL_EXPORT handler { /// Registers a dynamic parameter with the handler for later association with /// the node being created - /// @param DynamicParamBase + /// @param DynamicParamImpl /// @param ArgIndex void registerDynamicParameter( - ext::oneapi::experimental::detail::dynamic_parameter_base - &DynamicParamBase, + ext::oneapi::experimental::detail::dynamic_parameter_impl + *DynamicParamImpl, int ArgIndex); /// Verifies the kernel bundle to be used if any is set. This throws a @@ -1802,20 +1830,36 @@ class __SYCL_EXPORT handler { // set_arg for graph dynamic_parameters template void set_arg(int argIndex, - ext::oneapi::experimental::dynamic_parameter &dynamicParam) { + [[maybe_unused]] ext::oneapi::experimental::dynamic_parameter &dynamicParam) { setArgHelper(argIndex, dynamicParam); } // set_arg for graph dynamic_work_group_memory template - void set_arg( - int argIndex, - ext::oneapi::experimental::dynamic_work_group_memory - &dynWorkGroupMem) { + void + set_arg([[maybe_unused]] int argIndex, + [[maybe_unused]] ext::oneapi::experimental::dynamic_work_group_memory< + DataT, PropertyListT> &DynWorkGroupMem) { + +#ifndef __SYCL_DEVICE_ONLY__ ext::oneapi::experimental::detail::dynamic_work_group_memory_base - &dynWorkGroupBase = dynWorkGroupMem; - setArgHelper(argIndex, dynWorkGroupBase); + &DynWorkGroupBase = DynWorkGroupMem; + setArgHelper(argIndex, DynWorkGroupBase); +#endif + } + + // set_arg for graph dynamic_local_accessor + template + void + set_arg([[maybe_unused]] int argIndex, + [[maybe_unused]] ext::oneapi::experimental::dynamic_local_accessor< + DataT, Dimensions> &DynLocalAccessor) { +#ifndef __SYCL_DEVICE_ONLY__ + ext::oneapi::experimental::detail::dynamic_local_accessor_base + &DynLocalAccessorBase = DynLocalAccessor; + setArgHelper(argIndex, DynLocalAccessorBase); +#endif } // set_arg for the raw_kernel_arg extension type. diff --git a/sycl/source/detail/graph_impl.cpp b/sycl/source/detail/graph_impl.cpp index 931816551d95e..f7cbe00aed29e 100644 --- a/sycl/source/detail/graph_impl.cpp +++ b/sycl/source/detail/graph_impl.cpp @@ -2006,6 +2006,10 @@ dynamic_parameter_base::dynamic_parameter_base() : impl(std::make_shared()) {} #endif +dynamic_parameter_base::dynamic_parameter_base( + const std::shared_ptr &impl) + : impl(impl) {} + dynamic_parameter_base::dynamic_parameter_base( command_graph) : impl(std::make_shared()) {} @@ -2027,8 +2031,30 @@ void dynamic_parameter_base::updateAccessor( impl->updateAccessor(Acc); } -void dynamic_parameter_base::updateWorkGroupMem(size_t BufferSize) { - impl->updateWorkGroupMem(BufferSize); +dynamic_work_group_memory_base::dynamic_work_group_memory_base( + experimental::command_graph Graph, + size_t BufferSizeInBytes) + : dynamic_parameter_base(std::make_shared( + sycl::detail::getSyclObjImpl(Graph), BufferSizeInBytes)) {} + +void dynamic_work_group_memory_base::updateWorkGroupMem( + size_t NewBufferSizeInBytes) { + static_cast(impl.get()) + ->updateWorkGroupMem(NewBufferSizeInBytes); +} + +dynamic_local_accessor_base::dynamic_local_accessor_base( + experimental::command_graph Graph, + sycl::range<3> AllocationSize, int Dims, int ElemSize, + const property_list &PropList) + : dynamic_parameter_base(std::make_shared( + sycl::detail::getSyclObjImpl(Graph), AllocationSize, Dims, ElemSize, + PropList)) {} + +void dynamic_local_accessor_base::updateLocalAccessor( + sycl::range<3> NewAllocationSize) { + static_cast(impl.get()) + ->updateLocalAccessor(NewAllocationSize); } void dynamic_parameter_impl::updateValue(const raw_kernel_arg *NewRawValue, @@ -2086,39 +2112,6 @@ void dynamic_parameter_impl::updateAccessor( sizeof(sycl::detail::AccessorBaseHost)); } -void dynamic_parameter_impl::updateWorkGroupMem(size_t BufferSize) { - for (auto &[NodeWeak, ArgIndex] : MNodes) { - auto NodeShared = NodeWeak.lock(); - if (NodeShared) { - dynamic_parameter_impl::updateCGWorkGroupMem(NodeShared->MCommandGroup, - ArgIndex, BufferSize); - } - } - - for (auto &DynCGInfo : MDynCGs) { - auto DynCG = DynCGInfo.DynCG.lock(); - if (DynCG) { - auto &CG = DynCG->MCommandGroups[DynCGInfo.CGIndex]; - dynamic_parameter_impl::updateCGWorkGroupMem(CG, DynCGInfo.ArgIndex, - BufferSize); - } - } -} - -void dynamic_parameter_impl::updateCGWorkGroupMem( - std::shared_ptr CG, int ArgIndex, size_t BufferSize) { - - auto &Args = static_cast(CG.get())->MArgs; - for (auto &Arg : Args) { - if (Arg.MIndex != ArgIndex) { - continue; - } - assert(Arg.MType == sycl::detail::kernel_param_kind_t::kind_std_layout); - Arg.MSize = BufferSize; - break; - } -} - void dynamic_parameter_impl::updateCGArgValue( std::shared_ptr CG, int ArgIndex, const void *NewValue, size_t Size) { @@ -2184,6 +2177,90 @@ void dynamic_parameter_impl::updateCGAccessor( } } +void dynamic_work_group_memory_impl::updateWorkGroupMem( + size_t NewBufferSizeInBytes) { + for (auto &[NodeWeak, ArgIndex] : MNodes) { + auto NodeShared = NodeWeak.lock(); + if (NodeShared) { + dynamic_work_group_memory_impl::updateCGWorkGroupMem( + NodeShared->MCommandGroup, ArgIndex, NewBufferSizeInBytes); + } + } + + for (auto &DynCGInfo : MDynCGs) { + auto DynCG = DynCGInfo.DynCG.lock(); + if (DynCG) { + auto &CG = DynCG->MCommandGroups[DynCGInfo.CGIndex]; + dynamic_work_group_memory_impl::updateCGWorkGroupMem( + CG, DynCGInfo.ArgIndex, NewBufferSizeInBytes); + } + } +} + +void dynamic_work_group_memory_impl::updateCGWorkGroupMem( + std::shared_ptr CG, int ArgIndex, + size_t NewBufferSizeInBytes) { + + auto &Args = static_cast(CG.get())->MArgs; + for (auto &Arg : Args) { + if (Arg.MIndex != ArgIndex) { + continue; + } + assert(Arg.MType == sycl::detail::kernel_param_kind_t::kind_std_layout); + Arg.MSize = NewBufferSizeInBytes; + break; + } +} + +dynamic_local_accessor_impl::dynamic_local_accessor_impl( + std::shared_ptr GraphImpl, sycl::range<3> AllocationSize, + int Dims, int ElemSize, const property_list &PropList) + : dynamic_parameter_impl(GraphImpl), + LAccImplHost(AllocationSize, Dims, ElemSize, {}) { + checkGraphPropertiesAndThrow(PropList); +} + +void dynamic_local_accessor_impl::updateLocalAccessor( + range<3> NewAllocationSize) { + for (auto &[NodeWeak, ArgIndex] : MNodes) { + auto NodeShared = NodeWeak.lock(); + if (NodeShared) { + dynamic_local_accessor_impl::updateCGLocalAccessor( + NodeShared->MCommandGroup, ArgIndex, NewAllocationSize); + } + } + + for (auto &DynCGInfo : MDynCGs) { + auto DynCG = DynCGInfo.DynCG.lock(); + if (DynCG) { + auto &CG = DynCG->MCommandGroups[DynCGInfo.CGIndex]; + dynamic_local_accessor_impl::updateCGLocalAccessor(CG, DynCGInfo.ArgIndex, + NewAllocationSize); + } + } +} + +void dynamic_local_accessor_impl::updateCGLocalAccessor( + std::shared_ptr CG, int ArgIndex, + range<3> NewAllocationSize) { + + auto &Args = static_cast(CG.get())->MArgs; + for (auto &Arg : Args) { + if (Arg.MIndex != ArgIndex) { + continue; + } + assert(Arg.MType == sycl::detail::kernel_param_kind_t::kind_std_layout); + + // Update the local memory Size Argument + Arg.MSize = NewAllocationSize.size() * LAccImplHost.MElemSize; + + // MSize is used as an argument to the AccField kernel parameters. + LAccImplHost.MSize = NewAllocationSize; + + break; + } +} + dynamic_command_group_impl::dynamic_command_group_impl( const command_graph &Graph) : MGraph{sycl::detail::getSyclObjImpl(Graph)}, MActiveCGF(0), diff --git a/sycl/source/detail/graph_impl.hpp b/sycl/source/detail/graph_impl.hpp index 1600b76f7b991..ad3512bcbe1cc 100644 --- a/sycl/source/detail/graph_impl.hpp +++ b/sycl/source/detail/graph_impl.hpp @@ -823,6 +823,7 @@ class graph_impl : public std::enable_shared_from_this { /// Verifies the CG is valid to add to the graph and returns set of /// dependent nodes if so. /// @param CommandGroup The command group to verify and retrieve edges for. + /// @param CommandGroup The command group to verify and retrieve edges for. /// @return Set of dependent nodes in the graph. std::set> getCGEdges(const std::shared_ptr &CommandGroup) const; @@ -1618,22 +1619,6 @@ class dynamic_parameter_impl { /// @param Acc The new accessor value void updateAccessor(const sycl::detail::AccessorBaseHost *Acc); - /// Update the internal value of this dynamic parameter as well as the value - /// of this parameter in all registered nodes and dynamic CGs. Should only be - /// called for dynamic_work_group_memory arguments parameter. - /// @param BufferSize The total size in bytes of the new work_group_memory - /// array - void updateWorkGroupMem(size_t BufferSize); - - /// Static helper function for updating command-group - /// dynamic_work_group_memory arguments. - /// @param CG The command-group to update the argument information for. - /// @param ArgIndex The argument index to update. - /// @param BufferSize The total size in bytes of the new work_group_memory - /// array - static void updateCGWorkGroupMem(std::shared_ptr CG, - int ArgIndex, size_t BufferSize); - /// Static helper function for updating command-group value arguments. /// @param CG The command-group to update the argument information for. /// @param ArgIndex The argument index to update. @@ -1664,6 +1649,61 @@ class dynamic_parameter_impl { inline static std::atomic NextAvailableID = 0; }; +class dynamic_work_group_memory_impl : public dynamic_parameter_impl { + +public: + dynamic_work_group_memory_impl(std::shared_ptr GraphImpl, + size_t BufferSizeInBytes) + : dynamic_parameter_impl(GraphImpl), + BufferSizeInBytes(BufferSizeInBytes) {} + + virtual ~dynamic_work_group_memory_impl() = default; + + /// Update the internal value of this dynamic parameter as well as the value + /// of this parameter in all registered nodes and dynamic CGs. + /// @param NewBufferSizeInBytes The total size in bytes of the new + /// work_group_memory array. + void updateWorkGroupMem(size_t NewBufferSizeInBytes); + + /// Static helper function for updating command-group + /// dynamic_work_group_memory arguments. + /// @param CG The command-group to update the argument information for. + /// @param ArgIndex The argument index to update. + /// @param NewBufferSizeInBytes The total size in bytes of the new + /// work_group_memory array. + void updateCGWorkGroupMem(std::shared_ptr CG, int ArgIndex, + size_t NewBufferSizeInBytes); + + size_t BufferSizeInBytes; +}; + +class dynamic_local_accessor_impl : public dynamic_parameter_impl { + +public: + dynamic_local_accessor_impl(std::shared_ptr GraphImpl, + sycl::range<3> AllocationSize, int Dims, + int ElemSize, const property_list &PropList); + + virtual ~dynamic_local_accessor_impl() = default; + + /// Update the internal value of this dynamic parameter as well as the value + /// of this parameter in all registered nodes and dynamic CGs. + /// @param NewAllocationSize The new allocation size for the + /// dynamic_local_accessor. + void updateLocalAccessor(range<3> NewAllocationSize); + + /// Static helper function for updating command-group dynamic_local_accessor + /// arguments. + /// @param CG The command-group to update the argument information for. + /// @param ArgIndex The argument index to update. + /// @param NewAllocationSize The new allocation size for the + /// dynamic_local_accessor. + void updateCGLocalAccessor(std::shared_ptr CG, int ArgIndex, + range<3> NewAllocationSize); + + detail::LocalAccessorImplHost LAccImplHost; +}; + class dynamic_command_group_impl : public std::enable_shared_from_this { public: diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 783ce3b1412bb..7a6b110b0b853 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2309,6 +2309,7 @@ void SetArgBasedOnType( break; case kernel_param_kind_t::kind_stream: break; + case kernel_param_kind_t::kind_dynamic_accessor: case kernel_param_kind_t::kind_accessor: { Requirement *Req = (Requirement *)(Arg.MPtr); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 1c604405d14eb..d923d8ca921ae 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -107,9 +107,9 @@ fill_image_type(const ext::oneapi::experimental::image_descriptor &Desc, UrDesc.type = Desc.type == sycl::ext::oneapi::experimental::image_type::cubemap ? UR_MEM_TYPE_IMAGE_CUBEMAP_EXP - : Desc.type == sycl::ext::oneapi::experimental::image_type::gather - ? UR_MEM_TYPE_IMAGE_GATHER_EXP - : UrDesc.type; + : Desc.type == sycl::ext::oneapi::experimental::image_type::gather + ? UR_MEM_TYPE_IMAGE_GATHER_EXP + : UrDesc.type; return Desc.array_size; } @@ -432,7 +432,7 @@ event handler::finalize() { // Check associated accessors bool AccFound = false; for (detail::ArgDesc &Acc : impl->MAssociatedAccesors) { - if (Acc.MType == detail::kernel_param_kind_t::kind_accessor && + if ((Acc.MType == detail::kernel_param_kind_t::kind_accessor) && static_cast(Acc.MPtr) == AccImpl) { AccFound = true; break; @@ -938,6 +938,41 @@ static void addArgsForGlobalAccessor(detail::Requirement *AccImpl, size_t Index, } } +static void addArgsForLocalAccessor(detail::LocalAccessorImplHost *LAcc, size_t Index, + size_t &IndexShift, + bool IsKernelCreatedFromSource, + std::vector &Args, + bool IsESIMD) { + using detail::kernel_param_kind_t; + + range<3> &LAccSize = LAcc->MSize; + const int Dims = LAcc->MDims; + int SizeInBytes = LAcc->MElemSize; + for (int I = 0; I < Dims; ++I) + SizeInBytes *= LAccSize[I]; + + // Some backends do not accept zero-sized local memory arguments, so we + // make it a minimum allocation of 1 byte. + SizeInBytes = std::max(SizeInBytes, 1); + Args.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr, + SizeInBytes, Index + IndexShift); + // TODO ESIMD currently does not suport MSize field passing yet + // accessor::init for ESIMD-mode accessor has a single field, translated + // to a single kernel argument set above. + if (!IsESIMD && !IsKernelCreatedFromSource) { + ++IndexShift; + const size_t SizeAccField = (Dims == 0 ? 1 : Dims) * sizeof(LAccSize[0]); + Args.emplace_back(kernel_param_kind_t::kind_std_layout, &LAccSize, SizeAccField, + Index + IndexShift); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, &LAccSize, SizeAccField, + Index + IndexShift); + ++IndexShift; + Args.emplace_back(kernel_param_kind_t::kind_std_layout, &LAccSize, SizeAccField, + Index + IndexShift); + } +} + void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource, bool IsESIMD) { @@ -1008,34 +1043,11 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, break; } case access::target::local: { - detail::LocalAccessorImplHost *LAcc = + detail::LocalAccessorImplHost *LAccImpl = static_cast(Ptr); - range<3> &Size = LAcc->MSize; - const int Dims = LAcc->MDims; - int SizeInBytes = LAcc->MElemSize; - for (int I = 0; I < Dims; ++I) - SizeInBytes *= Size[I]; - // Some backends do not accept zero-sized local memory arguments, so we - // make it a minimum allocation of 1 byte. - SizeInBytes = std::max(SizeInBytes, 1); - impl->MArgs.emplace_back(kernel_param_kind_t::kind_std_layout, nullptr, - SizeInBytes, Index + IndexShift); - // TODO ESIMD currently does not suport MSize field passing yet - // accessor::init for ESIMD-mode accessor has a single field, translated - // to a single kernel argument set above. - if (!IsESIMD && !IsKernelCreatedFromSource) { - ++IndexShift; - const size_t SizeAccField = (Dims == 0 ? 1 : Dims) * sizeof(Size[0]); - addArg(kernel_param_kind_t::kind_std_layout, &Size, SizeAccField, - Index + IndexShift); - ++IndexShift; - addArg(kernel_param_kind_t::kind_std_layout, &Size, SizeAccField, - Index + IndexShift); - ++IndexShift; - addArg(kernel_param_kind_t::kind_std_layout, &Size, SizeAccField, - Index + IndexShift); - } + addArgsForLocalAccessor(LAccImpl, Index, IndexShift, + IsKernelCreatedFromSource, impl->MArgs, IsESIMD); break; } case access::target::image: @@ -1058,19 +1070,40 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, } break; } + case kernel_param_kind_t::kind_dynamic_accessor: { + const access::target AccTarget = + static_cast(Size & AccessTargetMask); + switch (AccTarget) { + case access::target::local: { + + auto *DynParamImpl = static_cast< + ext::oneapi::experimental::detail::dynamic_parameter_impl *>(Ptr); + + registerDynamicParameter(DynParamImpl, Index + IndexShift); + + auto *DynLocalAccessorImpl = static_cast(DynParamImpl); + + addArgsForLocalAccessor( + &DynLocalAccessorImpl->LAccImplHost, Index, IndexShift, IsKernelCreatedFromSource, impl->MArgs, IsESIMD); + break; + } + default: { + assert(false && "Unsupported dynamic accessor target"); + } + } + break; + } case kernel_param_kind_t::kind_dynamic_work_group_memory: { - auto *DynBase = static_cast< - ext::oneapi::experimental::detail::dynamic_parameter_base *>(Ptr); + auto *DynParamImpl = static_cast< + ext::oneapi::experimental::detail::dynamic_parameter_impl *>(Ptr); - auto *DynWorkGroupBase = static_cast< - ext::oneapi::experimental::detail::dynamic_work_group_memory_base *>( - Ptr); + registerDynamicParameter(DynParamImpl, Index + IndexShift); - registerDynamicParameter(*DynBase, Index + IndexShift); + auto *DynWorkGroupImpl = static_cast(DynParamImpl); addArg(kernel_param_kind_t::kind_std_layout, nullptr, - DynWorkGroupBase->BufferSize, Index + IndexShift); + DynWorkGroupImpl->BufferSizeInBytes, Index + IndexShift); break; } case kernel_param_kind_t::kind_work_group_memory: { @@ -1103,19 +1136,6 @@ void handler::setArgHelper(int ArgIndex, detail::work_group_memory_impl &Arg) { impl->MWorkGroupMemoryObjects.back().get(), 0, ArgIndex); } -void handler::setArgHelper( - int ArgIndex, - ext::oneapi::experimental::detail::dynamic_work_group_memory_base - &DynWorkGroupBase) { - - addArg(detail::kernel_param_kind_t::kind_dynamic_work_group_memory, - &DynWorkGroupBase, 0, ArgIndex); - - // Register the dynamic parameter with the handler for later association - // with the node being added - registerDynamicParameter(DynWorkGroupBase, ArgIndex); -} - // The argument can take up more space to store additional information about // MAccessRange, MMemoryRange, and MOffset added with addArgsForGlobalAccessor. // We use the worst-case estimate because the lifetime of the vector is short. @@ -1179,7 +1199,28 @@ void handler::extractArgsAndReqsFromLambda( static_cast(Ptr); Ptr = detail::getSyclObjImpl(*LocalAccBase).get(); } + } else if (Kind == detail::kernel_param_kind_t::kind_dynamic_accessor) { + // For args kind of accessor Size is information about accessor. + // The first 11 bits of Size encodes the accessor target. + const access::target AccTarget = + static_cast(Size & AccessTargetMask); + // Only local targets are supported for dynamic accessors. + assert(AccTarget == access::target::local); + + ext::oneapi::experimental::detail::dynamic_parameter_base + *DynamicParamBase = static_cast< + ext::oneapi::experimental::detail::dynamic_parameter_base *>( + Ptr); + Ptr = detail::getSyclObjImpl(*DynamicParamBase).get(); + } else if (Kind == + detail::kernel_param_kind_t::kind_dynamic_work_group_memory) { + ext::oneapi::experimental::detail::dynamic_parameter_base + *DynamicParamBase = + static_cast(Ptr); + Ptr = detail::getSyclObjImpl(*DynamicParamBase).get(); } + processArg(Ptr, Kind, Size, I, IndexShift, /*IsKernelCreatedFromSource=*/false, IsESIMD); } @@ -2162,8 +2203,9 @@ void handler::setNDRangeUsed(bool Value) { (void)Value; } #endif void handler::registerDynamicParameter( - ext::oneapi::experimental::detail::dynamic_parameter_base &DynamicParamBase, + ext::oneapi::experimental::detail::dynamic_parameter_impl *DynamicParamImpl, int ArgIndex) { + if (MQueue && MQueue->hasCommandGraph()) { throw sycl::exception( make_error_code(errc::invalid), @@ -2175,8 +2217,7 @@ void handler::registerDynamicParameter( "Dynamic Parameters cannot be used with normal SYCL submissions"); } - auto Paraimpl = detail::getSyclObjImpl(DynamicParamBase); - impl->MDynamicParameters.emplace_back(Paraimpl.get(), ArgIndex); + impl->MDynamicParameters.emplace_back(DynamicParamImpl, ArgIndex); } bool handler::eventNeeded() const { return impl->MEventNeeded; } diff --git a/sycl/test-e2e/Graph/Update/dyn_cgf_with_dyn_local_accessor.cpp b/sycl/test-e2e/Graph/Update/dyn_cgf_with_dyn_local_accessor.cpp new file mode 100644 index 0000000000000..47d51d2aea10c --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_cgf_with_dyn_local_accessor.cpp @@ -0,0 +1,92 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// Tests using a dynamic command-group object with dynamic_work_group_memory. + +#include "../graph_common.hpp" +#include + +int main() { + queue Queue{}; + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + constexpr int LocalSizeA{16}; + constexpr int LocalSizeB{64}; + + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); + + std::vector HostDataA(Size); + std::vector HostDataB(Size); + + exp_ext::dynamic_local_accessor DynLocalMem(Graph, LocalSizeA); + + nd_range<1> NDrangeA{Size, LocalSizeA}; + auto CGFA = [&](handler &CGH) { + CGH.parallel_for(NDrangeA, [=](nd_item<1> Item) { + size_t GlobalID = Item.get_global_id(); + auto LocalRange = Item.get_local_range(0); + auto LocalMem = DynLocalMem.get(); + + LocalMem[Item.get_local_id()] = LocalRange; + group_barrier(Item.get_group()); + + for (size_t i{0}; i < LocalRange; ++i) { + PtrA[GlobalID] += LocalMem[i]; + } + }); + }; + + nd_range<1> NDrangeB{Size, LocalSizeB}; + auto CGFB = [&](handler &CGH) { + CGH.parallel_for(NDrangeB, [=](nd_item<1> Item) { + size_t GlobalID = Item.get_global_id(); + auto LocalRange = Item.get_local_range(0); + auto LocalMem = DynLocalMem.get(); + + LocalMem[Item.get_local_id()] = LocalRange; + group_barrier(Item.get_group()); + + for (size_t i{0}; i < LocalRange; ++i) { + PtrB[GlobalID] += LocalMem[i]; + } + }); + }; + + auto DynamicCG = exp_ext::dynamic_command_group(Graph, {CGFA, CGFB}); + auto DynamicCGNode = Graph.add(DynamicCG); + + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + auto ExecuteGraphAndVerifyResults = [&](bool A, bool B, const int LocalSize) { + Queue.memset(PtrA, 0, Size * sizeof(int)); + Queue.memset(PtrB, 0, Size * sizeof(int)); + Queue.wait(); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size); + Queue.copy(PtrB, HostDataB.data(), Size); + Queue.wait(); + + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == (A ? LocalSize * LocalSize : 0)); + assert(HostDataB[i] == (B ? LocalSize * LocalSize : 0)); + } + }; + ExecuteGraphAndVerifyResults(true, false, LocalSizeA); + + DynamicCG.set_active_index(1); + DynLocalMem.update(LocalSizeB); + ExecGraph.update(DynamicCGNode); + ExecuteGraphAndVerifyResults(false, true, LocalSizeB); + + sycl::free(PtrA, Queue); + sycl::free(PtrB, Queue); + + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_local_accessor_basic.cpp b/sycl/test-e2e/Graph/Update/dyn_local_accessor_basic.cpp new file mode 100644 index 0000000000000..9e0ded56853db --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_local_accessor_basic.cpp @@ -0,0 +1,71 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// Tests updating dynamic_work_group_memory with a new size. + +#include "../graph_common.hpp" +#include + +int main() { + queue Queue{}; + constexpr int LocalSize{16}; + + using T = int; + + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size, Queue); + std::vector HostDataA(Size); + + exp_ext::dynamic_local_accessor DynLocalMem{Graph, LocalSize}; + + Queue.memset(PtrA, 0, Size * sizeof(T)).wait(); + + nd_range<1> NDRange{range{Size}, range{LocalSize}}; + + auto KernelNode = Graph.add([&](handler &cgh) { + cgh.parallel_for(NDRange, [=](nd_item<1> Item) { + size_t GlobalID = Item.get_global_id(); + auto LocalRange = Item.get_local_range(0); + auto LocalMem = DynLocalMem.get(); + + LocalMem[Item.get_local_id()] = LocalRange; + group_barrier(Item.get_group()); + + for (size_t i{0}; i < LocalRange; ++i) { + PtrA[GlobalID] += LocalMem[i]; + } + }); + }); + + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == LocalSize * LocalSize); + } + + Queue.memset(PtrA, 0, Size * sizeof(T)).wait(); + + constexpr int NewLocalSize{32}; + + DynLocalMem.update(NewLocalSize); + KernelNode.update_nd_range(nd_range<1>{range{Size}, range{NewLocalSize}}); + ExecGraph.update(KernelNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == NewLocalSize * NewLocalSize); + } + + free(PtrA, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_local_accessor_multiple.cpp b/sycl/test-e2e/Graph/Update/dyn_local_accessor_multiple.cpp new file mode 100644 index 0000000000000..0501ada14faad --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_local_accessor_multiple.cpp @@ -0,0 +1,77 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// Tests using more than one dynamic_work_group_memory object in the same node. + +#include "../graph_common.hpp" +#include + +int main() { + queue Queue{}; + + constexpr int LocalSize{16}; + + using T = int; + + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size, Queue); + std::vector HostDataA(Size); + + exp_ext::dynamic_local_accessor DynLocalMemA{Graph, LocalSize}; + exp_ext::dynamic_local_accessor DynLocalMemB{Graph, LocalSize}; + + Queue.memset(PtrA, 0, Size * sizeof(T)).wait(); + + nd_range<1> NDRange{range{Size}, range{LocalSize}}; + + auto KernelNode = Graph.add([&](handler &cgh) { + cgh.parallel_for(NDRange, [=](nd_item<1> Item) { + size_t GlobalID = Item.get_global_id(); + auto LocalRange = Item.get_local_range(0); + + auto LocalMemA = DynLocalMemA.get(); + auto LocalMemB = DynLocalMemB.get(); + + LocalMemA[Item.get_local_id()] = LocalRange; + LocalMemB[Item.get_local_id()] = LocalRange; + group_barrier(Item.get_group()); + + for (size_t i{0}; i < LocalRange; ++i) { + PtrA[GlobalID] += (T)(LocalMemA[i] + LocalMemB[i]); + } + }); + }); + + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == LocalSize * LocalSize * 2); + } + + Queue.memset(PtrA, 0, Size * sizeof(T)).wait(); + + constexpr int NewLocalSize{32}; + + DynLocalMemA.update(NewLocalSize); + DynLocalMemB.update(NewLocalSize); + KernelNode.update_nd_range(nd_range<1>{range{Size}, range{NewLocalSize}}); + ExecGraph.update(KernelNode); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size).wait(); + for (size_t i = 0; i < Size; i++) { + assert(HostDataA[i] == NewLocalSize * NewLocalSize * 2); + } + + free(PtrA, Queue); + return 0; +} diff --git a/sycl/test-e2e/Graph/Update/dyn_local_accessor_multiple_nodes.cpp b/sycl/test-e2e/Graph/Update/dyn_local_accessor_multiple_nodes.cpp new file mode 100644 index 0000000000000..e0755b9bd22f1 --- /dev/null +++ b/sycl/test-e2e/Graph/Update/dyn_local_accessor_multiple_nodes.cpp @@ -0,0 +1,129 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} + +// Tests using a dynamic_work_group_memory with multiple nodes. + +#include "../graph_common.hpp" +#include + +int main() { + queue Queue{}; + + constexpr int LocalSize{16}; + + using T = int; + + exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()}; + + int *PtrA = malloc_device(Size * Size, Queue); + std::vector HostDataA(Size * Size); + + exp_ext::dynamic_local_accessor DynLocalMemA{Graph, range<2>{LocalSize, LocalSize}}; + exp_ext::dynamic_local_accessor DynLocalMemC{Graph, range<1>{LocalSize}}; + + Queue.memset(PtrA, 0, Size * Size * sizeof(T)).wait(); + + nd_range<2> NDRange2D{range<2>{Size, Size}, range<2>{LocalSize, LocalSize}}; + + auto KernelNodeA = Graph.add([&](handler &cgh) { + cgh.parallel_for(NDRange2D, [=](nd_item<2> Item) { + size_t GlobalID = Item.get_global_linear_id(); + auto LocalRange = Item.get_local_range(0); + const auto i = Item.get_local_id()[0]; + const auto j = Item.get_local_id()[1]; + + auto LocalMemA = DynLocalMemA.get(); + + LocalMemA[i][j] = LocalRange; + group_barrier(Item.get_group()); + + for (size_t k{0}; k < LocalRange; ++k) { + for (size_t z{0}; z < LocalRange; ++z) { + PtrA[GlobalID] += (T)(LocalMemA[k][z]); + } + } + }); + }); + + auto KernelNodeB = Graph.add( + [&](handler &cgh) { + cgh.parallel_for(NDRange2D, [=](nd_item<2> Item) { + size_t GlobalID = Item.get_global_linear_id(); + auto LocalRange = Item.get_local_range(0); + const auto i = Item.get_local_id()[0]; + const auto j = Item.get_local_id()[1]; + + auto LocalMemA = DynLocalMemA.get(); + + LocalMemA[i][j] = LocalRange; + group_barrier(Item.get_group()); + + // Substracting what was added in NodeA gives 0. + for (size_t k{0}; k < LocalRange; ++k) { + for (size_t z{0}; z < LocalRange; ++z) { + PtrA[GlobalID] -= (T)(LocalMemA[k][z]); + } + } + }); + }, + exp_ext::property::node::depends_on{KernelNodeA}); + + nd_range<1> NDRange{Size * Size, LocalSize}; + auto KernelNodeC = Graph.add( + [&](handler &cgh) { + cgh.parallel_for(NDRange, [=](nd_item<1> Item) { + size_t GlobalID = Item.get_global_id(); + auto LocalRange = Item.get_local_range(0); + + auto LocalMemC = DynLocalMemC.get(); + + LocalMemC[Item.get_local_id()] = LocalRange; + group_barrier(Item.get_group()); + + for (size_t i{0}; i < LocalRange; ++i) { + PtrA[GlobalID] += (T)(LocalMemC[i]); + } + }); + }, + exp_ext::property::node::depends_on{KernelNodeB}); + + auto ExecGraph = Graph.finalize(exp_ext::property::graph::updatable{}); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size * Size).wait(); + for (size_t i = 0; i < Size * Size; i++) { + assert(HostDataA[i] == LocalSize * LocalSize); + } + + Queue.memset(PtrA, 0, Size * Size * sizeof(T)).wait(); + + constexpr size_t NewLocalSize{32}; + + DynLocalMemA.update(range<2>{NewLocalSize, NewLocalSize}); + DynLocalMemC.update(range<1>{NewLocalSize}); + + KernelNodeA.update_nd_range( + nd_range<2>{range<2>{Size, Size}, range<2>{NewLocalSize, NewLocalSize}}); + KernelNodeB.update_nd_range( + nd_range<2>{range<2>{Size, Size}, range<2>{NewLocalSize, NewLocalSize}}); + KernelNodeC.update_nd_range(nd_range<1>{Size * Size, NewLocalSize}); + + ExecGraph.update(KernelNodeA); + ExecGraph.update(KernelNodeB); + ExecGraph.update(KernelNodeC); + + Queue.ext_oneapi_graph(ExecGraph).wait(); + + Queue.copy(PtrA, HostDataA.data(), Size * Size).wait(); + for (size_t i = 0; i < Size * Size; i++) { + assert(HostDataA[i] == NewLocalSize * NewLocalSize); + } + + free(PtrA, Queue); + return 0; +} \ No newline at end of file diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 1e1ca428eff80..70715d28926e7 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3086,7 +3086,6 @@ _ZN4sycl3_V13ext6oneapi12experimental6detail19compile_from_sourceERNS0_13kernel_ _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base11updateValueEPKNS3_14raw_kernel_argEm _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base11updateValueEPKvm _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base14updateAccessorEPKNS0_6detail16AccessorBaseHostE -_ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_base18updateWorkGroupMemEm _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC1ENS3_13command_graphILNS3_11graph_stateE0EEE _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC1ENS3_13command_graphILNS3_11graph_stateE0EEEmPKv _ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC2ENS3_13command_graphILNS3_11graph_stateE0EEE @@ -3520,7 +3519,14 @@ _ZN4sycl3_V17handler11saveCodeLocENS0_6detail13code_locationE _ZN4sycl3_V17handler11saveCodeLocENS0_6detail13code_locationEb _ZN4sycl3_V17handler11storeRawArgEPKvm _ZN4sycl3_V17handler12addReductionERKSt10shared_ptrIKvE -_ZN4sycl3_V17handler12setArgHelperEiRNS0_3ext6oneapi12experimental6detail30dynamic_work_group_memory_baseE +_ZN4sycl3_V13ext6oneapi12experimental6detail30dynamic_work_group_memory_base18updateWorkGroupMemEm +_ZN4sycl3_V13ext6oneapi12experimental6detail30dynamic_work_group_memory_baseC1ENS3_13command_graphILNS3_11graph_stateE0EEEm +_ZN4sycl3_V13ext6oneapi12experimental6detail27dynamic_local_accessor_baseC2ENS3_13command_graphILNS3_11graph_stateE0EEENS0_5rangeILi3EEEiiRKNS0_13property_listE +_ZN4sycl3_V13ext6oneapi12experimental6detail30dynamic_work_group_memory_baseC2ENS3_13command_graphILNS3_11graph_stateE0EEEm +_ZN4sycl3_V13ext6oneapi12experimental6detail27dynamic_local_accessor_baseC1ENS3_13command_graphILNS3_11graph_stateE0EEENS0_5rangeILi3EEEiiRKNS0_13property_listE +_ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC1ERKSt10shared_ptrINS4_22dynamic_parameter_implEE +_ZN4sycl3_V13ext6oneapi12experimental6detail27dynamic_local_accessor_base19updateLocalAccessorENS0_5rangeILi3EEE +_ZN4sycl3_V13ext6oneapi12experimental6detail22dynamic_parameter_baseC2ERKSt10shared_ptrINS4_22dynamic_parameter_implEE _ZN4sycl3_V17handler12setArgHelperEiRNS0_6detail22work_group_memory_implE _ZN4sycl3_V17handler13getKernelNameEv _ZN4sycl3_V17handler14addAccessorReqESt10shared_ptrINS0_6detail16AccessorImplHostEE @@ -3568,7 +3574,7 @@ _ZN4sycl3_V17handler24GetRangeRoundingSettingsERmS2_S2_ _ZN4sycl3_V17handler24ext_intel_read_host_pipeENS0_6detail11string_viewEPvmb _ZN4sycl3_V17handler24ext_oneapi_memcpy2d_implEPvmPKvmmm _ZN4sycl3_V17handler24ext_oneapi_memset2d_implEPvmimm -_ZN4sycl3_V17handler24registerDynamicParameterERNS0_3ext6oneapi12experimental6detail22dynamic_parameter_baseEi +_ZN4sycl3_V17handler24registerDynamicParameterEPNS0_3ext6oneapi12experimental6detail22dynamic_parameter_implEi _ZN4sycl3_V17handler25ext_intel_write_host_pipeENS0_6detail11string_viewEPvmb _ZN4sycl3_V17handler26associateWithHandlerCommonESt10shared_ptrINS0_6detail16AccessorImplHostEEi _ZN4sycl3_V17handler26setNDRangeDescriptorPaddedENS0_5rangeILi3EEENS0_2idILi3EEEi diff --git a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp index 453e1beb72adf..dfb100acec848 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommandGraph.cpp @@ -689,3 +689,18 @@ TEST_F(CommandGraphTest, DynamicWorkGroupMemoryGet) { Graph, LocalSize}; ASSERT_ANY_THROW(DynLocalMem.get()); } + +// Tests that dynamic_local_accessor.get() will throw on the host side. +TEST_F(CommandGraphTest, DynamicLocalAccessorGet) { + device Dev; + context Ctx{{Dev}}; + queue Queue{Ctx, Dev}; + constexpr int LocalSize{32}; + + ext::oneapi::experimental::command_graph Graph{Queue.get_context(), + Queue.get_device()}; + + ext::oneapi::experimental::dynamic_local_accessor DynLocalMem{ + Graph, LocalSize}; + ASSERT_ANY_THROW(DynLocalMem.get()); +} diff --git a/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp b/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp index 9451bf1334b12..c6a9333cb02a5 100644 --- a/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp +++ b/sycl/unittests/Extensions/CommandGraph/CommonReferenceSemantics.cpp @@ -100,6 +100,28 @@ TEST_F(CommandGraphTest, DynamicParamSemantics) { testSemantics>(Factory)); } +TEST_F(CommandGraphTest, DynamicWorkGroupMemorySemantics) { + sycl::queue Queue; + experimental::command_graph Graph(Queue.get_context(), Queue.get_device()); + + auto Factory = [&]() { + return experimental::dynamic_work_group_memory(Graph, 1); + }; + ASSERT_NO_FATAL_FAILURE( + testSemantics>(Factory)); +} + +TEST_F(CommandGraphTest, DynamicLocalAccessorSemantics) { + sycl::queue Queue; + experimental::command_graph Graph(Queue.get_context(), Queue.get_device()); + + auto Factory = [&]() { + return experimental::dynamic_local_accessor(Graph, 1); + }; + ASSERT_NO_FATAL_FAILURE( + (testSemantics>(Factory))); +} + /** * Checks for potential hash collisions in the hash implementations of graph * related classes. diff --git a/sycl/unittests/Extensions/CommandGraph/Update.cpp b/sycl/unittests/Extensions/CommandGraph/Update.cpp index db0e04c0cccaa..109adf2df7cd0 100644 --- a/sycl/unittests/Extensions/CommandGraph/Update.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Update.cpp @@ -26,20 +26,29 @@ TEST_F(CommandGraphTest, UpdatableException) { EXPECT_ANY_THROW(ExecGraphNoUpdatable.update(Node)); } -TEST_F(CommandGraphTest, DynamicParamRegister) { - // Check that registering a dynamic param with a node from a graph that was - // not passed to its constructor does not throw. +TEST_F(CommandGraphTest, DynamicObjRegister) { + // Check that registering a dynamic object with a node from a graph that was + // not passed to its constructor throws. + auto CheckRegisterWrongGraph = [&](auto& DynObj) { + auto OtherGraph = + experimental::command_graph(Queue.get_context(), Queue.get_device()); + auto Node = OtherGraph.add([&](sycl::handler &cgh) { + // This should not throw + EXPECT_NO_THROW(cgh.set_arg(0, DynamicParam)); + cgh.single_task>([]() {}); + }); + }; + // TODO: Update test when deprecated constructors that take a graph have been // removed. - experimental::dynamic_parameter DynamicParam(Graph, int{}); + experimental::dynamic_parameter DynamicParam{Graph, int{}}; + ASSERT_NO_FATAL_FAILURE(CheckRegisterWrongGraph(DynamicParam)); - auto OtherGraph = - experimental::command_graph(Queue.get_context(), Queue.get_device()); - auto Node = OtherGraph.add([&](sycl::handler &cgh) { - // This should not throw - EXPECT_NO_THROW(cgh.set_arg(0, DynamicParam)); - cgh.single_task>([]() {}); - }); + experimental::dynamic_work_group_memory DynamicWorkGroupMem{Graph, 1}; + ASSERT_NO_FATAL_FAILURE(CheckRegisterWrongGraph(DynamicWorkGroupMem)); + + experimental::dynamic_local_accessor DynamicLocalAcc{Graph, 1}; + ASSERT_NO_FATAL_FAILURE(CheckRegisterWrongGraph(DynamicLocalAcc)); } TEST_F(CommandGraphTest, UpdateNodeNotInGraph) { @@ -67,73 +76,82 @@ TEST_F(CommandGraphTest, UpdateWithUnchangedNode) { } TEST_F(CommandGraphTest, UpdateNodeTypeExceptions) { - // Check that registering a dynamic parameter with various node types either + // Check that registering a dynamic object with various node types either // throws or does not throw as appropriate - // Allocate some pointers for memory nodes - int *PtrA = malloc_device(16, Queue); - int *PtrB = malloc_device(16, Queue); + auto CheckNodeCompatibility = [&](auto& DynObj) { + // Allocate some pointers for memory nodes + int *PtrA = malloc_device(16, Queue); + int *PtrB = malloc_device(16, Queue); + + ASSERT_NO_THROW(auto NodeKernel = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.single_task>([]() {}); + })); + + ASSERT_ANY_THROW(auto NodeMemcpy = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.memcpy(PtrA, PtrB, 16 * sizeof(int)); + })); + + ASSERT_ANY_THROW(auto NodeMemset = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.memset(PtrB, 7, 16 * sizeof(int)); + })); + + ASSERT_ANY_THROW(auto NodeMemfill = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.fill(PtrB, 7, 16); + })); + + ASSERT_ANY_THROW(auto NodePrefetch = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.prefetch(PtrA, 16 * sizeof(int)); + })); + + ASSERT_ANY_THROW(auto NodeMemadvise = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.mem_advise(PtrA, 16 * sizeof(int), 1); + })); + + ASSERT_ANY_THROW(auto NodeHostTask = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.host_task([]() {}); + })); + + ASSERT_ANY_THROW(auto NodeBarreriTask = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.ext_oneapi_barrier(); + })); + + Graph.begin_recording(Queue); + ASSERT_ANY_THROW(auto NodeBarrierTask = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.ext_oneapi_barrier(); + })); + Graph.end_recording(Queue); + + auto NodeEmpty = Graph.add(); + + experimental::command_graph Subgraph(Queue.get_context(), Queue.get_device()); + // Add an empty node to the subgraph + Subgraph.add(); + + auto SubgraphExec = Subgraph.finalize(); + ASSERT_ANY_THROW(auto NodeSubgraph = Graph.add([&](sycl::handler &cgh) { + cgh.set_arg(0, DynObj); + cgh.ext_oneapi_graph(SubgraphExec); + })); + }; experimental::dynamic_parameter DynamicParam{Graph, int{}}; + ASSERT_NO_FATAL_FAILURE(CheckNodeCompatibility(DynamicParam)); - ASSERT_NO_THROW(auto NodeKernel = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.single_task>([]() {}); - })); - - ASSERT_ANY_THROW(auto NodeMemcpy = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.memcpy(PtrA, PtrB, 16 * sizeof(int)); - })); - - ASSERT_ANY_THROW(auto NodeMemset = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.memset(PtrB, 7, 16 * sizeof(int)); - })); - - ASSERT_ANY_THROW(auto NodeMemfill = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.fill(PtrB, 7, 16); - })); - - ASSERT_ANY_THROW(auto NodePrefetch = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.prefetch(PtrA, 16 * sizeof(int)); - })); - - ASSERT_ANY_THROW(auto NodeMemadvise = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.mem_advise(PtrA, 16 * sizeof(int), 1); - })); - - ASSERT_ANY_THROW(auto NodeHostTask = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.host_task([]() {}); - })); - - ASSERT_ANY_THROW(auto NodeBarreriTask = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.ext_oneapi_barrier(); - })); - - Graph.begin_recording(Queue); - ASSERT_ANY_THROW(auto NodeBarrierTask = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.ext_oneapi_barrier(); - })); - Graph.end_recording(Queue); - - auto NodeEmpty = Graph.add(); - - experimental::command_graph Subgraph(Queue.get_context(), Dev); - // Add an empty node to the subgraph - Subgraph.add(); + experimental::dynamic_work_group_memory DynamicWorkGroupMem{Graph, 1}; + ASSERT_NO_FATAL_FAILURE(CheckNodeCompatibility(DynamicWorkGroupMem)); - auto SubgraphExec = Subgraph.finalize(); - ASSERT_ANY_THROW(auto NodeSubgraph = Graph.add([&](sycl::handler &cgh) { - cgh.set_arg(0, DynamicParam); - cgh.ext_oneapi_graph(SubgraphExec); - })); + experimental::dynamic_local_accessor DynamicLocalAcc{Graph, 1}; + ASSERT_NO_FATAL_FAILURE(CheckNodeCompatibility(DynamicLocalAcc)); } TEST_F(CommandGraphTest, UpdateRangeErrors) {