diff --git a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td index 839db6cfc3717..286546e0dbc9b 100644 --- a/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td +++ b/llvm/include/llvm/SYCLLowerIR/DeviceConfigFile.td @@ -98,6 +98,7 @@ def Aspectext_oneapi_clock_sub_group : Aspect<"ext_oneapi_clock_sub_group">; def Aspectext_oneapi_clock_work_group : Aspect<"ext_oneapi_clock_work_group">; def Aspectext_oneapi_clock_device : Aspect<"ext_oneapi_clock_device">; def Aspectext_oneapi_is_integrated_gpu : Aspect<"ext_oneapi_is_integrated_gpu">; +def Aspectext_oneapi_ipc_memory : Aspect<"ext_oneapi_ipc_memory">; // Deprecated aspects def AspectInt64_base_atomics : Aspect<"int64_base_atomics">; @@ -176,7 +177,8 @@ def : TargetInfo<"__TestAspectList", Aspectext_oneapi_clock_sub_group, Aspectext_oneapi_clock_work_group, Aspectext_oneapi_clock_device, - Aspectext_oneapi_is_integrated_gpu], + Aspectext_oneapi_is_integrated_gpu, + Aspectext_oneapi_ipc_memory], []>; // This definition serves the only purpose of testing whether the deprecated aspect list defined in here and in SYCL RT // match. diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_inter_process_communication.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_inter_process_communication.asciidoc new file mode 100644 index 0000000000000..d590550f209a2 --- /dev/null +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_inter_process_communication.asciidoc @@ -0,0 +1,393 @@ += sycl_ext_oneapi_inter_process_communication + +:source-highlighter: coderay +:coderay-linenums-mode: table + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en +:dpcpp: pass:[DPC++] +:endnote: —{nbsp}end{nbsp}note + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + + +== Notice + +[%hardbreaks] +Copyright (C) 2025 Intel Corporation. All rights reserved. + +Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks +of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. used by +permission by Khronos. + + +== Contact + +To report problems with this extension, please open a new issue at: + +https://github.com/intel/llvm/issues + + +== Dependencies + +:khr-default-context: https://registry.khronos.org/SYCL/specs/sycl-2020/html/sycl-2020.html#sec:khr-default-context + +This extension is written against the SYCL 2020 revision 10 specification. All +references below to the "core SYCL specification" or to section numbers in the +SYCL specification refer to that revision. + +[_Note:_ The APIs in this extension uses the concept of a per-platform +default context as specified in section 4.6.2 "Platform class" of the core SYCL +specification. +As a convenience, this extension specification describes the behavior of its +APIs by using the `khr_get_default_context` function from {khr-default-context}[ +sycl_khr_default_context], however there is no true dependency on that +extension. +An implementation could still implement +sycl_ext_oneapi_inter_process_communication even without implementing +sycl_khr_default_context because the core SYCL specification still requires +there to be a per-platform default context even if the core SYCL specification +does not provide a convenient way to get it. +_{endnote}_] + + +== Status + +This is an experimental extension specification, intended to provide early +access to features and gather community feedback. Interfaces defined in this +specification are implemented in {dpcpp}, but they are not finalized and may +change incompatibly in future versions of {dpcpp} without prior notice. +*Shipping software products should not rely on APIs defined in this +specification.* + + +== Overview + +This extension adds the ability for SYCL programs to share device USM memory +allocations between processes. This is done by the allocating process creating +a new IPC memory handle through the new free functions and transferring the +returned handle data to the other processes. The other processes can use the +handle data to retrieve the corresponding device USM memory. + + +== Specification + +=== Feature test macro + +This extension provides a feature-test macro as described in the core SYCL +specification. An implementation supporting this extension must predefine the +macro `SYCL_EXT_ONEAPI_INTER_PROCESS_COMMUNICATION` to one of the values defined +in the table below. Applications can test for the existence of this macro to +determine if the implementation supports this feature, or applications can test +the macro's value to determine which of the extension's features the +implementation supports. + +[%header,cols="1,5"] +|=== +|Value +|Description + +|1 +|The APIs of this experimental extension are not versioned, so the + feature-test macro always has this value. +|=== + +=== Extension to `enum class aspect` + +[source] +---- +namespace sycl { +enum class aspect { + ... + ext_oneapi_ipc_memory +} +} +---- + +If a SYCL device has this aspect, that device supports the `get` and `open` +functions specified in the following section. + + +=== Inter-process communicable memory + +This extension adds a new type alias `handle_data_t` and a new IPC memory handle +type `handle` under the `ipc_memory` experimental namespace. + +``` +namespace sycl::ext::oneapi::experimental::ipc_memory { + +using handle_data_t = sycl::span; + +struct handle { + handle_data_t data() const; +}; + +} +``` + +|==== +a| +[frame=all,grid=none] +!==== +a! +[source] +---- +handle_data_t data() const; +---- +!==== + +_Preconditions:_ The `put` function has not previously been called with this +handle and the USM device memory associated with this handle has not been freed. + +_Returns:_ The handle data associated with the IPC handle object. This data can +be transferred to other processes, but cannot be used to recreate a `handle` +object. + +!==== + +Additionally, this extension adds new free functions under the `ipc_memory` +experimental namespace. + +``` +namespace sycl::ext::oneapi::experimental::ipc_memory { + +handle get(void *ptr, const sycl::context &ctx); + +handle get(void *ptr); + +void put(handle &ipc_handle, const sycl::context &ctx); + +void put(handle &ipc_handle); + +void *open(const handle_data_t &handle_data, const sycl::context &ctx, + const sycl::device &dev); + +void *open(const handle_data_t &handle_data, const sycl::device &dev); + +void *open(const handle_data_t &handle_data); + +void close(void *ptr, const sycl::context &ctx); + +void close(void *ptr); + +} +``` + +|==== +a| +[frame=all,grid=none] +!==== +a! +[source] +---- +handle get(void *ptr, const sycl::context &ctx) +---- +!==== + +_Preconditions:_ `ptr` is a pointer to USM device memory on some device _D_, and +`ctx` is the same context as `ptr` was allocated against, using the USM device +memory allocation routines. + +_Returns:_ An IPC "handle" to this USM memory allocation. The bytes of this +handle can be transferred to another process on the same system, and the other +process can use the handle to get a pointer to the same USM allocation through a +call to the `open` function. + +_Throws:_ An exception with the `errc::feature_not_supported` error code if +device _D_ does not have `aspect::ext_oneapi_ipc_memory`. + +!==== +a! +[source] +---- +handle get(void *ptr) +---- +!==== + +_Effects_: Equivalent to: + +[source,c++,indent=2] +---- +sycl::device d; +sycl::context ctxt = d.get_platform().khr_get_default_context(); +return ipc_memory::get(ptr, ctxt); +---- + +!==== +a! +[source] +---- +void put(handle &ipc_handle, const sycl::context &ctx) +---- +!==== + +_Preconditions:_ `ipc_handle` is the IPC "handle" to USM device memory that was +returned from a call to `get` either in this process or in some other process on +the same system. The `put` function has not been previously called on the +handle. + +_Effects:_ Deallocates resources associated with the handle. These resources are +automatically deallocated when the USM device memory is freed, so it is not +strictly necessary to call the `put` function. After the resources associated +with the handle have been deallocated, i.e. through a call to the `put` function +or through freeing the USM device memory, the handle data is invalid and using +it in the `put` and `open` functions will result in undefined behavior. + +[_Note:_ Any pointers retrieved through a call to the `open` function in any +process on the system will still be valid after a call to the `put` function and +must still be freed through calls to the `close` function. +_{endnote}_] + +!==== +a! +[source] +---- +void put(handle &ipc_handle) +---- +!==== + +_Effects_: Equivalent to: + +[source,c++,indent=2] +---- +sycl::device d; +sycl::context ctxt = d.get_platform().khr_get_default_context(); +ipc_memory::put(handle_data, ctxt); +---- + +!==== +a! +[source] +---- +void *open(const handle_data_t &handle_data, const sycl::context &ctx, + const sycl::device &dev) +---- +!==== + +_Preconditions:_ `handle_data` is the IPC "handle" to USM device memory that was +returned from a call to the `get` function either in this process or in some +other process on the same system. That USM device memory is accessible on device +`dev`. + +_Returns:_ A pointer to the same USM device memory represented by `handle_data`. +The returned pointer is associated with context `ctx`. It can be used wherever a +USM device pointer for device `dev` and context `ctx` is expected, except it +cannot be passed to `sycl::free`. Instead, use the `close` function to free this +memory pointer. + +[_Note:_ The `open` function can be called multiple times on the same handle +within the same process. Each call to the `open` function may return a unique +pointer value even for the same handle, therefore each call to the `open` +function must have a matching call to the `close` function. +_{endnote}_] + +[_Note:_ The pointer returned from a call to the `open` function is no longer +valid if the associated USM device memory is freed through a call to the +`sycl::free` function. +_{endnote}_] + +_Throws:_ + + * An exception with the `errc::feature_not_supported` error code if device + `dev` does not have `aspect::ext_oneapi_ipc_memory`. + * An exception with the `errc::invalid` error code if the handle data + `handle_data` has an unexpected number of bytes. + +!==== +a! +[source] +---- +void *open(const handle_data_t &handle_data, const sycl::device &dev) +---- +!==== + +_Effects_: Equivalent to: + +[source,c++,indent=2] +---- +sycl::context ctxt = dev.get_platform().khr_get_default_context(); +return ipc_memory::put(handle_data, ctxt, dev); +---- + +!==== +a! +[source] +---- +void *open(const handle_data_t &handle_data, const sycl::context &ctx, + const sycl::device &dev) +---- +!==== + +_Effects_: Equivalent to: + +[source,c++,indent=2] +---- +sycl::device d; +sycl::context ctxt = d.get_platform().khr_get_default_context(); +return ipc_memory::open(handle_data, ctxt, d); +---- + +!==== +a! +[source] +---- +void close(void *ptr, const sycl::context &ctx) +---- +!==== + +_Precondition:_ `ptr` was previously returned from a call to the `open` function +in this same process, where `ctx` was passed as the context. This `ptr` value +has not yet been closed by calling the `close` function. + +_Effects:_ Closes a device USM pointer previously returned by a call to +the `open` function. + +!==== +a! +[source] +---- +void close(void *ptr) +---- +!==== + +_Effects_: Equivalent to: + +[source,c++,indent=2] +---- +sycl::device d; +sycl::context ctxt = d.get_platform().khr_get_default_context(); +ipc_memory::close(ptr, ctxt); +---- + +|==== + + +== Issues + +=== Level Zero file descriptor duplication dependency + +The IPC memory APIs in Level Zero on Linux currently requires the ability to +duplicate file descriptors between processes. For security this is not allowed +by default on Linux-based systems, so in order for the IPC memory APIs to work +with Level Zero on Linux the user must either call `prctl(PR_SET_PTRACER, ...)` +in the IPC handle owner process or enable the functionality globally using + +```bash +sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" +``` + +See also https://github.com/oneapi-src/unified-memory-framework/tree/main?tab=readme-ov-file#level-zero-memory-provider. + + +=== Level Zero IPC memory Windows support + +The new IPC memory APIs are not currently supported on the Level Zero backend on +Windows systems. + diff --git a/sycl/include/sycl/ext/oneapi/experimental/ipc_memory.hpp b/sycl/include/sycl/ext/oneapi/experimental/ipc_memory.hpp new file mode 100644 index 0000000000000..20b9b65cc3bd3 --- /dev/null +++ b/sycl/include/sycl/ext/oneapi/experimental/ipc_memory.hpp @@ -0,0 +1,87 @@ +//==------- ipc_memory.hpp --- SYCL inter-process communicable memory ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) + +#include +#include +#include +#include +#include +#include + +#include + +namespace sycl { +inline namespace _V1 { + +namespace ext::oneapi::experimental::ipc_memory { + +using handle_data_t = sycl::span; + +struct handle { +public: + handle_data_t data() const { + return {reinterpret_cast(MData), MSize}; + } + +private: + handle(void *Data, size_t Size) : MData{Data}, MSize{Size} {} + + void *MData; + size_t MSize; + + friend __SYCL_EXPORT handle get(void *Ptr, const sycl::context &Ctx); + friend __SYCL_EXPORT void put(handle &HandleData, const sycl::context &Ctx); +}; + +__SYCL_EXPORT handle get(void *Ptr, const sycl::context &Ctx); + +inline handle get(void *Ptr) { + sycl::device Dev; + sycl::context Ctx = Dev.get_platform().khr_get_default_context(); + return ipc_memory::get(Ptr, Ctx); +} + +__SYCL_EXPORT void put(handle &HandleData, const sycl::context &Ctx); + +inline void put(handle &HandleData) { + sycl::device Dev; + sycl::context Ctx = Dev.get_platform().khr_get_default_context(); + ipc_memory::put(HandleData, Ctx); +} + +__SYCL_EXPORT void *open(handle_data_t HandleData, const sycl::context &Ctx, + const sycl::device &Dev); + +inline void *open(handle_data_t HandleData, const sycl::device &Dev) { + sycl::context Ctx = Dev.get_platform().khr_get_default_context(); + return ipc_memory::open(HandleData, Ctx, Dev); +} + +inline void *open(handle_data_t HandleData) { + sycl::device Dev; + sycl::context Ctx = Dev.get_platform().khr_get_default_context(); + return ipc_memory::open(HandleData, Ctx, Dev); +} + +__SYCL_EXPORT void close(void *Ptr, const sycl::context &Ctx); + +inline void close(void *Ptr) { + sycl::device Dev; + sycl::context Ctx = Dev.get_platform().khr_get_default_context(); + ipc_memory::close(Ptr, Ctx); +} + +} // namespace ext::oneapi::experimental::ipc_memory +} // namespace _V1 +} // namespace sycl + +#endif diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def index 4c5cb0a3ec8b2..b4c6839dd8b86 100644 --- a/sycl/include/sycl/info/aspects.def +++ b/sycl/include/sycl/info/aspects.def @@ -84,3 +84,4 @@ __SYCL_ASPECT(ext_oneapi_clock_sub_group, 91) __SYCL_ASPECT(ext_oneapi_clock_work_group, 92) __SYCL_ASPECT(ext_oneapi_clock_device, 93) __SYCL_ASPECT(ext_oneapi_is_integrated_gpu, 94) +__SYCL_ASPECT(ext_oneapi_ipc_memory, 95) diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index ec3708a32cf63..ab6c294a38a11 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -131,6 +131,7 @@ can be disabled by setting SYCL_DISABLE_FSYCL_SYCLHPP_WARNING macro.") #include #include #include +#include #include #include #include diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 4159303f87d19..7815aa511a866 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -322,6 +322,7 @@ set(SYCL_COMMON_SOURCES "handler.cpp" "image.cpp" "interop_handle.cpp" + "ipc_memory.cpp" "kernel.cpp" "kernel_bundle.cpp" "physical_mem.cpp" diff --git a/sycl/source/detail/device_impl.hpp b/sycl/source/detail/device_impl.hpp index 13c5f5c16ce3c..9eaa57a2e0174 100644 --- a/sycl/source/detail/device_impl.hpp +++ b/sycl/source/detail/device_impl.hpp @@ -1597,6 +1597,10 @@ class device_impl : public std::enable_shared_from_this { get_info_impl_nocheck().value_or( 0); } + CASE(ext_oneapi_ipc_memory) { + return get_info_impl_nocheck() + .value_or(0); + } else { return false; // This device aspect has not been implemented yet. } diff --git a/sycl/source/detail/ur_device_info_ret_types.inc b/sycl/source/detail/ur_device_info_ret_types.inc index 9c7a12379efdc..4d0b86f56ed99 100644 --- a/sycl/source/detail/ur_device_info_ret_types.inc +++ b/sycl/source/detail/ur_device_info_ret_types.inc @@ -162,6 +162,7 @@ MAP(UR_DEVICE_INFO_NODE_MASK, uint32_t) // These aren't present in the specification, extracted from ur_api.h // instead. MAP(UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP, ur_exp_device_2d_block_array_capability_flags_t) +MAP(UR_DEVICE_INFO_IPC_MEMORY_SUPPORT_EXP, ur_bool_t) MAP(UR_DEVICE_INFO_ASYNC_USM_ALLOCATIONS_SUPPORT_EXP, ur_bool_t) MAP(UR_DEVICE_INFO_BINDLESS_IMAGES_1D_USM_SUPPORT_EXP, ur_bool_t) MAP(UR_DEVICE_INFO_BINDLESS_IMAGES_2D_USM_SUPPORT_EXP, ur_bool_t) diff --git a/sycl/source/feature_test.hpp.in b/sycl/source/feature_test.hpp.in index f4e21f60ee760..ba700c812c6dd 100644 --- a/sycl/source/feature_test.hpp.in +++ b/sycl/source/feature_test.hpp.in @@ -127,6 +127,7 @@ inline namespace _V1 { #define SYCL_KHR_DEFAULT_CONTEXT 1 #define SYCL_EXT_INTEL_EVENT_MODE 1 #define SYCL_EXT_ONEAPI_TANGLE 1 +#define SYCL_EXT_ONEAPI_INTER_PROCESS_COMMUNICATION 1 // Unfinished KHR extensions. These extensions are only available if the // __DPCPP_ENABLE_UNFINISHED_KHR_EXTENSIONS macro is defined. diff --git a/sycl/source/ipc_memory.cpp b/sycl/source/ipc_memory.cpp new file mode 100644 index 0000000000000..5ab0512e69683 --- /dev/null +++ b/sycl/source/ipc_memory.cpp @@ -0,0 +1,89 @@ +//==------- ipc_memory.cpp --- SYCL inter-process communicable memory ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +namespace sycl { +inline namespace _V1 { +namespace ext::oneapi::experimental::ipc_memory { + +__SYCL_EXPORT handle get(void *Ptr, const sycl::context &Ctx) { + auto CtxImpl = sycl::detail::getSyclObjImpl(Ctx); + sycl::detail::adapter_impl &Adapter = CtxImpl->getAdapter(); + + // If the API fails, check that the device actually supported it. We only do + // this if UR fails to avoid the device-lookup overhead. + auto CheckDeviceSupport = [Ptr, &Ctx]() { + sycl::device Dev = get_pointer_device(Ptr, Ctx); + if (!Dev.has(aspect::ext_oneapi_ipc_memory)) + throw sycl::exception( + sycl::make_error_code(errc::feature_not_supported), + "Device does not support aspect::ext_oneapi_ipc_memory."); + }; + + void *HandlePtr = nullptr; + size_t HandleSize = 0; + auto UrRes = + Adapter.call_nocheck( + CtxImpl->getHandleRef(), Ptr, &HandlePtr, &HandleSize); + if (UrRes != UR_RESULT_SUCCESS) { + CheckDeviceSupport(); + Adapter.checkUrResult(UrRes); + } + return {HandlePtr, HandleSize}; +} + +__SYCL_EXPORT void put(handle &Handle, const sycl::context &Ctx) { + auto CtxImpl = sycl::detail::getSyclObjImpl(Ctx); + CtxImpl->getAdapter().call( + CtxImpl->getHandleRef(), Handle.MData); +} + +__SYCL_EXPORT void *open(handle_data_t HandleData, const sycl::context &Ctx, + const sycl::device &Dev) { + if (!Dev.has(aspect::ext_oneapi_ipc_memory)) + throw sycl::exception( + sycl::make_error_code(errc::feature_not_supported), + "Device does not support aspect::ext_oneapi_ipc_memory."); + + auto CtxImpl = sycl::detail::getSyclObjImpl(Ctx); + sycl::detail::adapter_impl &Adapter = CtxImpl->getAdapter(); + + // TODO: UMF and UR currently requires the handle data to be non-const, so we + // need const-cast the data pointer. Once this has been changed, the + // const-cast can be removed. + // CMPLRLLVM-71181 + // https://github.com/oneapi-src/unified-memory-framework/issues/1536 + std::byte *HandleDataPtr = const_cast(HandleData.data()); + + void *Ptr = nullptr; + ur_result_t UrRes = + Adapter.call_nocheck( + CtxImpl->getHandleRef(), getSyclObjImpl(Dev)->getHandleRef(), + HandleDataPtr, HandleData.size(), &Ptr); + if (UrRes == UR_RESULT_ERROR_INVALID_VALUE) + throw sycl::exception(sycl::make_error_code(errc::invalid), + "HandleData data size does not correspond " + "to the target platform's IPC memory handle size."); + Adapter.checkUrResult(UrRes); + return Ptr; +} + +__SYCL_EXPORT void close(void *Ptr, const sycl::context &Ctx) { + auto CtxImpl = sycl::detail::getSyclObjImpl(Ctx); + CtxImpl->getAdapter().call( + CtxImpl->getHandleRef(), Ptr); +} + +} // namespace ext::oneapi::experimental::ipc_memory +} // namespace _V1 +} // namespace sycl diff --git a/sycl/test-e2e/Experimental/ipc_memory.cpp b/sycl/test-e2e/Experimental/ipc_memory.cpp new file mode 100644 index 0000000000000..0e329b25b447b --- /dev/null +++ b/sycl/test-e2e/Experimental/ipc_memory.cpp @@ -0,0 +1,121 @@ +// REQUIRES: aspect-usm_device_allocations && aspect-ext_oneapi_ipc_memory + +// UNSUPPORTED: level_zero && windows +// UNSUPPORTED-TRACKER: UMFW-348 + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include + +#include +#include +#include + +#if defined(__linux__) +#include +#include +#include +#endif // defined(__linux__) + +namespace syclexp = sycl::ext::oneapi::experimental; + +constexpr size_t N = 32; +constexpr const char *CommsFile = "ipc_comms.txt"; + +int spawner(int argc, char *argv[]) { + assert(argc == 1); + sycl::queue Q; + +#if defined(__linux__) + // UMF currently requires ptrace permissions to be set for the spawner. As + // such we need to set it until this limitation has been addressed. + // https://github.com/oneapi-src/unified-memory-framework/tree/main?tab=readme-ov-file#level-zero-memory-provider + if (Q.get_backend() == sycl::backend::ext_oneapi_level_zero && + prctl(PR_SET_PTRACER, getppid()) == -1) { + std::cout << "Failed to set ptracer permissions!" << std::endl; + return 1; + } +#endif // defined(__linux__) + + int *DataPtr = sycl::malloc_device(N, Q); + Q.parallel_for(N, [=](sycl::item<1> I) { + DataPtr[I] = static_cast(I.get_linear_id()); + }).wait(); + + { + // Write handle data to file. + { + syclexp::ipc_memory::handle Handle = + syclexp::ipc_memory::get(DataPtr, Q.get_context()); + syclexp::ipc_memory::handle_data_t HandleData = Handle.data(); + size_t HandleDataSize = HandleData.size(); + std::fstream FS(CommsFile, std::ios_base::out | std::ios_base::binary); + FS.write(reinterpret_cast(&HandleDataSize), sizeof(size_t)); + FS.write(reinterpret_cast(HandleData.data()), + HandleDataSize); + } + + // Spawn other process with an argument. + std::string Cmd = std::string{argv[0]} + " 1"; + std::cout << "Spawning: " << Cmd << std::endl; + std::system(Cmd.c_str()); + } + + int Failures = 0; + int Read[N] = {0}; + Q.copy(DataPtr, Read, N).wait(); + for (size_t I = 0; I < N; ++I) { + if (Read[I] != (N - I)) { + ++Failures; + std::cout << "Failed from spawner: Result at " << I + << " unexpected: " << Read[I] << " != " << (N - I) << std::endl; + } + } + sycl::free(DataPtr, Q); + return Failures; +} + +int consumer() { + sycl::queue Q; + + // Read the handle data. + std::fstream FS(CommsFile, std::ios_base::in | std::ios_base::binary); + size_t HandleSize = 0; + FS.read(reinterpret_cast(&HandleSize), sizeof(size_t)); + std::unique_ptr HandleData{new std::byte[HandleSize]}; + FS.read(reinterpret_cast(HandleData.get()), HandleSize); + + // Open IPC handle. + syclexp::ipc_memory::handle_data_t Handle{HandleData.get(), + HandleData.get() + HandleSize}; + int *DataPtr = reinterpret_cast( + syclexp::ipc_memory::open(Handle, Q.get_context(), Q.get_device())); + + // Test the data already in the USM pointer. + int Failures = 0; + int Read[N] = {0}; + Q.copy(DataPtr, Read, N).wait(); + for (size_t I = 0; I < N; ++I) { + if (Read[I] != I) { + ++Failures; + std::cout << "Failed from consumer: Result at " << I + << " unexpected: " << Read[I] << " != " << I << std::endl; + } + } + + Q.parallel_for(N, [=](sycl::item<1> I) { + DataPtr[I] = static_cast(N - I.get_linear_id()); + }).wait(); + + // Close the IPC pointer. + syclexp::ipc_memory::close(DataPtr, Q.get_context()); + + return Failures; +} + +int main(int argc, char *argv[]) { + return argc == 1 ? spawner(argc, argv) : consumer(); +} diff --git a/sycl/test-e2e/Experimental/ipc_put_after_free.cpp b/sycl/test-e2e/Experimental/ipc_put_after_free.cpp new file mode 100644 index 0000000000000..2851a2589eb7f --- /dev/null +++ b/sycl/test-e2e/Experimental/ipc_put_after_free.cpp @@ -0,0 +1,49 @@ +// REQUIRES: aspect-usm_device_allocations && aspect-ext_oneapi_ipc_memory + +// UNSUPPORTED: level_zero && windows +// UNSUPPORTED-TRACKER: UMFW-348 + +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out + +#include +#include +#include + +#include +#include + +#if defined(__linux__) +#include +#include +#include +#endif // defined(__linux__) + +namespace syclexp = sycl::ext::oneapi::experimental; + +int main() { + sycl::queue Q; + +#if defined(__linux__) + // UMF currently requires ptrace permissions to be set for the spawner. As + // such we need to set it until this limitation has been addressed. + // https://github.com/oneapi-src/unified-memory-framework/tree/main?tab=readme-ov-file#level-zero-memory-provider + if (Q.get_backend() == sycl::backend::ext_oneapi_level_zero && + prctl(PR_SET_PTRACER, getppid()) == -1) { + std::cout << "Failed to set ptracer permissions!" << std::endl; + return 1; + } +#endif // defined(__linux__) + + int *DataPtr = sycl::malloc_device(32, Q); + syclexp::ipc_memory::handle Handle = + syclexp::ipc_memory::get(DataPtr, Q.get_context()); + + // Free data before put. + sycl::free(DataPtr, Q); + + // Try calling put after free. + syclexp::ipc_memory::put(Handle, Q.get_context()); + + return 0; +} diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 032b82ae74293..888f29a8d5a13 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -2997,6 +2997,10 @@ _ZN4sycl3_V13ext5intel12experimental9pipe_base18get_pipe_name_implEPKv _ZN4sycl3_V13ext6oneapi10level_zero6detail11make_deviceERKNS0_8platformEm _ZN4sycl3_V13ext6oneapi12experimental10async_freeERKNS0_5queueEPvRKNS0_6detail13code_locationE _ZN4sycl3_V13ext6oneapi12experimental10async_freeERNS0_7handlerEPv +_ZN4sycl3_V13ext6oneapi12experimental10ipc_memory3getEPvRKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental10ipc_memory3putERNS4_6handleERKNS0_7contextE +_ZN4sycl3_V13ext6oneapi12experimental10ipc_memory4openENS0_4spanIKSt4byteLm18446744073709551615EEERKNS0_7contextERKNS0_6deviceE +_ZN4sycl3_V13ext6oneapi12experimental10ipc_memory5closeEPvRKNS0_7contextE _ZN4sycl3_V13ext6oneapi12experimental10mem_adviseENS0_5queueEPvmiRKNS0_6detail13code_locationE _ZN4sycl3_V13ext6oneapi12experimental11memory_pool21increase_threshold_toEm _ZN4sycl3_V13ext6oneapi12experimental11memory_poolC1ERKNS0_7contextERKNS0_6deviceENS0_3usm5allocENS4_15pool_propertiesE diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index d5f53a5bbb505..f1bd48d03ac26 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -3837,6 +3837,7 @@ ?category@exception@_V1@sycl@@QEBAAEBVerror_category@std@@XZ ?checkNodePropertiesAndThrow@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@KAXAEBVproperty_list@67@@Z ?clearArgs@handler@_V1@sycl@@AEAAXXZ +?close@ipc_memory@experimental@oneapi@ext@_V1@sycl@@YAXPEAXAEBVcontext@56@@Z ?code@exception@_V1@sycl@@QEBAAEBVerror_code@std@@XZ ?compile_from_source@detail@experimental@oneapi@ext@_V1@sycl@@YA?AV?$kernel_bundle@$00@56@AEAV?$kernel_bundle@$02@56@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@std@@AEBV?$vector@Vstring_view@detail@_V1@sycl@@V?$allocator@Vstring_view@detail@_V1@sycl@@@std@@@std@@PEAVstring@156@2@Z ?compile_impl@detail@_V1@sycl@@YA?AV?$shared_ptr@Vkernel_bundle_impl@detail@_V1@sycl@@@std@@AEBV?$kernel_bundle@$0A@@23@AEBV?$vector@Vdevice@_V1@sycl@@V?$allocator@Vdevice@_V1@sycl@@@std@@@5@AEBVproperty_list@23@@Z @@ -4050,6 +4051,7 @@ ?frexp_impl@detail@_V1@sycl@@YANNPEAH@Z ?get@context@_V1@sycl@@QEBAPEAU_cl_context@@XZ ?get@device@_V1@sycl@@QEBAPEAU_cl_device_id@@XZ +?get@ipc_memory@experimental@oneapi@ext@_V1@sycl@@YA?AV?$vector@W4byte@std@@V?$allocator@W4byte@std@@@2@@std@@PEAXAEBVcontext@56@@Z ?get@kernel@_V1@sycl@@QEBAPEAU_cl_kernel@@XZ ?get@platform@_V1@sycl@@QEBAPEAU_cl_platform_id@@XZ ?get@queue@_V1@sycl@@QEBAPEAU_cl_command_queue@@XZ @@ -4357,6 +4359,7 @@ ?modf_impl@detail@_V1@sycl@@YANNPEAN@Z ?name@SYCLCategory@detail@_V1@sycl@@UEBAPEBDXZ ?native_specialization_constant@kernel_bundle_plain@detail@_V1@sycl@@QEBA_NXZ +?open@ipc_memory@experimental@oneapi@ext@_V1@sycl@@YAPEAXAEBV?$vector@W4byte@std@@V?$allocator@W4byte@std@@@2@@std@@AEBVcontext@56@AEBVdevice@56@@Z ?parallel_for@handler@_V1@sycl@@QEAAXV?$range@$00@23@Vkernel@23@@Z ?parallel_for@handler@_V1@sycl@@QEAAXV?$range@$01@23@Vkernel@23@@Z ?parallel_for@handler@_V1@sycl@@QEAAXV?$range@$02@23@Vkernel@23@@Z @@ -4376,6 +4379,7 @@ ?print_graph@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@IEBAXVstring_view@267@_N@Z ?print_graph@modifiable_command_graph@detail@experimental@oneapi@ext@_V1@sycl@@QEBAXV?$basic_string@DU?$char_traits@D@std@@V?$allocator@D@2@@std@@_N@Z ?processArg@handler@_V1@sycl@@AEAAXPEAXAEBW4kernel_param_kind_t@detail@23@H_KAEA_K_N4@Z +?put@ipc_memory@experimental@oneapi@ext@_V1@sycl@@YAXAEBV?$vector@W4byte@std@@V?$allocator@W4byte@std@@@2@@std@@AEBVcontext@56@@Z ?query@tls_code_loc_t@detail@_V1@sycl@@QEAAAEBUcode_location@234@XZ ?reduComputeWGSize@detail@_V1@sycl@@YA_K_K0AEA_K@Z ?reduGetMaxNumConcurrentWorkGroups@detail@_V1@sycl@@YAIAEAVhandler@23@@Z diff --git a/sycl/unittests/Extensions/CMakeLists.txt b/sycl/unittests/Extensions/CMakeLists.txt index db0bc0120e8db..acdac92fd666a 100644 --- a/sycl/unittests/Extensions/CMakeLists.txt +++ b/sycl/unittests/Extensions/CMakeLists.txt @@ -26,6 +26,7 @@ add_sycl_unittest(ExtensionsTests OBJECT DeviceInfo.cpp RootGroup.cpp USMPrefetch.cpp + IPC.cpp ) add_subdirectory(BindlessImages) diff --git a/sycl/unittests/Extensions/IPC.cpp b/sycl/unittests/Extensions/IPC.cpp new file mode 100644 index 0000000000000..9b5acdbfc2d9f --- /dev/null +++ b/sycl/unittests/Extensions/IPC.cpp @@ -0,0 +1,169 @@ +//==------------------------------ IPC.cpp ---------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#include +#include +#include +#include +#include + +namespace syclexp = sycl::ext::oneapi::experimental; + +namespace { + +int DummyInt = 42; +void *DummyPtr = &DummyInt; + +constexpr size_t DummyHandleDataSize = 10; +std::byte DummyHandleData[DummyHandleDataSize] = { + std::byte{9}, std::byte{8}, std::byte{7}, std::byte{6}, std::byte{5}, + std::byte{4}, std::byte{3}, std::byte{2}, std::byte{1}, std::byte{0}}; + +thread_local int urIPCGetMemHandleExp_counter = 0; +thread_local int urIPCPutMemHandleExp_counter = 0; +thread_local int urIPCOpenMemHandleExp_counter = 0; +thread_local int urIPCCloseMemHandleExp_counter = 0; + +ur_result_t replace_urIPCGetMemHandleExp(void *pParams) { + ++urIPCGetMemHandleExp_counter; + auto params = *static_cast(pParams); + EXPECT_EQ(*params.ppMem, DummyPtr); + if (*params.ppIPCMemHandleDataSizeRet) + **params.ppIPCMemHandleDataSizeRet = DummyHandleDataSize; + if (*params.pppIPCMemHandleData) + **params.pppIPCMemHandleData = DummyHandleData; + return UR_RESULT_SUCCESS; +} + +ur_result_t replace_urIPCPutMemHandleExp(void *pParams) { + ++urIPCPutMemHandleExp_counter; + auto params = *static_cast(pParams); + EXPECT_EQ(*params.ppIPCMemHandleData, DummyHandleData); + return UR_RESULT_SUCCESS; +} + +ur_result_t replace_urIPCOpenMemHandleExp(void *pParams) { + ++urIPCOpenMemHandleExp_counter; + auto params = *static_cast(pParams); + EXPECT_EQ( + memcmp(*params.ppIPCMemHandleData, DummyHandleData, DummyHandleDataSize), + 0); + EXPECT_EQ(*params.pipcMemHandleDataSize, DummyHandleDataSize); + **params.pppMem = DummyPtr; + return UR_RESULT_SUCCESS; +} + +ur_result_t replace_urIPCCloseMemHandleExp(void *pParams) { + ++urIPCCloseMemHandleExp_counter; + auto params = *static_cast(pParams); + EXPECT_EQ(*params.ppMem, DummyPtr); + return UR_RESULT_SUCCESS; +} + +ur_result_t after_urDeviceGetInfo(void *pParams) { + auto params = *static_cast(pParams); + switch (*params.ppropName) { + case UR_DEVICE_INFO_IPC_MEMORY_SUPPORT_EXP: + if (*params.ppPropSizeRet) + **params.ppPropSizeRet = sizeof(ur_bool_t); + if (*params.ppPropValue) + *static_cast(*params.ppPropValue) = ur_bool_t{true}; + return UR_RESULT_SUCCESS; + default: + return UR_RESULT_SUCCESS; + } +} + +class IPCTests : public ::testing::Test { +public: + IPCTests() : Mock{}, Ctxt(sycl::platform()) {} + +protected: + void SetUp() override { + urIPCGetMemHandleExp_counter = 0; + urIPCPutMemHandleExp_counter = 0; + urIPCOpenMemHandleExp_counter = 0; + urIPCCloseMemHandleExp_counter = 0; + + mock::getCallbacks().set_replace_callback("urIPCGetMemHandleExp", + replace_urIPCGetMemHandleExp); + mock::getCallbacks().set_replace_callback("urIPCPutMemHandleExp", + replace_urIPCPutMemHandleExp); + mock::getCallbacks().set_replace_callback("urIPCOpenMemHandleExp", + replace_urIPCOpenMemHandleExp); + mock::getCallbacks().set_replace_callback("urIPCCloseMemHandleExp", + replace_urIPCCloseMemHandleExp); + mock::getCallbacks().set_after_callback("urDeviceGetInfo", + after_urDeviceGetInfo); + } + + sycl::unittest::UrMock<> Mock; + sycl::context Ctxt; +}; + +TEST_F(IPCTests, IPCGetPutImplicit) { + syclexp::ipc_memory::handle IPCMemHandle = + syclexp::ipc_memory::get(DummyPtr, Ctxt); + syclexp::ipc_memory::handle_data_t IPCMemHandleData = IPCMemHandle.data(); + EXPECT_EQ(IPCMemHandleData.size(), DummyHandleDataSize); + EXPECT_EQ(IPCMemHandleData.data(), DummyHandleData); + + // Creating the IPC memory from a pointer should only call "get". + EXPECT_EQ(urIPCGetMemHandleExp_counter, 1); + EXPECT_EQ(urIPCPutMemHandleExp_counter, 0); + EXPECT_EQ(urIPCOpenMemHandleExp_counter, 0); + EXPECT_EQ(urIPCCloseMemHandleExp_counter, 0); +} + +TEST_F(IPCTests, IPCGetPutExplicit) { + syclexp::ipc_memory::handle IPCMemHandle = + syclexp::ipc_memory::get(DummyPtr, Ctxt); + syclexp::ipc_memory::handle_data_t IPCMemHandleData = IPCMemHandle.data(); + EXPECT_EQ(IPCMemHandleData.size(), DummyHandleDataSize); + EXPECT_EQ(IPCMemHandleData.data(), DummyHandleData); + + // Creating the IPC memory from a pointer should only call "get". + EXPECT_EQ(urIPCGetMemHandleExp_counter, 1); + EXPECT_EQ(urIPCPutMemHandleExp_counter, 0); + EXPECT_EQ(urIPCOpenMemHandleExp_counter, 0); + EXPECT_EQ(urIPCCloseMemHandleExp_counter, 0); + + syclexp::ipc_memory::put(IPCMemHandle, Ctxt); + + // Calling "put" explicitly should call the UR function. + EXPECT_EQ(urIPCGetMemHandleExp_counter, 1); + EXPECT_EQ(urIPCPutMemHandleExp_counter, 1); + EXPECT_EQ(urIPCOpenMemHandleExp_counter, 0); + EXPECT_EQ(urIPCCloseMemHandleExp_counter, 0); +} + +TEST_F(IPCTests, IPCOpenClose) { + syclexp::ipc_memory::handle_data_t HandleData{ + DummyHandleData, DummyHandleData + DummyHandleDataSize}; + void *Ptr = + syclexp::ipc_memory::open(HandleData, Ctxt, Ctxt.get_devices()[0]); + EXPECT_EQ(Ptr, DummyPtr); + + // Opening an IPC handle should call open. + EXPECT_EQ(urIPCGetMemHandleExp_counter, 0); + EXPECT_EQ(urIPCPutMemHandleExp_counter, 0); + EXPECT_EQ(urIPCOpenMemHandleExp_counter, 1); + EXPECT_EQ(urIPCCloseMemHandleExp_counter, 0); + + syclexp::ipc_memory::close(Ptr, Ctxt); + + // When we close an IPC memory pointer, it should call close. + EXPECT_EQ(urIPCGetMemHandleExp_counter, 0); + EXPECT_EQ(urIPCPutMemHandleExp_counter, 0); + EXPECT_EQ(urIPCOpenMemHandleExp_counter, 1); + EXPECT_EQ(urIPCCloseMemHandleExp_counter, 1); +} + +} // namespace diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h index 38b6a73144e73..142b4c52ccb40 100644 --- a/unified-runtime/include/ur_api.h +++ b/unified-runtime/include/ur_api.h @@ -12502,7 +12502,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( /// + `NULL == hContext` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == pIPCMemHandleData` +/// + `NULL == ppIPCMemHandleData` /// + `NULL == pIPCMemHandleDataSizeRet` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -12512,7 +12512,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urIPCGetMemHandleExp( /// [in] pointer to device USM memory void *pMem, /// [out][optional] a pointer to the IPC memory handle data - void *pIPCMemHandleData, + void **ppIPCMemHandleData, /// [out][optional] size of the resulting IPC memory handle data size_t *pIPCMemHandleDataSizeRet); @@ -15715,7 +15715,7 @@ typedef struct ur_command_buffer_get_native_handle_exp_params_t { typedef struct ur_ipc_get_mem_handle_exp_params_t { ur_context_handle_t *phContext; void **ppMem; - void **ppIPCMemHandleData; + void ***pppIPCMemHandleData; size_t **ppIPCMemHandleDataSizeRet; } ur_ipc_get_mem_handle_exp_params_t; diff --git a/unified-runtime/include/ur_ddi.h b/unified-runtime/include/ur_ddi.h index 0cb139a2e93da..4702f04ead5ed 100644 --- a/unified-runtime/include/ur_ddi.h +++ b/unified-runtime/include/ur_ddi.h @@ -1796,7 +1796,7 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetCommandBufferExpProcAddrTable_t)( /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urIPCGetMemHandleExp typedef ur_result_t(UR_APICALL *ur_pfnIPCGetMemHandleExp_t)(ur_context_handle_t, - void *, void *, + void *, void **, size_t *); /////////////////////////////////////////////////////////////////////////////// diff --git a/unified-runtime/include/ur_print.hpp b/unified-runtime/include/ur_print.hpp index 23bb5bf89c654..878d5313f5b81 100644 --- a/unified-runtime/include/ur_print.hpp +++ b/unified-runtime/include/ur_print.hpp @@ -20544,9 +20544,9 @@ inline std::ostream &operator<<( os << *(params->ppMem); os << ", "; - os << ".pIPCMemHandleData = "; + os << ".ppIPCMemHandleData = "; - ur::details::printPtr(os, *(params->ppIPCMemHandleData)); + ur::details::printPtr(os, *(params->pppIPCMemHandleData)); os << ", "; os << ".pIPCMemHandleDataSizeRet = "; diff --git a/unified-runtime/scripts/core/exp-inter-process-communication.yml b/unified-runtime/scripts/core/exp-inter-process-communication.yml index 117e66d815ba9..f48a77b3aca62 100644 --- a/unified-runtime/scripts/core/exp-inter-process-communication.yml +++ b/unified-runtime/scripts/core/exp-inter-process-communication.yml @@ -36,8 +36,8 @@ params: - type: void * name: pMem desc: "[in] pointer to device USM memory" - - type: void* - name: pIPCMemHandleData + - type: void** + name: ppIPCMemHandleData desc: "[out][optional] a pointer to the IPC memory handle data" - type: size_t* name: pIPCMemHandleDataSizeRet @@ -47,7 +47,7 @@ returns: - $X_RESULT_ERROR_INVALID_NULL_HANDLE: - "`NULL == hContext`" - $X_RESULT_ERROR_INVALID_NULL_POINTER: - - "`NULL == pIPCMemHandleData`" + - "`NULL == ppIPCMemHandleData`" - "`NULL == pIPCMemHandleDataSizeRet`" - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES diff --git a/unified-runtime/source/adapters/cuda/memory.cpp b/unified-runtime/source/adapters/cuda/memory.cpp index 4477173cb83cc..6eb0c8e12ad3a 100644 --- a/unified-runtime/source/adapters/cuda/memory.cpp +++ b/unified-runtime/source/adapters/cuda/memory.cpp @@ -592,7 +592,7 @@ CUsurfObject SurfaceMem::getSurface(const ur_device_handle_t Device) { } UR_APIEXPORT ur_result_t UR_APICALL -urIPCGetMemHandleExp(ur_context_handle_t, void *pMem, void *pIPCMemHandleData, +urIPCGetMemHandleExp(ur_context_handle_t, void *pMem, void **ppIPCMemHandleData, size_t *pIPCMemHandleDataSizeRet) { umf_memory_pool_handle_t umfPool; auto urRet = umf::umf2urResult(umfPoolByPtr(pMem, &umfPool)); @@ -600,7 +600,7 @@ urIPCGetMemHandleExp(ur_context_handle_t, void *pMem, void *pIPCMemHandleData, return urRet; // Fast path for returning the size of the handle only. - if (!pIPCMemHandleData) + if (!ppIPCMemHandleData) return umf::umf2urResult( umfPoolGetIPCHandleSize(umfPool, pIPCMemHandleDataSizeRet)); @@ -608,12 +608,9 @@ urIPCGetMemHandleExp(ur_context_handle_t, void *pMem, void *pIPCMemHandleData, size_t *umfHandleSize = pIPCMemHandleDataSizeRet != nullptr ? pIPCMemHandleDataSizeRet : &fallbackUMFHandleSize; - umf_ipc_handle_t umfHandle; - urRet = umf::umf2urResult(umfGetIPCHandle(pMem, &umfHandle, umfHandleSize)); - if (urRet) - return urRet; - std::memcpy(pIPCMemHandleData, umfHandle, *umfHandleSize); - return UR_RESULT_SUCCESS; + return umf::umf2urResult(umfGetIPCHandle( + pMem, reinterpret_cast(ppIPCMemHandleData), + umfHandleSize)); } UR_APIEXPORT ur_result_t UR_APICALL diff --git a/unified-runtime/source/adapters/hip/memory.cpp b/unified-runtime/source/adapters/hip/memory.cpp index 4c646ed64a1e4..bec39baa474e1 100644 --- a/unified-runtime/source/adapters/hip/memory.cpp +++ b/unified-runtime/source/adapters/hip/memory.cpp @@ -642,7 +642,7 @@ hipSurfaceObject_t SurfaceMem::getSurface(const ur_device_handle_t Device) { } UR_APIEXPORT ur_result_t UR_APICALL urIPCGetMemHandleExp(ur_context_handle_t, - void *, void *, + void *, void **, size_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/unified-runtime/source/adapters/level_zero/memory.cpp b/unified-runtime/source/adapters/level_zero/memory.cpp index 127629734bfaf..1a31be1e57595 100644 --- a/unified-runtime/source/adapters/level_zero/memory.cpp +++ b/unified-runtime/source/adapters/level_zero/memory.cpp @@ -1953,7 +1953,7 @@ ur_result_t urEnqueueWriteHostPipe( } ur_result_t urIPCGetMemHandleExp(ur_context_handle_t, void *pMem, - void *pIPCMemHandleData, + void **ppIPCMemHandleData, size_t *pIPCMemHandleDataSizeRet) { umf_memory_pool_handle_t umfPool; auto urRet = umf::umf2urResult(umfPoolByPtr(pMem, &umfPool)); @@ -1961,7 +1961,7 @@ ur_result_t urIPCGetMemHandleExp(ur_context_handle_t, void *pMem, return urRet; // Fast path for returning the size of the handle only. - if (!pIPCMemHandleData) + if (!ppIPCMemHandleData) return umf::umf2urResult( umfPoolGetIPCHandleSize(umfPool, pIPCMemHandleDataSizeRet)); @@ -1969,12 +1969,9 @@ ur_result_t urIPCGetMemHandleExp(ur_context_handle_t, void *pMem, size_t *umfHandleSize = pIPCMemHandleDataSizeRet != nullptr ? pIPCMemHandleDataSizeRet : &fallbackUMFHandleSize; - umf_ipc_handle_t umfHandle; - urRet = umf::umf2urResult(umfGetIPCHandle(pMem, &umfHandle, umfHandleSize)); - if (urRet) - return urRet; - std::memcpy(pIPCMemHandleData, umfHandle, *umfHandleSize); - return UR_RESULT_SUCCESS; + return umf::umf2urResult(umfGetIPCHandle( + pMem, reinterpret_cast(ppIPCMemHandleData), + umfHandleSize)); } ur_result_t urIPCPutMemHandleExp(ur_context_handle_t, void *pIPCMemHandleData) { diff --git a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp index 160e0ea77f188..ca54e5489469c 100644 --- a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp +++ b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp @@ -775,7 +775,7 @@ ur_result_t urEnqueueTimestampRecordingExp( ur_queue_handle_t hQueue, bool blocking, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); ur_result_t urIPCGetMemHandleExp(ur_context_handle_t hContext, void *pMem, - void *pIPCMemHandleData, + void **ppIPCMemHandleData, size_t *pIPCMemHandleDataSizeRet); ur_result_t urIPCPutMemHandleExp(ur_context_handle_t hContext, void *pIPCMemHandleData); diff --git a/unified-runtime/source/adapters/level_zero/v2/memory.cpp b/unified-runtime/source/adapters/level_zero/v2/memory.cpp index 941d73be8de3e..6473ebf69fdae 100644 --- a/unified-runtime/source/adapters/level_zero/v2/memory.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/memory.cpp @@ -782,7 +782,7 @@ ur_result_t urMemImageGetInfo(ur_mem_handle_t /*hMemory*/, } ur_result_t urIPCGetMemHandleExp(ur_context_handle_t, void *pMem, - void *pIPCMemHandleData, + void **ppIPCMemHandleData, size_t *pIPCMemHandleDataSizeRet) { umf_memory_pool_handle_t umfPool; auto urRet = umf::umf2urResult(umfPoolByPtr(pMem, &umfPool)); @@ -790,7 +790,7 @@ ur_result_t urIPCGetMemHandleExp(ur_context_handle_t, void *pMem, return urRet; // Fast path for returning the size of the handle only. - if (!pIPCMemHandleData) + if (!ppIPCMemHandleData) return umf::umf2urResult( umfPoolGetIPCHandleSize(umfPool, pIPCMemHandleDataSizeRet)); @@ -798,12 +798,9 @@ ur_result_t urIPCGetMemHandleExp(ur_context_handle_t, void *pMem, size_t *umfHandleSize = pIPCMemHandleDataSizeRet != nullptr ? pIPCMemHandleDataSizeRet : &fallbackUMFHandleSize; - umf_ipc_handle_t umfHandle; - urRet = umf::umf2urResult(umfGetIPCHandle(pMem, &umfHandle, umfHandleSize)); - if (urRet) - return urRet; - std::memcpy(pIPCMemHandleData, umfHandle, *umfHandleSize); - return UR_RESULT_SUCCESS; + return umf::umf2urResult(umfGetIPCHandle( + pMem, reinterpret_cast(ppIPCMemHandleData), + umfHandleSize)); } ur_result_t urIPCPutMemHandleExp(ur_context_handle_t, void *pIPCMemHandleData) { diff --git a/unified-runtime/source/adapters/mock/ur_mockddi.cpp b/unified-runtime/source/adapters/mock/ur_mockddi.cpp index cbb5cbc26e1f6..6e0f7d96d3843 100644 --- a/unified-runtime/source/adapters/mock/ur_mockddi.cpp +++ b/unified-runtime/source/adapters/mock/ur_mockddi.cpp @@ -11405,13 +11405,13 @@ __urdlllocal ur_result_t UR_APICALL urIPCGetMemHandleExp( /// [in] pointer to device USM memory void *pMem, /// [out][optional] a pointer to the IPC memory handle data - void *pIPCMemHandleData, + void **ppIPCMemHandleData, /// [out][optional] size of the resulting IPC memory handle data size_t *pIPCMemHandleDataSizeRet) try { ur_result_t result = UR_RESULT_SUCCESS; ur_ipc_get_mem_handle_exp_params_t params = { - &hContext, &pMem, &pIPCMemHandleData, &pIPCMemHandleDataSizeRet}; + &hContext, &pMem, &ppIPCMemHandleData, &pIPCMemHandleDataSizeRet}; auto beforeCallback = reinterpret_cast( mock::getCallbacks().get_before_callback("urIPCGetMemHandleExp")); diff --git a/unified-runtime/source/adapters/native_cpu/memory.cpp b/unified-runtime/source/adapters/native_cpu/memory.cpp index 15eef80246e66..d38ca812d7676 100644 --- a/unified-runtime/source/adapters/native_cpu/memory.cpp +++ b/unified-runtime/source/adapters/native_cpu/memory.cpp @@ -140,7 +140,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo( } UR_APIEXPORT ur_result_t UR_APICALL urIPCGetMemHandleExp(ur_context_handle_t, - void *, void *, + void *, void **, size_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/unified-runtime/source/adapters/offload/memory.cpp b/unified-runtime/source/adapters/offload/memory.cpp index 2ec4c8e471547..f2c8d49004a11 100644 --- a/unified-runtime/source/adapters/offload/memory.cpp +++ b/unified-runtime/source/adapters/offload/memory.cpp @@ -209,7 +209,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemImageGetInfo(ur_mem_handle_t, } UR_APIEXPORT ur_result_t UR_APICALL urIPCGetMemHandleExp(ur_context_handle_t, - void *, void *, + void *, void **, size_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/unified-runtime/source/adapters/opencl/memory.cpp b/unified-runtime/source/adapters/opencl/memory.cpp index cda15dfa4db81..394af5f441a85 100644 --- a/unified-runtime/source/adapters/opencl/memory.cpp +++ b/unified-runtime/source/adapters/opencl/memory.cpp @@ -581,7 +581,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemRelease(ur_mem_handle_t hMem) { } UR_APIEXPORT ur_result_t UR_APICALL urIPCGetMemHandleExp(ur_context_handle_t, - void *, void *, + void *, void **, size_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp index 917b853246975..6e0ce13570684 100644 --- a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp +++ b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp @@ -9659,7 +9659,7 @@ __urdlllocal ur_result_t UR_APICALL urIPCGetMemHandleExp( /// [in] pointer to device USM memory void *pMem, /// [out][optional] a pointer to the IPC memory handle data - void *pIPCMemHandleData, + void **ppIPCMemHandleData, /// [out][optional] size of the resulting IPC memory handle data size_t *pIPCMemHandleDataSizeRet) { auto pfnGetMemHandleExp = getContext()->urDdiTable.IPCExp.pfnGetMemHandleExp; @@ -9668,14 +9668,14 @@ __urdlllocal ur_result_t UR_APICALL urIPCGetMemHandleExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; ur_ipc_get_mem_handle_exp_params_t params = { - &hContext, &pMem, &pIPCMemHandleData, &pIPCMemHandleDataSizeRet}; + &hContext, &pMem, &ppIPCMemHandleData, &pIPCMemHandleDataSizeRet}; uint64_t instance = getContext()->notify_begin( UR_FUNCTION_IPC_GET_MEM_HANDLE_EXP, "urIPCGetMemHandleExp", ¶ms); auto &logger = getContext()->logger; UR_LOG_L(logger, INFO, " ---> urIPCGetMemHandleExp\n"); - ur_result_t result = pfnGetMemHandleExp(hContext, pMem, pIPCMemHandleData, + ur_result_t result = pfnGetMemHandleExp(hContext, pMem, ppIPCMemHandleData, pIPCMemHandleDataSizeRet); getContext()->notify_end(UR_FUNCTION_IPC_GET_MEM_HANDLE_EXP, diff --git a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp index cda6c4849301d..d8ceff0228227 100644 --- a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp +++ b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp @@ -10414,7 +10414,7 @@ __urdlllocal ur_result_t UR_APICALL urIPCGetMemHandleExp( /// [in] pointer to device USM memory void *pMem, /// [out][optional] a pointer to the IPC memory handle data - void *pIPCMemHandleData, + void **ppIPCMemHandleData, /// [out][optional] size of the resulting IPC memory handle data size_t *pIPCMemHandleDataSizeRet) { auto pfnGetMemHandleExp = getContext()->urDdiTable.IPCExp.pfnGetMemHandleExp; @@ -10424,7 +10424,7 @@ __urdlllocal ur_result_t UR_APICALL urIPCGetMemHandleExp( } if (getContext()->enableParameterValidation) { - if (NULL == pIPCMemHandleData) + if (NULL == ppIPCMemHandleData) return UR_RESULT_ERROR_INVALID_NULL_POINTER; if (NULL == pIPCMemHandleDataSizeRet) @@ -10439,7 +10439,7 @@ __urdlllocal ur_result_t UR_APICALL urIPCGetMemHandleExp( URLOG_CTX_INVALID_REFERENCE(hContext); } - ur_result_t result = pfnGetMemHandleExp(hContext, pMem, pIPCMemHandleData, + ur_result_t result = pfnGetMemHandleExp(hContext, pMem, ppIPCMemHandleData, pIPCMemHandleDataSizeRet); return result; diff --git a/unified-runtime/source/loader/ur_ldrddi.cpp b/unified-runtime/source/loader/ur_ldrddi.cpp index e5fb500a0fcfb..b1a109a84bab5 100644 --- a/unified-runtime/source/loader/ur_ldrddi.cpp +++ b/unified-runtime/source/loader/ur_ldrddi.cpp @@ -5499,7 +5499,7 @@ __urdlllocal ur_result_t UR_APICALL urIPCGetMemHandleExp( /// [in] pointer to device USM memory void *pMem, /// [out][optional] a pointer to the IPC memory handle data - void *pIPCMemHandleData, + void **ppIPCMemHandleData, /// [out][optional] size of the resulting IPC memory handle data size_t *pIPCMemHandleDataSizeRet) { @@ -5510,7 +5510,7 @@ __urdlllocal ur_result_t UR_APICALL urIPCGetMemHandleExp( return UR_RESULT_ERROR_UNINITIALIZED; // forward to device-platform - return pfnGetMemHandleExp(hContext, pMem, pIPCMemHandleData, + return pfnGetMemHandleExp(hContext, pMem, ppIPCMemHandleData, pIPCMemHandleDataSizeRet); } diff --git a/unified-runtime/source/loader/ur_libapi.cpp b/unified-runtime/source/loader/ur_libapi.cpp index 210fcb3009805..2b649113c111d 100644 --- a/unified-runtime/source/loader/ur_libapi.cpp +++ b/unified-runtime/source/loader/ur_libapi.cpp @@ -10097,7 +10097,7 @@ ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( /// + `NULL == hContext` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == pIPCMemHandleData` +/// + `NULL == ppIPCMemHandleData` /// + `NULL == pIPCMemHandleDataSizeRet` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -10107,7 +10107,7 @@ ur_result_t UR_APICALL urIPCGetMemHandleExp( /// [in] pointer to device USM memory void *pMem, /// [out][optional] a pointer to the IPC memory handle data - void *pIPCMemHandleData, + void **ppIPCMemHandleData, /// [out][optional] size of the resulting IPC memory handle data size_t *pIPCMemHandleDataSizeRet) try { auto pfnGetMemHandleExp = @@ -10115,7 +10115,7 @@ ur_result_t UR_APICALL urIPCGetMemHandleExp( if (nullptr == pfnGetMemHandleExp) return UR_RESULT_ERROR_UNINITIALIZED; - return pfnGetMemHandleExp(hContext, pMem, pIPCMemHandleData, + return pfnGetMemHandleExp(hContext, pMem, ppIPCMemHandleData, pIPCMemHandleDataSizeRet); } catch (...) { return exceptionToResult(std::current_exception()); diff --git a/unified-runtime/source/ur_api.cpp b/unified-runtime/source/ur_api.cpp index fafc7e205b5cb..2c3b06c939da9 100644 --- a/unified-runtime/source/ur_api.cpp +++ b/unified-runtime/source/ur_api.cpp @@ -8793,7 +8793,7 @@ ur_result_t UR_APICALL urEnqueueTimestampRecordingExp( /// + `NULL == hContext` /// - ::UR_RESULT_ERROR_INVALID_CONTEXT /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER -/// + `NULL == pIPCMemHandleData` +/// + `NULL == ppIPCMemHandleData` /// + `NULL == pIPCMemHandleDataSizeRet` /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES @@ -8803,7 +8803,7 @@ ur_result_t UR_APICALL urIPCGetMemHandleExp( /// [in] pointer to device USM memory void *pMem, /// [out][optional] a pointer to the IPC memory handle data - void *pIPCMemHandleData, + void **ppIPCMemHandleData, /// [out][optional] size of the resulting IPC memory handle data size_t *pIPCMemHandleDataSizeRet) { ur_result_t result = UR_RESULT_SUCCESS;