From c0cbcbc209167ab955bfb14b229681ee461d295a Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Tue, 26 Nov 2024 09:22:21 -0500 Subject: [PATCH 1/2] Provide host memory allocation/release callbacks to the copies Allow PaRSEC to allocate host memory on demand, e.g., when data is evicted or we move data to a host task. Most data may never be needed on the host so it is wasteful to allocate it eagerly. Signed-off-by: Joseph Schuchart --- ttg/ttg/parsec/buffer.h | 53 +++++++++++++++++++++++++++++-------- ttg/ttg/parsec/devicefunc.h | 7 ++++- 2 files changed, 48 insertions(+), 12 deletions(-) diff --git a/ttg/ttg/parsec/buffer.h b/ttg/ttg/parsec/buffer.h index 62be4270ce..2fdeb5a839 100644 --- a/ttg/ttg/parsec/buffer.h +++ b/ttg/ttg/parsec/buffer.h @@ -72,18 +72,32 @@ namespace detail { PtrT m_ptr; // keep a reference if PtrT is a shared_ptr std::size_t m_size; - void allocate(std::size_t size) { + void do_allocate() { if constexpr (std::is_pointer_v) { - m_ptr = allocator_traits::allocate(m_allocator, size); + m_ptr = allocator_traits::allocate(m_allocator, m_size); } this->device_private = m_ptr; - m_size = size; } - void deallocate() { - allocator_traits::deallocate(m_allocator, static_cast(this->device_private), this->m_size); - this->device_private = nullptr; - this->m_size = 0; + void do_deallocate() { + if constexpr (std::is_pointer_v) { + if (this->device_private != nullptr) { + auto ptr = m_ptr; + this->device_private = nullptr; + this->m_ptr = nullptr; + allocator_traits::deallocate(m_allocator, ptr, this->m_size); + } + } + } + + static void allocate(parsec_data_copy_t *parsec_copy, int device) { + data_copy_type* copy = static_cast(parsec_copy); + copy->do_allocate(); + } + + static void deallocate(parsec_data_copy_t *parsec_copy, int device) { + data_copy_type* copy = static_cast(parsec_copy); + copy->do_deallocate(); } public: @@ -100,20 +114,37 @@ namespace detail { constexpr const bool is_empty_allocator = std::is_same_v>; assert(is_empty_allocator); m_ptr = std::move(ptr); + this->m_size = size; + this->dtt = parsec_datatype_int8_t; this->device_private = const_cast(to_address(m_ptr)); } void construct(std::size_t size, + ttg::scope scope, const allocator_type& alloc = allocator_type()) { constexpr const bool is_empty_allocator = std::is_same_v>; assert(!is_empty_allocator); m_allocator = alloc; - allocate(size); - this->device_private = m_ptr; + this->m_size = size; + this->dtt = parsec_datatype_int8_t; + if (scope == ttg::scope::Allocate) { + /* if the user only requests an allocation on the device + * we don't allocate host memory but provide PaRSEC with + * a way to request host memory from us. */ + this->alloc_cb = &allocate; + this->release_cb = &deallocate; + } else { + /* the user requested that the data be sync'ed into the device + * so we need to provide host memory for the user to fill prior */ + do_allocate(); + this->device_private = m_ptr; + } } ~data_copy_type() { - this->deallocate(); + this->alloc_cb = nullptr; + this->release_cb = nullptr; + this->do_deallocate(); } }; @@ -143,7 +174,7 @@ namespace detail { /* create the host copy and allocate host memory */ data_copy_type *copy = PARSEC_OBJ_NEW(data_copy_type); - copy->construct(size, allocator); + copy->construct(size, scope, allocator); parsec_data_copy_attach(data, copy, 0); /* adjust data flags */ diff --git a/ttg/ttg/parsec/devicefunc.h b/ttg/ttg/parsec/devicefunc.h index 38682ca9df..82cc4ac866 100644 --- a/ttg/ttg/parsec/devicefunc.h +++ b/ttg/ttg/parsec/devicefunc.h @@ -201,11 +201,16 @@ namespace ttg_parsec { /* enqueue the transfer into the compute stream to come back once the compute and transfer are complete */ if (data->owner_device != 0) { parsec_device_gpu_module_t *device_module = detail::parsec_ttg_caller->dev_ptr->device; + if (nullptr == data->device_copies[0]->device_private) { + assert(nullptr != data->device_copies[0]->alloc_cb); + data->device_copies[0]->alloc_cb(data->device_copies[0], 0); + } + int ret = device_module->memcpy_async(device_module, stream, data->device_copies[0]->device_private, data->device_copies[data->owner_device]->device_private, data->nb_elts, parsec_device_gpu_transfer_direction_d2h); - assert(ret == PARSEC_SUCCESS); + if (ret != PARSEC_SUCCESS) throw std::runtime_error("Failed to copy data from device to host!"); } if constexpr (sizeof...(Is) > 0) { // recursion From c86c55dc18bbc442511e823246d22d45921eabfe Mon Sep 17 00:00:00 2001 From: Joseph Schuchart Date: Fri, 14 Feb 2025 18:12:37 -0500 Subject: [PATCH 2/2] Switch PaRSEC reference to dedicated branch This branch contains features needed now and in the future by TTG. Once they have been merged into PaRSEC mainline we can switch back. Signed-off-by: Joseph Schuchart --- cmake/modules/ExternalDependenciesVersions.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/modules/ExternalDependenciesVersions.cmake b/cmake/modules/ExternalDependenciesVersions.cmake index b408fac581..61115082ae 100644 --- a/cmake/modules/ExternalDependenciesVersions.cmake +++ b/cmake/modules/ExternalDependenciesVersions.cmake @@ -4,7 +4,7 @@ set(TTG_TRACKED_VG_CMAKE_KIT_TAG cda539db32be6e8171f5cbebdb1a7c38d5ab4b34) # provides FindOrFetchLinalgPP and "real" FindOrFetchBoost set(TTG_TRACKED_CATCH2_VERSION 3.5.0) set(TTG_TRACKED_MADNESS_TAG 93a9a5cec2a8fa87fba3afe8056607e6062a9058) -set(TTG_TRACKED_PARSEC_TAG 996dda4c0ff3120bc65385f86e999befd4b3fe7a) +set(TTG_TRACKED_PARSEC_TAG parsec-for-ttg) set(TTG_TRACKED_BTAS_TAG c25b0a11d2a76190bfb13fa72f9e9dc3e57c3c2f) set(TTG_TRACKED_TILEDARRAY_TAG 5944bdba3266a3fa19f1809c8e2accf3dad4d815)