Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/modules/ExternalDependenciesVersions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
set(TTG_TRACKED_VG_CMAKE_KIT_TAG cda539db32be6e8171f5cbebdb1a7c38d5ab4b34) # provides FindOrFetchLinalgPP and "real" FindOrFetchBoost
set(TTG_TRACKED_CATCH2_VERSION 3.5.0)
set(TTG_TRACKED_MADNESS_TAG 93a9a5cec2a8fa87fba3afe8056607e6062a9058)
set(TTG_TRACKED_PARSEC_TAG 996dda4c0ff3120bc65385f86e999befd4b3fe7a)
set(TTG_TRACKED_PARSEC_TAG parsec-for-ttg)
set(TTG_TRACKED_BTAS_TAG c25b0a11d2a76190bfb13fa72f9e9dc3e57c3c2f)
set(TTG_TRACKED_TILEDARRAY_TAG 5944bdba3266a3fa19f1809c8e2accf3dad4d815)

Expand Down
53 changes: 42 additions & 11 deletions ttg/ttg/parsec/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,18 +72,32 @@ namespace detail {
PtrT m_ptr; // keep a reference if PtrT is a shared_ptr
std::size_t m_size;

void allocate(std::size_t size) {
void do_allocate() {
if constexpr (std::is_pointer_v<PtrT>) {
m_ptr = allocator_traits::allocate(m_allocator, size);
m_ptr = allocator_traits::allocate(m_allocator, m_size);
}
this->device_private = m_ptr;
m_size = size;
}

void deallocate() {
allocator_traits::deallocate(m_allocator, static_cast<value_type*>(this->device_private), this->m_size);
this->device_private = nullptr;
this->m_size = 0;
void do_deallocate() {
if constexpr (std::is_pointer_v<PtrT>) {
if (this->device_private != nullptr) {
auto ptr = m_ptr;
this->device_private = nullptr;
this->m_ptr = nullptr;
allocator_traits::deallocate(m_allocator, ptr, this->m_size);
}
}
}

static void allocate(parsec_data_copy_t *parsec_copy, int device) {
data_copy_type* copy = static_cast<data_copy_type*>(parsec_copy);
copy->do_allocate();
}

static void deallocate(parsec_data_copy_t *parsec_copy, int device) {
data_copy_type* copy = static_cast<data_copy_type*>(parsec_copy);
copy->do_deallocate();
}

public:
Expand All @@ -100,20 +114,37 @@ namespace detail {
constexpr const bool is_empty_allocator = std::is_same_v<Allocator, empty_allocator<value_type>>;
assert(is_empty_allocator);
m_ptr = std::move(ptr);
this->m_size = size;
this->dtt = parsec_datatype_int8_t;
this->device_private = const_cast<value_type*>(to_address(m_ptr));
}

void construct(std::size_t size,
ttg::scope scope,
const allocator_type& alloc = allocator_type()) {
constexpr const bool is_empty_allocator = std::is_same_v<Allocator, empty_allocator<value_type>>;
assert(!is_empty_allocator);
m_allocator = alloc;
allocate(size);
this->device_private = m_ptr;
this->m_size = size;
this->dtt = parsec_datatype_int8_t;
if (scope == ttg::scope::Allocate) {
/* if the user only requests an allocation on the device
* we don't allocate host memory but provide PaRSEC with
* a way to request host memory from us. */
this->alloc_cb = &allocate;
this->release_cb = &deallocate;
} else {
/* the user requested that the data be sync'ed into the device
* so we need to provide host memory for the user to fill prior */
do_allocate();
this->device_private = m_ptr;
}
}

~data_copy_type() {
this->deallocate();
this->alloc_cb = nullptr;
this->release_cb = nullptr;
this->do_deallocate();
}
};

Expand Down Expand Up @@ -143,7 +174,7 @@ namespace detail {

/* create the host copy and allocate host memory */
data_copy_type *copy = PARSEC_OBJ_NEW(data_copy_type);
copy->construct(size, allocator);
copy->construct(size, scope, allocator);
parsec_data_copy_attach(data, copy, 0);

/* adjust data flags */
Expand Down
7 changes: 6 additions & 1 deletion ttg/ttg/parsec/devicefunc.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,11 +201,16 @@ namespace ttg_parsec {
/* enqueue the transfer into the compute stream to come back once the compute and transfer are complete */
if (data->owner_device != 0) {
parsec_device_gpu_module_t *device_module = detail::parsec_ttg_caller->dev_ptr->device;
if (nullptr == data->device_copies[0]->device_private) {
assert(nullptr != data->device_copies[0]->alloc_cb);
data->device_copies[0]->alloc_cb(data->device_copies[0], 0);
}

int ret = device_module->memcpy_async(device_module, stream,
data->device_copies[0]->device_private,
data->device_copies[data->owner_device]->device_private,
data->nb_elts, parsec_device_gpu_transfer_direction_d2h);
assert(ret == PARSEC_SUCCESS);
if (ret != PARSEC_SUCCESS) throw std::runtime_error("Failed to copy data from device to host!");
}
if constexpr (sizeof...(Is) > 0) {
// recursion
Expand Down
Loading