diff --git a/CMakeLists.txt b/CMakeLists.txt index 0e328a043..4175ab2cd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,7 @@ # Copyright (c) 2010-2024 The University of Tennessee and The University # of Tennessee Research Foundation. All rights # reserved. +# Copyright (c) 2025 NVIDIA Corporation. All rights reserved. # cmake_minimum_required (VERSION 3.21) project (PARSEC C) @@ -536,6 +537,7 @@ if( NOT PARSEC_HAVE_RUSAGE_THREAD ) endif( NOT PARSEC_HAVE_RUSAGE_THREAD) check_include_files(limits.h PARSEC_HAVE_LIMITS_H) check_include_files(string.h PARSEC_HAVE_STRING_H) +check_include_files(strings.h PARSEC_HAVE_STRINGS_H) check_include_files(libgen.h PARSEC_HAVE_GEN_H) check_include_files(complex.h PARSEC_HAVE_COMPLEX_H) check_include_files(sys/param.h PARSEC_HAVE_SYS_PARAM_H) diff --git a/parsec/arena.c b/parsec/arena.c index ca7def080..0c56060c2 100644 --- a/parsec/arena.c +++ b/parsec/arena.c @@ -2,6 +2,7 @@ * Copyright (c) 2010-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" @@ -96,7 +97,7 @@ int parsec_arena_construct(parsec_arena_t* arena, parsec_arena_max_cached_memory); } -static void parsec_arena_destructor(parsec_arena_t* arena) +static int parsec_arena_destructor(parsec_arena_t* arena) { parsec_list_item_t* item; @@ -116,6 +117,7 @@ static void parsec_arena_destructor(parsec_arena_t* arena) } PARSEC_OBJ_DESTRUCT(&arena->area_lifo); } + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_arena_t, parsec_object_t, NULL, parsec_arena_destructor); @@ -223,7 +225,7 @@ int parsec_arena_allocate_device_private(parsec_data_copy_t *copy, assert(0 == (((ptrdiff_t)chunk->data) % arena->alignment)); assert((arena->elem_size + (ptrdiff_t)chunk->data) <= (size + (ptrdiff_t)chunk)); - data->nb_elts = count * arena->elem_size; + data->span = count * arena->elem_size; copy->flags = PARSEC_DATA_FLAG_ARENA | PARSEC_DATA_FLAG_PARSEC_OWNED | diff --git a/parsec/class/info.c b/parsec/class/info.c index 789f77f0d..955a7f7b6 100644 --- a/parsec/class/info.c +++ b/parsec/class/info.c @@ -2,7 +2,7 @@ * Copyright (c) 2020-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024-2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" @@ -21,7 +21,7 @@ static void parsec_info_constructor(parsec_object_t *obj) PARSEC_OBJ_CONSTRUCT(&nfo->ioa_list, parsec_list_t); } -static void parsec_info_destructor(parsec_object_t *obj) +static int parsec_info_destructor(parsec_object_t *obj) { parsec_info_t *nfo = (parsec_info_t*)obj; parsec_list_item_t *item, *next; @@ -34,6 +34,7 @@ static void parsec_info_destructor(parsec_object_t *obj) } PARSEC_OBJ_DESTRUCT(&nfo->ioa_list); /* nfo->info_list is the parent and will be destructed at exit */ + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_info_t, parsec_list_t, parsec_info_constructor, parsec_info_destructor); @@ -223,7 +224,7 @@ void parsec_info_object_array_init(parsec_info_object_array_t *oa, parsec_info_t oa->cons_obj = cons_obj; } -static void parsec_info_object_array_destructor(parsec_object_t *obj) +static int parsec_info_object_array_destructor(parsec_object_t *obj) { parsec_list_item_t *next, *item; parsec_info_object_array_t *oa = (parsec_info_object_array_t*)obj; @@ -246,6 +247,7 @@ static void parsec_info_object_array_destructor(parsec_object_t *obj) oa->info_objects = NULL; oa->infos = NULL; oa->known_infos = -1; + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_info_object_array_t, parsec_list_item_t, diff --git a/parsec/class/parsec_datacopy_future.c b/parsec/class/parsec_datacopy_future.c index c7cef96fd..35022ec96 100644 --- a/parsec/class/parsec_datacopy_future.c +++ b/parsec/class/parsec_datacopy_future.c @@ -2,7 +2,7 @@ * Copyright (c) 2018-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2023-2025 NVIDIA CORPORATION. All rights reserved. */ #include "parsec/parsec_config.h" #include "parsec/class/parsec_future.h" @@ -12,7 +12,7 @@ static void parsec_datacopy_future_construct(parsec_base_future_t* future); static void parsec_datacopy_future_cleanup_nested(parsec_base_future_t* future); -static void parsec_datacopy_future_destruct(parsec_base_future_t* future); +static int parsec_datacopy_future_destruct(parsec_base_future_t* future); static void parsec_datacopy_future_init(parsec_base_future_t* future, parsec_future_cb_fulfill cb, ...); @@ -295,7 +295,7 @@ static void parsec_datacopy_future_cleanup_nested(parsec_base_future_t* future) * * @param[in] future to be destructed. */ -static void parsec_datacopy_future_destruct(parsec_base_future_t* future) +static int parsec_datacopy_future_destruct(parsec_base_future_t* future) { parsec_datacopy_future_t* d_fut = (parsec_datacopy_future_t*)future; @@ -313,6 +313,7 @@ static void parsec_datacopy_future_destruct(parsec_base_future_t* future) if(d_fut->cb_cleanup != NULL){ d_fut->cb_cleanup(future); } + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_datacopy_future_t, parsec_base_future_t, diff --git a/parsec/class/parsec_hash_table.c b/parsec/class/parsec_hash_table.c index b39960878..7db104e4a 100644 --- a/parsec/class/parsec_hash_table.c +++ b/parsec/class/parsec_hash_table.c @@ -2,6 +2,7 @@ * Copyright (c) 2009-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #include @@ -261,7 +262,7 @@ void parsec_hash_table_unlock_bucket_handle_impl(parsec_hash_table_t *ht, } -void parsec_hash_table_fini(parsec_hash_table_t *ht) +int parsec_hash_table_fini(parsec_hash_table_t *ht) { parsec_hash_table_head_t *head, *next; head = ht->rw_hash; @@ -279,6 +280,7 @@ void parsec_hash_table_fini(parsec_hash_table_t *ht) head = next; } ht->rw_hash = NULL; + return 0; } void parsec_hash_table_nolock_insert(parsec_hash_table_t *ht, parsec_hash_table_item_t *item) diff --git a/parsec/class/parsec_hash_table.h b/parsec/class/parsec_hash_table.h index 7c6eb3d40..5c7727517 100644 --- a/parsec/class/parsec_hash_table.h +++ b/parsec/class/parsec_hash_table.h @@ -2,6 +2,7 @@ * Copyright (c) 2009-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #ifndef _parsec_hash_table_h @@ -210,8 +211,11 @@ void parsec_hash_table_unlock_bucket_handle_impl(parsec_hash_table_t *ht, * Releases the resources allocated by the hash table. * In debug mode, will assert if the hash table is not empty * @arg[inout] ht the hash table to release + * @return As all destructors, it shall return 0 to continue the chain of destructors and + * 1 to stop the chain and save the object from oblivion. The user will then be in + * charge of freeing the object. */ -void parsec_hash_table_fini(parsec_hash_table_t *ht); +int parsec_hash_table_fini(parsec_hash_table_t *ht); /** * @brief Insert element in a hash table without diff --git a/parsec/class/parsec_list.c b/parsec/class/parsec_list.c index 11366ce4c..39c4666cb 100644 --- a/parsec/class/parsec_list.c +++ b/parsec/class/parsec_list.c @@ -2,6 +2,7 @@ * Copyright (c) 2013-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" @@ -39,10 +40,11 @@ parsec_list_construct( parsec_list_t* list ) parsec_atomic_lock_init(&list->atomic_lock); } -static inline void +static inline int parsec_list_destruct( parsec_list_t* list ) { assert(parsec_list_is_empty(list)); (void)list; + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_list_t, parsec_object_t, diff --git a/parsec/class/parsec_object.c b/parsec/class/parsec_object.c index 14f3956b5..b6c181898 100644 --- a/parsec/class/parsec_object.c +++ b/parsec/class/parsec_object.c @@ -9,6 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -35,15 +36,15 @@ * and no constructor or destructor. */ parsec_class_t parsec_object_t_class = { - "parsec_object_t", /* name */ - NULL, /* parent class */ - NULL, /* constructor */ - NULL, /* destructor */ - 1, /* initialized -- this class is preinitialized */ - 0, /* class hierarchy depth */ - NULL, /* array of constructors */ - NULL, /* array of destructors */ - sizeof(parsec_object_t) /* size of the opal object */ + "parsec_object_t", /* name */ + NULL, /* parent class */ + NULL, /* constructor */ + NULL, /* destructor */ + 1, /* initialized -- this class is preinitialized */ + 0, /* class hierarchy depth */ + NULL, /* array of constructors */ + NULL, /* array of destructors */ + sizeof(parsec_object_t) /* size of the opal object */ }; /* @@ -135,7 +136,7 @@ void parsec_class_initialize(parsec_class_t *cls) exit(-1); } cls->cls_destruct_array = - cls->cls_construct_array + cls_construct_array_count + 1; + (parsec_destruct_t*)(cls->cls_construct_array + cls_construct_array_count + 1); /* * The constructor array is reversed, so start at the end diff --git a/parsec/class/parsec_object.h b/parsec/class/parsec_object.h index b94151ab7..d8828fc69 100644 --- a/parsec/class/parsec_object.h +++ b/parsec/class/parsec_object.h @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -136,7 +137,8 @@ BEGIN_C_DECLS typedef struct parsec_object_t parsec_object_t; typedef struct parsec_class_t parsec_class_t; typedef void (*parsec_construct_t) (parsec_object_t *); -typedef void (*parsec_destruct_t) (parsec_object_t *); +/* Class destructor: Returns 0 to continue through the class inheritance, or 1 to stop */ +typedef int (*parsec_destruct_t) (parsec_object_t *); /* types **************************************************************/ @@ -306,26 +308,28 @@ static inline parsec_object_t *parsec_obj_new_debug(parsec_class_t* type, const * @param object Pointer to the object */ #if defined(PARSEC_DEBUG_PARANOID) -#define PARSEC_OBJ_RELEASE(object) \ - do { \ - assert(NULL != ((parsec_object_t *) (object))->obj_class); \ - assert(PARSEC_OBJ_MAGIC_ID == ((parsec_object_t *) (object))->obj_magic_id); \ - if (0 == parsec_obj_update((parsec_object_t *) (object), -1)) { \ - parsec_obj_run_destructors((parsec_object_t *) (object)); \ - PARSEC_OBJ_SET_MAGIC_ID((object), 0); \ - PARSEC_OBJ_REMEMBER_FILE_AND_LINENO( object, __FILE__, __LINE__ ); \ - free(object); \ - object = NULL; \ - } \ +#define PARSEC_OBJ_RELEASE(object) \ + do \ + { \ + assert(NULL != ((parsec_object_t *)(object))->obj_class); \ + assert(PARSEC_OBJ_MAGIC_ID == ((parsec_object_t *)(object))->obj_magic_id); \ + if (0 == parsec_obj_update((parsec_object_t *)(object), -1)) \ + { \ + PARSEC_OBJ_SET_MAGIC_ID((object), 0); \ + PARSEC_OBJ_REMEMBER_FILE_AND_LINENO(object, __FILE__, __LINE__); \ + if (0 == parsec_obj_run_destructors((parsec_object_t *)(object))) \ + free(object); \ + object = NULL; \ + } \ } while (0) #else -#define PARSEC_OBJ_RELEASE(object) \ - do { \ - if (0 == parsec_obj_update((parsec_object_t *) (object), -1)) { \ - parsec_obj_run_destructors((parsec_object_t *) (object)); \ - free(object); \ - object = NULL; \ - } \ +#define PARSEC_OBJ_RELEASE(object) \ + do { \ + if (0 == parsec_obj_update((parsec_object_t *) (object), -1)) { \ + if (0 == parsec_obj_run_destructors((parsec_object_t *) (object))) \ + free(object); \ + object = NULL; \ + } \ } while (0) #endif @@ -435,7 +439,7 @@ static inline void parsec_obj_run_constructors(parsec_object_t * object) * * @param object Pointer to the object. */ -static inline void parsec_obj_run_destructors(parsec_object_t * object) +static inline int parsec_obj_run_destructors(parsec_object_t * object) { parsec_destruct_t* cls_destruct; @@ -443,9 +447,15 @@ static inline void parsec_obj_run_destructors(parsec_object_t * object) cls_destruct = object->obj_class->cls_destruct_array; while( NULL != *cls_destruct ) { - (*cls_destruct)(object); + /* Any destructor is allowed to withdraw the object from the complete release, and thus prevent the + * base object management from freeing the object. Instead, the destructor will have to recycle the object + * by some internal means, and free it later. + */ + if (0 != (*cls_destruct)(object)) + return 1; cls_destruct++; } + return 0; } diff --git a/parsec/class/parsec_value_array.c b/parsec/class/parsec_value_array.c index 6818c4ab9..3a4b63d00 100644 --- a/parsec/class/parsec_value_array.c +++ b/parsec/class/parsec_value_array.c @@ -9,6 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,10 +29,11 @@ static void parsec_value_array_construct(parsec_value_array_t* array) array->array_alloc_size = 0; } -static void parsec_value_array_destruct(parsec_value_array_t* array) +static int parsec_value_array_destruct(parsec_value_array_t* array) { if (NULL != array->array_items) free(array->array_items); + return 0; } PARSEC_OBJ_CLASS_INSTANCE( diff --git a/parsec/compound.c b/parsec/compound.c index 7aab685ef..6d683ac45 100644 --- a/parsec/compound.c +++ b/parsec/compound.c @@ -2,6 +2,7 @@ * Copyright (c) 2019-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" @@ -65,7 +66,7 @@ parsec_compound_taskpool_startup( parsec_context_t *context, (void)startup_list; } -static void +static int __parsec_compound_taskpool_destructor( parsec_compound_taskpool_t* compound ) { assert(PARSEC_TASKPOOL_TYPE_COMPOUND == compound->super.taskpool_type); @@ -76,6 +77,7 @@ __parsec_compound_taskpool_destructor( parsec_compound_taskpool_t* compound ) free(compound->super.taskpool_name); compound->super.taskpool_name = NULL; } + return 0; } static void diff --git a/parsec/data.c b/parsec/data.c index 8dffaa027..4b89fd254 100644 --- a/parsec/data.c +++ b/parsec/data.c @@ -2,6 +2,7 @@ * Copyright (c) 2012-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" @@ -36,7 +37,7 @@ static void parsec_data_copy_construct(parsec_data_copy_t* obj) PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "Allocate data copy %p", obj); } -static void parsec_data_copy_destruct(parsec_data_copy_t* obj) +static int parsec_data_copy_destruct(parsec_data_copy_t* obj) { PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "Destruct data copy %p (attached to %p)", obj, obj->original); @@ -52,6 +53,7 @@ static void parsec_data_copy_destruct(parsec_data_copy_t* obj) * obj is already detached from obj->original, but this frees the arena chunk */ parsec_arena_release(obj); } + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_data_copy_t, parsec_list_item_t, @@ -63,7 +65,7 @@ static void parsec_data_construct(parsec_data_t* obj ) obj->owner_device = -1; obj->preferred_device = -1; obj->key = 0; - obj->nb_elts = 0; + obj->span = 0; for( uint32_t i = 0; i < parsec_nb_devices; obj->device_copies[i] = NULL, i++ ); obj->dc = NULL; @@ -71,7 +73,7 @@ static void parsec_data_construct(parsec_data_t* obj ) PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "Allocate data %p", obj); } -static void parsec_data_destruct(parsec_data_t* obj ) +static int parsec_data_destruct(parsec_data_t* obj ) { PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "Destruct data %p", obj); for( uint32_t i = 0; i < parsec_nb_devices; i++ ) { @@ -103,6 +105,7 @@ static void parsec_data_destruct(parsec_data_t* obj ) } assert(NULL == obj->device_copies[i]); } + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_data_t, parsec_object_t, @@ -503,7 +506,7 @@ parsec_data_create( parsec_data_t **holder, data->owner_device = 0; data->key = key; data->dc = desc; - data->nb_elts = size; + data->span = size; parsec_data_copy_attach(data, data_copy, 0); if( !parsec_atomic_cas_ptr(holder, NULL, data) ) { @@ -540,7 +543,7 @@ parsec_data_create_with_type( parsec_data_collection_t *desc, clone->owner_device = 0; clone->key = key; clone->dc = desc; - clone->nb_elts = size; + clone->span = size; parsec_data_copy_attach(clone, data_copy, 0); return clone; diff --git a/parsec/data_dist/matrix/broadcast.jdf b/parsec/data_dist/matrix/broadcast.jdf index 6938132d6..714f2289b 100644 --- a/parsec/data_dist/matrix/broadcast.jdf +++ b/parsec/data_dist/matrix/broadcast.jdf @@ -4,6 +4,7 @@ extern "C" %{ * Copyright (c) 2011-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/data_internal.h" @@ -59,7 +60,7 @@ static parsec_data_t* data_of(parsec_data_collection_t *desc, ...) data->owner_device = 0; data->key = k; data->dc = (parsec_data_collection_t*)desc; - data->nb_elts = 1; + data->span = 1; parsec_data_copy_t* data_copy = (parsec_data_copy_t*)PARSEC_OBJ_NEW(parsec_data_copy_t); parsec_data_copy_attach(data, data_copy, 0); data_copy->device_private = NULL; diff --git a/parsec/data_dist/matrix/map_operator.c b/parsec/data_dist/matrix/map_operator.c index fe7846f8f..c1196346f 100644 --- a/parsec/data_dist/matrix/map_operator.c +++ b/parsec/data_dist/matrix/map_operator.c @@ -2,7 +2,7 @@ * Copyright (c) 2011-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024-2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" @@ -513,13 +513,14 @@ __parsec_map_operator_constructor(parsec_map_operator_taskpool_t* tp ) #endif /* defined(PARSEC_PROF_TRACE) */ } -static void +static int __parsec_map_operator_destructor(parsec_map_operator_taskpool_t* tp) { if( NULL != tp->super.taskpool_name ) { free(tp->super.taskpool_name); tp->super.taskpool_name = NULL; } + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_map_operator_taskpool_t, parsec_taskpool_t, diff --git a/parsec/data_dist/matrix/redistribute/redistribute_wrapper.c b/parsec/data_dist/matrix/redistribute/redistribute_wrapper.c index 333800558..ba6a16999 100644 --- a/parsec/data_dist/matrix/redistribute/redistribute_wrapper.c +++ b/parsec/data_dist/matrix/redistribute/redistribute_wrapper.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #include "redistribute_internal.h" #include "redistribute.h" @@ -143,7 +144,7 @@ parsec_redistribute_New(parsec_tiled_matrix_t *dcY, /** * @param [inout] the parsec object to destroy */ -static void +static int __parsec_redistribute_destructor(parsec_redistribute_taskpool_t *redistribute_taskpool) { /* Optimized version: tile sizes of source and target ar the same, @@ -163,6 +164,7 @@ __parsec_redistribute_destructor(parsec_redistribute_taskpool_t *redistribute_ta // parsec_del2arena(&redistribute_taskpool->arenas_datatypes[PARSEC_redistribute_SOURCE_ADT_IDX]); parsec_del2arena(&redistribute_taskpool->arenas_datatypes[PARSEC_redistribute_INNER_ADT_IDX]); } + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_redistribute_taskpool_t, parsec_taskpool_t, diff --git a/parsec/data_dist/matrix/reduce_wrapper.c b/parsec/data_dist/matrix/reduce_wrapper.c index a0cae6996..a518911a9 100644 --- a/parsec/data_dist/matrix/reduce_wrapper.c +++ b/parsec/data_dist/matrix/reduce_wrapper.c @@ -2,6 +2,7 @@ * Copyright (c) 2011-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" @@ -12,10 +13,11 @@ #include "reduce_col.h" #include "reduce_row.h" -static void +static int __parsec_reduce_col_destructor(parsec_reduce_col_taskpool_t* tp) { parsec_type_free(&tp->arenas_datatypes[PARSEC_reduce_col_DEFAULT_ADT_IDX].opaque_dtt); + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_reduce_col_taskpool_t, parsec_taskpool_t, @@ -49,10 +51,11 @@ parsec_reduce_col_New( const parsec_tiled_matrix_t* src, -static void +static int __parsec_reduce_row_destructor(parsec_reduce_row_taskpool_t* tp) { parsec_type_free(&tp->arenas_datatypes[PARSEC_reduce_row_DEFAULT_ADT_IDX].opaque_dtt); + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_reduce_row_taskpool_t, parsec_taskpool_t, diff --git a/parsec/data_internal.h b/parsec/data_internal.h index 4b4a396d2..b9cf6c5e0 100644 --- a/parsec/data_internal.h +++ b/parsec/data_internal.h @@ -2,6 +2,7 @@ * Copyright (c) 2015-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #if !defined(PARSEC_CONFIG_H_HAS_BEEN_INCLUDED) @@ -36,7 +37,7 @@ struct parsec_data_s { * which device this data should be modified RW when there * are multiple choices. -1 means no preference. */ struct parsec_data_collection_s* dc; - size_t nb_elts; /* size in bytes of the memory layout */ + size_t span; /* size in bytes of the memory layout */ struct parsec_data_copy_s *device_copies[]; /* this array allocated according to the number of devices * (parsec_nb_devices). It points to the most recent * version of the data. diff --git a/parsec/interfaces/dtd/insert_function.c b/parsec/interfaces/dtd/insert_function.c index af84dba73..629567722 100644 --- a/parsec/interfaces/dtd/insert_function.c +++ b/parsec/interfaces/dtd/insert_function.c @@ -2,7 +2,7 @@ * Copyright (c) 2013-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2023-2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2023-2025 NVIDIA Corporation. All rights reserved. */ /* **************************************************************************** */ @@ -352,7 +352,7 @@ void parsec_dtd_taskpool_constructor(parsec_dtd_taskpool_t *tp) * @ingroup DTD_INTERFACE_INTERNAL * ******************************************************************************/ -void +int parsec_dtd_taskpool_destructor(parsec_dtd_taskpool_t *tp) { uint32_t i; @@ -424,6 +424,8 @@ parsec_dtd_taskpool_destructor(parsec_dtd_taskpool_t *tp) parsec_hash_table_fini(tp->function_h_table); PARSEC_OBJ_RELEASE(tp->function_h_table); + + return 0; } /* To create object of class parsec_dtd_taskpool_t that inherits parsec_taskpool_t @@ -2293,7 +2295,7 @@ static parsec_hook_return_t parsec_dtd_gpu_task_submit(parsec_execution_stream_t if(flow->op_type & PARSEC_PUSHOUT) gpu_task->pushout |= 1<flow[i] = dtd_tc->super.in[i]; - gpu_task->flow_nb_elts[i] = this_task->data[i].data_in->original->nb_elts; + gpu_task->flow_span[i] = this_task->data[i].data_in->original->span; } parsec_device_module_t *device = this_task->selected_device; diff --git a/parsec/interfaces/ptg/ptg-compiler/jdf2c.c b/parsec/interfaces/ptg/ptg-compiler/jdf2c.c index 3c81a0158..eeea0cb0f 100644 --- a/parsec/interfaces/ptg/ptg-compiler/jdf2c.c +++ b/parsec/interfaces/ptg/ptg-compiler/jdf2c.c @@ -2,7 +2,7 @@ * Copyright (c) 2009-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024-2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" @@ -4534,7 +4534,7 @@ static void jdf_generate_destructor( const jdf_t *jdf ) string_arena_t *sa1 = string_arena_new(64); jdf_function_entry_t* f; - coutput("static void __parsec_%s_internal_destructor( __parsec_%s_internal_taskpool_t *__parsec_tp )\n" + coutput("static int __parsec_%s_internal_destructor( __parsec_%s_internal_taskpool_t *__parsec_tp )\n" "{\n" " uint32_t i;\n", jdf_basename, jdf_basename); @@ -4630,9 +4630,9 @@ static void jdf_generate_destructor( const jdf_t *jdf ) " if( PARSEC_SUCCESS != device->taskpool_unregister(device, &__parsec_tp->super.super) ) continue;\n" " }\n"); - coutput(" free(__parsec_tp->super.super.taskpool_name); __parsec_tp->super.super.taskpool_name = NULL;\n"); - - coutput("}\n\n"); + coutput(" free(__parsec_tp->super.super.taskpool_name); __parsec_tp->super.super.taskpool_name = NULL;\n" + " return 0;\n" + "}\n\n"); string_arena_free(sa); string_arena_free(sa1); @@ -6858,17 +6858,17 @@ static void jdf_generate_code_hook_gpu(const jdf_t *jdf, fl->varname, JDF_OBJECT_LINENO(fl)); exit(-1); } - coutput(" gpu_task->flow_nb_elts[%d] = 0;\n", di); + coutput(" gpu_task->flow_span[%d] = 0;\n", di); }else{ coutput(" // A shortcut to check if the flow exists\n"); coutput(" if (gpu_task->ec->data[%d].data_in != NULL) {\n", di); if(size_property == NULL){ - coutput(" gpu_task->flow_nb_elts[%d] = gpu_task->ec->data[%d].data_in->original->nb_elts;\n", di, di); + coutput(" gpu_task->flow_span[%d] = gpu_task->ec->data[%d].data_in->original->span;\n", di, di); }else{ - coutput(" gpu_task->flow_nb_elts[%d] = %s;\n", + coutput(" gpu_task->flow_span[%d] = %s;\n", di, dump_expr((void**)size_property->expr, &info)); if( (stage_in_property == NULL) || ( stage_out_property == NULL )){ - coutput(" assert(gpu_task->ec->data[%d].data_in->original->nb_elts <= %s);\n", + coutput(" assert(gpu_task->ec->data[%d].data_in->original->span <= %s);\n", di, dump_expr((void**)size_property->expr, &info)); } diff --git a/parsec/mca/device/device_gpu.c b/parsec/mca/device/device_gpu.c index d932e975e..53578a460 100644 --- a/parsec/mca/device/device_gpu.c +++ b/parsec/mca/device/device_gpu.c @@ -3,7 +3,7 @@ * Copyright (c) 2021-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024-2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" @@ -474,7 +474,7 @@ parsec_device_data_advise(parsec_device_module_t *dev, parsec_data_t *data, int PARSEC_OBJ_CONSTRUCT(gpu_task->ec, parsec_task_t); gpu_task->ec->task_class = &parsec_device_data_prefetch_tc; gpu_task->flow[0] = &parsec_device_data_prefetch_flow; - gpu_task->flow_nb_elts[0] = data->device_copies[ data->owner_device ]->original->nb_elts; + gpu_task->flow_span[0] = data->device_copies[ data->owner_device ]->original->span; gpu_task->stage_in = parsec_default_gpu_stage_in; gpu_task->stage_out = parsec_default_gpu_stage_out; PARSEC_DEBUG_VERBOSE(20, parsec_debug_output, "Retain data copy %p [ref_count %d]", @@ -900,12 +900,12 @@ parsec_device_data_reserve_space( parsec_device_gpu_module_t* gpu_device, PARSEC_DEBUG_VERBOSE(20, parsec_gpu_output_stream, "GPU[%d:%s]:%s: Allocate GPU copy %p sz %zu [ref_count %d] for data %p", gpu_device->super.device_index, gpu_device->super.name, task_name, - gpu_elem, gpu_task->flow_nb_elts[i], gpu_elem->super.super.obj_reference_count, master); + gpu_elem, gpu_task->flow_span[i], gpu_elem->super.super.obj_reference_count, master); gpu_elem->flags = PARSEC_DATA_FLAG_PARSEC_OWNED | PARSEC_DATA_FLAG_PARSEC_MANAGED; malloc_data: copy_readers_update = 0; assert(0 != (gpu_elem->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ); - gpu_elem->device_private = zone_malloc(gpu_device->memory, gpu_task->flow_nb_elts[i]); + gpu_elem->device_private = zone_malloc(gpu_device->memory, gpu_task->flow_span[i]); if( NULL == gpu_elem->device_private ) { #endif @@ -1110,7 +1110,7 @@ parsec_device_data_reserve_space( parsec_device_gpu_module_t* gpu_device, parsec_profiling_trace_flags(gpu_device->exec_stream[0]->profiling, parsec_gpu_allocate_memory_key, (int64_t)gpu_elem->device_private, gpu_device->super.device_index, - &gpu_task->flow_nb_elts[i], PARSEC_PROFILING_EVENT_COUNTER|PARSEC_PROFILING_EVENT_HAS_INFO); + &gpu_task->flow_span[i], PARSEC_PROFILING_EVENT_COUNTER|PARSEC_PROFILING_EVENT_HAS_INFO); } #endif #else @@ -1189,8 +1189,8 @@ parsec_default_gpu_stage_in(parsec_gpu_task_t *gtask, dir = parsec_device_gpu_transfer_direction_h2d; } - count = (source->original->nb_elts <= dest->original->nb_elts) ? - source->original->nb_elts : dest->original->nb_elts; + count = (source->original->span <= dest->original->span) ? + source->original->span : dest->original->span; ret = dst_dev->memcpy_async( dst_dev, gpu_stream, dest->device_private, source->device_private, @@ -1231,8 +1231,8 @@ parsec_default_gpu_stage_out(parsec_gpu_task_t *gtask, dst_dev = (parsec_device_gpu_module_t*)parsec_mca_device_get(dest->device_index); src_dev = (parsec_device_gpu_module_t*)parsec_mca_device_get(source->device_index); - count = (source->original->nb_elts <= dest->original->nb_elts) ? source->original->nb_elts : - dest->original->nb_elts; + count = (source->original->span <= dest->original->span) ? source->original->span : + dest->original->span; if( src_dev->super.type == dst_dev->super.type ) { assert( src_dev->peer_access_mask & (1 << dst_dev->super.device_index) ); dir = parsec_device_gpu_transfer_direction_d2d; @@ -1271,7 +1271,7 @@ parsec_device_data_stage_in( parsec_device_gpu_module_t* gpu_device, parsec_data_copy_t *candidate = task_data->data_in; /* best candidate for now */ parsec_data_t* original = candidate->original; parsec_gpu_data_copy_t* gpu_elem = task_data->data_out; - size_t nb_elts = gpu_task->flow_nb_elts[flow->flow_index]; + size_t span = gpu_task->flow_span[flow->flow_index]; int transfer_from = -1; if( gpu_task->task_type == PARSEC_GPU_TASK_TYPE_PREFETCH ) { @@ -1316,7 +1316,7 @@ parsec_device_data_stage_in( parsec_device_gpu_module_t* gpu_device, transfer_from = -1; /* Update the transferred required_data_in size */ - gpu_device->super.required_data_in += original->nb_elts; + gpu_device->super.required_data_in += original->span; if( -1 == transfer_from ) { /* Do not need to be transferred */ gpu_elem->data_transfer_status = PARSEC_DATA_STATUS_COMPLETE_TRANSFER; @@ -1341,7 +1341,7 @@ parsec_device_data_stage_in( parsec_device_gpu_module_t* gpu_device, PARSEC_DEBUG_VERBOSE(10, parsec_gpu_output_stream, "GPU[%d:%s]:\t\tMove data copy %p [ref_count %d, key %x] of %zu bytes: data copy is already under transfer, ignoring double request", gpu_device->super.device_index, gpu_device->super.name, - gpu_elem, gpu_elem->super.super.obj_reference_count, original->key, nb_elts); + gpu_elem, gpu_elem->super.super.obj_reference_count, original->key, span); parsec_atomic_unlock( &original->lock ); return 1; /* positive returns have special meaning and are used for optimizations */ } @@ -1446,7 +1446,7 @@ parsec_device_data_stage_in( parsec_device_gpu_module_t* gpu_device, "GPU[%d:%s]:\t\tMove %s data copy %p [ref_count %d, key %x] of %zu bytes\t(src dev: %d, v:%d, ptr:%p, copy:%p [ref_count %d, under_transfer: %d, coherency_state: %d] / dst dev: %d, v:%d, ptr:%p)", gpu_device->super.device_index, gpu_device->super.name, PARSEC_DEV_IS_GPU(candidate_dev->super.type) ? "D2D": "H2D", - gpu_elem, gpu_elem->super.super.obj_reference_count, original->key, nb_elts, + gpu_elem, gpu_elem->super.super.obj_reference_count, original->key, span, candidate_dev->super.device_index, candidate->version, (void*)candidate->device_private, candidate, candidate->super.super.obj_reference_count, candidate->data_transfer_status, candidate->coherency_state, gpu_device->super.device_index, gpu_elem->version, (void*)gpu_elem->device_private); @@ -1484,7 +1484,7 @@ parsec_device_data_stage_in( parsec_device_gpu_module_t* gpu_device, } if(gpu_device->trackable_events & PARSEC_PROFILE_GPU_TRACK_MEM_USE) { parsec_device_gpu_memory_prof_info_t _info; - _info.size = (uint64_t)nb_elts; + _info.size = (uint64_t)span; _info.data_key = gpu_elem->original->key; _info.dc_id = (uint64_t)(gpu_elem->original->dc); parsec_profiling_trace_flags(gpu_stream->profiling, @@ -1504,15 +1504,15 @@ parsec_device_data_stage_in( parsec_device_gpu_module_t* gpu_device, gpu_device->super.device_index, gpu_device->super.name, rc, __func__, __LINE__, candidate->device_private, candidate_dev->super.device_index, candidate_dev->super.name, gpu_elem->device_private, gpu_device->super.device_index, gpu_device->super.name, - nb_elts, (candidate_dev->super.type != gpu_device->super.type)? "H2D": "D2D"); + span, (candidate_dev->super.type != gpu_device->super.type)? "H2D": "D2D"); parsec_atomic_unlock( &original->lock ); assert(0); return PARSEC_HOOK_RETURN_ERROR; } assert(candidate_dev->super.device_index < gpu_device->super.data_in_array_size); - gpu_device->super.data_in_from_device[candidate_dev->super.device_index] += nb_elts; + gpu_device->super.data_in_from_device[candidate_dev->super.device_index] += span; if( PARSEC_GPU_TASK_TYPE_KERNEL == gpu_task->task_type ) - gpu_device->super.nb_data_faults += nb_elts; + gpu_device->super.nb_data_faults += span; /* We assign the version of the data preemptively (i.e. before the task is executing) * For read-only data, the GPU copy will get the same version as the source @@ -1597,7 +1597,7 @@ parsec_device_send_transfercomplete_cmd_to_device(parsec_data_copy_t *copy, PARSEC_OBJ_CONSTRUCT(gpu_task->ec, parsec_task_t); gpu_task->ec->task_class = &parsec_device_d2d_complete_tc; gpu_task->flow[0] = &parsec_device_d2d_complete_flow; - gpu_task->flow_nb_elts[0] = copy->original->nb_elts; + gpu_task->flow_span[0] = copy->original->span; gpu_task->stage_in = parsec_default_gpu_stage_in; gpu_task->stage_out = parsec_default_gpu_stage_out; gpu_task->ec->data[0].data_in = copy; /* We need to set not-null in data_in, so that the fake flow is @@ -2104,7 +2104,7 @@ parsec_device_kernel_pop( parsec_device_gpu_module_t *gpu_device, parsec_task_t *this_task = gpu_task->ec; parsec_gpu_data_copy_t *gpu_copy; parsec_data_t *original; - size_t nb_elts; + size_t span; const parsec_flow_t *flow; int return_code = 0, rc, how_many = 0, i, update_data_epoch = 0; #if defined(PARSEC_DEBUG_NOISIER) @@ -2151,7 +2151,7 @@ parsec_device_kernel_pop( parsec_device_gpu_module_t *gpu_device, if( 0 == (gpu_copy->flags & PARSEC_DATA_FLAG_PARSEC_OWNED) ) continue; original = gpu_copy->original; - nb_elts = gpu_task->flow_nb_elts[i]; + span = gpu_task->flow_span[i]; assert( this_task->data[i].data_in == NULL || original == this_task->data[i].data_in->original ); @@ -2201,7 +2201,7 @@ parsec_device_kernel_pop( parsec_device_gpu_module_t *gpu_device, gpu_device->super.device_index, gpu_device->super.name, gpu_copy, gpu_copy->super.super.obj_reference_count, flow->name); /* Stage the transfer of the data back to main memory */ - gpu_device->super.required_data_out += nb_elts; + gpu_device->super.required_data_out += span; assert( ((parsec_list_item_t*)gpu_copy)->list_next == (parsec_list_item_t*)gpu_copy ); assert( ((parsec_list_item_t*)gpu_copy)->list_prev == (parsec_list_item_t*)gpu_copy ); @@ -2249,7 +2249,7 @@ parsec_device_kernel_pop( parsec_device_gpu_module_t *gpu_device, parsec_atomic_unlock(&original->lock); goto release_and_return_error; } - gpu_device->super.data_out_to_host += nb_elts; /* TODO: not hardcoded, use datatype size */ + gpu_device->super.data_out_to_host += span; /* TODO: not hardcoded, use datatype size */ how_many++; } else { assert( 0 == gpu_copy->readers ); diff --git a/parsec/mca/device/device_gpu.h b/parsec/mca/device/device_gpu.h index fa25b87a3..35432f056 100644 --- a/parsec/mca/device/device_gpu.h +++ b/parsec/mca/device/device_gpu.h @@ -2,7 +2,7 @@ * Copyright (c) 2021-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024-2025 NVIDIA Corporation. All rights reserved. */ #ifndef PARSEC_DEVICE_GPU_H @@ -105,7 +105,7 @@ struct parsec_gpu_task_s { const parsec_flow_t *flow[MAX_PARAM_COUNT]; /* There is no consistent way to access the flows from the task_class, * so the DSL need to provide these flows here. */ - size_t flow_nb_elts[MAX_PARAM_COUNT]; /* for each flow, size of the data to be allocated + size_t flow_span[MAX_PARAM_COUNT]; /* for each flow, size of the data to be allocated * on the GPU. */ parsec_data_collection_t *flow_dc[MAX_PARAM_COUNT]; /* for each flow, data collection from which the data diff --git a/parsec/mca/device/transfer_gpu.c b/parsec/mca/device/transfer_gpu.c index 5d0afb61b..f0f90766e 100644 --- a/parsec/mca/device/transfer_gpu.c +++ b/parsec/mca/device/transfer_gpu.c @@ -2,7 +2,7 @@ * Copyright (c) 2016-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024-2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" @@ -309,7 +309,7 @@ int parsec_gpu_complete_w2r_task(parsec_device_gpu_module_t *gpu_device, parsec_atomic_lock(&gpu_copy->original->lock); gpu_copy->readers--; gpu_copy->data_transfer_status = PARSEC_DATA_STATUS_COMPLETE_TRANSFER; - gpu_device->super.data_out_to_host += gpu_copy->original->nb_elts; /* TODO: not hardcoded, use datatype size */ + gpu_device->super.data_out_to_host += gpu_copy->original->span; /* TODO: not hardcoded, use datatype size */ assert(gpu_copy->readers >= 0); original = gpu_copy->original; diff --git a/parsec/mca/sched/spq/sched_spq_module.c b/parsec/mca/sched/spq/sched_spq_module.c index 0143b73ec..a490054f4 100644 --- a/parsec/mca/sched/spq/sched_spq_module.c +++ b/parsec/mca/sched/spq/sched_spq_module.c @@ -2,6 +2,7 @@ * Copyright (c) 2017-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -40,7 +41,7 @@ typedef struct parsec_spq_priority_list_s { PARSEC_DECLSPEC PARSEC_OBJ_CLASS_DECLARATION(parsec_spq_priority_list_t); static inline void parsec_spq_priority_list_construct( parsec_spq_priority_list_t*plist ); -static inline void parsec_spq_priority_list_destruct( parsec_spq_priority_list_t *plist ); +static inline int parsec_spq_priority_list_destruct( parsec_spq_priority_list_t *plist ); PARSEC_OBJ_CLASS_INSTANCE(parsec_spq_priority_list_t, parsec_list_item_t, parsec_spq_priority_list_construct, parsec_spq_priority_list_destruct); @@ -51,9 +52,10 @@ static inline void parsec_spq_priority_list_construct( parsec_spq_priority_list_ plist->prio = -1; } -static inline void parsec_spq_priority_list_destruct( parsec_spq_priority_list_t*plist ) +static inline int parsec_spq_priority_list_destruct( parsec_spq_priority_list_t*plist ) { PARSEC_OBJ_DESTRUCT(&plist->tasks); + return 0; } /* Since we're locking the list for all operations anyway, diff --git a/parsec/parsec.c b/parsec/parsec.c index 3557b15d5..b7d803bd6 100644 --- a/parsec/parsec.c +++ b/parsec/parsec.c @@ -2,6 +2,7 @@ * Copyright (c) 2009-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" @@ -21,6 +22,7 @@ #if defined(PARSEC_HAVE_GETOPT_H) #include #endif /* defined(PARSEC_HAVE_GETOPT_H) */ +#include #include "parsec/ayudame.h" #include "parsec/mca/pins/pins.h" @@ -192,7 +194,7 @@ static void __parsec_taskpool_constructor(parsec_taskpool_t* tp) tp->tdm.module = NULL; } -static void __parsec_taskpool_destructor(parsec_taskpool_t* tp) +static int __parsec_taskpool_destructor(parsec_taskpool_t* tp) { if( NULL != tp->context ) { parsec_context_remove_taskpool(tp); @@ -200,6 +202,7 @@ static void __parsec_taskpool_destructor(parsec_taskpool_t* tp) if( NULL != tp->taskpool_name ) { free(tp->taskpool_name); } + return 0; } /* To create object of class parsec_taskpool_t that inherits parsec_list_t @@ -209,7 +212,7 @@ PARSEC_OBJ_CLASS_INSTANCE(parsec_taskpool_t, parsec_list_item_t, __parsec_taskpool_constructor, __parsec_taskpool_destructor); static void __parsec_task_constructor(parsec_task_t* task) { - /* no allocation here, only initalizations: the task_t will be constructed + /* no allocation here, only initializations: the task_t will be constructed * multiple times when push-poped from the mempool */ task->selected_device = NULL; task->selected_chore = -1; @@ -2136,13 +2139,6 @@ void parsec_taskpool_sync_ids_context( intptr_t comm ) parsec_atomic_unlock( &taskpool_array_lock ); } -/* globally synchronize taskpool id's so that next register generates the same - * id at all ranks. */ -void parsec_taskpool_sync_ids( void ) -{ - parsec_taskpool_sync_ids_context( (intptr_t)MPI_COMM_WORLD ); -} - /* Unregister the taskpool with the engine. This make the taskpool_id available for * future taskpools. Beware that in a distributed environment the connected taskpools * must have the same ID. diff --git a/parsec/remote_dep.h b/parsec/remote_dep.h index 931053b7d..aba1a7586 100644 --- a/parsec/remote_dep.h +++ b/parsec/remote_dep.h @@ -2,7 +2,7 @@ * Copyright (c) 2009-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2023-2025 NVIDIA CORPORATION. All rights reserved. */ #ifndef __USE_PARSEC_REMOTE_DEP_H__ #define __USE_PARSEC_REMOTE_DEP_H__ @@ -426,18 +426,30 @@ extern int parsec_comm_puts; static inline void remote_dep_rank_to_bit(int rank, uint32_t *bank, uint32_t *bit, int root) { +#ifdef DISTRIBUTED uint32_t nb_nodes = parsec_remote_dep_context.max_nodes_number; uint32_t _rank = (rank + nb_nodes - root) % nb_nodes; *bank = _rank / (8 * sizeof(uint32_t)); *bit = _rank % (8 * sizeof(uint32_t)); +#else + /* it's a lonely world ! */ + *bank = *bit = 0; + (void)rank; (void)root; +#endif /* DISTRIBUTED */ } static inline void remote_dep_bit_to_rank(int *rank, uint32_t bank, uint32_t bit, int root) { +#ifdef DISTRIBUTED int nb_nodes = parsec_remote_dep_context.max_nodes_number; uint32_t _rank = bank * (8 * sizeof(uint32_t)) + bit; *rank = (_rank + root) % nb_nodes; +#else + /* it's a lonely world ! */ + *rank = 0; + (void)bank; (void)bit; (void)root; +#endif /* DISTRIBUTED */ } #endif /* __USE_PARSEC_REMOTE_DEP_H__ */ diff --git a/parsec/remote_dep_mpi.c b/parsec/remote_dep_mpi.c index a06a2088c..28f69e83d 100644 --- a/parsec/remote_dep_mpi.c +++ b/parsec/remote_dep_mpi.c @@ -2,7 +2,7 @@ * Copyright (c) 2009-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2023 NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2023-2025 NVIDIA CORPORATION. All rights reserved. */ #include "parsec/parsec_config.h" @@ -579,6 +579,7 @@ remote_dep_copy_allocate(parsec_dep_type_description_t* data) assert(0 == data->dst_count); return NULL; } + /* predict where the data need to be located */ dc = parsec_arena_get_copy(data->arena, data->dst_count, 0, data->dst_datatype); dc->coherency_state = PARSEC_DATA_COHERENCY_EXCLUSIVE; @@ -793,7 +794,20 @@ remote_dep_mpi_retrieve_datatype(parsec_execution_stream_t *eu, parsec_ce.pack_size(&parsec_ce, output->data.remote.dst_count, output->data.remote.dst_datatype, &dsize); output->data.remote.src_count = output->data.remote.dst_count = dsize; output->data.remote.src_datatype = output->data.remote.dst_datatype = PARSEC_DATATYPE_PACKED; - + /* Predict where the incoming temporary should be located, by using the data_affinity. + * This only works is the task affinity is linked to the output location of the task, which + * is mostly true for owner-compute type of algorithms. + */ + if (NULL != fct->data_affinity ) { + parsec_data_ref_t dref; + fct->data_affinity(newcontext, &dref); + parsec_data_t* data = dref.dc->data_of_key(dref.dc, dref.key); + if (-1 != data->preferred_device ) { + printf("Data %p[%llx] is owned to device %d\n", dref.dc, dref.key, data->owner_device); + } else { + printf("Data %p[%llx] is owned to preferred on %d\n", dref.dc, dref.key, data->preferred_device); + } + } return PARSEC_ITERATE_STOP; } } diff --git a/parsec/runtime.h b/parsec/runtime.h index 55bc4f569..98526a052 100644 --- a/parsec/runtime.h +++ b/parsec/runtime.h @@ -2,6 +2,7 @@ * Copyright (c) 2009-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #ifndef PARSEC_RUNTIME_H_HAS_BEEN_INCLUDED @@ -561,17 +562,6 @@ void parsec_taskpool_unregister(parsec_taskpool_t* tp); */ void parsec_taskpool_sync_ids_context( intptr_t comm ); -/** - * @brief Globally synchronize taskpool IDs. - * - * @details - * Globally synchronize taskpool IDs so that next register generates the same - * id at all ranks. This is a collective over the communication object - * associated with PaRSEC, and can be used to resolve discrepancies introduced by - * taskpools not registered over all ranks. -*/ -void parsec_taskpool_sync_ids(void); - /** * @brief Returns the execution stream that corresponds to the calling thread * diff --git a/parsec/utils/cmd_line.c b/parsec/utils/cmd_line.c index d10c3fa2f..f63f31623 100644 --- a/parsec/utils/cmd_line.c +++ b/parsec/utils/cmd_line.c @@ -12,6 +12,7 @@ * Copyright (c) 2012 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2012-2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -69,7 +70,7 @@ struct cmd_line_option_t { }; typedef struct cmd_line_option_t cmd_line_option_t; static void option_constructor(cmd_line_option_t *cmd); -static void option_destructor(cmd_line_option_t *cmd); +static int option_destructor(cmd_line_option_t *cmd); PARSEC_OBJ_CLASS_INSTANCE(cmd_line_option_t, parsec_list_item_t, @@ -101,7 +102,7 @@ struct cmd_line_param_t { }; typedef struct cmd_line_param_t cmd_line_param_t; static void param_constructor(cmd_line_param_t *cmd); -static void param_destructor(cmd_line_param_t *cmd); +static int param_destructor(cmd_line_param_t *cmd); PARSEC_OBJ_CLASS_INSTANCE(cmd_line_param_t, parsec_list_item_t, param_constructor, param_destructor); @@ -110,7 +111,7 @@ PARSEC_OBJ_CLASS_INSTANCE(cmd_line_param_t, * Instantiate the parsec_cmd_line_t class */ static void cmd_line_constructor(parsec_cmd_line_t *cmd); -static void cmd_line_destructor(parsec_cmd_line_t *cmd); +static int cmd_line_destructor(parsec_cmd_line_t *cmd); PARSEC_OBJ_CLASS_INSTANCE(parsec_cmd_line_t, parsec_object_t, cmd_line_constructor, @@ -870,7 +871,7 @@ static void option_constructor(cmd_line_option_t *o) } -static void option_destructor(cmd_line_option_t *o) +static int option_destructor(cmd_line_option_t *o) { if (NULL != o->clo_single_dash_name) { free(o->clo_single_dash_name); @@ -884,6 +885,7 @@ static void option_destructor(cmd_line_option_t *o) if (NULL != o->clo_mca_param_env_var) { free(o->clo_mca_param_env_var); } + return 0; } @@ -896,11 +898,12 @@ static void param_constructor(cmd_line_param_t *p) } -static void param_destructor(cmd_line_param_t *p) +static int param_destructor(cmd_line_param_t *p) { if (NULL != p->clp_argv) { parsec_argv_free(p->clp_argv); } + return 0; } @@ -926,7 +929,7 @@ static void cmd_line_constructor(parsec_cmd_line_t *cmd) } -static void cmd_line_destructor(parsec_cmd_line_t *cmd) +static int cmd_line_destructor(parsec_cmd_line_t *cmd) { parsec_list_item_t *item; @@ -947,6 +950,7 @@ static void cmd_line_destructor(parsec_cmd_line_t *cmd) PARSEC_OBJ_DESTRUCT(&cmd->lcl_options); PARSEC_OBJ_DESTRUCT(&cmd->lcl_params); + return 0; } diff --git a/parsec/utils/mca_param.c b/parsec/utils/mca_param.c index 9baedaa72..d886295fd 100644 --- a/parsec/utils/mca_param.c +++ b/parsec/utils/mca_param.c @@ -12,6 +12,7 @@ * Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -135,13 +136,13 @@ static bool lookup_default(parsec_mca_param_t *param, static bool set(parsec_mca_param_type_t type, parsec_mca_param_storage_t *dest, parsec_mca_param_storage_t *src); static void param_constructor(parsec_mca_param_t *p); -static void param_destructor(parsec_mca_param_t *p); +static int param_destructor(parsec_mca_param_t *p); static void fv_constructor(parsec_mca_param_file_value_t *p); -static void fv_destructor(parsec_mca_param_file_value_t *p); +static int fv_destructor(parsec_mca_param_file_value_t *p); static void info_constructor(parsec_mca_param_info_t *p); -static void info_destructor(parsec_mca_param_info_t *p); +static int info_destructor(parsec_mca_param_info_t *p); static void syn_info_constructor(parsec_syn_info_t *si); -static void syn_info_destructor(parsec_syn_info_t *si); +static int syn_info_destructor(parsec_syn_info_t *si); static parsec_mca_param_type_t param_type_from_index (size_t index); /* @@ -1835,7 +1836,7 @@ static void param_constructor(parsec_mca_param_t *p) /* * Free all the contents of a param container */ -static void param_destructor(parsec_mca_param_t *p) +static int param_destructor(parsec_mca_param_t *p) { parsec_list_item_t *item; @@ -1891,6 +1892,7 @@ static void param_destructor(parsec_mca_param_t *p) /* Cheap trick to reset everything to NULL */ param_constructor(p); #endif + return 0; } @@ -1902,7 +1904,7 @@ static void fv_constructor(parsec_mca_param_file_value_t *f) } -static void fv_destructor(parsec_mca_param_file_value_t *f) +static int fv_destructor(parsec_mca_param_file_value_t *f) { if (NULL != f->mbpfv_param) { free(f->mbpfv_param); @@ -1914,6 +1916,7 @@ static void fv_destructor(parsec_mca_param_file_value_t *f) free(f->mbpfv_file); } fv_constructor(f); + return 0; } static void info_constructor(parsec_mca_param_info_t *p) @@ -1936,7 +1939,7 @@ static void info_constructor(parsec_mca_param_info_t *p) p->mbpp_help_msg = NULL; } -static void info_destructor(parsec_mca_param_info_t *p) +static int info_destructor(parsec_mca_param_info_t *p) { if (NULL != p->mbpp_synonyms) { free(p->mbpp_synonyms); @@ -1945,6 +1948,7 @@ static void info_destructor(parsec_mca_param_info_t *p) by value from their corresponding parameter registration */ info_constructor(p); + return 0; } static void syn_info_constructor(parsec_syn_info_t *si) @@ -1954,7 +1958,7 @@ static void syn_info_constructor(parsec_syn_info_t *si) si->si_deprecated = si->si_deprecated_warning_shown = false; } -static void syn_info_destructor(parsec_syn_info_t *si) +static int syn_info_destructor(parsec_syn_info_t *si) { if (NULL != si->si_type_name) { free(si->si_type_name); @@ -1973,6 +1977,7 @@ static void syn_info_destructor(parsec_syn_info_t *si) } syn_info_constructor(si); + return 0; } int parsec_mca_param_find_int_name(const char *type, diff --git a/parsec/utils/process_name.c b/parsec/utils/process_name.c index 4704f3d3f..04595d672 100644 --- a/parsec/utils/process_name.c +++ b/parsec/utils/process_name.c @@ -2,10 +2,14 @@ * Copyright (c) 2014-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" #include #include +#if defined(PARSEC_HAVE_STRINGS_H) +#include +#endif /* defined(PARSEC_HAVE_STRINGS_H) */ #include #include diff --git a/tests/apps/generalized_reduction/BT_reduction_wrapper.c b/tests/apps/generalized_reduction/BT_reduction_wrapper.c index cfd813850..507f407e2 100644 --- a/tests/apps/generalized_reduction/BT_reduction_wrapper.c +++ b/tests/apps/generalized_reduction/BT_reduction_wrapper.c @@ -2,6 +2,7 @@ * Copyright (c) 2009-2022 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" @@ -19,10 +20,11 @@ static parsec_datatype_t block; #include "BT_reduction_wrapper.h" #include "parsec/data_dist/matrix/two_dim_rectangle_cyclic.h" -static void +static int __parsec_taskpool_BT_reduction_destruct(parsec_BT_reduction_taskpool_t *tp) { parsec_type_free( &(tp->arenas_datatypes[PARSEC_BT_reduction_DEFAULT_ADT_IDX].opaque_dtt) ); + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_BT_reduction_taskpool_t, parsec_taskpool_t, diff --git a/tests/apps/pingpong/rtt_wrapper.c b/tests/apps/pingpong/rtt_wrapper.c index 4c282b301..cb6843f96 100644 --- a/tests/apps/pingpong/rtt_wrapper.c +++ b/tests/apps/pingpong/rtt_wrapper.c @@ -1,8 +1,8 @@ - /* * Copyright (c) 2009-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" @@ -17,13 +17,14 @@ #include "rtt.h" #include "rtt_wrapper.h" -static void +static int __parsec_rtt_taskpool_destructor(parsec_rtt_taskpool_t *rtt_tp) { /* We have created our own datatype, instead of using a predefined one * so we need to clean up. */ parsec_type_free( &(rtt_tp->arenas_datatypes[PARSEC_rtt_DEFAULT_ADT_IDX].opaque_dtt) ); + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_rtt_taskpool_t, parsec_taskpool_t, diff --git a/tests/collections/redistribute/redistribute_check.jdf b/tests/collections/redistribute/redistribute_check.jdf index daf8317bb..024e55ade 100644 --- a/tests/collections/redistribute/redistribute_check.jdf +++ b/tests/collections/redistribute/redistribute_check.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2017-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #include "redistribute_test.h" @@ -207,11 +208,12 @@ parsec_redistribute_check_New(parsec_tiled_matrix_t *Y, return redistribute_check_taskpool; } -static void +static int __parsec_taskpool_redistribute_check_destructor(parsec_redistribute_check_taskpool_t *redistribute_check_taskpool) { parsec_del2arena(&redistribute_check_taskpool->arenas_datatypes[PARSEC_redistribute_check_ORIGIN_ADT_IDX]); parsec_del2arena(&redistribute_check_taskpool->arenas_datatypes[PARSEC_redistribute_check_TARGET_ADT_IDX]); + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_redistribute_check_taskpool_t, parsec_taskpool_t, diff --git a/tests/collections/redistribute/redistribute_check2.jdf b/tests/collections/redistribute/redistribute_check2.jdf index 87e064b90..49c66602b 100644 --- a/tests/collections/redistribute/redistribute_check2.jdf +++ b/tests/collections/redistribute/redistribute_check2.jdf @@ -3,7 +3,7 @@ extern "C" %{ * Copyright (c) 2017-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024-2025 NVIDIA Corporation. All rights reserved. */ #include "redistribute_test.h" @@ -114,10 +114,11 @@ parsec_redistribute_check2_New(parsec_tiled_matrix_t *dcY, return redistribute_check2_taskpool; } -static void +static int __parsec_taskpool_redistribute_check2_destructor(parsec_redistribute_check2_taskpool_t *redistribute_check2_taskpool) { parsec_del2arena(&redistribute_check2_taskpool->arenas_datatypes[PARSEC_redistribute_check2_DEFAULT_ADT_IDX]); + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_redistribute_check2_taskpool_t, parsec_taskpool_t, diff --git a/tests/collections/redistribute/redistribute_no_optimization.jdf b/tests/collections/redistribute/redistribute_no_optimization.jdf index f37ea0586..fab2303a6 100644 --- a/tests/collections/redistribute/redistribute_no_optimization.jdf +++ b/tests/collections/redistribute/redistribute_no_optimization.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2017-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. * */ #include "redistribute_test.h" @@ -444,11 +445,12 @@ parsec_redistribute_no_optimization_New(parsec_tiled_matrix_t *dcY, /** * @param [inout] the parsec object to destroy */ -static void +static int __parsec_taskpool_redistribute_no_optimization_destructor(parsec_redistribute_no_optimization_taskpool_t *redistribute_no_optimization_taskpool) { parsec_del2arena(&redistribute_no_optimization_taskpool->arenas_datatypes[PARSEC_redistribute_no_optimization_TARGET_ADT_IDX]); parsec_del2arena(&redistribute_no_optimization_taskpool->arenas_datatypes[PARSEC_redistribute_no_optimization_SOURCE_ADT_IDX]); + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_redistribute_no_optimization_taskpool_t, parsec_taskpool_t, diff --git a/tests/dsl/ptg/branching/branching_wrapper.c b/tests/dsl/ptg/branching/branching_wrapper.c index aed89073a..a7af00987 100644 --- a/tests/dsl/ptg/branching/branching_wrapper.c +++ b/tests/dsl/ptg/branching/branching_wrapper.c @@ -2,6 +2,7 @@ * Copyright (c) 2009-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" @@ -16,14 +17,8 @@ #include "branching.h" #include "branching_wrapper.h" -static void -__parsec_taskpool_branching_destructor(parsec_branching_taskpool_t* tp) -{ - (void)tp; -} - PARSEC_OBJ_CLASS_INSTANCE(parsec_branching_taskpool_t, parsec_taskpool_t, - NULL, __parsec_taskpool_branching_destructor); + NULL, NULL); /** * @param [IN] A the data, already distributed and allocated diff --git a/tests/dsl/ptg/choice/choice_data.c b/tests/dsl/ptg/choice/choice_data.c index a6c7dcd77..0d0ff50df 100644 --- a/tests/dsl/ptg/choice/choice_data.c +++ b/tests/dsl/ptg/choice/choice_data.c @@ -2,6 +2,7 @@ * Copyright (c) 2009-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" @@ -50,7 +51,7 @@ get_or_create_data(my_datatype_t* dat, uint32_t pos) data->owner_device = 0; data->key = pos; - data->nb_elts = 1; + data->span = 1; data->device_copies[0] = data_copy; if( !parsec_atomic_cas_ptr(&dat->data_map[pos], NULL, data) ) { diff --git a/tests/dsl/ptg/choice/choice_wrapper.c b/tests/dsl/ptg/choice/choice_wrapper.c index 62b97bfc5..02b706325 100644 --- a/tests/dsl/ptg/choice/choice_wrapper.c +++ b/tests/dsl/ptg/choice/choice_wrapper.c @@ -2,6 +2,7 @@ * Copyright (c) 2009-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" @@ -16,13 +17,14 @@ #include "choice.h" #include "choice_wrapper.h" -static void +static int __parsec_taskpool_choice_destructor(parsec_choice_taskpool_t *tp) { /* We have created our own datatype, instead of using a predefined one * so we need to clean up. */ parsec_type_free(&(tp->arenas_datatypes[PARSEC_choice_DEFAULT_ADT_IDX].opaque_dtt)); + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_choice_taskpool_t, parsec_taskpool_t, diff --git a/tests/dsl/ptg/controlgather/ctlgat_wrapper.c b/tests/dsl/ptg/controlgather/ctlgat_wrapper.c index c7b5a0fd5..596811de5 100644 --- a/tests/dsl/ptg/controlgather/ctlgat_wrapper.c +++ b/tests/dsl/ptg/controlgather/ctlgat_wrapper.c @@ -2,6 +2,7 @@ * Copyright (c) 2009-2021 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/runtime.h" @@ -18,13 +19,14 @@ static parsec_datatype_t block; -static void +static int __parsec_taskpool_ctlgat_destructor(parsec_ctlgat_taskpool_t *tp) { /* We have created our own datatype, instead of using a predefined one * so we need to clean up. */ parsec_type_free(&(tp->arenas_datatypes[PARSEC_ctlgat_DEFAULT_ADT_IDX].opaque_dtt)); + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_ctlgat_taskpool_t, parsec_taskpool_t, diff --git a/tests/runtime/cuda/get_best_device_check.jdf b/tests/runtime/cuda/get_best_device_check.jdf index 7a1efa7fb..5313432b3 100644 --- a/tests/runtime/cuda/get_best_device_check.jdf +++ b/tests/runtime/cuda/get_best_device_check.jdf @@ -3,6 +3,7 @@ extern "C" %{ * Copyright (c) 2021-2023 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #include "cuda_test_internal.h" @@ -180,7 +181,7 @@ parsec_get_best_device_check_New(parsec_tiled_matrix_t *dcA, int *info) /** * @param [inout] the parsec object to destroy */ -static void +static int __parsec_taskpool_get_best_device_check_destructor(parsec_get_best_device_check_taskpool_t *get_best_device_check_taskpool) { parsec_del2arena(&get_best_device_check_taskpool->arenas_datatypes[PARSEC_get_best_device_check_DEFAULT_ADT_IDX]); @@ -188,6 +189,7 @@ __parsec_taskpool_get_best_device_check_destructor(parsec_get_best_device_check_ if( NULL != get_best_device_check_taskpool->_g_cuda_device_index ) free(get_best_device_check_taskpool->_g_cuda_device_index); #endif + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_get_best_device_check_taskpool_t, parsec_taskpool_t, diff --git a/tests/runtime/cuda/nvlink_wrapper.c b/tests/runtime/cuda/nvlink_wrapper.c index abc4b19c9..2b3955119 100644 --- a/tests/runtime/cuda/nvlink_wrapper.c +++ b/tests/runtime/cuda/nvlink_wrapper.c @@ -2,7 +2,7 @@ * Copyright (c) 2019-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024-2025 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" @@ -57,7 +57,7 @@ static void destroy_cublas_handle(void *_h, void *_n) (void)_h; } -static void +static int __parsec_nvlink_destructor( parsec_nvlink_taskpool_t* nvlink_taskpool) { int g, dev; @@ -89,6 +89,7 @@ __parsec_nvlink_destructor( parsec_nvlink_taskpool_t* nvlink_taskpool) free(dcA); free(userM); + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_nvlink_taskpool_t, parsec_taskpool_t, @@ -189,7 +190,7 @@ parsec_taskpool_t* testing_nvlink_New( parsec_context_t *ctx, int depth, int mb /* And copy the tile from CPU to GPU */ status = (cudaError_t)cudaMemcpy( gpu_copy->device_private, cpu_copy->device_private, - dta->nb_elts, + dta->span, cudaMemcpyHostToDevice ); PARSEC_CUDA_CHECK_ERROR( "(nvlink_wrapper) cudaMemcpy", status, {return NULL;} ); g++; diff --git a/tests/runtime/cuda/stage_custom.jdf b/tests/runtime/cuda/stage_custom.jdf index 7df99800f..e907740dd 100644 --- a/tests/runtime/cuda/stage_custom.jdf +++ b/tests/runtime/cuda/stage_custom.jdf @@ -3,7 +3,7 @@ extern "C" %{ * Copyright (c) 2019-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2024-2025 NVIDIA Corporation. All rights reserved. */ #include "parsec/parsec_config.h" @@ -62,7 +62,7 @@ stage_stride_in(parsec_gpu_task_t *gtask, }else{ ret = (cudaError_t)cudaMemcpyAsync( copy_out->device_private, copy_in->device_private, - copy_in->original->nb_elts, + copy_in->original->span, cudaMemcpyDeviceToDevice, cuda_stream->cuda_stream ); PARSEC_CUDA_CHECK_ERROR( "cudaMemcpyAsync", ret, { return PARSEC_ERROR; } ); @@ -312,7 +312,7 @@ parsec_taskpool_t* testing_stage_custom_New( parsec_context_t *ctx, int M, int N return &testing_handle->super; } -static void +static int __parsec_taskpool_stage_custom_destructor(parsec_stage_custom_taskpool_t *stage_custom_taskpool) { parsec_matrix_block_cyclic_t *descA = (parsec_matrix_block_cyclic_t*)stage_custom_taskpool->_g_descA; @@ -324,6 +324,7 @@ __parsec_taskpool_stage_custom_destructor(parsec_stage_custom_taskpool_t *stage_ parsec_tiled_matrix_destroy( (parsec_tiled_matrix_t*)stage_custom_taskpool->_g_descB ); free(descA); free(descB); + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_stage_custom_taskpool_t, parsec_taskpool_t, diff --git a/tests/runtime/cuda/stress_wrapper.c b/tests/runtime/cuda/stress_wrapper.c index 50877c7d3..7e2ad3f03 100644 --- a/tests/runtime/cuda/stress_wrapper.c +++ b/tests/runtime/cuda/stress_wrapper.c @@ -2,6 +2,7 @@ * Copyright (c) 2019-2024 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. + * Copyright (c) 2025 NVIDIA Corporation. All rights reserved. */ #include "parsec.h" #include "parsec/execution_stream.h" @@ -11,7 +12,7 @@ #include "stress.h" -static void __parsec_stress_destructor( parsec_taskpool_t *tp ) +static int __parsec_stress_destructor( parsec_taskpool_t *tp ) { parsec_stress_taskpool_t *stress_taskpool = (parsec_stress_taskpool_t *)tp; parsec_matrix_block_cyclic_t *dcA; @@ -21,6 +22,7 @@ static void __parsec_stress_destructor( parsec_taskpool_t *tp ) parsec_tiled_matrix_destroy( (parsec_tiled_matrix_t*)stress_taskpool->_g_descA ); free(dcA); free(stress_taskpool->_g_cuda_device_index); + return 0; } PARSEC_OBJ_CLASS_INSTANCE(parsec_stress_taskpool_t, parsec_taskpool_t,