From 956b2ab6b51cb31123f4a264c0b0db3166c4586b Mon Sep 17 00:00:00 2001 From: Thomas Herault Date: Wed, 7 Aug 2024 10:40:41 -0400 Subject: [PATCH 1/2] Add a function to pre-set infos for all streams/devices/... Lazy/dynamic allocation of infos can be convenient for the users, but also detrimental to predictive behaviors. Examples of drawbacks are when testing memory constrained setups (e.g., when the test uses zone_malloc to dynamically allocate objects in the info object constructor, that may fail undeterministically because it depends how many streams call the constructor), or small scale performance runs where the number of cublas objects to initialize vary depending on the run. Calling the new function incurs determinisitic overheads. --- parsec/class/info.c | 15 +++++++++++++++ parsec/class/info.h | 15 ++++++++++++++- tests/runtime/cuda/nvlink_wrapper.c | 2 ++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/parsec/class/info.c b/parsec/class/info.c index e9c45cb51..396fd8169 100644 --- a/parsec/class/info.c +++ b/parsec/class/info.c @@ -318,3 +318,18 @@ void *parsec_info_get(parsec_info_object_array_t *oa, parsec_info_id_t iid) } return ret; } + +void parsec_info_set_all(parsec_info_t *nfo, parsec_info_id_t iid) +{ + parsec_list_item_t *li; + parsec_info_object_array_t *oa; + + parsec_list_lock(&nfo->ioa_list); + for(li = PARSEC_LIST_ITERATOR_FIRST(&nfo->ioa_list); + li != PARSEC_LIST_ITERATOR_END(&nfo->ioa_list); + li = PARSEC_LIST_ITERATOR_NEXT(li)) { + oa = (parsec_info_object_array_t*)li; + parsec_info_get(oa, iid); + } + parsec_list_unlock(&nfo->ioa_list); +} diff --git a/parsec/class/info.h b/parsec/class/info.h index c231e93ba..244af81c4 100644 --- a/parsec/class/info.h +++ b/parsec/class/info.h @@ -150,7 +150,7 @@ parsec_info_id_t parsec_info_register(parsec_info_t *nfo, const char *name, parsec_info_destructor_t destructor, void *des_data, parsec_info_constructor_t constructor, void *cons_data, void *cb_data); - + /** * @brief unregisters an info key using its ID. * @@ -235,6 +235,19 @@ void *parsec_info_test_and_set(parsec_info_object_array_t *oa, parsec_info_id_t */ void *parsec_info_get(parsec_info_object_array_t *oa, parsec_info_id_t info_id); +/** + * @brief (pre)set the info object for all registered stream/device/... + * + * @details + * @param[IN] nfo: the info collection that needs to be pre-set + * @param[IN] iid: the index of the info to set + * This will call @fn parsec_info_get on each stream or device registered + * with @p nfo for the index @p iid, potentially calling the @p constructor + * callback for the objects that do not have an entry in the object array + * already. + */ +void parsec_info_set_all(parsec_info_t *nfo, parsec_info_id_t iid); + END_C_DECLS #endif /* PARSEC_INFO_H_HAS_BEEN_INCLUDED */ diff --git a/tests/runtime/cuda/nvlink_wrapper.c b/tests/runtime/cuda/nvlink_wrapper.c index abc4b19c9..c3894c224 100644 --- a/tests/runtime/cuda/nvlink_wrapper.c +++ b/tests/runtime/cuda/nvlink_wrapper.c @@ -119,6 +119,8 @@ parsec_taskpool_t* testing_nvlink_New( parsec_context_t *ctx, int depth, int mb create_cublas_handle, NULL, NULL); assert(CuHI != -1); + /* Pre-set the cublas handle for all streams */ + parsec_info_set_all(&parsec_per_stream_infos, CuHI); #else int CuHI = -1; #endif From 8279fbd31e54c5b53de25b185d6b81b19139bdd1 Mon Sep 17 00:00:00 2001 From: Thomas Herault Date: Wed, 7 Aug 2024 13:35:28 -0400 Subject: [PATCH 2/2] In multi-GPU runs, we need to set the target GPU before calling the user's object info constructor (and potentially destructor). Allow the device to define a function to do so in order to keep the user code unchanged. This adds an additional function pointer call, but this call only happens once per info and per stream, ideally during warmup/initialization time --- parsec/class/info.c | 6 ++++- parsec/class/info.h | 23 +++++++++++++++++-- parsec/mca/device/cuda/device_cuda_module.c | 2 +- parsec/mca/device/device.c | 14 ++++++++++- .../level_zero/device_level_zero_module.c | 2 +- 5 files changed, 41 insertions(+), 6 deletions(-) diff --git a/parsec/class/info.c b/parsec/class/info.c index 396fd8169..7967769f3 100644 --- a/parsec/class/info.c +++ b/parsec/class/info.c @@ -125,6 +125,7 @@ parsec_info_id_t parsec_info_unregister(parsec_info_t *nfo, parsec_info_id_t iid item2 = PARSEC_LIST_ITERATOR_NEXT(item2)) { ioa = (parsec_info_object_array_t*)item2; if(iid < ioa->known_infos && NULL != ioa->info_objects[iid]) { + if(NULL != ioa->ctx_set) ioa->ctx_set(ioa->ctx_set_obj); ie->destructor(ioa->info_objects[iid], ie->des_data); ioa->info_objects[iid] = NULL; } @@ -210,7 +211,7 @@ static void parsec_info_object_array_constructor(parsec_object_t *obj) /* The constructor cannot set the info, as it does not take additional * parameters. Thus, it is needed to call init after constructing the * info_object_array. */ -void parsec_info_object_array_init(parsec_info_object_array_t *oa, parsec_info_t *nfo, void *cons_obj) +void parsec_info_object_array_init(parsec_info_object_array_t *oa, parsec_info_t *nfo, void *cons_obj, parsec_info_set_ctx_fn ctx_set, void *ctx_set_param) { oa->known_infos = nfo->max_id+1; parsec_list_push_front(&nfo->ioa_list, &oa->list_item); @@ -220,6 +221,8 @@ void parsec_info_object_array_init(parsec_info_object_array_t *oa, parsec_info_t oa->info_objects = calloc(sizeof(void*), oa->known_infos); oa->infos = nfo; oa->cons_obj = cons_obj; + oa->ctx_set = ctx_set; + oa->ctx_set_obj = ctx_set_param; } static void parsec_info_object_array_destructor(parsec_object_t *obj) @@ -311,6 +314,7 @@ void *parsec_info_get(parsec_info_object_array_t *oa, parsec_info_id_t iid) ie = parsec_info_lookup_by_iid(oa->infos, iid); if(NULL == ie->constructor) return ret; + if(NULL != oa->ctx_set) oa->ctx_set(oa->ctx_set_obj); nio = ie->constructor(oa->cons_obj, ie->cons_data); ret = parsec_info_test_and_set(oa, iid, nio, NULL); if(ret != nio && NULL != ie->destructor) { diff --git a/parsec/class/info.h b/parsec/class/info.h index 244af81c4..f319ed819 100644 --- a/parsec/class/info.h +++ b/parsec/class/info.h @@ -82,6 +82,20 @@ typedef void (*parsec_info_destructor_t)(void *elt, void *cb_data); */ typedef void *(*parsec_info_constructor_t)(void *obj, void *cb_data); +/** + * @brief Prototype of a context setter function + * + * @details + * A function with this prototype will be called each time a + * user callback is called, before it is called, to set the + * context in which the user callback is supposed to execute + * (this is dependent on the level of software that defines + * the info) + * + * @param[inout] cb_data: opaque pointer provided by the user + */ +typedef int (*parsec_info_set_ctx_fn)(void *cb_data); + /** * @brief The descriptor of a single info */ @@ -100,7 +114,7 @@ struct parsec_info_entry_s { /** * @brief An array of info objects - * + * * @details This structure holds the info objects * in an array indexed by the iid */ @@ -113,6 +127,8 @@ struct parsec_info_object_array_s { void **info_objects; /**< Info objects are stored in this array indexed by * info_entry->iid */ void *cons_obj; /**< obj pointer for the constructor */ + parsec_info_set_ctx_fn ctx_set; /**< Callback to set the context of the ioa */ + void *ctx_set_obj; /**< Parameter passed to ctx_set */ }; PARSEC_OBJ_CLASS_DECLARATION(parsec_info_object_array_t); @@ -183,6 +199,8 @@ parsec_info_id_t parsec_info_lookup(parsec_info_t *nfo, const char *name, void * * @param[IN] nfo: the info collection that defines its keys * @param[IN] cons_obj: pointer to the object holding the object array (passed as * first argument to constructor calls) + * @param[IN] ctx_set: how to set the context for this ioa + * @param[IN] ctx_set_param: what parameter to pass to ctx_set * * @remark when constructing an object array with PARSE_OBJ_CONSTRUCT or PARSEC_OBJ_NEW, * the parsec_info_t cannot be associated to the object array automatically as the constructor @@ -190,7 +208,8 @@ parsec_info_id_t parsec_info_lookup(parsec_info_t *nfo, const char *name, void * * after it is constructed. */ void parsec_info_object_array_init(parsec_info_object_array_t *oa, parsec_info_t *nfo, - void *cons_obj); + void *cons_obj, parsec_info_set_ctx_fn ctx_set, + void *ctx_set_obj); /** * @brief Set an info in an array of objects diff --git a/parsec/mca/device/cuda/device_cuda_module.c b/parsec/mca/device/cuda/device_cuda_module.c index 312775719..cf2785586 100644 --- a/parsec/mca/device/cuda/device_cuda_module.c +++ b/parsec/mca/device/cuda/device_cuda_module.c @@ -470,7 +470,7 @@ parsec_cuda_module_init( int dev_id, parsec_device_module_t** module ) {goto release_device;} ); exec_stream->workspace = NULL; PARSEC_OBJ_CONSTRUCT(&exec_stream->infos, parsec_info_object_array_t); - parsec_info_object_array_init(&exec_stream->infos, &parsec_per_stream_infos, exec_stream); + parsec_info_object_array_init(&exec_stream->infos, &parsec_per_stream_infos, exec_stream, (parsec_info_set_ctx_fn)parsec_cuda_set_device, gpu_device); exec_stream->max_events = PARSEC_MAX_EVENTS_PER_STREAM; exec_stream->executed = 0; exec_stream->start = 0; diff --git a/parsec/mca/device/device.c b/parsec/mca/device/device.c index d7ea17a99..2ce9f2591 100644 --- a/parsec/mca/device/device.c +++ b/parsec/mca/device/device.c @@ -14,6 +14,7 @@ #include "parsec/execution_stream.h" #include "parsec/utils/argv.h" #include "parsec/parsec_internal.h" +#include "parsec/mca/device/device_gpu.h" #include #if defined(PARSEC_HAVE_ERRNO_H) @@ -1022,6 +1023,17 @@ int parsec_mca_device_attach(parsec_context_t* context) return PARSEC_SUCCESS; } +static int parsec_device_set_ctx(void *_device) +{ + parsec_device_module_t *device = (parsec_device_module_t*)_device; + if(PARSEC_DEV_IS_GPU(device->type)) { + parsec_device_gpu_module_t *gpu_device = (parsec_device_gpu_module_t *)device; + gpu_device->set_device(gpu_device); + } + /* Nothing to do for non-GPU devices */ + return PARSEC_SUCCESS; +} + int parsec_mca_device_add(parsec_context_t* context, parsec_device_module_t* device) { if( parsec_mca_device_are_freezed ) { @@ -1045,7 +1057,7 @@ int parsec_mca_device_add(parsec_context_t* context, parsec_device_module_t* dev device->context = context; parsec_atomic_unlock(&parsec_devices_mutex); /* CRITICAL SECTION: END */ PARSEC_OBJ_CONSTRUCT(&device->infos, parsec_info_object_array_t); - parsec_info_object_array_init(&device->infos, &parsec_per_device_infos, device); + parsec_info_object_array_init(&device->infos, &parsec_per_device_infos, device, parsec_device_set_ctx, device); return device->device_index; } diff --git a/parsec/mca/device/level_zero/device_level_zero_module.c b/parsec/mca/device/level_zero/device_level_zero_module.c index b1ea18758..d72bab0f2 100644 --- a/parsec/mca/device/level_zero/device_level_zero_module.c +++ b/parsec/mca/device/level_zero/device_level_zero_module.c @@ -308,7 +308,7 @@ int parsec_level_zero_module_init( int dev_id, parsec_device_level_zero_driver_t PARSEC_LEVEL_ZERO_CHECK_ERROR( "zeCommandQueueCreate ", ze_rc, {goto release_device;} ); exec_stream->workspace = NULL; PARSEC_OBJ_CONSTRUCT(&exec_stream->infos, parsec_info_object_array_t); - parsec_info_object_array_init(&exec_stream->infos, &parsec_per_stream_infos, exec_stream); + parsec_info_object_array_init(&exec_stream->infos, &parsec_per_stream_infos, exec_stream, (parsec_info_set_ctx_fn)parsec_level_zero_set_device, gpu_device); exec_stream->max_events = PARSEC_MAX_EVENTS_PER_STREAM; exec_stream->executed = 0; exec_stream->start = 0;